Home | History | Annotate | Line # | Download | only in zfs
      1    1.1      haad /*
      2    1.1      haad  * CDDL HEADER START
      3    1.1      haad  *
      4    1.1      haad  * The contents of this file are subject to the terms of the
      5    1.1      haad  * Common Development and Distribution License (the "License").
      6    1.1      haad  * You may not use this file except in compliance with the License.
      7    1.1      haad  *
      8    1.1      haad  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9    1.1      haad  * or http://www.opensolaris.org/os/licensing.
     10    1.1      haad  * See the License for the specific language governing permissions
     11    1.1      haad  * and limitations under the License.
     12    1.1      haad  *
     13    1.1      haad  * When distributing Covered Code, include this CDDL HEADER in each
     14    1.1      haad  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15    1.1      haad  * If applicable, add the following below this CDDL HEADER, with the
     16    1.1      haad  * fields enclosed by brackets "[]" replaced with your own identifying
     17    1.1      haad  * information: Portions Copyright [yyyy] [name of copyright owner]
     18    1.1      haad  *
     19    1.1      haad  * CDDL HEADER END
     20    1.1      haad  */
     21    1.1      haad /*
     22   1.27       chs  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
     23   1.27       chs  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
     24   1.27       chs  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
     25   1.27       chs  * Copyright (c) 2014 Integros [integros.com]
     26    1.1      haad  */
     27    1.1      haad 
     28    1.1      haad /* Portions Copyright 2007 Jeremy Teo */
     29   1.27       chs /* Portions Copyright 2010 Robert Milkowski */
     30    1.1      haad 
     31    1.1      haad #include <sys/types.h>
     32    1.1      haad #include <sys/param.h>
     33    1.1      haad #include <sys/time.h>
     34    1.1      haad #include <sys/systm.h>
     35    1.1      haad #include <sys/sysmacros.h>
     36    1.1      haad #include <sys/resource.h>
     37    1.1      haad #include <sys/vfs.h>
     38   1.27       chs #include <sys/vm.h>
     39    1.1      haad #include <sys/vnode.h>
     40    1.1      haad #include <sys/file.h>
     41    1.1      haad #include <sys/stat.h>
     42    1.1      haad #include <sys/kmem.h>
     43    1.1      haad #include <sys/taskq.h>
     44    1.1      haad #include <sys/uio.h>
     45    1.1      haad #include <sys/atomic.h>
     46    1.2      haad #include <sys/namei.h>
     47    1.1      haad #include <sys/mman.h>
     48    1.1      haad #include <sys/cmn_err.h>
     49    1.1      haad #include <sys/errno.h>
     50    1.1      haad #include <sys/unistd.h>
     51    1.1      haad #include <sys/zfs_dir.h>
     52    1.1      haad #include <sys/zfs_ioctl.h>
     53    1.1      haad #include <sys/fs/zfs.h>
     54    1.1      haad #include <sys/dmu.h>
     55   1.27       chs #include <sys/dmu_objset.h>
     56    1.1      haad #include <sys/spa.h>
     57    1.1      haad #include <sys/txg.h>
     58    1.1      haad #include <sys/dbuf.h>
     59    1.1      haad #include <sys/zap.h>
     60   1.27       chs #include <sys/sa.h>
     61    1.1      haad #include <sys/dirent.h>
     62    1.1      haad #include <sys/policy.h>
     63    1.1      haad #include <sys/sunddi.h>
     64    1.1      haad #include <sys/filio.h>
     65   1.27       chs #include <sys/sid.h>
     66    1.1      haad #include <sys/zfs_ctldir.h>
     67    1.1      haad #include <sys/zfs_fuid.h>
     68   1.27       chs #include <sys/zfs_sa.h>
     69    1.1      haad #include <sys/dnlc.h>
     70    1.1      haad #include <sys/zfs_rlock.h>
     71    1.2      haad #include <sys/buf.h>
     72    1.2      haad #include <sys/sched.h>
     73    1.2      haad #include <sys/acl.h>
     74   1.27       chs #include <sys/extdirent.h>
     75   1.27       chs 
     76   1.27       chs #ifdef __FreeBSD__
     77   1.27       chs #include <sys/kidmap.h>
     78   1.27       chs #include <sys/bio.h>
     79   1.27       chs #include <vm/vm_param.h>
     80   1.27       chs #endif
     81    1.2      haad 
     82    1.2      haad #ifdef __NetBSD__
     83   1.28  riastrad #include <dev/mm.h>
     84   1.48   hannken #include <miscfs/fifofs/fifo.h>
     85    1.2      haad #include <miscfs/genfs/genfs.h>
     86   1.27       chs #include <miscfs/genfs/genfs_node.h>
     87   1.27       chs #include <uvm/uvm_extern.h>
     88   1.41   hannken #include <sys/fstrans.h>
     89   1.45   hannken #include <sys/malloc.h>
     90   1.27       chs 
     91   1.27       chs uint_t zfs_putpage_key;
     92    1.2      haad #endif
     93    1.1      haad 
     94    1.1      haad /*
     95    1.1      haad  * Programming rules.
     96    1.1      haad  *
     97    1.1      haad  * Each vnode op performs some logical unit of work.  To do this, the ZPL must
     98    1.1      haad  * properly lock its in-core state, create a DMU transaction, do the work,
     99    1.1      haad  * record this work in the intent log (ZIL), commit the DMU transaction,
    100    1.1      haad  * and wait for the intent log to commit if it is a synchronous operation.
    101    1.1      haad  * Moreover, the vnode ops must work in both normal and log replay context.
    102    1.1      haad  * The ordering of events is important to avoid deadlocks and references
    103    1.1      haad  * to freed memory.  The example below illustrates the following Big Rules:
    104    1.1      haad  *
    105   1.27       chs  *  (1)	A check must be made in each zfs thread for a mounted file system.
    106    1.1      haad  *	This is done avoiding races using ZFS_ENTER(zfsvfs).
    107   1.27       chs  *	A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
    108   1.27       chs  *	must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
    109   1.27       chs  *	can return EIO from the calling function.
    110    1.1      haad  *
    111    1.1      haad  *  (2)	VN_RELE() should always be the last thing except for zil_commit()
    112    1.1      haad  *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
    113    1.1      haad  *	First, if it's the last reference, the vnode/znode
    114    1.1      haad  *	can be freed, so the zp may point to freed memory.  Second, the last
    115    1.1      haad  *	reference will call zfs_zinactive(), which may induce a lot of work --
    116    1.1      haad  *	pushing cached pages (which acquires range locks) and syncing out
    117    1.1      haad  *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
    118    1.1      haad  *	which could deadlock the system if you were already holding one.
    119    1.2      haad  *	If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
    120    1.1      haad  *
    121    1.1      haad  *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
    122    1.1      haad  *	as they can span dmu_tx_assign() calls.
    123    1.1      haad  *
    124   1.27       chs  *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
    125   1.27       chs  *      dmu_tx_assign().  This is critical because we don't want to block
    126   1.27       chs  *      while holding locks.
    127   1.27       chs  *
    128   1.27       chs  *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
    129   1.27       chs  *	reduces lock contention and CPU usage when we must wait (note that if
    130   1.27       chs  *	throughput is constrained by the storage, nearly every transaction
    131   1.27       chs  *	must wait).
    132   1.27       chs  *
    133   1.27       chs  *      Note, in particular, that if a lock is sometimes acquired before
    134   1.27       chs  *      the tx assigns, and sometimes after (e.g. z_lock), then failing
    135   1.27       chs  *      to use a non-blocking assign can deadlock the system.  The scenario:
    136    1.1      haad  *
    137    1.1      haad  *	Thread A has grabbed a lock before calling dmu_tx_assign().
    138    1.1      haad  *	Thread B is in an already-assigned tx, and blocks for this lock.
    139    1.1      haad  *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
    140    1.1      haad  *	forever, because the previous txg can't quiesce until B's tx commits.
    141    1.1      haad  *
    142    1.1      haad  *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
    143   1.27       chs  *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
    144   1.27       chs  *	calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT,
    145   1.27       chs  *	to indicate that this operation has already called dmu_tx_wait().
    146   1.27       chs  *	This will ensure that we don't retry forever, waiting a short bit
    147   1.27       chs  *	each time.
    148    1.1      haad  *
    149    1.1      haad  *  (5)	If the operation succeeded, generate the intent log entry for it
    150    1.1      haad  *	before dropping locks.  This ensures that the ordering of events
    151    1.1      haad  *	in the intent log matches the order in which they actually occurred.
    152   1.27       chs  *	During ZIL replay the zfs_log_* functions will update the sequence
    153    1.4      haad  *	number to indicate the zil transaction has replayed.
    154    1.1      haad  *
    155    1.1      haad  *  (6)	At the end of each vnode op, the DMU tx must always commit,
    156    1.1      haad  *	regardless of whether there were any errors.
    157    1.1      haad  *
    158   1.27       chs  *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
    159    1.1      haad  *	to ensure that synchronous semantics are provided when necessary.
    160    1.1      haad  *
    161    1.1      haad  * In general, this is how things should be ordered in each vnode op:
    162    1.1      haad  *
    163    1.1      haad  *	ZFS_ENTER(zfsvfs);		// exit if unmounted
    164    1.1      haad  * top:
    165   1.27       chs  *	zfs_dirent_lookup(&dl, ...)	// lock directory entry (may VN_HOLD())
    166    1.1      haad  *	rw_enter(...);			// grab any other locks you need
    167    1.1      haad  *	tx = dmu_tx_create(...);	// get DMU tx
    168    1.1      haad  *	dmu_tx_hold_*();		// hold each object you might modify
    169   1.27       chs  *	error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
    170    1.1      haad  *	if (error) {
    171    1.1      haad  *		rw_exit(...);		// drop locks
    172   1.27       chs  *		zfs_dirent_unlock(dl);	// unlock directory entry
    173    1.1      haad  *		VN_RELE(...);		// release held vnodes
    174    1.4      haad  *		if (error == ERESTART) {
    175   1.27       chs  *			waited = B_TRUE;
    176    1.1      haad  *			dmu_tx_wait(tx);
    177    1.1      haad  *			dmu_tx_abort(tx);
    178    1.1      haad  *			goto top;
    179    1.1      haad  *		}
    180    1.1      haad  *		dmu_tx_abort(tx);	// abort DMU tx
    181    1.1      haad  *		ZFS_EXIT(zfsvfs);	// finished in zfs
    182    1.1      haad  *		return (error);		// really out of space
    183    1.1      haad  *	}
    184    1.1      haad  *	error = do_real_work();		// do whatever this VOP does
    185    1.1      haad  *	if (error == 0)
    186    1.1      haad  *		zfs_log_*(...);		// on success, make ZIL entry
    187    1.1      haad  *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
    188    1.1      haad  *	rw_exit(...);			// drop locks
    189   1.27       chs  *	zfs_dirent_unlock(dl);		// unlock directory entry
    190    1.1      haad  *	VN_RELE(...);			// release held vnodes
    191   1.27       chs  *	zil_commit(zilog, foid);	// synchronous when necessary
    192    1.1      haad  *	ZFS_EXIT(zfsvfs);		// finished in zfs
    193    1.1      haad  *	return (error);			// done, report error
    194    1.1      haad  */
    195    1.1      haad 
    196    1.1      haad /* ARGSUSED */
    197    1.1      haad static int
    198    1.1      haad zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
    199    1.1      haad {
    200    1.1      haad 	znode_t	*zp = VTOZ(*vpp);
    201   1.27       chs 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
    202    1.1      haad 
    203   1.27       chs 	ZFS_ENTER(zfsvfs);
    204   1.27       chs 	ZFS_VERIFY_ZP(zp);
    205   1.27       chs 
    206   1.27       chs 	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
    207    1.1      haad 	    ((flag & FAPPEND) == 0)) {
    208   1.27       chs 		ZFS_EXIT(zfsvfs);
    209   1.27       chs 		return (SET_ERROR(EPERM));
    210    1.1      haad 	}
    211    1.1      haad 
    212    1.1      haad 	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
    213    1.1      haad 	    ZTOV(zp)->v_type == VREG &&
    214   1.27       chs 	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
    215   1.27       chs 		if (fs_vscan(*vpp, cr, 0) != 0) {
    216   1.27       chs 			ZFS_EXIT(zfsvfs);
    217   1.27       chs 			return (SET_ERROR(EACCES));
    218   1.27       chs 		}
    219   1.27       chs 	}
    220    1.1      haad 
    221   1.99      yamt 	/*
    222   1.99      yamt 	 * Keep a count of the synchronous opens in the znode. On first
    223   1.99      yamt 	 * synchronous open we must convert all previous async transactions
    224   1.99      yamt 	 * into sync to keep correct ordering.
    225   1.99      yamt 	 */
    226   1.99      yamt 	if (flag & (FSYNC | FDSYNC)) {
    227   1.99      yamt 		if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
    228   1.99      yamt 			zil_async_to_sync(zfsvfs->z_log, zp->z_id);
    229   1.99      yamt 	}
    230    1.1      haad 
    231   1.27       chs 	ZFS_EXIT(zfsvfs);
    232    1.1      haad 	return (0);
    233    1.1      haad }
    234    1.1      haad 
    235    1.1      haad /* ARGSUSED */
    236    1.1      haad static int
    237    1.1      haad zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
    238    1.1      haad     caller_context_t *ct)
    239    1.1      haad {
    240    1.1      haad 	znode_t	*zp = VTOZ(vp);
    241    1.4      haad 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
    242    1.1      haad 
    243    1.1      haad 	/*
    244    1.1      haad 	 * Clean up any locks held by this process on the vp.
    245    1.1      haad 	 */
    246    1.1      haad 	cleanlocks(vp, ddi_get_pid(), 0);
    247    1.1      haad 	cleanshares(vp, ddi_get_pid());
    248    1.1      haad 
    249    1.4      haad 	ZFS_ENTER(zfsvfs);
    250    1.4      haad 	ZFS_VERIFY_ZP(zp);
    251    1.4      haad 
    252    1.4      haad 	/* Decrement the synchronous opens in the znode */
    253    1.4      haad 	if ((flag & (FSYNC | FDSYNC)) && (count == 1))
    254    1.4      haad 		atomic_dec_32(&zp->z_sync_cnt);
    255    1.4      haad 
    256    1.1      haad 	if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
    257    1.1      haad 	    ZTOV(zp)->v_type == VREG &&
    258   1.27       chs 	    !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
    259    1.1      haad 		VERIFY(fs_vscan(vp, cr, 1) == 0);
    260    1.1      haad 
    261   1.18  riastrad 	ZFS_EXIT(zfsvfs);
    262    1.1      haad 	return (0);
    263    1.1      haad }
    264    1.1      haad 
    265    1.1      haad /*
    266    1.1      haad  * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
    267    1.1      haad  * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
    268    1.1      haad  */
    269    1.1      haad static int
    270    1.2      haad zfs_holey(vnode_t *vp, u_long cmd, offset_t *off)
    271    1.1      haad {
    272    1.1      haad 	znode_t	*zp = VTOZ(vp);
    273    1.1      haad 	uint64_t noff = (uint64_t)*off; /* new offset */
    274    1.1      haad 	uint64_t file_sz;
    275    1.1      haad 	int error;
    276    1.1      haad 	boolean_t hole;
    277    1.1      haad 
    278   1.27       chs 	file_sz = zp->z_size;
    279    1.1      haad 	if (noff >= file_sz)  {
    280   1.27       chs 		return (SET_ERROR(ENXIO));
    281    1.1      haad 	}
    282    1.1      haad 
    283    1.1      haad 	if (cmd == _FIO_SEEK_HOLE)
    284    1.1      haad 		hole = B_TRUE;
    285    1.1      haad 	else
    286    1.1      haad 		hole = B_FALSE;
    287    1.1      haad 
    288    1.1      haad 	error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff);
    289    1.1      haad 
    290   1.27       chs 	if (error == ESRCH)
    291   1.27       chs 		return (SET_ERROR(ENXIO));
    292   1.27       chs 
    293   1.27       chs 	/*
    294   1.27       chs 	 * We could find a hole that begins after the logical end-of-file,
    295   1.27       chs 	 * because dmu_offset_next() only works on whole blocks.  If the
    296   1.27       chs 	 * EOF falls mid-block, then indicate that the "virtual hole"
    297   1.27       chs 	 * at the end of the file begins at the logical EOF, rather than
    298   1.27       chs 	 * at the end of the last block.
    299   1.27       chs 	 */
    300   1.27       chs 	if (noff > file_sz) {
    301   1.27       chs 		ASSERT(hole);
    302   1.27       chs 		noff = file_sz;
    303    1.1      haad 	}
    304    1.1      haad 
    305    1.1      haad 	if (noff < *off)
    306    1.1      haad 		return (error);
    307    1.1      haad 	*off = noff;
    308    1.1      haad 	return (error);
    309    1.1      haad }
    310    1.1      haad 
    311   1.27       chs /* ARGSUSED */
    312    1.1      haad static int
    313    1.2      haad zfs_ioctl(vnode_t *vp, u_long com, intptr_t data, int flag, cred_t *cred,
    314    1.1      haad     int *rvalp, caller_context_t *ct)
    315    1.1      haad {
    316    1.1      haad 	offset_t off;
    317   1.27       chs 	offset_t ndata;
    318   1.27       chs 	dmu_object_info_t doi;
    319    1.1      haad 	int error;
    320    1.1      haad 	zfsvfs_t *zfsvfs;
    321    1.1      haad 	znode_t *zp;
    322   1.27       chs 
    323    1.1      haad 	switch (com) {
    324    1.1      haad 	case _FIOFFS:
    325   1.27       chs 	{
    326    1.2      haad 		return (0);
    327   1.27       chs 
    328    1.1      haad 		/*
    329    1.1      haad 		 * The following two ioctls are used by bfu.  Faking out,
    330    1.1      haad 		 * necessary to avoid bfu errors.
    331    1.1      haad 		 */
    332   1.27       chs 	}
    333    1.1      haad 	case _FIOGDIO:
    334    1.1      haad 	case _FIOSDIO:
    335   1.27       chs 	{
    336    1.1      haad 		return (0);
    337   1.27       chs 	}
    338   1.27       chs 
    339    1.1      haad 	case _FIO_SEEK_DATA:
    340    1.1      haad 	case _FIO_SEEK_HOLE:
    341   1.27       chs 	{
    342   1.27       chs #ifdef illumos
    343    1.1      haad 		if (ddi_copyin((void *)data, &off, sizeof (off), flag))
    344   1.27       chs 			return (SET_ERROR(EFAULT));
    345   1.27       chs #else
    346   1.27       chs 		off = *(offset_t *)data;
    347   1.27       chs #endif
    348    1.1      haad 		zp = VTOZ(vp);
    349    1.1      haad 		zfsvfs = zp->z_zfsvfs;
    350    1.1      haad 		ZFS_ENTER(zfsvfs);
    351    1.1      haad 		ZFS_VERIFY_ZP(zp);
    352   1.27       chs 
    353    1.1      haad 		/* offset parameter is in/out */
    354    1.1      haad 		error = zfs_holey(vp, com, &off);
    355    1.1      haad 		ZFS_EXIT(zfsvfs);
    356    1.1      haad 		if (error)
    357    1.1      haad 			return (error);
    358   1.27       chs #ifdef illumos
    359    1.1      haad 		if (ddi_copyout(&off, (void *)data, sizeof (off), flag))
    360   1.27       chs 			return (SET_ERROR(EFAULT));
    361   1.27       chs #else
    362   1.27       chs 		*(offset_t *)data = off;
    363   1.27       chs #endif
    364   1.27       chs 		return (0);
    365   1.27       chs 	}
    366   1.27       chs #ifdef illumos
    367   1.27       chs 	case _FIO_COUNT_FILLED:
    368   1.27       chs 	{
    369   1.27       chs 		/*
    370   1.27       chs 		 * _FIO_COUNT_FILLED adds a new ioctl command which
    371   1.27       chs 		 * exposes the number of filled blocks in a
    372   1.27       chs 		 * ZFS object.
    373   1.27       chs 		 */
    374   1.27       chs 		zp = VTOZ(vp);
    375   1.27       chs 		zfsvfs = zp->z_zfsvfs;
    376   1.27       chs 		ZFS_ENTER(zfsvfs);
    377   1.27       chs 		ZFS_VERIFY_ZP(zp);
    378   1.27       chs 
    379   1.27       chs 		/*
    380   1.27       chs 		 * Wait for all dirty blocks for this object
    381   1.27       chs 		 * to get synced out to disk, and the DMU info
    382   1.27       chs 		 * updated.
    383   1.27       chs 		 */
    384   1.27       chs 		error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id);
    385   1.27       chs 		if (error) {
    386   1.27       chs 			ZFS_EXIT(zfsvfs);
    387   1.27       chs 			return (error);
    388   1.27       chs 		}
    389   1.27       chs 
    390   1.27       chs 		/*
    391   1.27       chs 		 * Retrieve fill count from DMU object.
    392   1.27       chs 		 */
    393   1.27       chs 		error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi);
    394   1.27       chs 		if (error) {
    395   1.27       chs 			ZFS_EXIT(zfsvfs);
    396   1.27       chs 			return (error);
    397   1.27       chs 		}
    398   1.27       chs 
    399   1.27       chs 		ndata = doi.doi_fill_count;
    400   1.27       chs 
    401   1.27       chs 		ZFS_EXIT(zfsvfs);
    402   1.27       chs 		if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag))
    403   1.27       chs 			return (SET_ERROR(EFAULT));
    404    1.1      haad 		return (0);
    405   1.27       chs 	}
    406    1.2      haad #endif
    407    1.1      haad 	}
    408   1.27       chs 	return (SET_ERROR(ENOTTY));
    409   1.27       chs }
    410   1.27       chs 
    411   1.27       chs #ifdef __FreeBSD__
    412   1.27       chs static vm_page_t
    413   1.27       chs page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
    414   1.27       chs {
    415   1.27       chs 	vm_object_t obj;
    416   1.27       chs 	vm_page_t pp;
    417   1.27       chs 	int64_t end;
    418   1.27       chs 
    419   1.27       chs 	/*
    420   1.27       chs 	 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
    421   1.27       chs 	 * aligned boundaries, if the range is not aligned.  As a result a
    422   1.27       chs 	 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
    423   1.27       chs 	 * It may happen that all DEV_BSIZE subranges are marked clean and thus
    424   1.27       chs 	 * the whole page would be considred clean despite have some dirty data.
    425   1.27       chs 	 * For this reason we should shrink the range to DEV_BSIZE aligned
    426   1.27       chs 	 * boundaries before calling vm_page_clear_dirty.
    427   1.27       chs 	 */
    428   1.27       chs 	end = rounddown2(off + nbytes, DEV_BSIZE);
    429   1.27       chs 	off = roundup2(off, DEV_BSIZE);
    430   1.27       chs 	nbytes = end - off;
    431   1.27       chs 
    432   1.27       chs 	obj = vp->v_object;
    433   1.27       chs 	zfs_vmobject_assert_wlocked(obj);
    434   1.27       chs 
    435   1.27       chs 	for (;;) {
    436   1.27       chs 		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
    437   1.27       chs 		    pp->valid) {
    438   1.27       chs 			if (vm_page_xbusied(pp)) {
    439   1.27       chs 				/*
    440   1.27       chs 				 * Reference the page before unlocking and
    441   1.27       chs 				 * sleeping so that the page daemon is less
    442   1.27       chs 				 * likely to reclaim it.
    443   1.27       chs 				 */
    444   1.27       chs 				vm_page_reference(pp);
    445   1.27       chs 				vm_page_lock(pp);
    446   1.27       chs 				zfs_vmobject_wunlock(obj);
    447   1.27       chs 				vm_page_busy_sleep(pp, "zfsmwb", true);
    448   1.27       chs 				zfs_vmobject_wlock(obj);
    449   1.27       chs 				continue;
    450   1.27       chs 			}
    451   1.27       chs 			vm_page_sbusy(pp);
    452   1.27       chs 		} else if (pp != NULL) {
    453   1.27       chs 			ASSERT(!pp->valid);
    454   1.27       chs 			pp = NULL;
    455   1.27       chs 		}
    456   1.27       chs 
    457   1.27       chs 		if (pp != NULL) {
    458   1.27       chs 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
    459   1.27       chs 			vm_object_pip_add(obj, 1);
    460   1.27       chs 			pmap_remove_write(pp);
    461   1.27       chs 			if (nbytes != 0)
    462   1.27       chs 				vm_page_clear_dirty(pp, off, nbytes);
    463   1.27       chs 		}
    464   1.27       chs 		break;
    465   1.27       chs 	}
    466   1.27       chs 	return (pp);
    467   1.27       chs }
    468   1.27       chs 
    469   1.27       chs static void
    470   1.27       chs page_unbusy(vm_page_t pp)
    471   1.27       chs {
    472   1.27       chs 
    473   1.27       chs 	vm_page_sunbusy(pp);
    474   1.27       chs 	vm_object_pip_subtract(pp->object, 1);
    475   1.27       chs }
    476   1.27       chs 
    477   1.27       chs static vm_page_t
    478   1.27       chs page_hold(vnode_t *vp, int64_t start)
    479   1.27       chs {
    480   1.27       chs 	vm_object_t obj;
    481   1.27       chs 	vm_page_t pp;
    482   1.27       chs 
    483   1.27       chs 	obj = vp->v_object;
    484   1.27       chs 	zfs_vmobject_assert_wlocked(obj);
    485   1.27       chs 
    486   1.27       chs 	for (;;) {
    487   1.27       chs 		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
    488   1.27       chs 		    pp->valid) {
    489   1.27       chs 			if (vm_page_xbusied(pp)) {
    490   1.27       chs 				/*
    491   1.27       chs 				 * Reference the page before unlocking and
    492   1.27       chs 				 * sleeping so that the page daemon is less
    493   1.27       chs 				 * likely to reclaim it.
    494   1.27       chs 				 */
    495   1.27       chs 				vm_page_reference(pp);
    496   1.27       chs 				vm_page_lock(pp);
    497   1.27       chs 				zfs_vmobject_wunlock(obj);
    498   1.27       chs 				vm_page_busy_sleep(pp, "zfsmwb", true);
    499   1.27       chs 				zfs_vmobject_wlock(obj);
    500   1.27       chs 				continue;
    501   1.27       chs 			}
    502   1.27       chs 
    503   1.27       chs 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
    504   1.27       chs 			vm_page_lock(pp);
    505   1.27       chs 			vm_page_hold(pp);
    506   1.27       chs 			vm_page_unlock(pp);
    507   1.27       chs 
    508   1.27       chs 		} else
    509   1.27       chs 			pp = NULL;
    510   1.27       chs 		break;
    511   1.27       chs 	}
    512   1.27       chs 	return (pp);
    513   1.27       chs }
    514   1.27       chs 
    515   1.27       chs static void
    516   1.27       chs page_unhold(vm_page_t pp)
    517   1.27       chs {
    518   1.27       chs 
    519   1.27       chs 	vm_page_lock(pp);
    520   1.27       chs 	vm_page_unhold(pp);
    521   1.27       chs 	vm_page_unlock(pp);
    522    1.1      haad }
    523    1.1      haad 
    524    1.1      haad /*
    525    1.1      haad  * When a file is memory mapped, we must keep the IO data synchronized
    526    1.1      haad  * between the DMU cache and the memory mapped pages.  What this means:
    527    1.1      haad  *
    528    1.1      haad  * On Write:	If we find a memory mapped page, we write to *both*
    529    1.1      haad  *		the page and the dmu buffer.
    530    1.1      haad  */
    531    1.4      haad static void
    532   1.27       chs update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
    533   1.27       chs     int segflg, dmu_tx_t *tx)
    534    1.1      haad {
    535   1.27       chs 	vm_object_t obj;
    536   1.27       chs 	struct sf_buf *sf;
    537   1.27       chs 	caddr_t va;
    538   1.27       chs 	int off;
    539   1.27       chs 
    540   1.27       chs 	ASSERT(segflg != UIO_NOCOPY);
    541   1.27       chs 	ASSERT(vp->v_mount != NULL);
    542   1.27       chs 	obj = vp->v_object;
    543   1.27       chs 	ASSERT(obj != NULL);
    544    1.1      haad 
    545    1.1      haad 	off = start & PAGEOFFSET;
    546   1.27       chs 	zfs_vmobject_wlock(obj);
    547    1.1      haad 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
    548   1.27       chs 		vm_page_t pp;
    549   1.27       chs 		int nbytes = imin(PAGESIZE - off, len);
    550   1.27       chs 
    551   1.27       chs 		if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
    552   1.27       chs 			zfs_vmobject_wunlock(obj);
    553    1.1      haad 
    554   1.27       chs 			va = zfs_map_page(pp, &sf);
    555   1.27       chs 			(void) dmu_read(os, oid, start+off, nbytes,
    556   1.27       chs 			    va+off, DMU_READ_PREFETCH);;
    557   1.27       chs 			zfs_unmap_page(sf);
    558    1.1      haad 
    559   1.27       chs 			zfs_vmobject_wlock(obj);
    560   1.27       chs 			page_unbusy(pp);
    561    1.1      haad 		}
    562    1.4      haad 		len -= nbytes;
    563    1.1      haad 		off = 0;
    564    1.1      haad 	}
    565   1.27       chs 	vm_object_pip_wakeupn(obj, 0);
    566   1.27       chs 	zfs_vmobject_wunlock(obj);
    567   1.27       chs }
    568   1.27       chs 
    569   1.27       chs /*
    570   1.27       chs  * Read with UIO_NOCOPY flag means that sendfile(2) requests
    571   1.27       chs  * ZFS to populate a range of page cache pages with data.
    572   1.27       chs  *
    573   1.27       chs  * NOTE: this function could be optimized to pre-allocate
    574   1.27       chs  * all pages in advance, drain exclusive busy on all of them,
    575   1.27       chs  * map them into contiguous KVA region and populate them
    576   1.27       chs  * in one single dmu_read() call.
    577   1.27       chs  */
    578   1.27       chs static int
    579   1.27       chs mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio)
    580   1.27       chs {
    581   1.27       chs 	znode_t *zp = VTOZ(vp);
    582   1.27       chs 	objset_t *os = zp->z_zfsvfs->z_os;
    583   1.27       chs 	struct sf_buf *sf;
    584   1.27       chs 	vm_object_t obj;
    585   1.27       chs 	vm_page_t pp;
    586   1.27       chs 	int64_t start;
    587   1.27       chs 	caddr_t va;
    588   1.27       chs 	int len = nbytes;
    589   1.27       chs 	int off;
    590   1.27       chs 	int error = 0;
    591    1.4      haad 
    592   1.27       chs 	ASSERT(uio->uio_segflg == UIO_NOCOPY);
    593   1.27       chs 	ASSERT(vp->v_mount != NULL);
    594   1.27       chs 	obj = vp->v_object;
    595   1.27       chs 	ASSERT(obj != NULL);
    596   1.27       chs 	ASSERT((uio->uio_loffset & PAGEOFFSET) == 0);
    597   1.27       chs 
    598   1.27       chs 	zfs_vmobject_wlock(obj);
    599   1.27       chs 	for (start = uio->uio_loffset; len > 0; start += PAGESIZE) {
    600   1.27       chs 		int bytes = MIN(PAGESIZE, len);
    601   1.27       chs 
    602   1.27       chs 		pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY |
    603   1.27       chs 		    VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
    604   1.27       chs 		if (pp->valid == 0) {
    605   1.27       chs 			zfs_vmobject_wunlock(obj);
    606   1.27       chs 			va = zfs_map_page(pp, &sf);
    607   1.27       chs 			error = dmu_read(os, zp->z_id, start, bytes, va,
    608   1.27       chs 			    DMU_READ_PREFETCH);
    609   1.27       chs 			if (bytes != PAGESIZE && error == 0)
    610   1.27       chs 				bzero(va + bytes, PAGESIZE - bytes);
    611   1.27       chs 			zfs_unmap_page(sf);
    612   1.27       chs 			zfs_vmobject_wlock(obj);
    613   1.27       chs 			vm_page_sunbusy(pp);
    614   1.27       chs 			vm_page_lock(pp);
    615   1.27       chs 			if (error) {
    616   1.27       chs 				if (pp->wire_count == 0 && pp->valid == 0 &&
    617   1.27       chs 				    !vm_page_busied(pp))
    618   1.27       chs 					vm_page_free(pp);
    619   1.27       chs 			} else {
    620   1.27       chs 				pp->valid = VM_PAGE_BITS_ALL;
    621   1.27       chs 				vm_page_activate(pp);
    622   1.27       chs 			}
    623   1.27       chs 			vm_page_unlock(pp);
    624   1.27       chs 		} else {
    625   1.27       chs 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
    626   1.27       chs 			vm_page_sunbusy(pp);
    627   1.27       chs 		}
    628   1.27       chs 		if (error)
    629   1.27       chs 			break;
    630   1.27       chs 		uio->uio_resid -= bytes;
    631   1.27       chs 		uio->uio_offset += bytes;
    632   1.27       chs 		len -= bytes;
    633   1.27       chs 	}
    634   1.27       chs 	zfs_vmobject_wunlock(obj);
    635    1.1      haad 	return (error);
    636    1.1      haad }
    637    1.1      haad 
    638    1.1      haad /*
    639    1.1      haad  * When a file is memory mapped, we must keep the IO data synchronized
    640    1.1      haad  * between the DMU cache and the memory mapped pages.  What this means:
    641    1.1      haad  *
    642    1.1      haad  * On Read:	We "read" preferentially from memory mapped pages,
    643    1.1      haad  *		else we default from the dmu buffer.
    644    1.1      haad  *
    645    1.1      haad  * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
    646   1.27       chs  *	 the file is memory mapped.
    647    1.1      haad  */
    648    1.1      haad static int
    649    1.1      haad mappedread(vnode_t *vp, int nbytes, uio_t *uio)
    650    1.1      haad {
    651    1.1      haad 	znode_t *zp = VTOZ(vp);
    652    1.2      haad 	vm_object_t obj;
    653   1.27       chs 	int64_t start;
    654    1.2      haad 	caddr_t va;
    655    1.1      haad 	int len = nbytes;
    656   1.27       chs 	int off;
    657    1.1      haad 	int error = 0;
    658    1.2      haad 
    659    1.2      haad 	ASSERT(vp->v_mount != NULL);
    660    1.2      haad 	obj = vp->v_object;
    661    1.2      haad 	ASSERT(obj != NULL);
    662    1.1      haad 
    663    1.1      haad 	start = uio->uio_loffset;
    664    1.1      haad 	off = start & PAGEOFFSET;
    665   1.27       chs 	zfs_vmobject_wlock(obj);
    666   1.27       chs 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
    667   1.27       chs 		vm_page_t pp;
    668   1.27       chs 		uint64_t bytes = MIN(PAGESIZE - off, len);
    669   1.27       chs 
    670   1.27       chs 		if (pp = page_hold(vp, start)) {
    671   1.27       chs 			struct sf_buf *sf;
    672   1.27       chs 			caddr_t va;
    673   1.27       chs 
    674   1.27       chs 			zfs_vmobject_wunlock(obj);
    675   1.27       chs 			va = zfs_map_page(pp, &sf);
    676   1.27       chs #ifdef illumos
    677   1.27       chs 			error = uiomove(va + off, bytes, UIO_READ, uio);
    678   1.27       chs #else
    679   1.27       chs 			error = vn_io_fault_uiomove(va + off, bytes, uio);
    680   1.27       chs #endif
    681   1.27       chs 			zfs_unmap_page(sf);
    682   1.27       chs 			zfs_vmobject_wlock(obj);
    683   1.27       chs 			page_unhold(pp);
    684   1.27       chs 		} else {
    685   1.27       chs 			zfs_vmobject_wunlock(obj);
    686   1.27       chs 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
    687   1.27       chs 			    uio, bytes);
    688   1.27       chs 			zfs_vmobject_wlock(obj);
    689   1.27       chs 		}
    690   1.27       chs 		len -= bytes;
    691   1.27       chs 		off = 0;
    692   1.27       chs 		if (error)
    693   1.27       chs 			break;
    694   1.27       chs 	}
    695   1.27       chs 	zfs_vmobject_wunlock(obj);
    696   1.27       chs 	return (error);
    697   1.27       chs }
    698   1.27       chs #endif /* __FreeBSD__ */
    699   1.27       chs 
    700   1.27       chs #ifdef __NetBSD__
    701   1.27       chs 
    702   1.27       chs caddr_t
    703   1.27       chs zfs_map_page(page_t *pp, enum seg_rw rw)
    704   1.27       chs {
    705   1.27       chs 	vaddr_t va;
    706   1.27       chs 	int flags;
    707   1.27       chs 
    708   1.27       chs #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
    709   1.27       chs 	if (mm_md_direct_mapped_phys(VM_PAGE_TO_PHYS(pp), &va))
    710   1.27       chs 		return (caddr_t)va;
    711   1.27       chs #endif
    712   1.27       chs 
    713   1.27       chs 	flags = UVMPAGER_MAPIN_WAITOK |
    714   1.27       chs 		(rw == S_READ ? UVMPAGER_MAPIN_WRITE : UVMPAGER_MAPIN_READ);
    715   1.27       chs 	va = uvm_pagermapin(&pp, 1, flags);
    716   1.27       chs 	return (caddr_t)va;
    717   1.27       chs }
    718   1.27       chs 
    719   1.27       chs void
    720   1.27       chs zfs_unmap_page(page_t *pp, caddr_t addr)
    721   1.27       chs {
    722   1.27       chs 
    723   1.27       chs #ifdef __HAVE_MM_MD_DIRECT_MAPPED_PHYS
    724   1.27       chs 	vaddr_t va;
    725   1.27       chs 
    726   1.27       chs 	if (mm_md_direct_mapped_phys(VM_PAGE_TO_PHYS(pp), &va))
    727   1.27       chs 		return;
    728   1.27       chs #endif
    729   1.27       chs 	uvm_pagermapout((vaddr_t)addr, 1);
    730   1.27       chs }
    731   1.27       chs 
    732   1.27       chs static int
    733   1.27       chs mappedread(vnode_t *vp, int nbytes, uio_t *uio)
    734   1.27       chs {
    735   1.27       chs 	znode_t *zp = VTOZ(vp);
    736   1.27       chs 	struct uvm_object *uobj = &vp->v_uobj;
    737   1.62        ad 	krwlock_t *rw = uobj->vmobjlock;
    738   1.27       chs 	int64_t start;
    739   1.27       chs 	caddr_t va;
    740   1.27       chs 	size_t len = nbytes;
    741   1.27       chs 	int off;
    742   1.27       chs 	int error = 0;
    743   1.27       chs 	int npages, found;
    744   1.83      yamt 	void *buf = NULL;
    745   1.27       chs 
    746   1.27       chs 	start = uio->uio_loffset;
    747   1.27       chs 	off = start & PAGEOFFSET;
    748   1.27       chs 
    749    1.1      haad 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
    750   1.27       chs 		page_t *pp;
    751    1.1      haad 		uint64_t bytes = MIN(PAGESIZE - off, len);
    752   1.83      yamt retry:
    753   1.27       chs 		pp = NULL;
    754   1.27       chs 		npages = 1;
    755   1.62        ad 		rw_enter(rw, RW_WRITER);
    756   1.55        ad 		found = uvn_findpages(uobj, start, &npages, &pp, NULL,
    757   1.55        ad 		    UFP_NOALLOC);
    758   1.62        ad 		rw_exit(rw);
    759   1.27       chs 
    760   1.27       chs 		if (found) {
    761   1.83      yamt 			if (buf != NULL) {
    762   1.83      yamt 				va = zfs_map_page(pp, S_READ);
    763   1.83      yamt 				memcpy(buf, va + off, bytes);
    764   1.83      yamt 				zfs_unmap_page(pp, va);
    765   1.83      yamt 			}
    766   1.68        ad 			rw_enter(rw, RW_WRITER);
    767   1.68        ad 			uvm_page_unbusy(&pp, 1);
    768   1.68        ad 			rw_exit(rw);
    769   1.83      yamt 			if (buf == NULL) {
    770   1.83      yamt 				buf = kmem_alloc(PAGESIZE, KM_SLEEP);
    771   1.83      yamt 				goto retry;
    772   1.83      yamt 			}
    773   1.83      yamt 			error = uiomove(buf, bytes, UIO_READ, uio);
    774    1.1      haad 		} else {
    775   1.27       chs 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
    776   1.27       chs 			    uio, bytes);
    777    1.1      haad 		}
    778   1.27       chs 
    779    1.1      haad 		len -= bytes;
    780    1.1      haad 		off = 0;
    781    1.1      haad 		if (error)
    782    1.1      haad 			break;
    783    1.1      haad 	}
    784   1.83      yamt 	if (buf != NULL) {
    785   1.83      yamt 		kmem_free(buf, PAGESIZE);
    786   1.83      yamt 	}
    787    1.1      haad 	return (error);
    788    1.1      haad }
    789   1.27       chs 
    790   1.27       chs static void
    791   1.27       chs update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid,
    792   1.27       chs     int segflg, dmu_tx_t *tx)
    793   1.27       chs {
    794   1.27       chs 	struct uvm_object *uobj = &vp->v_uobj;
    795   1.62        ad 	krwlock_t *rw = uobj->vmobjlock;
    796   1.27       chs 	caddr_t va;
    797   1.56        ad 	int off, status;
    798   1.27       chs 
    799   1.27       chs 	ASSERT(vp->v_mount != NULL);
    800   1.27       chs 
    801   1.62        ad 	rw_enter(rw, RW_WRITER);
    802   1.27       chs 
    803   1.27       chs 	off = start & PAGEOFFSET;
    804   1.27       chs 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
    805   1.27       chs 		page_t *pp;
    806   1.27       chs 		int nbytes = MIN(PAGESIZE - off, len);
    807   1.27       chs 		int npages, found;
    808   1.27       chs 
    809   1.27       chs 		pp = NULL;
    810   1.27       chs 		npages = 1;
    811   1.55        ad 		found = uvn_findpages(uobj, start, &npages, &pp, NULL,
    812   1.55        ad 		    UFP_NOALLOC);
    813   1.27       chs 		if (found) {
    814   1.84      yamt 			if (nbytes == PAGESIZE) {
    815   1.84      yamt 				/*
    816   1.84      yamt 				 * We're about to zap the page's contents
    817   1.84      yamt 				 * and don't care about any existing
    818   1.84      yamt 				 * modifications.  We must keep track of
    819   1.84      yamt 				 * any new modifications past this point.
    820   1.84      yamt 				 * Clear the modified bit in the pmap, and
    821   1.84      yamt 				 * if the page is marked dirty revert to
    822   1.84      yamt 				 * tracking the modified bit.
    823   1.84      yamt 				 */
    824   1.84      yamt 				switch (uvm_pagegetdirty(pp)) {
    825   1.84      yamt 				case UVM_PAGE_STATUS_DIRTY:
    826   1.84      yamt 					/* Does pmap_clear_modify(). */
    827   1.84      yamt 					uvm_pagemarkdirty(pp, UVM_PAGE_STATUS_UNKNOWN);
    828   1.84      yamt 					break;
    829   1.84      yamt 				case UVM_PAGE_STATUS_UNKNOWN:
    830   1.84      yamt 					pmap_clear_modify(pp);
    831   1.84      yamt 					break;
    832   1.84      yamt 				case UVM_PAGE_STATUS_CLEAN:
    833   1.84      yamt 					/* Nothing to do. */
    834   1.84      yamt 					break;
    835   1.84      yamt 				}
    836   1.56        ad 			}
    837   1.62        ad 			rw_exit(rw);
    838   1.27       chs 
    839   1.27       chs 			va = zfs_map_page(pp, S_WRITE);
    840   1.27       chs 			(void) dmu_read(os, oid, start + off, nbytes,
    841   1.27       chs 			    va + off, DMU_READ_PREFETCH);
    842   1.27       chs 			zfs_unmap_page(pp, va);
    843   1.27       chs 
    844   1.62        ad 			rw_enter(rw, RW_WRITER);
    845   1.27       chs 			uvm_page_unbusy(&pp, 1);
    846   1.27       chs 		}
    847   1.27       chs 		len -= nbytes;
    848   1.27       chs 		off = 0;
    849   1.27       chs 	}
    850   1.62        ad 	rw_exit(rw);
    851   1.27       chs }
    852   1.27       chs #endif /* __NetBSD__ */
    853   1.27       chs 
    854    1.1      haad offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */
    855    1.1      haad 
    856    1.1      haad /*
    857    1.1      haad  * Read bytes from specified file into supplied buffer.
    858    1.1      haad  *
    859    1.1      haad  *	IN:	vp	- vnode of file to be read from.
    860    1.1      haad  *		uio	- structure supplying read location, range info,
    861    1.1      haad  *			  and return buffer.
    862    1.1      haad  *		ioflag	- SYNC flags; used to provide FRSYNC semantics.
    863    1.1      haad  *		cr	- credentials of caller.
    864    1.1      haad  *		ct	- caller context
    865    1.1      haad  *
    866    1.1      haad  *	OUT:	uio	- updated offset and range, buffer filled.
    867    1.1      haad  *
    868   1.27       chs  *	RETURN:	0 on success, error code on failure.
    869    1.1      haad  *
    870    1.1      haad  * Side Effects:
    871    1.1      haad  *	vp - atime updated if byte count > 0
    872    1.1      haad  */
    873    1.1      haad /* ARGSUSED */
    874    1.1      haad static int
    875    1.1      haad zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
    876    1.1      haad {
    877    1.1      haad 	znode_t		*zp = VTOZ(vp);
    878    1.1      haad 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
    879    1.1      haad 	ssize_t		n, nbytes;
    880   1.27       chs 	int		error = 0;
    881    1.1      haad 	rl_t		*rl;
    882    1.4      haad 	xuio_t		*xuio = NULL;
    883    1.1      haad 
    884    1.1      haad 	ZFS_ENTER(zfsvfs);
    885    1.1      haad 	ZFS_VERIFY_ZP(zp);
    886    1.1      haad 
    887   1.27       chs 	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
    888    1.1      haad 		ZFS_EXIT(zfsvfs);
    889   1.27       chs 		return (SET_ERROR(EACCES));
    890    1.1      haad 	}
    891    1.1      haad 
    892    1.1      haad 	/*
    893    1.1      haad 	 * Validate file offset
    894    1.1      haad 	 */
    895    1.1      haad 	if (uio->uio_loffset < (offset_t)0) {
    896    1.1      haad 		ZFS_EXIT(zfsvfs);
    897   1.27       chs 		return (SET_ERROR(EINVAL));
    898    1.1      haad 	}
    899    1.1      haad 
    900    1.1      haad 	/*
    901    1.1      haad 	 * Fasttrack empty reads
    902    1.1      haad 	 */
    903    1.1      haad 	if (uio->uio_resid == 0) {
    904    1.1      haad 		ZFS_EXIT(zfsvfs);
    905    1.1      haad 		return (0);
    906    1.1      haad 	}
    907    1.1      haad 
    908    1.1      haad 	/*
    909    1.1      haad 	 * Check for mandatory locks
    910    1.1      haad 	 */
    911   1.27       chs 	if (MANDMODE(zp->z_mode)) {
    912    1.1      haad 		if (error = chklock(vp, FREAD,
    913    1.1      haad 		    uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) {
    914    1.1      haad 			ZFS_EXIT(zfsvfs);
    915    1.1      haad 			return (error);
    916    1.1      haad 		}
    917    1.1      haad 	}
    918    1.1      haad 
    919    1.1      haad 	/*
    920    1.1      haad 	 * If we're in FRSYNC mode, sync out this znode before reading it.
    921    1.1      haad 	 */
    922   1.27       chs 	if (zfsvfs->z_log &&
    923   1.27       chs 	    (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
    924   1.27       chs 		zil_commit(zfsvfs->z_log, zp->z_id);
    925    1.1      haad 
    926    1.1      haad 	/*
    927    1.1      haad 	 * Lock the range against changes.
    928    1.1      haad 	 */
    929    1.1      haad 	rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER);
    930    1.1      haad 
    931    1.1      haad 	/*
    932    1.1      haad 	 * If we are reading past end-of-file we can skip
    933    1.1      haad 	 * to the end; but we might still need to set atime.
    934    1.1      haad 	 */
    935   1.27       chs 	if (uio->uio_loffset >= zp->z_size) {
    936    1.1      haad 		error = 0;
    937    1.1      haad 		goto out;
    938    1.1      haad 	}
    939    1.1      haad 
    940   1.27       chs 	ASSERT(uio->uio_loffset < zp->z_size);
    941   1.27       chs 	n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
    942   1.27       chs 
    943   1.27       chs #ifdef illumos
    944    1.4      haad 	if ((uio->uio_extflg == UIO_XUIO) &&
    945    1.4      haad 	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
    946    1.4      haad 		int nblk;
    947    1.4      haad 		int blksz = zp->z_blksz;
    948    1.4      haad 		uint64_t offset = uio->uio_loffset;
    949    1.4      haad 
    950    1.4      haad 		xuio = (xuio_t *)uio;
    951    1.4      haad 		if ((ISP2(blksz))) {
    952    1.4      haad 			nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
    953    1.4      haad 			    blksz)) / blksz;
    954    1.4      haad 		} else {
    955    1.4      haad 			ASSERT(offset + n <= blksz);
    956    1.4      haad 			nblk = 1;
    957    1.4      haad 		}
    958    1.4      haad 		(void) dmu_xuio_init(xuio, nblk);
    959    1.1      haad 
    960    1.4      haad 		if (vn_has_cached_data(vp)) {
    961    1.4      haad 			/*
    962    1.4      haad 			 * For simplicity, we always allocate a full buffer
    963    1.4      haad 			 * even if we only expect to read a portion of a block.
    964    1.4      haad 			 */
    965    1.4      haad 			while (--nblk >= 0) {
    966    1.4      haad 				(void) dmu_xuio_add(xuio,
    967   1.27       chs 				    dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
    968   1.27       chs 				    blksz), 0, blksz);
    969    1.4      haad 			}
    970    1.4      haad 		}
    971    1.4      haad 	}
    972   1.27       chs #endif	/* illumos */
    973   1.27       chs 
    974    1.1      haad 	while (n > 0) {
    975    1.1      haad 		nbytes = MIN(n, zfs_read_chunk_size -
    976    1.1      haad 		    P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
    977    1.1      haad 
    978   1.27       chs #ifdef __FreeBSD__
    979   1.27       chs 		if (uio->uio_segflg == UIO_NOCOPY)
    980   1.27       chs 			error = mappedread_sf(vp, nbytes, uio);
    981   1.27       chs 		else
    982   1.27       chs #endif /* __FreeBSD__ */
    983   1.27       chs 		if (vn_has_cached_data(vp)) {
    984   1.27       chs 			error = mappedread(vp, nbytes, uio);
    985   1.27       chs 		} else {
    986   1.27       chs 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
    987   1.27       chs 			    uio, nbytes);
    988   1.27       chs 		}
    989    1.1      haad 		if (error) {
    990    1.1      haad 			/* convert checksum errors into IO errors */
    991    1.1      haad 			if (error == ECKSUM)
    992   1.27       chs 				error = SET_ERROR(EIO);
    993    1.1      haad 			break;
    994    1.1      haad 		}
    995    1.1      haad 
    996    1.1      haad 		n -= nbytes;
    997    1.1      haad 	}
    998    1.1      haad out:
    999    1.1      haad 	zfs_range_unlock(rl);
   1000    1.1      haad 
   1001    1.1      haad 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
   1002    1.1      haad 	ZFS_EXIT(zfsvfs);
   1003    1.1      haad 	return (error);
   1004    1.1      haad }
   1005    1.1      haad 
   1006    1.1      haad /*
   1007    1.1      haad  * Write the bytes to a file.
   1008    1.1      haad  *
   1009    1.1      haad  *	IN:	vp	- vnode of file to be written to.
   1010    1.1      haad  *		uio	- structure supplying write location, range info,
   1011    1.1      haad  *			  and data buffer.
   1012   1.27       chs  *		ioflag	- FAPPEND, FSYNC, and/or FDSYNC.  FAPPEND is
   1013   1.27       chs  *			  set if in append mode.
   1014    1.1      haad  *		cr	- credentials of caller.
   1015    1.1      haad  *		ct	- caller context (NFS/CIFS fem monitor only)
   1016    1.1      haad  *
   1017    1.1      haad  *	OUT:	uio	- updated offset and range.
   1018    1.1      haad  *
   1019   1.27       chs  *	RETURN:	0 on success, error code on failure.
   1020    1.1      haad  *
   1021    1.1      haad  * Timestamps:
   1022    1.1      haad  *	vp - ctime|mtime updated if byte count > 0
   1023    1.1      haad  */
   1024   1.27       chs 
   1025    1.1      haad /* ARGSUSED */
   1026    1.1      haad static int
   1027    1.1      haad zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
   1028    1.1      haad {
   1029    1.1      haad 	znode_t		*zp = VTOZ(vp);
   1030    1.2      haad 	rlim64_t	limit = MAXOFFSET_T;
   1031    1.1      haad 	ssize_t		start_resid = uio->uio_resid;
   1032    1.1      haad 	ssize_t		tx_bytes;
   1033    1.1      haad 	uint64_t	end_size;
   1034    1.1      haad 	dmu_tx_t	*tx;
   1035    1.1      haad 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
   1036    1.1      haad 	zilog_t		*zilog;
   1037    1.1      haad 	offset_t	woff;
   1038    1.1      haad 	ssize_t		n, nbytes;
   1039    1.1      haad 	rl_t		*rl;
   1040    1.1      haad 	int		max_blksz = zfsvfs->z_max_blksz;
   1041   1.27       chs 	int		error = 0;
   1042    1.4      haad 	arc_buf_t	*abuf;
   1043   1.27       chs 	iovec_t		*aiov = NULL;
   1044    1.4      haad 	xuio_t		*xuio = NULL;
   1045    1.4      haad 	int		i_iov = 0;
   1046    1.4      haad 	int		iovcnt = uio->uio_iovcnt;
   1047    1.4      haad 	iovec_t		*iovp = uio->uio_iov;
   1048    1.4      haad 	int		write_eof;
   1049   1.27       chs 	int		count = 0;
   1050   1.27       chs 	sa_bulk_attr_t	bulk[4];
   1051   1.27       chs 	uint64_t	mtime[2], ctime[2];
   1052   1.27       chs 	int		segflg;
   1053   1.99      yamt 	boolean_t	commit;
   1054    1.1      haad 
   1055   1.27       chs #ifdef __NetBSD__
   1056   1.27       chs 	segflg = VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ?
   1057   1.27       chs 		UIO_SYSSPACE : UIO_USERSPACE;
   1058   1.27       chs #else
   1059   1.27       chs 	segflg = uio->uio_segflg;
   1060   1.27       chs #endif
   1061    1.2      haad 
   1062    1.1      haad 	/*
   1063    1.1      haad 	 * Fasttrack empty write
   1064    1.1      haad 	 */
   1065    1.1      haad 	n = start_resid;
   1066    1.1      haad 	if (n == 0)
   1067    1.1      haad 		return (0);
   1068    1.1      haad 
   1069    1.1      haad 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
   1070    1.1      haad 		limit = MAXOFFSET_T;
   1071    1.1      haad 
   1072    1.1      haad 	ZFS_ENTER(zfsvfs);
   1073    1.1      haad 	ZFS_VERIFY_ZP(zp);
   1074    1.1      haad 
   1075   1.27       chs 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
   1076   1.27       chs 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
   1077   1.27       chs 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
   1078   1.27       chs 	    &zp->z_size, 8);
   1079   1.27       chs 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
   1080   1.27       chs 	    &zp->z_pflags, 8);
   1081   1.27       chs 
   1082    1.1      haad 	/*
   1083   1.27       chs 	 * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our
   1084   1.27       chs 	 * callers might not be able to detect properly that we are read-only,
   1085   1.27       chs 	 * so check it explicitly here.
   1086    1.1      haad 	 */
   1087   1.27       chs 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
   1088    1.1      haad 		ZFS_EXIT(zfsvfs);
   1089   1.27       chs 		return (SET_ERROR(EROFS));
   1090    1.1      haad 	}
   1091   1.27       chs 
   1092   1.27       chs 	/*
   1093   1.27       chs 	 * If immutable or not appending then return EPERM
   1094   1.27       chs 	 */
   1095   1.27       chs 	if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
   1096   1.27       chs 	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
   1097   1.27       chs 	    (uio->uio_loffset < zp->z_size))) {
   1098   1.27       chs 		ZFS_EXIT(zfsvfs);
   1099   1.27       chs 		return (SET_ERROR(EPERM));
   1100   1.27       chs 	}
   1101   1.27       chs 
   1102   1.27       chs 	zilog = zfsvfs->z_log;
   1103    1.1      haad 
   1104    1.1      haad 	/*
   1105    1.4      haad 	 * Validate file offset
   1106    1.4      haad 	 */
   1107   1.27       chs 	woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
   1108    1.4      haad 	if (woff < 0) {
   1109    1.4      haad 		ZFS_EXIT(zfsvfs);
   1110   1.27       chs 		return (SET_ERROR(EINVAL));
   1111    1.4      haad 	}
   1112    1.4      haad 
   1113    1.4      haad 	/*
   1114    1.4      haad 	 * Check for mandatory locks before calling zfs_range_lock()
   1115    1.4      haad 	 * in order to prevent a deadlock with locks set via fcntl().
   1116    1.4      haad 	 */
   1117   1.27       chs 	if (MANDMODE((mode_t)zp->z_mode) &&
   1118    1.4      haad 	    (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
   1119    1.4      haad 		ZFS_EXIT(zfsvfs);
   1120    1.4      haad 		return (error);
   1121    1.4      haad 	}
   1122    1.4      haad 
   1123   1.27       chs #ifdef illumos
   1124    1.4      haad 	/*
   1125    1.1      haad 	 * Pre-fault the pages to ensure slow (eg NFS) pages
   1126    1.1      haad 	 * don't hold up txg.
   1127    1.4      haad 	 * Skip this if uio contains loaned arc_buf.
   1128    1.1      haad 	 */
   1129   1.27       chs 	if ((uio->uio_extflg == UIO_XUIO) &&
   1130   1.27       chs 	    (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
   1131   1.27       chs 		xuio = (xuio_t *)uio;
   1132   1.27       chs 	else
   1133   1.27       chs 		uio_prefaultpages(MIN(n, max_blksz), uio);
   1134   1.27       chs #endif
   1135    1.1      haad 
   1136    1.1      haad 	/*
   1137    1.1      haad 	 * If in append mode, set the io offset pointer to eof.
   1138    1.1      haad 	 */
   1139   1.27       chs 	if (ioflag & FAPPEND) {
   1140    1.1      haad 		/*
   1141    1.4      haad 		 * Obtain an appending range lock to guarantee file append
   1142    1.4      haad 		 * semantics.  We reset the write offset once we have the lock.
   1143    1.1      haad 		 */
   1144    1.1      haad 		rl = zfs_range_lock(zp, 0, n, RL_APPEND);
   1145    1.4      haad 		woff = rl->r_off;
   1146    1.1      haad 		if (rl->r_len == UINT64_MAX) {
   1147    1.4      haad 			/*
   1148    1.4      haad 			 * We overlocked the file because this write will cause
   1149    1.4      haad 			 * the file block size to increase.
   1150    1.4      haad 			 * Note that zp_size cannot change with this lock held.
   1151    1.4      haad 			 */
   1152   1.27       chs 			woff = zp->z_size;
   1153    1.1      haad 		}
   1154    1.4      haad 		uio->uio_loffset = woff;
   1155    1.1      haad 	} else {
   1156    1.1      haad 		/*
   1157    1.4      haad 		 * Note that if the file block size will change as a result of
   1158    1.4      haad 		 * this write, then this range lock will lock the entire file
   1159    1.4      haad 		 * so that we can re-write the block safely.
   1160    1.1      haad 		 */
   1161    1.1      haad 		rl = zfs_range_lock(zp, woff, n, RL_WRITER);
   1162    1.1      haad 	}
   1163    1.1      haad 
   1164   1.27       chs #ifdef illumos
   1165    1.1      haad 	if (woff >= limit) {
   1166    1.1      haad 		zfs_range_unlock(rl);
   1167    1.1      haad 		ZFS_EXIT(zfsvfs);
   1168   1.27       chs 		return (SET_ERROR(EFBIG));
   1169   1.27       chs 	}
   1170   1.27       chs 
   1171   1.27       chs #endif
   1172   1.27       chs #ifdef __FreeBSD__
   1173   1.27       chs 	if (vn_rlimit_fsize(vp, uio, uio->uio_td)) {
   1174   1.27       chs 		zfs_range_unlock(rl);
   1175   1.27       chs 		ZFS_EXIT(zfsvfs);
   1176   1.27       chs 		return (SET_ERROR(EFBIG));
   1177    1.1      haad 	}
   1178   1.27       chs #endif
   1179   1.27       chs #ifdef __NetBSD__
   1180   1.27       chs 	/* XXXNETBSD we might need vn_rlimit_fsize() too here eventually */
   1181   1.27       chs #endif
   1182    1.1      haad 
   1183    1.1      haad 	if ((woff + n) > limit || woff > (limit - n))
   1184    1.1      haad 		n = limit - woff;
   1185    1.1      haad 
   1186    1.4      haad 	/* Will this write extend the file length? */
   1187   1.27       chs 	write_eof = (woff + n > zp->z_size);
   1188    1.4      haad 
   1189   1.27       chs 	end_size = MAX(zp->z_size, woff + n);
   1190    1.1      haad 
   1191   1.99      yamt 	commit = ((ioflag & (FSYNC | FDSYNC)) != 0 ||
   1192   1.99      yamt 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS);
   1193   1.99      yamt 
   1194    1.1      haad 	/*
   1195    1.1      haad 	 * Write the file in reasonable size chunks.  Each chunk is written
   1196    1.1      haad 	 * in a separate transaction; this keeps the intent log records small
   1197    1.1      haad 	 * and allows us to do more fine-grained space accounting.
   1198    1.1      haad 	 */
   1199    1.1      haad 	while (n > 0) {
   1200    1.4      haad 		abuf = NULL;
   1201    1.4      haad 		woff = uio->uio_loffset;
   1202   1.27       chs 		if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
   1203   1.27       chs 		    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
   1204    1.4      haad 			if (abuf != NULL)
   1205    1.4      haad 				dmu_return_arcbuf(abuf);
   1206   1.27       chs 			error = SET_ERROR(EDQUOT);
   1207    1.4      haad 			break;
   1208    1.4      haad 		}
   1209    1.4      haad 
   1210    1.4      haad 		if (xuio && abuf == NULL) {
   1211    1.4      haad 			ASSERT(i_iov < iovcnt);
   1212    1.4      haad 			aiov = &iovp[i_iov];
   1213    1.4      haad 			abuf = dmu_xuio_arcbuf(xuio, i_iov);
   1214    1.4      haad 			dmu_xuio_clear(xuio, i_iov);
   1215    1.4      haad 			DTRACE_PROBE3(zfs_cp_write, int, i_iov,
   1216    1.4      haad 			    iovec_t *, aiov, arc_buf_t *, abuf);
   1217    1.4      haad 			ASSERT((aiov->iov_base == abuf->b_data) ||
   1218    1.4      haad 			    ((char *)aiov->iov_base - (char *)abuf->b_data +
   1219    1.4      haad 			    aiov->iov_len == arc_buf_size(abuf)));
   1220    1.4      haad 			i_iov++;
   1221    1.4      haad 		} else if (abuf == NULL && n >= max_blksz &&
   1222   1.27       chs 		    woff >= zp->z_size &&
   1223    1.4      haad 		    P2PHASE(woff, max_blksz) == 0 &&
   1224    1.4      haad 		    zp->z_blksz == max_blksz) {
   1225    1.4      haad 			/*
   1226    1.4      haad 			 * This write covers a full block.  "Borrow" a buffer
   1227    1.4      haad 			 * from the dmu so that we can fill it before we enter
   1228    1.4      haad 			 * a transaction.  This avoids the possibility of
   1229    1.4      haad 			 * holding up the transaction if the data copy hangs
   1230    1.4      haad 			 * up on a pagefault (e.g., from an NFS server mapping).
   1231    1.4      haad 			 */
   1232   1.36   hannken #if defined(illumos) || defined(__NetBSD__)
   1233    1.4      haad 			size_t cbytes;
   1234   1.27       chs #endif
   1235    1.4      haad 
   1236   1.27       chs 			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
   1237   1.27       chs 			    max_blksz);
   1238    1.4      haad 			ASSERT(abuf != NULL);
   1239    1.4      haad 			ASSERT(arc_buf_size(abuf) == max_blksz);
   1240   1.36   hannken #if defined(illumos) || defined(__NetBSD__)
   1241    1.4      haad 			if (error = uiocopy(abuf->b_data, max_blksz,
   1242    1.4      haad 			    UIO_WRITE, uio, &cbytes)) {
   1243    1.4      haad 				dmu_return_arcbuf(abuf);
   1244    1.4      haad 				break;
   1245    1.4      haad 			}
   1246    1.4      haad 			ASSERT(cbytes == max_blksz);
   1247   1.27       chs #endif
   1248   1.27       chs #ifdef __FreeBSD__
   1249   1.27       chs 			ssize_t resid = uio->uio_resid;
   1250   1.27       chs 
   1251   1.27       chs 			error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio);
   1252   1.27       chs 			if (error != 0) {
   1253   1.27       chs 				uio->uio_offset -= resid - uio->uio_resid;
   1254   1.27       chs 				uio->uio_resid = resid;
   1255   1.27       chs 				dmu_return_arcbuf(abuf);
   1256   1.27       chs 				break;
   1257   1.27       chs 			}
   1258   1.27       chs #endif
   1259    1.4      haad 		}
   1260    1.4      haad 
   1261    1.1      haad 		/*
   1262    1.1      haad 		 * Start a transaction.
   1263    1.1      haad 		 */
   1264    1.1      haad 		tx = dmu_tx_create(zfsvfs->z_os);
   1265   1.27       chs 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
   1266    1.1      haad 		dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
   1267   1.27       chs 		zfs_sa_upgrade_txholds(tx, zp);
   1268   1.27       chs 		error = dmu_tx_assign(tx, TXG_WAIT);
   1269    1.1      haad 		if (error) {
   1270    1.1      haad 			dmu_tx_abort(tx);
   1271    1.4      haad 			if (abuf != NULL)
   1272    1.4      haad 				dmu_return_arcbuf(abuf);
   1273    1.1      haad 			break;
   1274    1.1      haad 		}
   1275    1.1      haad 
   1276    1.1      haad 		/*
   1277    1.1      haad 		 * If zfs_range_lock() over-locked we grow the blocksize
   1278    1.1      haad 		 * and then reduce the lock range.  This will only happen
   1279    1.1      haad 		 * on the first iteration since zfs_range_reduce() will
   1280    1.1      haad 		 * shrink down r_len to the appropriate size.
   1281    1.1      haad 		 */
   1282    1.1      haad 		if (rl->r_len == UINT64_MAX) {
   1283    1.1      haad 			uint64_t new_blksz;
   1284    1.1      haad 
   1285    1.1      haad 			if (zp->z_blksz > max_blksz) {
   1286   1.27       chs 				/*
   1287   1.27       chs 				 * File's blocksize is already larger than the
   1288   1.27       chs 				 * "recordsize" property.  Only let it grow to
   1289   1.27       chs 				 * the next power of 2.
   1290   1.27       chs 				 */
   1291    1.1      haad 				ASSERT(!ISP2(zp->z_blksz));
   1292   1.27       chs 				new_blksz = MIN(end_size,
   1293   1.27       chs 				    1 << highbit64(zp->z_blksz));
   1294    1.1      haad 			} else {
   1295    1.1      haad 				new_blksz = MIN(end_size, max_blksz);
   1296    1.1      haad 			}
   1297    1.1      haad 			zfs_grow_blocksize(zp, new_blksz, tx);
   1298    1.1      haad 			zfs_range_reduce(rl, woff, n);
   1299    1.1      haad 		}
   1300    1.1      haad 
   1301    1.1      haad 		/*
   1302    1.1      haad 		 * XXX - should we really limit each write to z_max_blksz?
   1303    1.1      haad 		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
   1304    1.1      haad 		 */
   1305    1.1      haad 		nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
   1306    1.2      haad 
   1307   1.27       chs 		if (woff + nbytes > zp->z_size)
   1308   1.27       chs 			vnode_pager_setsize(vp, woff + nbytes);
   1309   1.27       chs 
   1310    1.4      haad 		if (abuf == NULL) {
   1311    1.4      haad 			tx_bytes = uio->uio_resid;
   1312   1.27       chs 			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
   1313   1.27       chs 			    uio, nbytes, tx);
   1314    1.4      haad 			tx_bytes -= uio->uio_resid;
   1315    1.1      haad 		} else {
   1316    1.4      haad 			tx_bytes = nbytes;
   1317    1.4      haad 			ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
   1318    1.4      haad 			/*
   1319    1.4      haad 			 * If this is not a full block write, but we are
   1320    1.4      haad 			 * extending the file past EOF and this data starts
   1321    1.4      haad 			 * block-aligned, use assign_arcbuf().  Otherwise,
   1322    1.4      haad 			 * write via dmu_write().
   1323    1.4      haad 			 */
   1324    1.4      haad 			if (tx_bytes < max_blksz && (!write_eof ||
   1325    1.4      haad 			    aiov->iov_base != abuf->b_data)) {
   1326    1.4      haad 				ASSERT(xuio);
   1327    1.4      haad 				dmu_write(zfsvfs->z_os, zp->z_id, woff,
   1328    1.4      haad 				    aiov->iov_len, aiov->iov_base, tx);
   1329    1.4      haad 				dmu_return_arcbuf(abuf);
   1330    1.4      haad 				xuio_stat_wbuf_copied();
   1331    1.4      haad 			} else {
   1332    1.4      haad 				ASSERT(xuio || tx_bytes == max_blksz);
   1333   1.27       chs 				dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
   1334   1.27       chs 				    woff, abuf, tx);
   1335    1.4      haad 			}
   1336   1.36   hannken #if defined(illumos) || defined(__NetBSD__)
   1337    1.4      haad 			ASSERT(tx_bytes <= uio->uio_resid);
   1338    1.4      haad 			uioskip(uio, tx_bytes);
   1339   1.27       chs #endif
   1340    1.1      haad 		}
   1341    1.4      haad 		if (tx_bytes && vn_has_cached_data(vp)) {
   1342   1.27       chs 			update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
   1343   1.27       chs 			    zp->z_id, segflg, tx);
   1344    1.4      haad 		}
   1345   1.27       chs 
   1346    1.1      haad 		/*
   1347    1.1      haad 		 * If we made no progress, we're done.  If we made even
   1348    1.1      haad 		 * partial progress, update the znode and ZIL accordingly.
   1349    1.1      haad 		 */
   1350    1.1      haad 		if (tx_bytes == 0) {
   1351   1.27       chs 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
   1352   1.27       chs 			    (void *)&zp->z_size, sizeof (uint64_t), tx);
   1353    1.1      haad 			dmu_tx_commit(tx);
   1354    1.1      haad 			ASSERT(error != 0);
   1355    1.1      haad 			break;
   1356    1.1      haad 		}
   1357    1.1      haad 
   1358    1.1      haad 		/*
   1359    1.1      haad 		 * Clear Set-UID/Set-GID bits on successful write if not
   1360    1.1      haad 		 * privileged and at least one of the excute bits is set.
   1361    1.1      haad 		 *
   1362    1.1      haad 		 * It would be nice to to this after all writes have
   1363    1.1      haad 		 * been done, but that would still expose the ISUID/ISGID
   1364    1.1      haad 		 * to another app after the partial write is committed.
   1365    1.1      haad 		 *
   1366    1.1      haad 		 * Note: we don't call zfs_fuid_map_id() here because
   1367    1.1      haad 		 * user 0 is not an ephemeral uid.
   1368    1.1      haad 		 */
   1369    1.1      haad 		mutex_enter(&zp->z_acl_lock);
   1370   1.27       chs 		if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
   1371    1.1      haad 		    (S_IXUSR >> 6))) != 0 &&
   1372   1.27       chs 		    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
   1373   1.27       chs 		    secpolicy_vnode_setid_retain(vp, cr,
   1374   1.27       chs 		    (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
   1375   1.27       chs 			uint64_t newmode;
   1376   1.27       chs 			zp->z_mode &= ~(S_ISUID | S_ISGID);
   1377   1.27       chs 			newmode = zp->z_mode;
   1378   1.27       chs 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
   1379   1.27       chs 			    (void *)&newmode, sizeof (uint64_t), tx);
   1380   1.70   hannken #ifdef __NetBSD__
   1381   1.86      yamt 			if (zfsvfs->z_use_namecache)
   1382   1.86      yamt 				cache_enter_id(vp, zp->z_mode, zp->z_uid,
   1383   1.86      yamt 				    zp->z_gid, true);
   1384   1.70   hannken #endif
   1385    1.1      haad 		}
   1386    1.1      haad 		mutex_exit(&zp->z_acl_lock);
   1387    1.1      haad 
   1388   1.27       chs 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
   1389   1.27       chs 		    B_TRUE);
   1390    1.1      haad 
   1391    1.1      haad 		/*
   1392    1.1      haad 		 * Update the file size (zp_size) if it has changed;
   1393    1.1      haad 		 * account for possible concurrent updates.
   1394    1.1      haad 		 */
   1395   1.27       chs 		while ((end_size = zp->z_size) < uio->uio_loffset) {
   1396   1.27       chs 			(void) atomic_cas_64(&zp->z_size, end_size,
   1397    1.1      haad 			    uio->uio_loffset);
   1398   1.27       chs #ifdef illumos
   1399   1.27       chs 			ASSERT(error == 0);
   1400   1.27       chs #else
   1401   1.27       chs 			ASSERT(error == 0 || error == EFAULT);
   1402   1.27       chs #endif
   1403   1.27       chs 		}
   1404   1.27       chs 		/*
   1405   1.27       chs 		 * If we are replaying and eof is non zero then force
   1406   1.27       chs 		 * the file size to the specified eof. Note, there's no
   1407   1.27       chs 		 * concurrency during replay.
   1408   1.27       chs 		 */
   1409   1.27       chs 		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
   1410   1.27       chs 			zp->z_size = zfsvfs->z_replay_eof;
   1411   1.27       chs 
   1412   1.27       chs 		if (error == 0)
   1413   1.27       chs 			error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
   1414   1.27       chs 		else
   1415   1.27       chs 			(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
   1416   1.27       chs 
   1417   1.99      yamt 		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, commit);
   1418    1.1      haad 		dmu_tx_commit(tx);
   1419    1.1      haad 
   1420    1.1      haad 		if (error != 0)
   1421    1.1      haad 			break;
   1422    1.1      haad 		ASSERT(tx_bytes == nbytes);
   1423    1.1      haad 		n -= nbytes;
   1424   1.27       chs 
   1425   1.27       chs #ifdef illumos
   1426   1.27       chs 		if (!xuio && n > 0)
   1427   1.27       chs 			uio_prefaultpages(MIN(n, max_blksz), uio);
   1428   1.27       chs #endif
   1429    1.1      haad 	}
   1430    1.1      haad 
   1431    1.1      haad 	zfs_range_unlock(rl);
   1432    1.1      haad 
   1433    1.1      haad 	/*
   1434    1.1      haad 	 * If we're in replay mode, or we made no progress, return error.
   1435    1.1      haad 	 * Otherwise, it's at least a partial write, so it's successful.
   1436    1.1      haad 	 */
   1437    1.4      haad 	if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
   1438    1.1      haad 		ZFS_EXIT(zfsvfs);
   1439    1.1      haad 		return (error);
   1440    1.1      haad 	}
   1441    1.1      haad 
   1442   1.27       chs #ifdef __FreeBSD__
   1443   1.27       chs 	/*
   1444   1.27       chs 	 * EFAULT means that at least one page of the source buffer was not
   1445   1.27       chs 	 * available.  VFS will re-try remaining I/O upon this error.
   1446   1.27       chs 	 */
   1447   1.27       chs 	if (error == EFAULT) {
   1448   1.27       chs 		ZFS_EXIT(zfsvfs);
   1449   1.27       chs 		return (error);
   1450   1.27       chs 	}
   1451   1.27       chs #endif
   1452   1.27       chs 
   1453   1.99      yamt 	if (commit)
   1454   1.27       chs 		zil_commit(zilog, zp->z_id);
   1455    1.1      haad 
   1456    1.1      haad 	ZFS_EXIT(zfsvfs);
   1457    1.1      haad 	return (0);
   1458    1.1      haad }
   1459    1.1      haad 
   1460    1.1      haad void
   1461    1.4      haad zfs_get_done(zgd_t *zgd, int error)
   1462    1.1      haad {
   1463    1.4      haad 	znode_t *zp = zgd->zgd_private;
   1464    1.4      haad 	objset_t *os = zp->z_zfsvfs->z_os;
   1465    1.4      haad 
   1466    1.4      haad 	if (zgd->zgd_db)
   1467    1.4      haad 		dmu_buf_rele(zgd->zgd_db, zgd);
   1468    1.4      haad 
   1469    1.4      haad 	zfs_range_unlock(zgd->zgd_rl);
   1470    1.1      haad 
   1471    1.2      haad 	/*
   1472    1.2      haad 	 * Release the vnode asynchronously as we currently have the
   1473    1.2      haad 	 * txg stopped from syncing.
   1474    1.2      haad 	 */
   1475   1.92      yamt 	VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
   1476    1.4      haad 
   1477    1.4      haad 	if (error == 0 && zgd->zgd_bp)
   1478    1.4      haad 		zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
   1479    1.4      haad 
   1480    1.1      haad 	kmem_free(zgd, sizeof (zgd_t));
   1481    1.1      haad }
   1482    1.1      haad 
   1483    1.4      haad #ifdef DEBUG
   1484    1.4      haad static int zil_fault_io = 0;
   1485    1.4      haad #endif
   1486    1.4      haad 
   1487    1.1      haad /*
   1488    1.1      haad  * Get data to generate a TX_WRITE intent log record.
   1489    1.1      haad  */
   1490    1.1      haad int
   1491    1.1      haad zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
   1492    1.1      haad {
   1493    1.1      haad 	zfsvfs_t *zfsvfs = arg;
   1494    1.1      haad 	objset_t *os = zfsvfs->z_os;
   1495    1.1      haad 	znode_t *zp;
   1496    1.4      haad 	uint64_t object = lr->lr_foid;
   1497    1.4      haad 	uint64_t offset = lr->lr_offset;
   1498    1.4      haad 	uint64_t size = lr->lr_length;
   1499    1.4      haad 	blkptr_t *bp = &lr->lr_blkptr;
   1500    1.1      haad 	dmu_buf_t *db;
   1501    1.1      haad 	zgd_t *zgd;
   1502    1.1      haad 	int error = 0;
   1503    1.1      haad 
   1504    1.4      haad 	ASSERT(zio != NULL);
   1505    1.4      haad 	ASSERT(size != 0);
   1506    1.1      haad 
   1507    1.1      haad 	/*
   1508    1.1      haad 	 * Nothing to do if the file has been removed
   1509    1.1      haad 	 */
   1510   1.92      yamt 	if (zfs_zget(zfsvfs, object, &zp) != 0)
   1511   1.27       chs 		return (SET_ERROR(ENOENT));
   1512    1.1      haad 	if (zp->z_unlinked) {
   1513    1.2      haad 		/*
   1514    1.2      haad 		 * Release the vnode asynchronously as we currently have the
   1515    1.2      haad 		 * txg stopped from syncing.
   1516    1.2      haad 		 */
   1517   1.92      yamt 		VN_RELE_ASYNC(ZTOV(zp),
   1518    1.4      haad 		    dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
   1519   1.27       chs 		return (SET_ERROR(ENOENT));
   1520    1.1      haad 	}
   1521    1.1      haad 
   1522    1.4      haad 	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
   1523    1.4      haad 	zgd->zgd_zilog = zfsvfs->z_log;
   1524    1.4      haad 	zgd->zgd_private = zp;
   1525    1.4      haad 
   1526    1.1      haad 	/*
   1527    1.1      haad 	 * Write records come in two flavors: immediate and indirect.
   1528    1.1      haad 	 * For small writes it's cheaper to store the data with the
   1529    1.1      haad 	 * log record (immediate); for large writes it's cheaper to
   1530    1.1      haad 	 * sync the data and get a pointer to it (indirect) so that
   1531    1.1      haad 	 * we don't have to write the data twice.
   1532    1.1      haad 	 */
   1533    1.1      haad 	if (buf != NULL) { /* immediate write */
   1534    1.4      haad 		zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
   1535    1.1      haad 		/* test for truncation needs to be done while range locked */
   1536   1.27       chs 		if (offset >= zp->z_size) {
   1537   1.27       chs 			error = SET_ERROR(ENOENT);
   1538    1.4      haad 		} else {
   1539    1.4      haad 			error = dmu_read(os, object, offset, size, buf,
   1540    1.4      haad 			    DMU_READ_NO_PREFETCH);
   1541    1.1      haad 		}
   1542    1.4      haad 		ASSERT(error == 0 || error == ENOENT);
   1543    1.1      haad 	} else { /* indirect write */
   1544    1.1      haad 		/*
   1545    1.1      haad 		 * Have to lock the whole block to ensure when it's
   1546    1.1      haad 		 * written out and it's checksum is being calculated
   1547    1.1      haad 		 * that no one can change the data. We need to re-check
   1548    1.1      haad 		 * blocksize after we get the lock in case it's changed!
   1549    1.1      haad 		 */
   1550    1.1      haad 		for (;;) {
   1551    1.4      haad 			uint64_t blkoff;
   1552    1.4      haad 			size = zp->z_blksz;
   1553    1.4      haad 			blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
   1554    1.4      haad 			offset -= blkoff;
   1555    1.4      haad 			zgd->zgd_rl = zfs_range_lock(zp, offset, size,
   1556    1.4      haad 			    RL_READER);
   1557    1.4      haad 			if (zp->z_blksz == size)
   1558    1.1      haad 				break;
   1559    1.4      haad 			offset += blkoff;
   1560    1.4      haad 			zfs_range_unlock(zgd->zgd_rl);
   1561    1.1      haad 		}
   1562    1.1      haad 		/* test for truncation needs to be done while range locked */
   1563   1.27       chs 		if (lr->lr_offset >= zp->z_size)
   1564   1.27       chs 			error = SET_ERROR(ENOENT);
   1565    1.4      haad #ifdef DEBUG
   1566    1.4      haad 		if (zil_fault_io) {
   1567   1.27       chs 			error = SET_ERROR(EIO);
   1568    1.4      haad 			zil_fault_io = 0;
   1569    1.1      haad 		}
   1570    1.4      haad #endif
   1571    1.1      haad 		if (error == 0)
   1572   1.27       chs 			error = dmu_buf_hold(os, object, offset, zgd, &db,
   1573   1.27       chs 			    DMU_READ_NO_PREFETCH);
   1574    1.4      haad 
   1575    1.4      haad 		if (error == 0) {
   1576   1.27       chs 			blkptr_t *obp = dmu_buf_get_blkptr(db);
   1577   1.27       chs 			if (obp) {
   1578   1.27       chs 				ASSERT(BP_IS_HOLE(bp));
   1579   1.27       chs 				*bp = *obp;
   1580   1.27       chs 			}
   1581   1.27       chs 
   1582    1.4      haad 			zgd->zgd_db = db;
   1583    1.4      haad 			zgd->zgd_bp = bp;
   1584    1.4      haad 
   1585    1.4      haad 			ASSERT(db->db_offset == offset);
   1586    1.4      haad 			ASSERT(db->db_size == size);
   1587    1.4      haad 
   1588    1.4      haad 			error = dmu_sync(zio, lr->lr_common.lrc_txg,
   1589    1.4      haad 			    zfs_get_done, zgd);
   1590    1.4      haad 			ASSERT(error || lr->lr_length <= zp->z_blksz);
   1591    1.4      haad 
   1592    1.4      haad 			/*
   1593    1.4      haad 			 * On success, we need to wait for the write I/O
   1594    1.4      haad 			 * initiated by dmu_sync() to complete before we can
   1595    1.4      haad 			 * release this dbuf.  We will finish everything up
   1596    1.4      haad 			 * in the zfs_get_done() callback.
   1597    1.4      haad 			 */
   1598    1.4      haad 			if (error == 0)
   1599    1.4      haad 				return (0);
   1600    1.4      haad 
   1601    1.4      haad 			if (error == EALREADY) {
   1602    1.4      haad 				lr->lr_common.lrc_txtype = TX_WRITE2;
   1603    1.4      haad 				error = 0;
   1604    1.4      haad 			}
   1605    1.4      haad 		}
   1606    1.1      haad 	}
   1607    1.4      haad 
   1608    1.4      haad 	zfs_get_done(zgd, error);
   1609    1.4      haad 
   1610    1.1      haad 	return (error);
   1611    1.1      haad }
   1612    1.1      haad 
   1613    1.1      haad /*ARGSUSED*/
   1614    1.1      haad static int
   1615    1.1      haad zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr,
   1616    1.1      haad     caller_context_t *ct)
   1617    1.1      haad {
   1618    1.1      haad 	znode_t *zp = VTOZ(vp);
   1619    1.1      haad 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   1620    1.1      haad 	int error;
   1621    1.1      haad 
   1622    1.1      haad 	ZFS_ENTER(zfsvfs);
   1623    1.1      haad 	ZFS_VERIFY_ZP(zp);
   1624    1.1      haad 
   1625    1.1      haad 	if (flag & V_ACE_MASK)
   1626    1.1      haad 		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
   1627    1.1      haad 	else
   1628    1.1      haad 		error = zfs_zaccess_rwx(zp, mode, flag, cr);
   1629    1.1      haad 
   1630    1.1      haad 	ZFS_EXIT(zfsvfs);
   1631    1.1      haad 	return (error);
   1632    1.1      haad }
   1633    1.1      haad 
   1634   1.27       chs #ifdef __FreeBSD__
   1635   1.27       chs static int
   1636   1.27       chs zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
   1637   1.27       chs {
   1638   1.27       chs 	int error;
   1639   1.27       chs 
   1640   1.27       chs 	*vpp = arg;
   1641   1.27       chs 	error = vn_lock(*vpp, lkflags);
   1642   1.27       chs 	if (error != 0)
   1643   1.27       chs 		vrele(*vpp);
   1644   1.27       chs 	return (error);
   1645   1.27       chs }
   1646   1.27       chs 
   1647   1.27       chs static int
   1648   1.27       chs zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
   1649   1.27       chs {
   1650   1.27       chs 	znode_t *zdp = VTOZ(dvp);
   1651   1.27       chs 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
   1652   1.27       chs 	int error;
   1653   1.27       chs 	int ltype;
   1654   1.27       chs 
   1655   1.27       chs 	ASSERT_VOP_LOCKED(dvp, __func__);
   1656   1.27       chs #ifdef DIAGNOSTIC
   1657   1.27       chs 	if ((zdp->z_pflags & ZFS_XATTR) == 0)
   1658   1.27       chs 		VERIFY(!RRM_LOCK_HELD(&zfsvfs->z_teardown_lock));
   1659   1.27       chs #endif
   1660   1.27       chs 
   1661   1.27       chs 	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
   1662   1.27       chs 		ASSERT3P(dvp, ==, vp);
   1663   1.27       chs 		vref(dvp);
   1664   1.27       chs 		ltype = lkflags & LK_TYPE_MASK;
   1665   1.27       chs 		if (ltype != VOP_ISLOCKED(dvp)) {
   1666   1.27       chs 			if (ltype == LK_EXCLUSIVE)
   1667   1.27       chs 				vn_lock(dvp, LK_UPGRADE | LK_RETRY);
   1668   1.27       chs 			else /* if (ltype == LK_SHARED) */
   1669   1.27       chs 				vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
   1670   1.27       chs 
   1671   1.27       chs 			/*
   1672   1.27       chs 			 * Relock for the "." case could leave us with
   1673   1.27       chs 			 * reclaimed vnode.
   1674   1.27       chs 			 */
   1675   1.27       chs 			if (dvp->v_iflag & VI_DOOMED) {
   1676   1.27       chs 				vrele(dvp);
   1677   1.27       chs 				return (SET_ERROR(ENOENT));
   1678   1.27       chs 			}
   1679   1.27       chs 		}
   1680   1.27       chs 		return (0);
   1681   1.27       chs 	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
   1682   1.27       chs 		/*
   1683   1.27       chs 		 * Note that in this case, dvp is the child vnode, and we
   1684   1.27       chs 		 * are looking up the parent vnode - exactly reverse from
   1685   1.27       chs 		 * normal operation.  Unlocking dvp requires some rather
   1686   1.27       chs 		 * tricky unlock/relock dance to prevent mp from being freed;
   1687   1.27       chs 		 * use vn_vget_ino_gen() which takes care of all that.
   1688   1.27       chs 		 *
   1689   1.27       chs 		 * XXX Note that there is a time window when both vnodes are
   1690   1.27       chs 		 * unlocked.  It is possible, although highly unlikely, that
   1691   1.27       chs 		 * during that window the parent-child relationship between
   1692   1.27       chs 		 * the vnodes may change, for example, get reversed.
   1693   1.27       chs 		 * In that case we would have a wrong lock order for the vnodes.
   1694   1.27       chs 		 * All other filesystems seem to ignore this problem, so we
   1695   1.27       chs 		 * do the same here.
   1696   1.27       chs 		 * A potential solution could be implemented as follows:
   1697   1.27       chs 		 * - using LK_NOWAIT when locking the second vnode and retrying
   1698   1.27       chs 		 *   if necessary
   1699   1.27       chs 		 * - checking that the parent-child relationship still holds
   1700   1.27       chs 		 *   after locking both vnodes and retrying if it doesn't
   1701   1.27       chs 		 */
   1702   1.27       chs 		error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
   1703   1.27       chs 		return (error);
   1704   1.27       chs 	} else {
   1705   1.27       chs 		error = vn_lock(vp, lkflags);
   1706   1.27       chs 		if (error != 0)
   1707   1.27       chs 			vrele(vp);
   1708   1.27       chs 		return (error);
   1709   1.27       chs 	}
   1710   1.27       chs }
   1711   1.27       chs 
   1712   1.27       chs /*
   1713   1.27       chs  * Lookup an entry in a directory, or an extended attribute directory.
   1714   1.27       chs  * If it exists, return a held vnode reference for it.
   1715   1.27       chs  *
   1716   1.27       chs  *	IN:	dvp	- vnode of directory to search.
   1717   1.27       chs  *		nm	- name of entry to lookup.
   1718   1.27       chs  *		pnp	- full pathname to lookup [UNUSED].
   1719   1.27       chs  *		flags	- LOOKUP_XATTR set if looking for an attribute.
   1720   1.27       chs  *		rdir	- root directory vnode [UNUSED].
   1721   1.27       chs  *		cr	- credentials of caller.
   1722   1.27       chs  *		ct	- caller context
   1723   1.27       chs  *
   1724   1.27       chs  *	OUT:	vpp	- vnode of located entry, NULL if not found.
   1725   1.27       chs  *
   1726   1.27       chs  *	RETURN:	0 on success, error code on failure.
   1727   1.27       chs  *
   1728   1.27       chs  * Timestamps:
   1729   1.27       chs  *	NA
   1730   1.27       chs  */
   1731   1.27       chs /* ARGSUSED */
   1732   1.27       chs static int
   1733   1.27       chs zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp,
   1734   1.27       chs     int nameiop, cred_t *cr, kthread_t *td, int flags)
   1735   1.27       chs {
   1736   1.27       chs 	znode_t *zdp = VTOZ(dvp);
   1737   1.27       chs 	znode_t *zp;
   1738   1.27       chs 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
   1739   1.27       chs 	int	error = 0;
   1740   1.27       chs 
   1741   1.27       chs 	/* fast path (should be redundant with vfs namecache) */
   1742   1.27       chs 	if (!(flags & LOOKUP_XATTR)) {
   1743   1.27       chs 		if (dvp->v_type != VDIR) {
   1744   1.27       chs 			return (SET_ERROR(ENOTDIR));
   1745   1.27       chs 		} else if (zdp->z_sa_hdl == NULL) {
   1746   1.27       chs 			return (SET_ERROR(EIO));
   1747   1.27       chs 		}
   1748   1.27       chs 	}
   1749   1.27       chs 
   1750   1.27       chs 	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
   1751   1.27       chs 
   1752   1.27       chs 	ZFS_ENTER(zfsvfs);
   1753   1.27       chs 	ZFS_VERIFY_ZP(zdp);
   1754   1.27       chs 
   1755   1.27       chs 	*vpp = NULL;
   1756   1.27       chs 
   1757   1.27       chs 	if (flags & LOOKUP_XATTR) {
   1758   1.27       chs #ifdef TODO
   1759   1.27       chs 		/*
   1760   1.27       chs 		 * If the xattr property is off, refuse the lookup request.
   1761   1.27       chs 		 */
   1762   1.27       chs 		if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) {
   1763   1.27       chs 			ZFS_EXIT(zfsvfs);
   1764   1.27       chs 			return (SET_ERROR(EINVAL));
   1765   1.27       chs 		}
   1766   1.27       chs #endif
   1767   1.27       chs 
   1768   1.27       chs 		/*
   1769   1.27       chs 		 * We don't allow recursive attributes..
   1770   1.27       chs 		 * Maybe someday we will.
   1771   1.27       chs 		 */
   1772   1.27       chs 		if (zdp->z_pflags & ZFS_XATTR) {
   1773   1.27       chs 			ZFS_EXIT(zfsvfs);
   1774   1.27       chs 			return (SET_ERROR(EINVAL));
   1775   1.27       chs 		}
   1776   1.27       chs 
   1777   1.27       chs 		if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) {
   1778   1.27       chs 			ZFS_EXIT(zfsvfs);
   1779   1.27       chs 			return (error);
   1780   1.27       chs 		}
   1781   1.27       chs 
   1782   1.27       chs 		/*
   1783   1.27       chs 		 * Do we have permission to get into attribute directory?
   1784   1.27       chs 		 */
   1785   1.27       chs 		if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0,
   1786   1.27       chs 		    B_FALSE, cr)) {
   1787   1.27       chs 			vrele(*vpp);
   1788   1.27       chs 			*vpp = NULL;
   1789   1.27       chs 		}
   1790   1.27       chs 
   1791   1.27       chs 		ZFS_EXIT(zfsvfs);
   1792   1.27       chs 		return (error);
   1793   1.27       chs 	}
   1794   1.27       chs 
   1795   1.27       chs 	/*
   1796   1.27       chs 	 * Check accessibility of directory.
   1797   1.27       chs 	 */
   1798   1.27       chs 	if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) {
   1799   1.27       chs 		ZFS_EXIT(zfsvfs);
   1800   1.27       chs 		return (error);
   1801   1.27       chs 	}
   1802   1.27       chs 
   1803   1.27       chs 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
   1804   1.27       chs 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
   1805   1.27       chs 		ZFS_EXIT(zfsvfs);
   1806   1.27       chs 		return (SET_ERROR(EILSEQ));
   1807   1.27       chs 	}
   1808   1.27       chs 
   1809   1.27       chs 
   1810   1.27       chs 	/*
   1811   1.27       chs 	 * First handle the special cases.
   1812   1.27       chs 	 */
   1813   1.27       chs 	if ((cnp->cn_flags & ISDOTDOT) != 0) {
   1814   1.27       chs 		/*
   1815   1.27       chs 		 * If we are a snapshot mounted under .zfs, return
   1816   1.27       chs 		 * the vp for the snapshot directory.
   1817   1.27       chs 		 */
   1818   1.27       chs 		if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
   1819   1.27       chs 			struct componentname cn;
   1820   1.27       chs 			vnode_t *zfsctl_vp;
   1821   1.27       chs 			int ltype;
   1822   1.27       chs 
   1823   1.27       chs 			ZFS_EXIT(zfsvfs);
   1824   1.27       chs 			ltype = VOP_ISLOCKED(dvp);
   1825   1.27       chs 			VOP_UNLOCK(dvp, 0);
   1826   1.27       chs 			error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
   1827   1.27       chs 			    &zfsctl_vp);
   1828   1.27       chs 			if (error == 0) {
   1829   1.27       chs 				cn.cn_nameptr = "snapshot";
   1830   1.27       chs 				cn.cn_namelen = strlen(cn.cn_nameptr);
   1831   1.27       chs 				cn.cn_nameiop = cnp->cn_nameiop;
   1832   1.27       chs 				cn.cn_flags = cnp->cn_flags;
   1833   1.27       chs 				cn.cn_lkflags = cnp->cn_lkflags;
   1834   1.27       chs 				error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
   1835   1.27       chs 				vput(zfsctl_vp);
   1836   1.27       chs 			}
   1837   1.27       chs 			vn_lock(dvp, ltype | LK_RETRY);
   1838   1.27       chs 			return (error);
   1839   1.27       chs 		}
   1840   1.27       chs 	}
   1841   1.27       chs 	if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
   1842   1.27       chs 		ZFS_EXIT(zfsvfs);
   1843   1.27       chs 		if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
   1844   1.27       chs 			return (SET_ERROR(ENOTSUP));
   1845   1.27       chs 		error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
   1846   1.27       chs 		return (error);
   1847   1.27       chs 	}
   1848   1.27       chs 
   1849   1.27       chs 	/*
   1850   1.27       chs 	 * The loop is retry the lookup if the parent-child relationship
   1851   1.27       chs 	 * changes during the dot-dot locking complexities.
   1852   1.27       chs 	 */
   1853   1.27       chs 	for (;;) {
   1854   1.27       chs 		uint64_t parent;
   1855   1.27       chs 
   1856   1.27       chs 		error = zfs_dirlook(zdp, nm, &zp);
   1857   1.27       chs 		if (error == 0)
   1858   1.27       chs 			*vpp = ZTOV(zp);
   1859   1.27       chs 
   1860   1.27       chs 		ZFS_EXIT(zfsvfs);
   1861   1.27       chs 		if (error != 0)
   1862   1.27       chs 			break;
   1863   1.27       chs 
   1864   1.27       chs 		error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
   1865   1.27       chs 		if (error != 0) {
   1866   1.27       chs 			/*
   1867   1.27       chs 			 * If we've got a locking error, then the vnode
   1868   1.27       chs 			 * got reclaimed because of a force unmount.
   1869   1.27       chs 			 * We never enter doomed vnodes into the name cache.
   1870   1.27       chs 			 */
   1871   1.27       chs 			*vpp = NULL;
   1872   1.27       chs 			return (error);
   1873   1.27       chs 		}
   1874   1.27       chs 
   1875   1.27       chs 		if ((cnp->cn_flags & ISDOTDOT) == 0)
   1876   1.27       chs 			break;
   1877   1.27       chs 
   1878   1.27       chs 		ZFS_ENTER(zfsvfs);
   1879   1.27       chs 		if (zdp->z_sa_hdl == NULL) {
   1880   1.27       chs 			error = SET_ERROR(EIO);
   1881   1.27       chs 		} else {
   1882   1.27       chs 			error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
   1883   1.27       chs 			    &parent, sizeof (parent));
   1884   1.27       chs 		}
   1885   1.27       chs 		if (error != 0) {
   1886   1.27       chs 			ZFS_EXIT(zfsvfs);
   1887   1.27       chs 			vput(ZTOV(zp));
   1888   1.27       chs 			break;
   1889   1.27       chs 		}
   1890   1.27       chs 		if (zp->z_id == parent) {
   1891   1.27       chs 			ZFS_EXIT(zfsvfs);
   1892   1.27       chs 			break;
   1893   1.27       chs 		}
   1894   1.27       chs 		vput(ZTOV(zp));
   1895   1.27       chs 	}
   1896   1.27       chs 
   1897   1.27       chs out:
   1898   1.27       chs 	if (error != 0)
   1899   1.27       chs 		*vpp = NULL;
   1900   1.27       chs 
   1901   1.27       chs 	/* Translate errors and add SAVENAME when needed. */
   1902   1.27       chs 	if (cnp->cn_flags & ISLASTCN) {
   1903   1.27       chs 		switch (nameiop) {
   1904   1.27       chs 		case CREATE:
   1905   1.27       chs 		case RENAME:
   1906   1.27       chs 			if (error == ENOENT) {
   1907   1.27       chs 				error = EJUSTRETURN;
   1908   1.27       chs 				cnp->cn_flags |= SAVENAME;
   1909   1.27       chs 				break;
   1910   1.27       chs 			}
   1911   1.27       chs 			/* FALLTHROUGH */
   1912   1.27       chs 		case DELETE:
   1913   1.27       chs 			if (error == 0)
   1914   1.27       chs 				cnp->cn_flags |= SAVENAME;
   1915   1.27       chs 			break;
   1916   1.27       chs 		}
   1917   1.27       chs 	}
   1918   1.27       chs 
   1919   1.27       chs 	/* Insert name into cache (as non-existent) if appropriate. */
   1920   1.27       chs 	if (zfsvfs->z_use_namecache &&
   1921   1.27       chs 	    error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
   1922   1.27       chs 		cache_enter(dvp, NULL, cnp);
   1923   1.27       chs 
   1924   1.27       chs 	/* Insert name into cache if appropriate. */
   1925   1.27       chs 	if (zfsvfs->z_use_namecache &&
   1926   1.27       chs 	    error == 0 && (cnp->cn_flags & MAKEENTRY)) {
   1927   1.27       chs 		if (!(cnp->cn_flags & ISLASTCN) ||
   1928   1.27       chs 		    (nameiop != DELETE && nameiop != RENAME)) {
   1929   1.27       chs 			cache_enter(dvp, *vpp, cnp);
   1930   1.27       chs 		}
   1931   1.27       chs 	}
   1932   1.27       chs 
   1933   1.27       chs 	return (error);
   1934   1.27       chs }
   1935   1.27       chs #endif /* __FreeBSD__ */
   1936   1.27       chs 
   1937   1.27       chs #ifdef __NetBSD__
   1938    1.1      haad /*
   1939    1.4      haad  * If vnode is for a device return a specfs vnode instead.
   1940    1.4      haad  */
   1941    1.4      haad static int
   1942    1.4      haad specvp_check(vnode_t **vpp, cred_t *cr)
   1943    1.4      haad {
   1944    1.4      haad 	int error = 0;
   1945    1.4      haad 
   1946    1.4      haad 	if (IS_DEVVP(*vpp)) {
   1947    1.4      haad 		struct vnode *svp;
   1948    1.4      haad 
   1949    1.4      haad 		svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
   1950    1.4      haad 		VN_RELE(*vpp);
   1951    1.4      haad 		if (svp == NULL)
   1952    1.4      haad 			error = ENOSYS;
   1953    1.4      haad 		*vpp = svp;
   1954    1.4      haad 	}
   1955    1.4      haad 	return (error);
   1956    1.4      haad }
   1957    1.4      haad 
   1958    1.4      haad /*
   1959    1.1      haad  * Lookup an entry in a directory, or an extended attribute directory.
   1960    1.1      haad  * If it exists, return a held vnode reference for it.
   1961    1.1      haad  *
   1962    1.1      haad  *	IN:	dvp	- vnode of directory to search.
   1963    1.1      haad  *		nm	- name of entry to lookup.
   1964    1.1      haad  *		pnp	- full pathname to lookup [UNUSED].
   1965    1.1      haad  *		flags	- LOOKUP_XATTR set if looking for an attribute.
   1966    1.1      haad  *		rdir	- root directory vnode [UNUSED].
   1967    1.1      haad  *		cr	- credentials of caller.
   1968    1.1      haad  *		ct	- caller context
   1969    1.1      haad  *		direntflags - directory lookup flags
   1970    1.1      haad  *		realpnp - returned pathname.
   1971    1.1      haad  *
   1972    1.1      haad  *	OUT:	vpp	- vnode of located entry, NULL if not found.
   1973    1.1      haad  *
   1974    1.1      haad  *	RETURN:	0 if success
   1975    1.1      haad  *		error code if failure
   1976    1.1      haad  *
   1977    1.1      haad  * Timestamps:
   1978    1.1      haad  *	NA
   1979    1.1      haad  */
   1980    1.1      haad /* ARGSUSED */
   1981    1.1      haad static int
   1982   1.44   hannken zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, int flags,
   1983   1.44   hannken     struct componentname *cnp, int nameiop, cred_t *cr)
   1984    1.1      haad {
   1985    1.1      haad 	znode_t *zdp = VTOZ(dvp);
   1986   1.27       chs 	znode_t *zp;
   1987    1.1      haad 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
   1988    1.4      haad 	int	error = 0;
   1989    1.4      haad 
   1990    1.4      haad 	/* fast path */
   1991   1.27       chs 	if (!(flags & LOOKUP_XATTR)) {
   1992    1.4      haad 		if (dvp->v_type != VDIR) {
   1993    1.4      haad 			return (ENOTDIR);
   1994   1.27       chs 		} else if (zdp->z_sa_hdl == NULL) {
   1995   1.27       chs 			return (SET_ERROR(EIO));
   1996    1.4      haad 		}
   1997    1.4      haad 
   1998    1.4      haad 		if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
   1999    1.4      haad 			error = zfs_fastaccesschk_execute(zdp, cr);
   2000    1.4      haad 			if (!error) {
   2001    1.4      haad 				*vpp = dvp;
   2002    1.4      haad 				VN_HOLD(*vpp);
   2003    1.4      haad 				return (0);
   2004    1.4      haad 			}
   2005    1.4      haad 			return (error);
   2006    1.4      haad 		} else {
   2007    1.4      haad 			vnode_t *tvp = dnlc_lookup(dvp, nm);
   2008    1.4      haad 
   2009    1.4      haad 			if (tvp) {
   2010    1.4      haad 				error = zfs_fastaccesschk_execute(zdp, cr);
   2011    1.4      haad 				if (error) {
   2012    1.4      haad 					VN_RELE(tvp);
   2013    1.4      haad 					return (error);
   2014    1.4      haad 				}
   2015    1.4      haad 				if (tvp == DNLC_NO_VNODE) {
   2016    1.4      haad 					VN_RELE(tvp);
   2017    1.4      haad 					return (ENOENT);
   2018    1.4      haad 				} else {
   2019    1.4      haad 					*vpp = tvp;
   2020    1.4      haad 					return (specvp_check(vpp, cr));
   2021    1.4      haad 				}
   2022    1.4      haad 			}
   2023    1.4      haad 		}
   2024    1.4      haad 	}
   2025    1.4      haad 
   2026    1.4      haad 	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
   2027    1.1      haad 
   2028    1.1      haad 	ZFS_ENTER(zfsvfs);
   2029    1.1      haad 	ZFS_VERIFY_ZP(zdp);
   2030    1.1      haad 
   2031    1.1      haad 	*vpp = NULL;
   2032   1.12  riastrad 
   2033    1.1      haad 	if (flags & LOOKUP_XATTR) {
   2034    1.2      haad #ifdef TODO
   2035    1.1      haad 		/*
   2036    1.1      haad 		 * If the xattr property is off, refuse the lookup request.
   2037    1.1      haad 		 */
   2038    1.1      haad 		if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) {
   2039    1.1      haad 			ZFS_EXIT(zfsvfs);
   2040    1.1      haad 			return (EINVAL);
   2041    1.1      haad 		}
   2042    1.2      haad #endif
   2043    1.1      haad 
   2044    1.1      haad 		/*
   2045    1.1      haad 		 * We don't allow recursive attributes..
   2046    1.1      haad 		 * Maybe someday we will.
   2047    1.1      haad 		 */
   2048   1.27       chs 		if (zdp->z_pflags & ZFS_XATTR) {
   2049    1.1      haad 			ZFS_EXIT(zfsvfs);
   2050    1.1      haad 			return (EINVAL);
   2051    1.1      haad 		}
   2052    1.1      haad 
   2053    1.1      haad 		if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) {
   2054    1.1      haad 			ZFS_EXIT(zfsvfs);
   2055    1.1      haad 			return (error);
   2056    1.1      haad 		}
   2057    1.1      haad 
   2058    1.1      haad 		/*
   2059    1.1      haad 		 * Do we have permission to get into attribute directory?
   2060    1.1      haad 		 */
   2061    1.1      haad 		if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0,
   2062    1.1      haad 		    B_FALSE, cr)) {
   2063    1.1      haad 			VN_RELE(*vpp);
   2064    1.1      haad 			*vpp = NULL;
   2065    1.1      haad 		}
   2066    1.1      haad 
   2067    1.1      haad 		ZFS_EXIT(zfsvfs);
   2068    1.1      haad 		return (error);
   2069    1.1      haad 	}
   2070    1.1      haad 
   2071    1.1      haad 	if (dvp->v_type != VDIR) {
   2072    1.1      haad 		ZFS_EXIT(zfsvfs);
   2073    1.1      haad 		return (ENOTDIR);
   2074    1.1      haad 	}
   2075    1.1      haad 
   2076    1.1      haad 	/*
   2077    1.1      haad 	 * Check accessibility of directory.
   2078    1.1      haad 	 */
   2079    1.2      haad 	if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) {
   2080    1.2      haad 		ZFS_EXIT(zfsvfs);
   2081    1.2      haad 		return (error);
   2082    1.2      haad 	}
   2083    1.2      haad 
   2084    1.1      haad 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
   2085    1.1      haad 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
   2086    1.1      haad 		ZFS_EXIT(zfsvfs);
   2087    1.1      haad 		return (EILSEQ);
   2088    1.1      haad 	}
   2089    1.1      haad 
   2090   1.44   hannken 	/*
   2091   1.44   hannken 	 * First handle the special cases.
   2092   1.44   hannken 	 */
   2093   1.44   hannken 	if ((cnp->cn_flags & ISDOTDOT) != 0) {
   2094   1.44   hannken 		/*
   2095   1.44   hannken 		 * If we are a snapshot mounted under .zfs, return
   2096   1.44   hannken 		 * the vp for the snapshot directory.
   2097   1.44   hannken 		 */
   2098   1.44   hannken 		if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
   2099   1.44   hannken 			ZFS_EXIT(zfsvfs);
   2100   1.44   hannken 			error = zfsctl_snapshot(zfsvfs->z_parent, vpp);
   2101   1.44   hannken 
   2102   1.44   hannken 			return (error);
   2103   1.44   hannken 		}
   2104   1.44   hannken 	}
   2105   1.44   hannken 	if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
   2106   1.44   hannken 		ZFS_EXIT(zfsvfs);
   2107   1.44   hannken 		if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
   2108   1.44   hannken 			return (SET_ERROR(ENOTSUP));
   2109   1.44   hannken 		error = zfsctl_root(zfsvfs, vpp);
   2110   1.44   hannken 		return (error);
   2111   1.44   hannken 	}
   2112   1.44   hannken 
   2113   1.27       chs 	error = zfs_dirlook(zdp, nm, &zp);
   2114   1.27       chs 	if (error == 0) {
   2115   1.27       chs 		*vpp = ZTOV(zp);
   2116    1.4      haad 		error = specvp_check(vpp, cr);
   2117   1.27       chs 	}
   2118    1.1      haad 
   2119    1.1      haad 	ZFS_EXIT(zfsvfs);
   2120    1.1      haad 	return (error);
   2121    1.1      haad }
   2122   1.27       chs #endif
   2123    1.1      haad 
   2124    1.1      haad /*
   2125    1.1      haad  * Attempt to create a new entry in a directory.  If the entry
   2126    1.1      haad  * already exists, truncate the file if permissible, else return
   2127    1.1      haad  * an error.  Return the vp of the created or trunc'd file.
   2128    1.1      haad  *
   2129    1.1      haad  *	IN:	dvp	- vnode of directory to put new file entry in.
   2130    1.1      haad  *		name	- name of new file entry.
   2131    1.1      haad  *		vap	- attributes of new file.
   2132    1.1      haad  *		excl	- flag indicating exclusive or non-exclusive mode.
   2133    1.1      haad  *		mode	- mode to open file with.
   2134    1.1      haad  *		cr	- credentials of caller.
   2135    1.1      haad  *		flag	- large file flag [UNUSED].
   2136    1.1      haad  *		ct	- caller context
   2137   1.27       chs  *		vsecp	- ACL to be set
   2138    1.1      haad  *
   2139    1.1      haad  *	OUT:	vpp	- vnode of created or trunc'd entry.
   2140    1.1      haad  *
   2141   1.27       chs  *	RETURN:	0 on success, error code on failure.
   2142    1.1      haad  *
   2143    1.1      haad  * Timestamps:
   2144    1.1      haad  *	dvp - ctime|mtime updated if new entry created
   2145    1.1      haad  *	 vp - ctime|mtime always, atime if new
   2146    1.1      haad  */
   2147    1.1      haad 
   2148    1.1      haad /* ARGSUSED */
   2149    1.1      haad static int
   2150    1.2      haad zfs_create(vnode_t *dvp, char *name, vattr_t *vap, int excl, int mode,
   2151   1.27       chs     vnode_t **vpp, cred_t *cr, kthread_t *td)
   2152    1.1      haad {
   2153    1.1      haad 	znode_t		*zp, *dzp = VTOZ(dvp);
   2154    1.1      haad 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
   2155    1.1      haad 	zilog_t		*zilog;
   2156    1.1      haad 	objset_t	*os;
   2157    1.1      haad 	dmu_tx_t	*tx;
   2158    1.1      haad 	int		error;
   2159   1.27       chs 	ksid_t		*ksid;
   2160   1.27       chs 	uid_t		uid;
   2161   1.27       chs 	gid_t		gid = crgetgid(cr);
   2162   1.27       chs 	zfs_acl_ids_t   acl_ids;
   2163   1.27       chs 	boolean_t	fuid_dirtied;
   2164    1.2      haad 	void		*vsecp = NULL;
   2165    1.2      haad 	int		flag = 0;
   2166   1.27       chs 	uint64_t	txtype;
   2167    1.1      haad 
   2168    1.1      haad 	/*
   2169    1.1      haad 	 * If we have an ephemeral id, ACL, or XVATTR then
   2170    1.1      haad 	 * make sure file system is at proper version
   2171    1.1      haad 	 */
   2172    1.1      haad 
   2173   1.27       chs 	ksid = crgetsid(cr, KSID_OWNER);
   2174   1.27       chs 	if (ksid)
   2175   1.27       chs 		uid = ksid_getid(ksid);
   2176   1.27       chs 	else
   2177   1.27       chs 		uid = crgetuid(cr);
   2178   1.27       chs 
   2179    1.1      haad 	if (zfsvfs->z_use_fuids == B_FALSE &&
   2180    1.1      haad 	    (vsecp || (vap->va_mask & AT_XVATTR) ||
   2181   1.27       chs 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
   2182   1.27       chs 		return (SET_ERROR(EINVAL));
   2183    1.1      haad 
   2184    1.1      haad 	ZFS_ENTER(zfsvfs);
   2185    1.1      haad 	ZFS_VERIFY_ZP(dzp);
   2186    1.1      haad 	os = zfsvfs->z_os;
   2187    1.1      haad 	zilog = zfsvfs->z_log;
   2188    1.1      haad 
   2189    1.1      haad 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
   2190    1.1      haad 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
   2191    1.1      haad 		ZFS_EXIT(zfsvfs);
   2192   1.27       chs 		return (SET_ERROR(EILSEQ));
   2193    1.1      haad 	}
   2194    1.1      haad 
   2195    1.1      haad 	if (vap->va_mask & AT_XVATTR) {
   2196   1.27       chs 		if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap,
   2197    1.1      haad 		    crgetuid(cr), cr, vap->va_type)) != 0) {
   2198    1.1      haad 			ZFS_EXIT(zfsvfs);
   2199    1.1      haad 			return (error);
   2200    1.1      haad 		}
   2201    1.1      haad 	}
   2202   1.27       chs 
   2203    1.1      haad 	*vpp = NULL;
   2204    1.1      haad 
   2205    1.2      haad 	if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
   2206    1.2      haad 		vap->va_mode &= ~S_ISVTX;
   2207    1.1      haad 
   2208   1.27       chs 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
   2209   1.27       chs 	if (error) {
   2210   1.27       chs 		ZFS_EXIT(zfsvfs);
   2211   1.27       chs 		return (error);
   2212   1.27       chs 	}
   2213   1.27       chs 	ASSERT3P(zp, ==, NULL);
   2214   1.27       chs 
   2215   1.27       chs 	/*
   2216   1.27       chs 	 * Create a new file object and update the directory
   2217   1.27       chs 	 * to reference it.
   2218   1.27       chs 	 */
   2219   1.27       chs 	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
   2220   1.27       chs 		goto out;
   2221   1.27       chs 	}
   2222    1.1      haad 
   2223   1.27       chs 	/*
   2224   1.27       chs 	 * We only support the creation of regular files in
   2225   1.27       chs 	 * extended attribute directories.
   2226   1.27       chs 	 */
   2227    1.1      haad 
   2228   1.27       chs 	if ((dzp->z_pflags & ZFS_XATTR) &&
   2229   1.27       chs 	    (vap->va_type != VREG)) {
   2230   1.27       chs 		error = SET_ERROR(EINVAL);
   2231   1.27       chs 		goto out;
   2232    1.1      haad 	}
   2233    1.1      haad 
   2234   1.27       chs 	if ((error = zfs_acl_ids_create(dzp, 0, vap,
   2235   1.27       chs 	    cr, vsecp, &acl_ids)) != 0)
   2236   1.27       chs 		goto out;
   2237    1.1      haad 
   2238   1.27       chs 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
   2239   1.27       chs 		zfs_acl_ids_free(&acl_ids);
   2240   1.27       chs 		error = SET_ERROR(EDQUOT);
   2241   1.27       chs 		goto out;
   2242   1.27       chs 	}
   2243    1.1      haad 
   2244   1.27       chs 	getnewvnode_reserve(1);
   2245    1.4      haad 
   2246   1.27       chs 	tx = dmu_tx_create(os);
   2247    1.4      haad 
   2248   1.27       chs 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
   2249   1.27       chs 	    ZFS_SA_BASE_ATTR_SIZE);
   2250    1.4      haad 
   2251   1.27       chs 	fuid_dirtied = zfsvfs->z_fuid_dirty;
   2252   1.27       chs 	if (fuid_dirtied)
   2253   1.27       chs 		zfs_fuid_txhold(zfsvfs, tx);
   2254   1.27       chs 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
   2255   1.27       chs 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
   2256   1.27       chs 	if (!zfsvfs->z_use_sa &&
   2257   1.27       chs 	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
   2258   1.27       chs 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
   2259   1.27       chs 		    0, acl_ids.z_aclp->z_acl_bytes);
   2260   1.27       chs 	}
   2261   1.27       chs 	error = dmu_tx_assign(tx, TXG_WAIT);
   2262   1.27       chs 	if (error) {
   2263   1.27       chs 		zfs_acl_ids_free(&acl_ids);
   2264   1.27       chs 		dmu_tx_abort(tx);
   2265   1.27       chs 		getnewvnode_drop_reserve();
   2266   1.27       chs 		ZFS_EXIT(zfsvfs);
   2267   1.27       chs 		return (error);
   2268   1.27       chs 	}
   2269   1.27       chs 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
   2270    1.4      haad 
   2271   1.27       chs 	if (fuid_dirtied)
   2272   1.27       chs 		zfs_fuid_sync(zfsvfs, tx);
   2273    1.1      haad 
   2274   1.27       chs 	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
   2275   1.27       chs 	txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
   2276   1.27       chs 	zfs_log_create(zilog, tx, txtype, dzp, zp, name,
   2277   1.27       chs 	    vsecp, acl_ids.z_fuidp, vap);
   2278   1.27       chs 	zfs_acl_ids_free(&acl_ids);
   2279   1.27       chs 	dmu_tx_commit(tx);
   2280    1.1      haad 
   2281   1.27       chs 	getnewvnode_drop_reserve();
   2282    1.1      haad 
   2283    1.1      haad out:
   2284   1.27       chs 	if (error == 0) {
   2285    1.1      haad 		*vpp = ZTOV(zp);
   2286    1.1      haad 	}
   2287    1.1      haad 
   2288   1.27       chs 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
   2289   1.27       chs 		zil_commit(zilog, 0);
   2290   1.27       chs 
   2291    1.1      haad 	ZFS_EXIT(zfsvfs);
   2292    1.1      haad 	return (error);
   2293    1.1      haad }
   2294    1.1      haad 
   2295    1.1      haad /*
   2296    1.1      haad  * Remove an entry from a directory.
   2297    1.1      haad  *
   2298    1.1      haad  *	IN:	dvp	- vnode of directory to remove entry from.
   2299    1.1      haad  *		name	- name of entry to remove.
   2300    1.1      haad  *		cr	- credentials of caller.
   2301    1.1      haad  *		ct	- caller context
   2302    1.1      haad  *		flags	- case flags
   2303    1.1      haad  *
   2304   1.27       chs  *	RETURN:	0 on success, error code on failure.
   2305    1.1      haad  *
   2306    1.1      haad  * Timestamps:
   2307    1.1      haad  *	dvp - ctime|mtime
   2308    1.1      haad  *	 vp - ctime (if nlink > 0)
   2309    1.1      haad  */
   2310   1.27       chs 
   2311    1.1      haad /*ARGSUSED*/
   2312    1.1      haad static int
   2313   1.27       chs zfs_remove(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr)
   2314    1.1      haad {
   2315   1.27       chs 	znode_t		*dzp = VTOZ(dvp);
   2316   1.27       chs 	znode_t		*zp = VTOZ(vp);
   2317   1.27       chs 	znode_t		*xzp;
   2318    1.1      haad 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
   2319    1.1      haad 	zilog_t		*zilog;
   2320    1.1      haad 	uint64_t	acl_obj, xattr_obj;
   2321   1.27       chs 	uint64_t	obj = 0;
   2322    1.1      haad 	dmu_tx_t	*tx;
   2323    1.1      haad 	boolean_t	unlinked, toobig = FALSE;
   2324    1.1      haad 	uint64_t	txtype;
   2325    1.1      haad 	int		error;
   2326    1.1      haad 
   2327    1.1      haad 	ZFS_ENTER(zfsvfs);
   2328    1.1      haad 	ZFS_VERIFY_ZP(dzp);
   2329   1.27       chs 	ZFS_VERIFY_ZP(zp);
   2330    1.1      haad 	zilog = zfsvfs->z_log;
   2331   1.27       chs 	zp = VTOZ(vp);
   2332    1.1      haad 
   2333   1.27       chs 	xattr_obj = 0;
   2334   1.27       chs 	xzp = NULL;
   2335    1.1      haad 
   2336    1.1      haad 	if (error = zfs_zaccess_delete(dzp, zp, cr)) {
   2337    1.1      haad 		goto out;
   2338    1.1      haad 	}
   2339    1.1      haad 
   2340    1.1      haad 	/*
   2341    1.1      haad 	 * Need to use rmdir for removing directories.
   2342    1.1      haad 	 */
   2343    1.1      haad 	if (vp->v_type == VDIR) {
   2344   1.27       chs 		error = SET_ERROR(EPERM);
   2345    1.1      haad 		goto out;
   2346    1.1      haad 	}
   2347    1.1      haad 
   2348    1.1      haad 	vnevent_remove(vp, dvp, name, ct);
   2349    1.1      haad 
   2350   1.27       chs 	obj = zp->z_id;
   2351    1.1      haad 
   2352   1.27       chs 	/* are there any extended attributes? */
   2353   1.27       chs 	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
   2354   1.27       chs 	    &xattr_obj, sizeof (xattr_obj));
   2355   1.27       chs 	if (error == 0 && xattr_obj) {
   2356   1.27       chs 		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
   2357   1.27       chs 		ASSERT0(error);
   2358   1.27       chs 	}
   2359    1.1      haad 
   2360    1.1      haad 	/*
   2361    1.1      haad 	 * We may delete the znode now, or we may put it in the unlinked set;
   2362    1.1      haad 	 * it depends on whether we're the last link, and on whether there are
   2363    1.1      haad 	 * other holds on the vnode.  So we dmu_tx_hold() the right things to
   2364    1.1      haad 	 * allow for either case.
   2365    1.1      haad 	 */
   2366    1.1      haad 	tx = dmu_tx_create(zfsvfs->z_os);
   2367    1.1      haad 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
   2368   1.27       chs 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
   2369   1.27       chs 	zfs_sa_upgrade_txholds(tx, zp);
   2370   1.27       chs 	zfs_sa_upgrade_txholds(tx, dzp);
   2371   1.27       chs 
   2372   1.27       chs 	if (xzp) {
   2373   1.27       chs 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
   2374   1.27       chs 		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
   2375    1.1      haad 	}
   2376    1.1      haad 
   2377    1.1      haad 	/* charge as an update -- would be nice not to charge at all */
   2378    1.1      haad 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
   2379    1.1      haad 
   2380   1.27       chs 	/*
   2381   1.27       chs 	 * Mark this transaction as typically resulting in a net free of space
   2382   1.27       chs 	 */
   2383   1.27       chs 	dmu_tx_mark_netfree(tx);
   2384   1.27       chs 
   2385   1.27       chs 	error = dmu_tx_assign(tx, TXG_WAIT);
   2386    1.1      haad 	if (error) {
   2387    1.1      haad 		dmu_tx_abort(tx);
   2388    1.1      haad 		ZFS_EXIT(zfsvfs);
   2389    1.1      haad 		return (error);
   2390    1.1      haad 	}
   2391    1.1      haad 
   2392    1.1      haad 	/*
   2393    1.1      haad 	 * Remove the directory entry.
   2394    1.1      haad 	 */
   2395   1.27       chs 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
   2396    1.1      haad 
   2397    1.1      haad 	if (error) {
   2398    1.1      haad 		dmu_tx_commit(tx);
   2399    1.1      haad 		goto out;
   2400    1.1      haad 	}
   2401    1.1      haad 
   2402   1.27       chs 	if (unlinked) {
   2403    1.1      haad 		zfs_unlinked_add(zp, tx);
   2404   1.27       chs 		vp->v_vflag |= VV_NOSYNC;
   2405    1.1      haad 	}
   2406    1.1      haad 
   2407    1.1      haad 	txtype = TX_REMOVE;
   2408   1.27       chs 	zfs_log_remove(zilog, tx, txtype, dzp, name, obj);
   2409    1.1      haad 
   2410    1.1      haad 	dmu_tx_commit(tx);
   2411    1.1      haad out:
   2412    1.1      haad 
   2413   1.27       chs 	if (xzp)
   2414   1.27       chs 		vrele(ZTOV(xzp));
   2415    1.1      haad 
   2416   1.27       chs 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
   2417   1.27       chs 		zil_commit(zilog, 0);
   2418    1.1      haad 
   2419    1.1      haad 	ZFS_EXIT(zfsvfs);
   2420    1.1      haad 	return (error);
   2421    1.1      haad }
   2422    1.1      haad 
   2423    1.1      haad /*
   2424    1.1      haad  * Create a new directory and insert it into dvp using the name
   2425    1.1      haad  * provided.  Return a pointer to the inserted directory.
   2426    1.1      haad  *
   2427    1.1      haad  *	IN:	dvp	- vnode of directory to add subdir to.
   2428    1.1      haad  *		dirname	- name of new directory.
   2429    1.1      haad  *		vap	- attributes of new directory.
   2430    1.1      haad  *		cr	- credentials of caller.
   2431    1.1      haad  *		ct	- caller context
   2432   1.27       chs  *		flags	- case flags
   2433    1.1      haad  *		vsecp	- ACL to be set
   2434    1.1      haad  *
   2435    1.1      haad  *	OUT:	vpp	- vnode of created directory.
   2436    1.1      haad  *
   2437   1.27       chs  *	RETURN:	0 on success, error code on failure.
   2438    1.1      haad  *
   2439    1.1      haad  * Timestamps:
   2440    1.1      haad  *	dvp - ctime|mtime updated
   2441    1.1      haad  *	 vp - ctime|mtime|atime updated
   2442    1.1      haad  */
   2443    1.1      haad /*ARGSUSED*/
   2444    1.1      haad static int
   2445   1.27       chs zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr)
   2446    1.1      haad {
   2447    1.1      haad 	znode_t		*zp, *dzp = VTOZ(dvp);
   2448    1.1      haad 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
   2449    1.1      haad 	zilog_t		*zilog;
   2450    1.1      haad 	uint64_t	txtype;
   2451    1.1      haad 	dmu_tx_t	*tx;
   2452    1.1      haad 	int		error;
   2453   1.27       chs 	ksid_t		*ksid;
   2454   1.27       chs 	uid_t		uid;
   2455   1.27       chs 	gid_t		gid = crgetgid(cr);
   2456   1.27       chs 	zfs_acl_ids_t   acl_ids;
   2457    1.4      haad 	boolean_t	fuid_dirtied;
   2458    1.1      haad 
   2459    1.1      haad 	ASSERT(vap->va_type == VDIR);
   2460    1.1      haad 
   2461    1.1      haad 	/*
   2462    1.1      haad 	 * If we have an ephemeral id, ACL, or XVATTR then
   2463    1.1      haad 	 * make sure file system is at proper version
   2464    1.1      haad 	 */
   2465    1.1      haad 
   2466   1.27       chs 	ksid = crgetsid(cr, KSID_OWNER);
   2467   1.27       chs 	if (ksid)
   2468   1.27       chs 		uid = ksid_getid(ksid);
   2469   1.27       chs 	else
   2470   1.27       chs 		uid = crgetuid(cr);
   2471    1.1      haad 	if (zfsvfs->z_use_fuids == B_FALSE &&
   2472   1.27       chs 	    ((vap->va_mask & AT_XVATTR) ||
   2473   1.27       chs 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
   2474   1.27       chs 		return (SET_ERROR(EINVAL));
   2475    1.1      haad 
   2476    1.1      haad 	ZFS_ENTER(zfsvfs);
   2477    1.1      haad 	ZFS_VERIFY_ZP(dzp);
   2478    1.1      haad 	zilog = zfsvfs->z_log;
   2479    1.1      haad 
   2480   1.27       chs 	if (dzp->z_pflags & ZFS_XATTR) {
   2481    1.1      haad 		ZFS_EXIT(zfsvfs);
   2482   1.27       chs 		return (SET_ERROR(EINVAL));
   2483    1.1      haad 	}
   2484    1.1      haad 
   2485    1.1      haad 	if (zfsvfs->z_utf8 && u8_validate(dirname,
   2486    1.1      haad 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
   2487    1.1      haad 		ZFS_EXIT(zfsvfs);
   2488   1.27       chs 		return (SET_ERROR(EILSEQ));
   2489    1.1      haad 	}
   2490    1.1      haad 
   2491   1.27       chs 	if (vap->va_mask & AT_XVATTR) {
   2492   1.27       chs 		if ((error = secpolicy_xvattr(dvp, (xvattr_t *)vap,
   2493    1.1      haad 		    crgetuid(cr), cr, vap->va_type)) != 0) {
   2494    1.1      haad 			ZFS_EXIT(zfsvfs);
   2495    1.1      haad 			return (error);
   2496    1.1      haad 		}
   2497   1.27       chs 	}
   2498   1.27       chs 
   2499   1.27       chs 	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
   2500   1.27       chs 	    NULL, &acl_ids)) != 0) {
   2501   1.27       chs 		ZFS_EXIT(zfsvfs);
   2502   1.27       chs 		return (error);
   2503   1.27       chs 	}
   2504    1.1      haad 
   2505    1.1      haad 	/*
   2506    1.1      haad 	 * First make sure the new directory doesn't exist.
   2507   1.27       chs 	 *
   2508   1.27       chs 	 * Existence is checked first to make sure we don't return
   2509   1.27       chs 	 * EACCES instead of EEXIST which can cause some applications
   2510   1.27       chs 	 * to fail.
   2511    1.1      haad 	 */
   2512    1.1      haad 	*vpp = NULL;
   2513    1.1      haad 
   2514   1.27       chs 	if (error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW)) {
   2515   1.27       chs 		zfs_acl_ids_free(&acl_ids);
   2516    1.1      haad 		ZFS_EXIT(zfsvfs);
   2517    1.1      haad 		return (error);
   2518    1.1      haad 	}
   2519   1.27       chs 	ASSERT3P(zp, ==, NULL);
   2520    1.1      haad 
   2521    1.1      haad 	if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) {
   2522   1.27       chs 		zfs_acl_ids_free(&acl_ids);
   2523    1.1      haad 		ZFS_EXIT(zfsvfs);
   2524    1.1      haad 		return (error);
   2525    1.1      haad 	}
   2526    1.1      haad 
   2527    1.4      haad 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
   2528    1.4      haad 		zfs_acl_ids_free(&acl_ids);
   2529    1.4      haad 		ZFS_EXIT(zfsvfs);
   2530   1.27       chs 		return (SET_ERROR(EDQUOT));
   2531    1.1      haad 	}
   2532    1.4      haad 
   2533    1.1      haad 	/*
   2534    1.1      haad 	 * Add a new entry to the directory.
   2535    1.1      haad 	 */
   2536   1.27       chs 	getnewvnode_reserve(1);
   2537    1.1      haad 	tx = dmu_tx_create(zfsvfs->z_os);
   2538    1.1      haad 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
   2539    1.1      haad 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
   2540    1.4      haad 	fuid_dirtied = zfsvfs->z_fuid_dirty;
   2541    1.4      haad 	if (fuid_dirtied)
   2542    1.4      haad 		zfs_fuid_txhold(zfsvfs, tx);
   2543   1.27       chs 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
   2544   1.27       chs 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
   2545   1.27       chs 		    acl_ids.z_aclp->z_acl_bytes);
   2546   1.27       chs 	}
   2547   1.27       chs 
   2548   1.27       chs 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
   2549   1.27       chs 	    ZFS_SA_BASE_ATTR_SIZE);
   2550   1.27       chs 
   2551   1.27       chs 	error = dmu_tx_assign(tx, TXG_WAIT);
   2552    1.1      haad 	if (error) {
   2553    1.4      haad 		zfs_acl_ids_free(&acl_ids);
   2554    1.1      haad 		dmu_tx_abort(tx);
   2555   1.27       chs 		getnewvnode_drop_reserve();
   2556    1.1      haad 		ZFS_EXIT(zfsvfs);
   2557    1.1      haad 		return (error);
   2558    1.1      haad 	}
   2559    1.1      haad 
   2560    1.1      haad 	/*
   2561    1.1      haad 	 * Create new node.
   2562    1.1      haad 	 */
   2563   1.27       chs 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
   2564    1.1      haad 
   2565    1.4      haad 	if (fuid_dirtied)
   2566    1.4      haad 		zfs_fuid_sync(zfsvfs, tx);
   2567   1.27       chs 
   2568    1.1      haad 	/*
   2569    1.1      haad 	 * Now put new name in parent dir.
   2570    1.1      haad 	 */
   2571   1.27       chs 	(void) zfs_link_create(dzp, dirname, zp, tx, ZNEW);
   2572    1.1      haad 
   2573    1.1      haad 	*vpp = ZTOV(zp);
   2574    1.1      haad 
   2575   1.27       chs 	txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
   2576   1.27       chs 	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
   2577    1.4      haad 	    acl_ids.z_fuidp, vap);
   2578    1.1      haad 
   2579    1.4      haad 	zfs_acl_ids_free(&acl_ids);
   2580   1.27       chs 
   2581    1.1      haad 	dmu_tx_commit(tx);
   2582    1.1      haad 
   2583   1.27       chs 	getnewvnode_drop_reserve();
   2584   1.27       chs 
   2585   1.27       chs 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
   2586   1.27       chs 		zil_commit(zilog, 0);
   2587    1.1      haad 
   2588    1.1      haad 	ZFS_EXIT(zfsvfs);
   2589    1.1      haad 	return (0);
   2590    1.1      haad }
   2591    1.1      haad 
   2592    1.1      haad /*
   2593    1.1      haad  * Remove a directory subdir entry.  If the current working
   2594    1.1      haad  * directory is the same as the subdir to be removed, the
   2595    1.1      haad  * remove will fail.
   2596    1.1      haad  *
   2597    1.1      haad  *	IN:	dvp	- vnode of directory to remove from.
   2598    1.1      haad  *		name	- name of directory to be removed.
   2599    1.1      haad  *		cwd	- vnode of current working directory.
   2600    1.1      haad  *		cr	- credentials of caller.
   2601    1.1      haad  *		ct	- caller context
   2602    1.1      haad  *		flags	- case flags
   2603    1.1      haad  *
   2604   1.27       chs  *	RETURN:	0 on success, error code on failure.
   2605    1.1      haad  *
   2606    1.1      haad  * Timestamps:
   2607    1.1      haad  *	dvp - ctime|mtime updated
   2608    1.1      haad  */
   2609    1.1      haad /*ARGSUSED*/
   2610    1.1      haad static int
   2611   1.27       chs zfs_rmdir(vnode_t *dvp, vnode_t *vp, char *name, cred_t *cr)
   2612    1.1      haad {
   2613    1.1      haad 	znode_t		*dzp = VTOZ(dvp);
   2614   1.27       chs 	znode_t		*zp = VTOZ(vp);
   2615    1.1      haad 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
   2616    1.1      haad 	zilog_t		*zilog;
   2617    1.1      haad 	dmu_tx_t	*tx;
   2618    1.1      haad 	int		error;
   2619    1.1      haad 
   2620    1.1      haad 	ZFS_ENTER(zfsvfs);
   2621    1.1      haad 	ZFS_VERIFY_ZP(dzp);
   2622   1.27       chs 	ZFS_VERIFY_ZP(zp);
   2623    1.1      haad 	zilog = zfsvfs->z_log;
   2624    1.1      haad 
   2625    1.1      haad 
   2626    1.1      haad 	if (error = zfs_zaccess_delete(dzp, zp, cr)) {
   2627    1.1      haad 		goto out;
   2628    1.1      haad 	}
   2629    1.1      haad 
   2630    1.1      haad 	if (vp->v_type != VDIR) {
   2631   1.27       chs 		error = SET_ERROR(ENOTDIR);
   2632    1.1      haad 		goto out;
   2633    1.1      haad 	}
   2634    1.1      haad 
   2635    1.1      haad 	vnevent_rmdir(vp, dvp, name, ct);
   2636    1.1      haad 
   2637    1.1      haad 	tx = dmu_tx_create(zfsvfs->z_os);
   2638    1.1      haad 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
   2639   1.27       chs 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
   2640    1.1      haad 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
   2641   1.27       chs 	zfs_sa_upgrade_txholds(tx, zp);
   2642   1.27       chs 	zfs_sa_upgrade_txholds(tx, dzp);
   2643   1.27       chs 	dmu_tx_mark_netfree(tx);
   2644   1.27       chs 	error = dmu_tx_assign(tx, TXG_WAIT);
   2645    1.1      haad 	if (error) {
   2646    1.1      haad 		dmu_tx_abort(tx);
   2647    1.1      haad 		ZFS_EXIT(zfsvfs);
   2648    1.1      haad 		return (error);
   2649    1.1      haad 	}
   2650    1.1      haad 
   2651    1.2      haad 	cache_purge(dvp);
   2652    1.2      haad 
   2653   1.27       chs 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
   2654    1.1      haad 
   2655    1.1      haad 	if (error == 0) {
   2656    1.1      haad 		uint64_t txtype = TX_RMDIR;
   2657   1.27       chs 		zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
   2658    1.1      haad 	}
   2659    1.1      haad 
   2660    1.1      haad 	dmu_tx_commit(tx);
   2661    1.1      haad 
   2662   1.27       chs 	cache_purge(vp);
   2663    1.1      haad out:
   2664   1.27       chs 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
   2665   1.27       chs 		zil_commit(zilog, 0);
   2666    1.1      haad 
   2667    1.1      haad 	ZFS_EXIT(zfsvfs);
   2668    1.1      haad 	return (error);
   2669    1.1      haad }
   2670    1.1      haad 
   2671    1.1      haad /*
   2672    1.1      haad  * Read as many directory entries as will fit into the provided
   2673    1.1      haad  * buffer from the given directory cursor position (specified in
   2674   1.27       chs  * the uio structure).
   2675    1.1      haad  *
   2676    1.1      haad  *	IN:	vp	- vnode of directory to read.
   2677    1.1      haad  *		uio	- structure supplying read location, range info,
   2678    1.1      haad  *			  and return buffer.
   2679    1.1      haad  *		cr	- credentials of caller.
   2680    1.1      haad  *		ct	- caller context
   2681    1.1      haad  *		flags	- case flags
   2682    1.1      haad  *
   2683    1.1      haad  *	OUT:	uio	- updated offset and range, buffer filled.
   2684    1.1      haad  *		eofp	- set to true if end-of-file detected.
   2685    1.1      haad  *
   2686   1.27       chs  *	RETURN:	0 on success, error code on failure.
   2687    1.1      haad  *
   2688    1.1      haad  * Timestamps:
   2689    1.1      haad  *	vp - atime updated
   2690    1.1      haad  *
   2691    1.1      haad  * Note that the low 4 bits of the cookie returned by zap is always zero.
   2692    1.1      haad  * This allows us to use the low range for "special" directory entries:
   2693    1.1      haad  * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
   2694    1.1      haad  * we use the offset 2 for the '.zfs' directory.
   2695    1.1      haad  */
   2696    1.1      haad /* ARGSUSED */
   2697    1.1      haad static int
   2698   1.27       chs zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, int *ncookies, off_t **cookies)
   2699    1.1      haad {
   2700    1.1      haad 	znode_t		*zp = VTOZ(vp);
   2701    1.1      haad 	iovec_t		*iovp;
   2702    1.1      haad 	edirent_t	*eodp;
   2703    1.1      haad 	dirent64_t	*odp;
   2704    1.1      haad 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
   2705    1.1      haad 	objset_t	*os;
   2706    1.1      haad 	caddr_t		outbuf;
   2707    1.1      haad 	size_t		bufsize;
   2708    1.1      haad 	zap_cursor_t	zc;
   2709    1.1      haad 	zap_attribute_t	zap;
   2710    1.1      haad 	uint_t		bytes_wanted;
   2711    1.1      haad 	uint64_t	offset; /* must be unsigned; checks for < 1 */
   2712   1.27       chs 	uint64_t	parent;
   2713    1.1      haad 	int		local_eof;
   2714    1.1      haad 	int		outcount;
   2715    1.1      haad 	int		error;
   2716    1.1      haad 	uint8_t		prefetch;
   2717    1.1      haad 	boolean_t	check_sysattrs;
   2718    1.2      haad 	uint8_t		type;
   2719   1.27       chs 	int		ncooks = 0;
   2720   1.27       chs 	off_t		*cooks = NULL;
   2721    1.2      haad 	int		flags = 0;
   2722   1.27       chs #ifdef __FreeBSD__
   2723   1.27       chs 	boolean_t user = uio->uio_segflg != UIO_SYSSPACE;
   2724   1.27       chs #endif
   2725   1.27       chs #ifdef __NetBSD__
   2726   1.27       chs 	boolean_t user = !VMSPACE_IS_KERNEL_P(uio->uio_vmspace);
   2727   1.27       chs #endif
   2728    1.1      haad 
   2729    1.1      haad 	ZFS_ENTER(zfsvfs);
   2730    1.1      haad 	ZFS_VERIFY_ZP(zp);
   2731    1.1      haad 
   2732   1.27       chs 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
   2733   1.27       chs 	    &parent, sizeof (parent))) != 0) {
   2734   1.27       chs 		ZFS_EXIT(zfsvfs);
   2735   1.27       chs 		return (error);
   2736   1.27       chs 	}
   2737   1.27       chs 
   2738    1.1      haad 	/*
   2739    1.1      haad 	 * If we are not given an eof variable,
   2740    1.1      haad 	 * use a local one.
   2741    1.1      haad 	 */
   2742    1.1      haad 	if (eofp == NULL)
   2743    1.1      haad 		eofp = &local_eof;
   2744    1.1      haad 
   2745    1.1      haad 	/*
   2746    1.1      haad 	 * Check for valid iov_len.
   2747    1.1      haad 	 */
   2748    1.1      haad 	if (uio->uio_iov->iov_len <= 0) {
   2749    1.1      haad 		ZFS_EXIT(zfsvfs);
   2750   1.27       chs 		return (SET_ERROR(EINVAL));
   2751    1.1      haad 	}
   2752    1.1      haad 
   2753    1.1      haad 	/*
   2754    1.1      haad 	 * Quit if directory has been removed (posix)
   2755    1.1      haad 	 */
   2756    1.1      haad 	if ((*eofp = zp->z_unlinked) != 0) {
   2757    1.1      haad 		ZFS_EXIT(zfsvfs);
   2758    1.1      haad 		return (0);
   2759    1.1      haad 	}
   2760    1.1      haad 
   2761    1.1      haad 	error = 0;
   2762    1.1      haad 	os = zfsvfs->z_os;
   2763    1.1      haad 	offset = uio->uio_loffset;
   2764    1.1      haad 	prefetch = zp->z_zn_prefetch;
   2765   1.27       chs 
   2766    1.1      haad 	/*
   2767    1.1      haad 	 * Initialize the iterator cursor.
   2768    1.1      haad 	 */
   2769    1.1      haad 	if (offset <= 3) {
   2770    1.1      haad 		/*
   2771    1.1      haad 		 * Start iteration from the beginning of the directory.
   2772    1.1      haad 		 */
   2773    1.1      haad 		zap_cursor_init(&zc, os, zp->z_id);
   2774    1.1      haad 	} else {
   2775    1.1      haad 		/*
   2776    1.1      haad 		 * The offset is a serialized cursor.
   2777    1.1      haad 		 */
   2778    1.1      haad 		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
   2779    1.1      haad 	}
   2780    1.1      haad 
   2781    1.1      haad 	/*
   2782    1.1      haad 	 * Get space to change directory entries into fs independent format.
   2783    1.1      haad 	 */
   2784    1.1      haad 	iovp = uio->uio_iov;
   2785    1.1      haad 	bytes_wanted = iovp->iov_len;
   2786   1.27       chs 	if (user || uio->uio_iovcnt != 1) {
   2787    1.1      haad 		bufsize = bytes_wanted;
   2788    1.1      haad 		outbuf = kmem_alloc(bufsize, KM_SLEEP);
   2789    1.1      haad 		odp = (struct dirent64 *)outbuf;
   2790    1.1      haad 	} else {
   2791    1.1      haad 		bufsize = bytes_wanted;
   2792   1.27       chs 		outbuf = NULL;
   2793    1.1      haad 		odp = (struct dirent64 *)iovp->iov_base;
   2794    1.1      haad 	}
   2795    1.1      haad 	eodp = (struct edirent *)odp;
   2796    1.1      haad 
   2797    1.2      haad 	if (ncookies != NULL) {
   2798    1.2      haad 		/*
   2799    1.2      haad 		 * Minimum entry size is dirent size and 1 byte for a file name.
   2800    1.2      haad 		 */
   2801   1.27       chs #ifdef __FreeBSD__
   2802   1.27       chs 		ncooks = uio->uio_resid / (sizeof(struct dirent) - sizeof(((struct dirent *)NULL)->d_name) + 1);
   2803   1.27       chs 		cooks = malloc(ncooks * sizeof(u_long), M_TEMP, M_WAITOK);
   2804   1.27       chs #endif
   2805   1.27       chs #ifdef __NetBSD__
   2806    1.2      haad 		ncooks = uio->uio_resid / _DIRENT_MINSIZE(odp);
   2807   1.45   hannken 		cooks = malloc(ncooks * sizeof(off_t), M_TEMP, M_WAITOK);
   2808   1.27       chs #endif
   2809    1.2      haad 		*cookies = cooks;
   2810    1.2      haad 		*ncookies = ncooks;
   2811    1.2      haad 	}
   2812    1.2      haad 
   2813    1.1      haad 	/*
   2814    1.1      haad 	 * If this VFS supports the system attribute view interface; and
   2815    1.1      haad 	 * we're looking at an extended attribute directory; and we care
   2816    1.1      haad 	 * about normalization conflicts on this vfs; then we must check
   2817    1.1      haad 	 * for normalization conflicts with the sysattr name space.
   2818    1.1      haad 	 */
   2819    1.2      haad #ifdef TODO
   2820    1.1      haad 	check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
   2821    1.1      haad 	    (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
   2822    1.1      haad 	    (flags & V_RDDIR_ENTFLAGS);
   2823    1.2      haad #else
   2824    1.2      haad 	check_sysattrs = 0;
   2825    1.2      haad #endif
   2826    1.1      haad 
   2827    1.1      haad 	/*
   2828    1.1      haad 	 * Transform to file-system independent format
   2829    1.1      haad 	 */
   2830    1.1      haad 	outcount = 0;
   2831    1.1      haad 	while (outcount < bytes_wanted) {
   2832    1.1      haad 		ino64_t objnum;
   2833    1.1      haad 		ushort_t reclen;
   2834   1.27       chs 		off64_t *next = NULL;
   2835    1.1      haad 
   2836    1.1      haad 		/*
   2837    1.1      haad 		 * Special case `.', `..', and `.zfs'.
   2838    1.1      haad 		 */
   2839    1.1      haad 		if (offset == 0) {
   2840    1.1      haad 			(void) strcpy(zap.za_name, ".");
   2841    1.1      haad 			zap.za_normalization_conflict = 0;
   2842    1.1      haad 			objnum = zp->z_id;
   2843    1.2      haad 			type = DT_DIR;
   2844    1.1      haad 		} else if (offset == 1) {
   2845    1.1      haad 			(void) strcpy(zap.za_name, "..");
   2846    1.1      haad 			zap.za_normalization_conflict = 0;
   2847   1.27       chs 			objnum = parent;
   2848    1.2      haad 			type = DT_DIR;
   2849    1.1      haad 		} else if (offset == 2 && zfs_show_ctldir(zp)) {
   2850    1.1      haad 			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
   2851    1.1      haad 			zap.za_normalization_conflict = 0;
   2852    1.1      haad 			objnum = ZFSCTL_INO_ROOT;
   2853    1.2      haad 			type = DT_DIR;
   2854    1.1      haad 		} else {
   2855    1.1      haad 			/*
   2856    1.1      haad 			 * Grab next entry.
   2857    1.1      haad 			 */
   2858    1.1      haad 			if (error = zap_cursor_retrieve(&zc, &zap)) {
   2859    1.1      haad 				if ((*eofp = (error == ENOENT)) != 0)
   2860    1.1      haad 					break;
   2861    1.1      haad 				else
   2862    1.1      haad 					goto update;
   2863    1.1      haad 			}
   2864    1.1      haad 
   2865    1.1      haad 			if (zap.za_integer_length != 8 ||
   2866    1.1      haad 			    zap.za_num_integers != 1) {
   2867    1.1      haad 				cmn_err(CE_WARN, "zap_readdir: bad directory "
   2868    1.1      haad 				    "entry, obj = %lld, offset = %lld\n",
   2869    1.1      haad 				    (u_longlong_t)zp->z_id,
   2870    1.1      haad 				    (u_longlong_t)offset);
   2871   1.27       chs 				error = SET_ERROR(ENXIO);
   2872    1.1      haad 				goto update;
   2873    1.1      haad 			}
   2874    1.1      haad 
   2875    1.1      haad 			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
   2876    1.1      haad 			/*
   2877    1.1      haad 			 * MacOS X can extract the object type here such as:
   2878    1.1      haad 			 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
   2879    1.1      haad 			 */
   2880    1.2      haad 			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
   2881    1.1      haad 
   2882    1.1      haad 			if (check_sysattrs && !zap.za_normalization_conflict) {
   2883    1.2      haad #ifdef TODO
   2884    1.1      haad 				zap.za_normalization_conflict =
   2885    1.1      haad 				    xattr_sysattr_casechk(zap.za_name);
   2886    1.2      haad #else
   2887    1.2      haad 				panic("%s:%u: TODO", __func__, __LINE__);
   2888    1.2      haad #endif
   2889    1.1      haad 			}
   2890    1.1      haad 		}
   2891    1.1      haad 
   2892    1.4      haad 		if (flags & V_RDDIR_ACCFILTER) {
   2893    1.4      haad 			/*
   2894    1.4      haad 			 * If we have no access at all, don't include
   2895    1.4      haad 			 * this entry in the returned information
   2896    1.4      haad 			 */
   2897    1.4      haad 			znode_t	*ezp;
   2898    1.4      haad 			if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
   2899    1.4      haad 				goto skip_entry;
   2900    1.4      haad 			if (!zfs_has_access(ezp, cr)) {
   2901   1.27       chs 				vrele(ZTOV(ezp));
   2902    1.4      haad 				goto skip_entry;
   2903    1.4      haad 			}
   2904   1.27       chs 			vrele(ZTOV(ezp));
   2905    1.4      haad 		}
   2906    1.4      haad 
   2907    1.1      haad 		if (flags & V_RDDIR_ENTFLAGS)
   2908    1.1      haad 			reclen = EDIRENT_RECLEN(strlen(zap.za_name));
   2909    1.1      haad 		else
   2910   1.27       chs 			reclen = DIRENT64_RECLEN(strlen(zap.za_name));
   2911    1.1      haad 
   2912    1.1      haad 		/*
   2913    1.1      haad 		 * Will this entry fit in the buffer?
   2914    1.1      haad 		 */
   2915    1.1      haad 		if (outcount + reclen > bufsize) {
   2916    1.1      haad 			/*
   2917    1.1      haad 			 * Did we manage to fit anything in the buffer?
   2918    1.1      haad 			 */
   2919    1.1      haad 			if (!outcount) {
   2920   1.27       chs 				error = SET_ERROR(EINVAL);
   2921    1.1      haad 				goto update;
   2922    1.1      haad 			}
   2923    1.1      haad 			break;
   2924    1.1      haad 		}
   2925    1.1      haad 		if (flags & V_RDDIR_ENTFLAGS) {
   2926    1.1      haad 			/*
   2927    1.1      haad 			 * Add extended flag entry:
   2928    1.1      haad 			 */
   2929    1.1      haad 			eodp->ed_ino = objnum;
   2930    1.1      haad 			eodp->ed_reclen = reclen;
   2931    1.1      haad 			/* NOTE: ed_off is the offset for the *next* entry */
   2932    1.1      haad 			next = &(eodp->ed_off);
   2933    1.1      haad 			eodp->ed_eflags = zap.za_normalization_conflict ?
   2934    1.1      haad 			    ED_CASE_CONFLICT : 0;
   2935    1.1      haad 			(void) strncpy(eodp->ed_name, zap.za_name,
   2936    1.1      haad 			    EDIRENT_NAMELEN(reclen));
   2937    1.1      haad 			eodp = (edirent_t *)((intptr_t)eodp + reclen);
   2938    1.1      haad 		} else {
   2939    1.1      haad 			/*
   2940    1.1      haad 			 * Add normal entry:
   2941    1.1      haad 			 */
   2942    1.1      haad 			odp->d_ino = objnum;
   2943    1.1      haad 			odp->d_reclen = reclen;
   2944    1.2      haad 			odp->d_namlen = strlen(zap.za_name);
   2945    1.2      haad 			(void) strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
   2946    1.2      haad 			odp->d_type = type;
   2947    1.1      haad 			odp = (dirent64_t *)((intptr_t)odp + reclen);
   2948    1.1      haad 		}
   2949    1.1      haad 		outcount += reclen;
   2950    1.1      haad 
   2951   1.27       chs 		ASSERT(outcount <= bufsize);
   2952    1.1      haad 
   2953    1.1      haad 		/* Prefetch znode */
   2954    1.1      haad 		if (prefetch)
   2955   1.27       chs 			dmu_prefetch(os, objnum, 0, 0, 0,
   2956   1.27       chs 			    ZIO_PRIORITY_SYNC_READ);
   2957    1.1      haad 
   2958    1.4      haad 	skip_entry:
   2959    1.1      haad 		/*
   2960    1.1      haad 		 * Move to the next entry, fill in the previous offset.
   2961    1.1      haad 		 */
   2962    1.1      haad 		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
   2963    1.1      haad 			zap_cursor_advance(&zc);
   2964    1.1      haad 			offset = zap_cursor_serialize(&zc);
   2965    1.1      haad 		} else {
   2966    1.1      haad 			offset += 1;
   2967    1.1      haad 		}
   2968    1.2      haad 
   2969    1.2      haad 		if (cooks != NULL) {
   2970    1.2      haad 			*cooks++ = offset;
   2971    1.2      haad 			ncooks--;
   2972   1.27       chs #ifdef __FreeBSD__
   2973   1.27       chs 			KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
   2974   1.27       chs #endif
   2975   1.27       chs #ifdef __NetBSD__
   2976   1.27       chs 			KASSERTMSG(ncooks >= 0, "ncooks=%d", ncooks);
   2977   1.27       chs #endif
   2978    1.2      haad 		}
   2979    1.1      haad 	}
   2980    1.1      haad 	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
   2981    1.1      haad 
   2982    1.2      haad 	/* Subtract unused cookies */
   2983    1.2      haad 	if (ncookies != NULL)
   2984    1.2      haad 		*ncookies -= ncooks;
   2985    1.2      haad 
   2986   1.27       chs 	if (!user && uio->uio_iovcnt == 1) {
   2987    1.1      haad 		iovp->iov_base += outcount;
   2988    1.1      haad 		iovp->iov_len -= outcount;
   2989    1.1      haad 		uio->uio_resid -= outcount;
   2990   1.27       chs 	} else if (error = uiomove(outbuf, (size_t)outcount, UIO_READ, uio)) {
   2991    1.1      haad 		/*
   2992    1.1      haad 		 * Reset the pointer.
   2993    1.1      haad 		 */
   2994    1.1      haad 		offset = uio->uio_loffset;
   2995    1.1      haad 	}
   2996    1.1      haad 
   2997    1.1      haad update:
   2998    1.1      haad 	zap_cursor_fini(&zc);
   2999   1.27       chs 	if (user || uio->uio_iovcnt != 1)
   3000    1.1      haad 		kmem_free(outbuf, bufsize);
   3001    1.1      haad 
   3002    1.1      haad 	if (error == ENOENT)
   3003    1.1      haad 		error = 0;
   3004    1.1      haad 
   3005    1.1      haad 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
   3006    1.1      haad 
   3007    1.1      haad 	uio->uio_loffset = offset;
   3008    1.1      haad 	ZFS_EXIT(zfsvfs);
   3009    1.2      haad 	if (error != 0 && cookies != NULL) {
   3010   1.27       chs #ifdef __FreeBSD__
   3011   1.27       chs 		free(*cookies, M_TEMP);
   3012   1.27       chs #endif
   3013   1.27       chs #ifdef __NetBSD__
   3014   1.27       chs 		kmem_free(*cookies, ncooks * sizeof(off_t));
   3015   1.27       chs #endif
   3016    1.2      haad 		*cookies = NULL;
   3017    1.2      haad 		*ncookies = 0;
   3018    1.2      haad 	}
   3019    1.1      haad 	return (error);
   3020    1.1      haad }
   3021    1.1      haad 
   3022    1.1      haad static int
   3023    1.1      haad zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
   3024    1.1      haad {
   3025    1.1      haad 	znode_t	*zp = VTOZ(vp);
   3026    1.1      haad 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   3027    1.1      haad 
   3028   1.27       chs 	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
   3029   1.27       chs 		ZFS_ENTER(zfsvfs);
   3030   1.27       chs 		ZFS_VERIFY_ZP(zp);
   3031    1.2      haad 
   3032   1.27       chs #ifdef __NetBSD__
   3033   1.27       chs 		if (!zp->z_unlinked)
   3034   1.27       chs #endif
   3035   1.27       chs 		zil_commit(zfsvfs->z_log, zp->z_id);
   3036   1.27       chs 		ZFS_EXIT(zfsvfs);
   3037    1.2      haad 	}
   3038    1.1      haad 	return (0);
   3039    1.1      haad }
   3040    1.1      haad 
   3041    1.1      haad 
   3042    1.1      haad /*
   3043    1.1      haad  * Get the requested file attributes and place them in the provided
   3044    1.1      haad  * vattr structure.
   3045    1.1      haad  *
   3046    1.1      haad  *	IN:	vp	- vnode of file.
   3047    1.1      haad  *		vap	- va_mask identifies requested attributes.
   3048    1.1      haad  *			  If AT_XVATTR set, then optional attrs are requested
   3049    1.1      haad  *		flags	- ATTR_NOACLCHECK (CIFS server context)
   3050    1.1      haad  *		cr	- credentials of caller.
   3051    1.1      haad  *		ct	- caller context
   3052    1.1      haad  *
   3053    1.1      haad  *	OUT:	vap	- attribute values.
   3054    1.1      haad  *
   3055   1.27       chs  *	RETURN:	0 (always succeeds).
   3056    1.1      haad  */
   3057    1.1      haad /* ARGSUSED */
   3058    1.1      haad static int
   3059    1.1      haad zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
   3060    1.1      haad     caller_context_t *ct)
   3061    1.1      haad {
   3062    1.1      haad 	znode_t *zp = VTOZ(vp);
   3063    1.1      haad 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   3064    1.1      haad 	int	error = 0;
   3065    1.2      haad 	uint32_t blksize;
   3066    1.2      haad 	u_longlong_t nblocks;
   3067    1.1      haad 	uint64_t links;
   3068   1.27       chs 	uint64_t mtime[2], ctime[2], crtime[2], rdev;
   3069    1.1      haad 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
   3070    1.1      haad 	xoptattr_t *xoap = NULL;
   3071    1.1      haad 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
   3072   1.27       chs 	sa_bulk_attr_t bulk[4];
   3073   1.27       chs 	int count = 0;
   3074    1.1      haad 
   3075    1.1      haad 	ZFS_ENTER(zfsvfs);
   3076    1.1      haad 	ZFS_VERIFY_ZP(zp);
   3077   1.27       chs 
   3078   1.27       chs 	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
   3079   1.27       chs 
   3080   1.27       chs 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
   3081   1.27       chs 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
   3082   1.27       chs 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
   3083   1.27       chs 	if (vp->v_type == VBLK || vp->v_type == VCHR)
   3084   1.27       chs 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
   3085   1.27       chs 		    &rdev, 8);
   3086   1.27       chs 
   3087   1.27       chs 	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
   3088   1.27       chs 		ZFS_EXIT(zfsvfs);
   3089   1.27       chs 		return (error);
   3090   1.27       chs 	}
   3091    1.1      haad 
   3092    1.1      haad 	/*
   3093    1.1      haad 	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
   3094    1.1      haad 	 * Also, if we are the owner don't bother, since owner should
   3095    1.1      haad 	 * always be allowed to read basic attributes of file.
   3096    1.1      haad 	 */
   3097   1.27       chs 	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
   3098   1.27       chs 	    (vap->va_uid != crgetuid(cr))) {
   3099    1.1      haad 		if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
   3100    1.1      haad 		    skipaclchk, cr)) {
   3101    1.1      haad 			ZFS_EXIT(zfsvfs);
   3102    1.1      haad 			return (error);
   3103    1.1      haad 		}
   3104    1.1      haad 	}
   3105    1.1      haad 
   3106    1.1      haad 	/*
   3107    1.1      haad 	 * Return all attributes.  It's cheaper to provide the answer
   3108    1.1      haad 	 * than to determine whether we were asked the question.
   3109    1.1      haad 	 */
   3110   1.27       chs 
   3111   1.27       chs 	vap->va_type = IFTOVT(zp->z_mode);
   3112   1.27       chs 	vap->va_mode = zp->z_mode & ~S_IFMT;
   3113   1.27       chs #ifdef illumos
   3114   1.27       chs 	vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev;
   3115   1.27       chs #endif
   3116   1.27       chs #ifdef __FreeBSD__
   3117   1.27       chs 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
   3118   1.44   hannken 	vap->va_nodeid = zp->z_id;
   3119   1.27       chs #endif
   3120   1.27       chs #ifdef __NetBSD__
   3121   1.98      yamt 	/*
   3122   1.98      yamt 	 * note: f_fsid is a signed long.
   3123   1.98      yamt 	 * we don't want sign extension here.
   3124   1.98      yamt 	 */
   3125   1.98      yamt 	vap->va_fsid = (uint32_t)vp->v_mount->mnt_stat.f_fsid;
   3126   1.44   hannken 	vap->va_nodeid = zp->z_id;
   3127   1.44   hannken 	/*
   3128   1.44   hannken 	 * If we are a snapshot mounted under .zfs, return
   3129   1.44   hannken 	 * the object id of the snapshot to make getcwd happy.
   3130   1.44   hannken 	 */
   3131   1.44   hannken 	if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
   3132   1.44   hannken 		vnode_t *cvp = vp->v_mount->mnt_vnodecovered;
   3133   1.44   hannken 
   3134   1.44   hannken 		if (cvp && zfsctl_is_node(cvp))
   3135   1.44   hannken 			vap->va_nodeid = dmu_objset_id(zfsvfs->z_os);
   3136   1.44   hannken 	}
   3137   1.27       chs #endif
   3138    1.1      haad 	if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp))
   3139   1.27       chs 		links = zp->z_links + 1;
   3140    1.1      haad 	else
   3141   1.27       chs 		links = zp->z_links;
   3142   1.75    simonb 	/* XXX NetBSD: use LINK_MAX when that value matches 32-bit nlink_t */
   3143   1.75    simonb 	vap->va_nlink = MIN(links, UINT32_MAX);	/* nlink_t limit! */
   3144   1.27       chs 	vap->va_size = zp->z_size;
   3145   1.27       chs #ifdef illumos
   3146   1.27       chs 	vap->va_rdev = vp->v_rdev;
   3147   1.27       chs #else
   3148   1.27       chs 	if (vp->v_type == VBLK || vp->v_type == VCHR)
   3149   1.27       chs 		vap->va_rdev = zfs_cmpldev(rdev);
   3150   1.27       chs #endif
   3151    1.1      haad 	vap->va_seq = zp->z_seq;
   3152    1.2      haad 	vap->va_flags = 0;	/* FreeBSD: Reset chflags(2) flags. */
   3153   1.27       chs      	vap->va_filerev = zp->z_seq;
   3154    1.1      haad 
   3155    1.1      haad 	/*
   3156    1.1      haad 	 * Add in any requested optional attributes and the create time.
   3157    1.1      haad 	 * Also set the corresponding bits in the returned attribute bitmap.
   3158    1.1      haad 	 */
   3159    1.1      haad 	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
   3160    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
   3161    1.1      haad 			xoap->xoa_archive =
   3162   1.27       chs 			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
   3163    1.1      haad 			XVA_SET_RTN(xvap, XAT_ARCHIVE);
   3164    1.1      haad 		}
   3165    1.1      haad 
   3166    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
   3167    1.1      haad 			xoap->xoa_readonly =
   3168   1.27       chs 			    ((zp->z_pflags & ZFS_READONLY) != 0);
   3169    1.1      haad 			XVA_SET_RTN(xvap, XAT_READONLY);
   3170    1.1      haad 		}
   3171    1.1      haad 
   3172    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
   3173    1.1      haad 			xoap->xoa_system =
   3174   1.27       chs 			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
   3175    1.1      haad 			XVA_SET_RTN(xvap, XAT_SYSTEM);
   3176    1.1      haad 		}
   3177    1.1      haad 
   3178    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
   3179    1.1      haad 			xoap->xoa_hidden =
   3180   1.27       chs 			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
   3181    1.1      haad 			XVA_SET_RTN(xvap, XAT_HIDDEN);
   3182    1.1      haad 		}
   3183    1.1      haad 
   3184    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
   3185    1.1      haad 			xoap->xoa_nounlink =
   3186   1.27       chs 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
   3187    1.1      haad 			XVA_SET_RTN(xvap, XAT_NOUNLINK);
   3188    1.1      haad 		}
   3189    1.1      haad 
   3190    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
   3191    1.1      haad 			xoap->xoa_immutable =
   3192   1.27       chs 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
   3193    1.1      haad 			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
   3194    1.1      haad 		}
   3195    1.1      haad 
   3196    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
   3197    1.1      haad 			xoap->xoa_appendonly =
   3198   1.27       chs 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
   3199    1.1      haad 			XVA_SET_RTN(xvap, XAT_APPENDONLY);
   3200    1.1      haad 		}
   3201    1.1      haad 
   3202    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
   3203    1.1      haad 			xoap->xoa_nodump =
   3204   1.27       chs 			    ((zp->z_pflags & ZFS_NODUMP) != 0);
   3205    1.1      haad 			XVA_SET_RTN(xvap, XAT_NODUMP);
   3206    1.1      haad 		}
   3207    1.1      haad 
   3208    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
   3209    1.1      haad 			xoap->xoa_opaque =
   3210   1.27       chs 			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
   3211    1.1      haad 			XVA_SET_RTN(xvap, XAT_OPAQUE);
   3212    1.1      haad 		}
   3213    1.1      haad 
   3214    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
   3215    1.1      haad 			xoap->xoa_av_quarantined =
   3216   1.27       chs 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
   3217    1.1      haad 			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
   3218    1.1      haad 		}
   3219    1.1      haad 
   3220    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
   3221    1.1      haad 			xoap->xoa_av_modified =
   3222   1.27       chs 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
   3223    1.1      haad 			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
   3224    1.1      haad 		}
   3225    1.1      haad 
   3226    1.1      haad 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
   3227   1.27       chs 		    vp->v_type == VREG) {
   3228   1.27       chs 			zfs_sa_get_scanstamp(zp, xvap);
   3229   1.27       chs 		}
   3230    1.1      haad 
   3231   1.27       chs 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
   3232   1.27       chs 			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
   3233   1.27       chs 			XVA_SET_RTN(xvap, XAT_REPARSE);
   3234   1.27       chs 		}
   3235   1.27       chs 		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
   3236   1.27       chs 			xoap->xoa_generation = zp->z_gen;
   3237   1.27       chs 			XVA_SET_RTN(xvap, XAT_GEN);
   3238    1.1      haad 		}
   3239    1.1      haad 
   3240   1.27       chs 		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
   3241   1.27       chs 			xoap->xoa_offline =
   3242   1.27       chs 			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
   3243   1.27       chs 			XVA_SET_RTN(xvap, XAT_OFFLINE);
   3244    1.1      haad 		}
   3245    1.4      haad 
   3246   1.27       chs 		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
   3247   1.27       chs 			xoap->xoa_sparse =
   3248   1.27       chs 			    ((zp->z_pflags & ZFS_SPARSE) != 0);
   3249   1.27       chs 			XVA_SET_RTN(xvap, XAT_SPARSE);
   3250    1.4      haad 		}
   3251    1.1      haad 	}
   3252    1.1      haad 
   3253   1.27       chs 	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
   3254   1.27       chs 	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
   3255   1.27       chs 	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
   3256   1.27       chs 	ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
   3257    1.1      haad 
   3258    1.1      haad 
   3259   1.27       chs 	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
   3260    1.2      haad 	vap->va_blksize = blksize;
   3261    1.2      haad 	vap->va_bytes = nblocks << 9;	/* nblocks * 512 */
   3262    1.1      haad 
   3263    1.1      haad 	if (zp->z_blksz == 0) {
   3264    1.1      haad 		/*
   3265    1.1      haad 		 * Block size hasn't been set; suggest maximal I/O transfers.
   3266    1.1      haad 		 */
   3267    1.1      haad 		vap->va_blksize = zfsvfs->z_max_blksz;
   3268    1.1      haad 	}
   3269    1.1      haad 
   3270    1.1      haad 	ZFS_EXIT(zfsvfs);
   3271    1.1      haad 	return (0);
   3272    1.1      haad }
   3273    1.1      haad 
   3274    1.1      haad /*
   3275    1.1      haad  * Set the file attributes to the values contained in the
   3276    1.1      haad  * vattr structure.
   3277    1.1      haad  *
   3278    1.1      haad  *	IN:	vp	- vnode of file to be modified.
   3279    1.1      haad  *		vap	- new attribute values.
   3280    1.1      haad  *			  If AT_XVATTR set, then optional attrs are being set
   3281    1.1      haad  *		flags	- ATTR_UTIME set if non-default time values provided.
   3282    1.1      haad  *			- ATTR_NOACLCHECK (CIFS context only).
   3283    1.1      haad  *		cr	- credentials of caller.
   3284    1.1      haad  *		ct	- caller context
   3285    1.1      haad  *
   3286   1.27       chs  *	RETURN:	0 on success, error code on failure.
   3287   1.27       chs  *
   3288    1.1      haad  * Timestamps:
   3289    1.1      haad  *	vp - ctime updated, mtime updated if size changed.
   3290    1.1      haad  */
   3291    1.1      haad /* ARGSUSED */
   3292    1.1      haad static int
   3293    1.1      haad zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
   3294   1.27       chs     caller_context_t *ct)
   3295    1.1      haad {
   3296    1.1      haad 	znode_t		*zp = VTOZ(vp);
   3297    1.1      haad 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
   3298    1.1      haad 	zilog_t		*zilog;
   3299    1.1      haad 	dmu_tx_t	*tx;
   3300    1.1      haad 	vattr_t		oldva;
   3301    1.4      haad 	xvattr_t	tmpxvattr;
   3302    1.1      haad 	uint_t		mask = vap->va_mask;
   3303   1.27       chs 	uint_t		saved_mask = 0;
   3304   1.27       chs 	uint64_t	saved_mode;
   3305    1.1      haad 	int		trim_mask = 0;
   3306    1.1      haad 	uint64_t	new_mode;
   3307    1.4      haad 	uint64_t	new_uid, new_gid;
   3308   1.27       chs 	uint64_t	xattr_obj;
   3309   1.27       chs 	uint64_t	mtime[2], ctime[2];
   3310    1.1      haad 	znode_t		*attrzp;
   3311    1.1      haad 	int		need_policy = FALSE;
   3312   1.27       chs 	int		err, err2;
   3313    1.1      haad 	zfs_fuid_info_t *fuidp = NULL;
   3314    1.1      haad 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
   3315    1.1      haad 	xoptattr_t	*xoap;
   3316   1.27       chs 	zfs_acl_t	*aclp;
   3317    1.1      haad 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
   3318   1.27       chs 	boolean_t	fuid_dirtied = B_FALSE;
   3319   1.27       chs 	sa_bulk_attr_t	bulk[7], xattr_bulk[7];
   3320   1.27       chs 	int		count = 0, xattr_count = 0;
   3321    1.2      haad 
   3322    1.1      haad 	if (mask == 0)
   3323    1.1      haad 		return (0);
   3324    1.1      haad 
   3325    1.1      haad 	if (mask & AT_NOSET)
   3326   1.27       chs 		return (SET_ERROR(EINVAL));
   3327    1.1      haad 
   3328    1.1      haad 	ZFS_ENTER(zfsvfs);
   3329    1.1      haad 	ZFS_VERIFY_ZP(zp);
   3330    1.1      haad 
   3331    1.1      haad 	zilog = zfsvfs->z_log;
   3332    1.1      haad 
   3333    1.1      haad 	/*
   3334    1.1      haad 	 * Make sure that if we have ephemeral uid/gid or xvattr specified
   3335    1.1      haad 	 * that file system is at proper version level
   3336    1.1      haad 	 */
   3337    1.1      haad 
   3338    1.1      haad 	if (zfsvfs->z_use_fuids == B_FALSE &&
   3339    1.1      haad 	    (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
   3340    1.1      haad 	    ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
   3341    1.1      haad 	    (mask & AT_XVATTR))) {
   3342    1.1      haad 		ZFS_EXIT(zfsvfs);
   3343   1.27       chs 		return (SET_ERROR(EINVAL));
   3344    1.1      haad 	}
   3345    1.1      haad 
   3346    1.1      haad 	if (mask & AT_SIZE && vp->v_type == VDIR) {
   3347    1.1      haad 		ZFS_EXIT(zfsvfs);
   3348   1.27       chs 		return (SET_ERROR(EISDIR));
   3349    1.1      haad 	}
   3350    1.1      haad 
   3351    1.1      haad 	if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
   3352    1.1      haad 		ZFS_EXIT(zfsvfs);
   3353   1.27       chs 		return (SET_ERROR(EINVAL));
   3354    1.1      haad 	}
   3355    1.1      haad 
   3356    1.1      haad 	/*
   3357    1.1      haad 	 * If this is an xvattr_t, then get a pointer to the structure of
   3358    1.1      haad 	 * optional attributes.  If this is NULL, then we have a vattr_t.
   3359    1.1      haad 	 */
   3360    1.1      haad 	xoap = xva_getxoptattr(xvap);
   3361    1.1      haad 
   3362    1.4      haad 	xva_init(&tmpxvattr);
   3363    1.4      haad 
   3364    1.1      haad 	/*
   3365    1.1      haad 	 * Immutable files can only alter immutable bit and atime
   3366    1.1      haad 	 */
   3367   1.27       chs 	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
   3368    1.1      haad 	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
   3369    1.1      haad 	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
   3370    1.1      haad 		ZFS_EXIT(zfsvfs);
   3371   1.27       chs 		return (SET_ERROR(EPERM));
   3372    1.1      haad 	}
   3373    1.1      haad 
   3374   1.27       chs 	if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
   3375    1.1      haad 		ZFS_EXIT(zfsvfs);
   3376   1.27       chs 		return (SET_ERROR(EPERM));
   3377    1.1      haad 	}
   3378    1.1      haad 
   3379    1.1      haad 	/*
   3380    1.1      haad 	 * Verify timestamps doesn't overflow 32 bits.
   3381    1.1      haad 	 * ZFS can handle large timestamps, but 32bit syscalls can't
   3382    1.1      haad 	 * handle times greater than 2039.  This check should be removed
   3383    1.1      haad 	 * once large timestamps are fully supported.
   3384    1.1      haad 	 */
   3385    1.1      haad 	if (mask & (AT_ATIME | AT_MTIME)) {
   3386    1.1      haad 		if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
   3387    1.1      haad 		    ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
   3388    1.1      haad 			ZFS_EXIT(zfsvfs);
   3389   1.27       chs 			return (SET_ERROR(EOVERFLOW));
   3390    1.1      haad 		}
   3391    1.1      haad 	}
   3392   1.27       chs 	if (xoap && (mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
   3393   1.27       chs 	    TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
   3394   1.27       chs 		ZFS_EXIT(zfsvfs);
   3395   1.27       chs 		return (SET_ERROR(EOVERFLOW));
   3396   1.27       chs 	}
   3397    1.1      haad 
   3398    1.1      haad 	attrzp = NULL;
   3399   1.27       chs 	aclp = NULL;
   3400    1.1      haad 
   3401    1.4      haad 	/* Can this be moved to before the top label? */
   3402    1.1      haad 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
   3403    1.1      haad 		ZFS_EXIT(zfsvfs);
   3404   1.27       chs 		return (SET_ERROR(EROFS));
   3405    1.1      haad 	}
   3406    1.1      haad 
   3407    1.1      haad 	/*
   3408    1.1      haad 	 * First validate permissions
   3409    1.1      haad 	 */
   3410   1.27       chs 
   3411    1.1      haad 	if (mask & AT_SIZE) {
   3412    1.1      haad 		/*
   3413    1.1      haad 		 * XXX - Note, we are not providing any open
   3414    1.1      haad 		 * mode flags here (like FNDELAY), so we may
   3415    1.1      haad 		 * block if there are locks present... this
   3416    1.1      haad 		 * should be addressed in openat().
   3417    1.1      haad 		 */
   3418    1.1      haad 		/* XXX - would it be OK to generate a log record here? */
   3419    1.1      haad 		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
   3420    1.1      haad 		if (err) {
   3421    1.1      haad 			ZFS_EXIT(zfsvfs);
   3422    1.1      haad 			return (err);
   3423    1.1      haad 		}
   3424    1.1      haad 	}
   3425   1.27       chs 
   3426    1.1      haad 	if (mask & (AT_ATIME|AT_MTIME) ||
   3427    1.1      haad 	    ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
   3428    1.1      haad 	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
   3429    1.1      haad 	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
   3430   1.27       chs 	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
   3431   1.27       chs 	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
   3432    1.1      haad 	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
   3433   1.27       chs 	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
   3434    1.1      haad 		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
   3435    1.1      haad 		    skipaclchk, cr);
   3436   1.27       chs 	}
   3437    1.1      haad 
   3438    1.1      haad 	if (mask & (AT_UID|AT_GID)) {
   3439    1.1      haad 		int	idmask = (mask & (AT_UID|AT_GID));
   3440    1.1      haad 		int	take_owner;
   3441    1.1      haad 		int	take_group;
   3442    1.1      haad 
   3443    1.1      haad 		/*
   3444    1.1      haad 		 * NOTE: even if a new mode is being set,
   3445    1.1      haad 		 * we may clear S_ISUID/S_ISGID bits.
   3446    1.1      haad 		 */
   3447    1.1      haad 
   3448    1.1      haad 		if (!(mask & AT_MODE))
   3449   1.27       chs 			vap->va_mode = zp->z_mode;
   3450    1.1      haad 
   3451    1.1      haad 		/*
   3452    1.1      haad 		 * Take ownership or chgrp to group we are a member of
   3453    1.1      haad 		 */
   3454    1.1      haad 
   3455    1.1      haad 		take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
   3456    1.1      haad 		take_group = (mask & AT_GID) &&
   3457    1.1      haad 		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
   3458    1.1      haad 
   3459    1.1      haad 		/*
   3460    1.1      haad 		 * If both AT_UID and AT_GID are set then take_owner and
   3461    1.1      haad 		 * take_group must both be set in order to allow taking
   3462    1.1      haad 		 * ownership.
   3463    1.1      haad 		 *
   3464    1.1      haad 		 * Otherwise, send the check through secpolicy_vnode_setattr()
   3465    1.1      haad 		 *
   3466    1.1      haad 		 */
   3467   1.27       chs 
   3468    1.1      haad 		if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
   3469    1.1      haad 		    ((idmask == AT_UID) && take_owner) ||
   3470    1.1      haad 		    ((idmask == AT_GID) && take_group)) {
   3471    1.1      haad 			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
   3472    1.1      haad 			    skipaclchk, cr) == 0) {
   3473    1.1      haad 				/*
   3474    1.1      haad 				 * Remove setuid/setgid for non-privileged users
   3475    1.1      haad 				 */
   3476   1.27       chs 				secpolicy_setid_clear(vap, vp, cr);
   3477    1.1      haad 				trim_mask = (mask & (AT_UID|AT_GID));
   3478    1.1      haad 			} else {
   3479    1.1      haad 				need_policy =  TRUE;
   3480    1.1      haad 			}
   3481    1.1      haad 		} else {
   3482    1.1      haad 			need_policy =  TRUE;
   3483    1.1      haad 		}
   3484    1.1      haad 	}
   3485    1.1      haad 
   3486   1.27       chs 	oldva.va_mode = zp->z_mode;
   3487    1.1      haad 	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
   3488    1.1      haad 	if (mask & AT_XVATTR) {
   3489    1.4      haad 		/*
   3490    1.4      haad 		 * Update xvattr mask to include only those attributes
   3491    1.4      haad 		 * that are actually changing.
   3492    1.4      haad 		 *
   3493    1.4      haad 		 * the bits will be restored prior to actually setting
   3494    1.4      haad 		 * the attributes so the caller thinks they were set.
   3495    1.4      haad 		 */
   3496    1.4      haad 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
   3497    1.4      haad 			if (xoap->xoa_appendonly !=
   3498   1.27       chs 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
   3499    1.4      haad 				need_policy = TRUE;
   3500    1.4      haad 			} else {
   3501    1.4      haad 				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
   3502    1.4      haad 				XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
   3503    1.4      haad 			}
   3504    1.4      haad 		}
   3505    1.4      haad 
   3506    1.4      haad 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
   3507    1.4      haad 			if (xoap->xoa_nounlink !=
   3508   1.27       chs 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
   3509    1.4      haad 				need_policy = TRUE;
   3510    1.4      haad 			} else {
   3511    1.4      haad 				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
   3512    1.4      haad 				XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
   3513    1.4      haad 			}
   3514    1.4      haad 		}
   3515    1.4      haad 
   3516    1.4      haad 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
   3517    1.4      haad 			if (xoap->xoa_immutable !=
   3518   1.27       chs 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
   3519    1.4      haad 				need_policy = TRUE;
   3520    1.4      haad 			} else {
   3521    1.4      haad 				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
   3522    1.4      haad 				XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
   3523    1.4      haad 			}
   3524    1.4      haad 		}
   3525    1.4      haad 
   3526    1.4      haad 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
   3527    1.4      haad 			if (xoap->xoa_nodump !=
   3528   1.27       chs 			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
   3529   1.77    simonb #if 0
   3530   1.77    simonb 				/*
   3531   1.77    simonb                                  * XXXSB - zfs_netbsd_setattr()
   3532   1.77    simonb                                  * has already checked if this
   3533   1.77    simonb                                  * request is authorised, and our
   3534   1.77    simonb                                  * secpolicy_xvattr() doesn't check
   3535   1.77    simonb                                  * kauth chflags.  Fix this when we
   3536   1.77    simonb                                  * migrate to openzfs.
   3537   1.77    simonb 				 */
   3538    1.4      haad 				need_policy = TRUE;
   3539   1.77    simonb #endif
   3540    1.4      haad 			} else {
   3541    1.4      haad 				XVA_CLR_REQ(xvap, XAT_NODUMP);
   3542    1.4      haad 				XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
   3543    1.4      haad 			}
   3544    1.4      haad 		}
   3545    1.4      haad 
   3546    1.4      haad 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
   3547    1.4      haad 			if (xoap->xoa_av_modified !=
   3548   1.27       chs 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
   3549    1.4      haad 				need_policy = TRUE;
   3550    1.4      haad 			} else {
   3551    1.4      haad 				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
   3552    1.4      haad 				XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
   3553    1.4      haad 			}
   3554    1.4      haad 		}
   3555    1.4      haad 
   3556    1.4      haad 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
   3557    1.4      haad 			if ((vp->v_type != VREG &&
   3558    1.4      haad 			    xoap->xoa_av_quarantined) ||
   3559    1.4      haad 			    xoap->xoa_av_quarantined !=
   3560   1.27       chs 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
   3561    1.4      haad 				need_policy = TRUE;
   3562    1.4      haad 			} else {
   3563    1.4      haad 				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
   3564    1.4      haad 				XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
   3565    1.4      haad 			}
   3566    1.4      haad 		}
   3567    1.4      haad 
   3568    1.4      haad 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
   3569    1.4      haad 			ZFS_EXIT(zfsvfs);
   3570   1.27       chs 			return (SET_ERROR(EPERM));
   3571    1.4      haad 		}
   3572    1.4      haad 
   3573    1.4      haad 		if (need_policy == FALSE &&
   3574    1.4      haad 		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
   3575    1.4      haad 		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
   3576    1.1      haad 			need_policy = TRUE;
   3577    1.1      haad 		}
   3578    1.1      haad 	}
   3579    1.4      haad 
   3580    1.1      haad 	if (mask & AT_MODE) {
   3581    1.1      haad 		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
   3582    1.1      haad 			err = secpolicy_setid_setsticky_clear(vp, vap,
   3583    1.1      haad 			    &oldva, cr);
   3584    1.1      haad 			if (err) {
   3585    1.1      haad 				ZFS_EXIT(zfsvfs);
   3586    1.1      haad 				return (err);
   3587    1.1      haad 			}
   3588    1.1      haad 			trim_mask |= AT_MODE;
   3589    1.1      haad 		} else {
   3590    1.1      haad 			need_policy = TRUE;
   3591    1.1      haad 		}
   3592    1.1      haad 	}
   3593    1.1      haad 
   3594    1.1      haad 	if (need_policy) {
   3595    1.1      haad 		/*
   3596    1.1      haad 		 * If trim_mask is set then take ownership
   3597    1.1      haad 		 * has been granted or write_acl is present and user
   3598    1.1      haad 		 * has the ability to modify mode.  In that case remove
   3599    1.1      haad 		 * UID|GID and or MODE from mask so that
   3600    1.1      haad 		 * secpolicy_vnode_setattr() doesn't revoke it.
   3601    1.1      haad 		 */
   3602    1.1      haad 
   3603    1.1      haad 		if (trim_mask) {
   3604    1.1      haad 			saved_mask = vap->va_mask;
   3605    1.1      haad 			vap->va_mask &= ~trim_mask;
   3606   1.27       chs 			if (trim_mask & AT_MODE) {
   3607   1.27       chs 				/*
   3608   1.27       chs 				 * Save the mode, as secpolicy_vnode_setattr()
   3609   1.27       chs 				 * will overwrite it with ova.va_mode.
   3610   1.27       chs 				 */
   3611   1.27       chs 				saved_mode = vap->va_mode;
   3612   1.27       chs 			}
   3613    1.1      haad 		}
   3614    1.1      haad 		err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
   3615    1.1      haad 		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
   3616    1.1      haad 		if (err) {
   3617    1.1      haad 			ZFS_EXIT(zfsvfs);
   3618    1.1      haad 			return (err);
   3619    1.1      haad 		}
   3620    1.1      haad 
   3621   1.27       chs 		if (trim_mask) {
   3622    1.1      haad 			vap->va_mask |= saved_mask;
   3623   1.27       chs 			if (trim_mask & AT_MODE) {
   3624   1.27       chs 				/*
   3625   1.27       chs 				 * Recover the mode after
   3626   1.27       chs 				 * secpolicy_vnode_setattr().
   3627   1.27       chs 				 */
   3628   1.27       chs 				vap->va_mode = saved_mode;
   3629   1.27       chs 			}
   3630   1.27       chs 		}
   3631    1.1      haad 	}
   3632   1.27       chs 
   3633    1.1      haad 	/*
   3634    1.1      haad 	 * secpolicy_vnode_setattr, or take ownership may have
   3635    1.1      haad 	 * changed va_mask
   3636    1.1      haad 	 */
   3637    1.1      haad 	mask = vap->va_mask;
   3638    1.1      haad 
   3639   1.27       chs 	if ((mask & (AT_UID | AT_GID))) {
   3640   1.27       chs 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
   3641   1.27       chs 		    &xattr_obj, sizeof (xattr_obj));
   3642   1.27       chs 
   3643   1.27       chs 		if (err == 0 && xattr_obj) {
   3644   1.27       chs 			err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
   3645   1.27       chs 			if (err == 0) {
   3646   1.27       chs 				err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
   3647   1.27       chs 				if (err != 0)
   3648   1.27       chs 					vrele(ZTOV(attrzp));
   3649   1.27       chs 			}
   3650   1.27       chs 			if (err)
   3651   1.27       chs 				goto out2;
   3652   1.27       chs 		}
   3653   1.27       chs 		if (mask & AT_UID) {
   3654   1.27       chs 			new_uid = zfs_fuid_create(zfsvfs,
   3655   1.27       chs 			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
   3656   1.27       chs 			if (new_uid != zp->z_uid &&
   3657   1.27       chs 			    zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) {
   3658   1.27       chs 				if (attrzp)
   3659   1.27       chs 					vput(ZTOV(attrzp));
   3660   1.27       chs 				err = SET_ERROR(EDQUOT);
   3661   1.27       chs 				goto out2;
   3662   1.27       chs 			}
   3663   1.27       chs 		}
   3664   1.27       chs 
   3665   1.27       chs 		if (mask & AT_GID) {
   3666   1.27       chs 			new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
   3667   1.27       chs 			    cr, ZFS_GROUP, &fuidp);
   3668   1.27       chs 			if (new_gid != zp->z_gid &&
   3669   1.27       chs 			    zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) {
   3670   1.27       chs 				if (attrzp)
   3671   1.27       chs 					vput(ZTOV(attrzp));
   3672   1.27       chs 				err = SET_ERROR(EDQUOT);
   3673   1.27       chs 				goto out2;
   3674   1.27       chs 			}
   3675   1.27       chs 		}
   3676   1.27       chs 	}
   3677    1.1      haad 	tx = dmu_tx_create(zfsvfs->z_os);
   3678    1.1      haad 
   3679    1.1      haad 	if (mask & AT_MODE) {
   3680   1.27       chs 		uint64_t pmode = zp->z_mode;
   3681   1.27       chs 		uint64_t acl_obj;
   3682   1.27       chs 		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
   3683    1.1      haad 
   3684   1.27       chs 		if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
   3685   1.27       chs 		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
   3686   1.27       chs 			err = SET_ERROR(EPERM);
   3687   1.27       chs 			goto out;
   3688   1.27       chs 		}
   3689    1.1      haad 
   3690    1.4      haad 		if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
   3691    1.4      haad 			goto out;
   3692   1.27       chs 
   3693   1.27       chs 		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
   3694   1.27       chs 			/*
   3695   1.27       chs 			 * Are we upgrading ACL from old V0 format
   3696   1.27       chs 			 * to V1 format?
   3697   1.27       chs 			 */
   3698   1.27       chs 			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
   3699   1.27       chs 			    zfs_znode_acl_version(zp) ==
   3700    1.1      haad 			    ZFS_ACL_VERSION_INITIAL) {
   3701   1.27       chs 				dmu_tx_hold_free(tx, acl_obj, 0,
   3702    1.1      haad 				    DMU_OBJECT_END);
   3703    1.1      haad 				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
   3704    1.1      haad 				    0, aclp->z_acl_bytes);
   3705    1.1      haad 			} else {
   3706   1.27       chs 				dmu_tx_hold_write(tx, acl_obj, 0,
   3707    1.1      haad 				    aclp->z_acl_bytes);
   3708    1.1      haad 			}
   3709   1.27       chs 		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
   3710    1.1      haad 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
   3711    1.1      haad 			    0, aclp->z_acl_bytes);
   3712    1.1      haad 		}
   3713   1.27       chs 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
   3714   1.27       chs 	} else {
   3715   1.27       chs 		if ((mask & AT_XVATTR) &&
   3716   1.27       chs 		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
   3717   1.27       chs 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
   3718   1.27       chs 		else
   3719   1.27       chs 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
   3720   1.27       chs 	}
   3721   1.27       chs 
   3722   1.27       chs 	if (attrzp) {
   3723   1.27       chs 		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
   3724    1.1      haad 	}
   3725    1.1      haad 
   3726   1.27       chs 	fuid_dirtied = zfsvfs->z_fuid_dirty;
   3727   1.27       chs 	if (fuid_dirtied)
   3728   1.27       chs 		zfs_fuid_txhold(zfsvfs, tx);
   3729    1.4      haad 
   3730   1.27       chs 	zfs_sa_upgrade_txholds(tx, zp);
   3731    1.1      haad 
   3732   1.27       chs 	err = dmu_tx_assign(tx, TXG_WAIT);
   3733   1.27       chs 	if (err)
   3734    1.4      haad 		goto out;
   3735    1.1      haad 
   3736   1.27       chs 	count = 0;
   3737    1.1      haad 	/*
   3738    1.1      haad 	 * Set each attribute requested.
   3739    1.1      haad 	 * We group settings according to the locks they need to acquire.
   3740    1.1      haad 	 *
   3741    1.1      haad 	 * Note: you cannot set ctime directly, although it will be
   3742    1.1      haad 	 * updated as a side-effect of calling this function.
   3743    1.1      haad 	 */
   3744    1.1      haad 
   3745   1.27       chs 	if (mask & (AT_UID|AT_GID|AT_MODE))
   3746   1.27       chs 		mutex_enter(&zp->z_acl_lock);
   3747   1.27       chs 
   3748   1.27       chs 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
   3749   1.27       chs 	    &zp->z_pflags, sizeof (zp->z_pflags));
   3750   1.27       chs 
   3751   1.27       chs 	if (attrzp) {
   3752   1.27       chs 		if (mask & (AT_UID|AT_GID|AT_MODE))
   3753   1.27       chs 			mutex_enter(&attrzp->z_acl_lock);
   3754   1.27       chs 		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
   3755   1.27       chs 		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
   3756   1.27       chs 		    sizeof (attrzp->z_pflags));
   3757   1.27       chs 	}
   3758   1.27       chs 
   3759   1.27       chs 	if (mask & (AT_UID|AT_GID)) {
   3760   1.27       chs 
   3761   1.27       chs 		if (mask & AT_UID) {
   3762   1.27       chs 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
   3763   1.27       chs 			    &new_uid, sizeof (new_uid));
   3764   1.27       chs 			zp->z_uid = new_uid;
   3765   1.27       chs 			if (attrzp) {
   3766   1.27       chs 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
   3767   1.27       chs 				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
   3768   1.27       chs 				    sizeof (new_uid));
   3769   1.27       chs 				attrzp->z_uid = new_uid;
   3770   1.27       chs 			}
   3771   1.27       chs 		}
   3772   1.27       chs 
   3773   1.27       chs 		if (mask & AT_GID) {
   3774   1.27       chs 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
   3775   1.27       chs 			    NULL, &new_gid, sizeof (new_gid));
   3776   1.27       chs 			zp->z_gid = new_gid;
   3777   1.27       chs 			if (attrzp) {
   3778   1.27       chs 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
   3779   1.27       chs 				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
   3780   1.27       chs 				    sizeof (new_gid));
   3781   1.27       chs 				attrzp->z_gid = new_gid;
   3782   1.27       chs 			}
   3783   1.27       chs 		}
   3784   1.27       chs 		if (!(mask & AT_MODE)) {
   3785   1.27       chs 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
   3786   1.27       chs 			    NULL, &new_mode, sizeof (new_mode));
   3787   1.27       chs 			new_mode = zp->z_mode;
   3788   1.27       chs 		}
   3789   1.27       chs 		err = zfs_acl_chown_setattr(zp);
   3790   1.27       chs 		ASSERT(err == 0);
   3791   1.27       chs 		if (attrzp) {
   3792   1.27       chs 			err = zfs_acl_chown_setattr(attrzp);
   3793   1.27       chs 			ASSERT(err == 0);
   3794   1.27       chs 		}
   3795   1.27       chs 	}
   3796    1.1      haad 
   3797    1.1      haad 	if (mask & AT_MODE) {
   3798   1.27       chs 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
   3799   1.27       chs 		    &new_mode, sizeof (new_mode));
   3800   1.27       chs 		zp->z_mode = new_mode;
   3801   1.27       chs 		ASSERT3U((uintptr_t)aclp, !=, 0);
   3802    1.4      haad 		err = zfs_aclset_common(zp, aclp, cr, tx);
   3803   1.27       chs 		ASSERT0(err);
   3804   1.27       chs 		if (zp->z_acl_cached)
   3805   1.27       chs 			zfs_acl_free(zp->z_acl_cached);
   3806    1.4      haad 		zp->z_acl_cached = aclp;
   3807    1.4      haad 		aclp = NULL;
   3808    1.1      haad 	}
   3809    1.1      haad 
   3810    1.1      haad 
   3811   1.27       chs 	if (mask & AT_ATIME) {
   3812   1.27       chs 		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
   3813   1.27       chs 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
   3814   1.27       chs 		    &zp->z_atime, sizeof (zp->z_atime));
   3815    1.1      haad 	}
   3816    1.1      haad 
   3817   1.27       chs 	if (mask & AT_MTIME) {
   3818   1.27       chs 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
   3819   1.27       chs 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
   3820   1.27       chs 		    mtime, sizeof (mtime));
   3821    1.1      haad 	}
   3822    1.1      haad 
   3823    1.1      haad 	/* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
   3824   1.27       chs 	if (mask & AT_SIZE && !(mask & AT_MTIME)) {
   3825   1.27       chs 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
   3826   1.27       chs 		    NULL, mtime, sizeof (mtime));
   3827   1.27       chs 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
   3828   1.27       chs 		    &ctime, sizeof (ctime));
   3829   1.27       chs 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
   3830   1.27       chs 		    B_TRUE);
   3831   1.27       chs 	} else if (mask != 0) {
   3832   1.27       chs 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
   3833   1.27       chs 		    &ctime, sizeof (ctime));
   3834   1.27       chs 		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
   3835   1.27       chs 		    B_TRUE);
   3836   1.27       chs 		if (attrzp) {
   3837   1.27       chs 			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
   3838   1.27       chs 			    SA_ZPL_CTIME(zfsvfs), NULL,
   3839   1.27       chs 			    &ctime, sizeof (ctime));
   3840   1.27       chs 			zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
   3841   1.27       chs 			    mtime, ctime, B_TRUE);
   3842   1.27       chs 		}
   3843   1.27       chs 	}
   3844    1.1      haad 	/*
   3845    1.1      haad 	 * Do this after setting timestamps to prevent timestamp
   3846    1.1      haad 	 * update from toggling bit
   3847    1.1      haad 	 */
   3848    1.1      haad 
   3849    1.1      haad 	if (xoap && (mask & AT_XVATTR)) {
   3850    1.4      haad 
   3851   1.27       chs 		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
   3852   1.27       chs 			xoap->xoa_createtime = vap->va_birthtime;
   3853    1.4      haad 		/*
   3854    1.4      haad 		 * restore trimmed off masks
   3855    1.4      haad 		 * so that return masks can be set for caller.
   3856    1.4      haad 		 */
   3857    1.4      haad 
   3858    1.4      haad 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
   3859    1.4      haad 			XVA_SET_REQ(xvap, XAT_APPENDONLY);
   3860    1.4      haad 		}
   3861    1.4      haad 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
   3862    1.4      haad 			XVA_SET_REQ(xvap, XAT_NOUNLINK);
   3863    1.4      haad 		}
   3864    1.4      haad 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
   3865    1.4      haad 			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
   3866    1.4      haad 		}
   3867    1.4      haad 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
   3868    1.4      haad 			XVA_SET_REQ(xvap, XAT_NODUMP);
   3869    1.4      haad 		}
   3870    1.4      haad 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
   3871    1.4      haad 			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
   3872    1.4      haad 		}
   3873    1.4      haad 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
   3874    1.4      haad 			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
   3875    1.4      haad 		}
   3876    1.4      haad 
   3877   1.27       chs 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
   3878    1.1      haad 			ASSERT(vp->v_type == VREG);
   3879    1.1      haad 
   3880   1.27       chs 		zfs_xvattr_set(zp, xvap, tx);
   3881    1.1      haad 	}
   3882    1.1      haad 
   3883    1.4      haad 	if (fuid_dirtied)
   3884    1.4      haad 		zfs_fuid_sync(zfsvfs, tx);
   3885    1.4      haad 
   3886    1.1      haad 	if (mask != 0)
   3887    1.1      haad 		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
   3888    1.1      haad 
   3889   1.27       chs 	if (mask & (AT_UID|AT_GID|AT_MODE))
   3890   1.27       chs 		mutex_exit(&zp->z_acl_lock);
   3891    1.1      haad 
   3892   1.27       chs 	if (attrzp) {
   3893   1.27       chs 		if (mask & (AT_UID|AT_GID|AT_MODE))
   3894   1.27       chs 			mutex_exit(&attrzp->z_acl_lock);
   3895   1.27       chs 	}
   3896    1.4      haad out:
   3897   1.27       chs 	if (err == 0 && attrzp) {
   3898   1.27       chs 		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
   3899   1.27       chs 		    xattr_count, tx);
   3900   1.27       chs 		ASSERT(err2 == 0);
   3901   1.27       chs 	}
   3902   1.27       chs 
   3903    1.1      haad 	if (attrzp)
   3904   1.27       chs 		vput(ZTOV(attrzp));
   3905    1.1      haad 
   3906    1.4      haad 	if (aclp)
   3907    1.4      haad 		zfs_acl_free(aclp);
   3908    1.4      haad 
   3909    1.4      haad 	if (fuidp) {
   3910    1.4      haad 		zfs_fuid_info_free(fuidp);
   3911    1.4      haad 		fuidp = NULL;
   3912    1.4      haad 	}
   3913    1.4      haad 
   3914   1.27       chs 	if (err) {
   3915    1.4      haad 		dmu_tx_abort(tx);
   3916   1.27       chs 	} else {
   3917   1.27       chs 		err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
   3918    1.4      haad 		dmu_tx_commit(tx);
   3919   1.27       chs 	}
   3920    1.4      haad 
   3921   1.27       chs out2:
   3922   1.27       chs 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
   3923   1.27       chs 		zil_commit(zilog, 0);
   3924    1.1      haad 
   3925    1.1      haad 	ZFS_EXIT(zfsvfs);
   3926    1.1      haad 	return (err);
   3927    1.1      haad }
   3928    1.1      haad 
   3929    1.1      haad /*
   3930   1.27       chs  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
   3931   1.27       chs  * fail to acquire any lock in the path we will drop all held locks,
   3932   1.27       chs  * acquire the new lock in a blocking fashion, and then release it and
   3933   1.27       chs  * restart the rename.  This acquire/release step ensures that we do not
   3934   1.27       chs  * spin on a lock waiting for release.  On error release all vnode locks
   3935   1.27       chs  * and decrement references the way tmpfs_rename() would do.
   3936    1.1      haad  */
   3937   1.27       chs static int
   3938   1.27       chs zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
   3939   1.27       chs     struct vnode *tdvp, struct vnode **tvpp,
   3940   1.27       chs     const struct componentname *scnp, const struct componentname *tcnp)
   3941    1.1      haad {
   3942   1.27       chs 	zfsvfs_t	*zfsvfs;
   3943   1.27       chs 	struct vnode	*nvp, *svp, *tvp;
   3944   1.27       chs 	znode_t		*sdzp, *tdzp, *szp, *tzp;
   3945   1.37   hannken #ifdef __FreeBSD__
   3946   1.27       chs 	const char	*snm = scnp->cn_nameptr;
   3947   1.27       chs 	const char	*tnm = tcnp->cn_nameptr;
   3948   1.37   hannken #endif
   3949   1.37   hannken #ifdef __NetBSD__
   3950   1.37   hannken 	char *snm, *tnm;
   3951   1.37   hannken #endif
   3952   1.27       chs 	int error;
   3953   1.27       chs 
   3954   1.27       chs #ifdef __FreeBSD__
   3955   1.27       chs 	VOP_UNLOCK(tdvp, 0);
   3956   1.27       chs 	if (*tvpp != NULL && *tvpp != tdvp)
   3957   1.27       chs 		VOP_UNLOCK(*tvpp, 0);
   3958   1.27       chs #endif
   3959   1.27       chs 
   3960   1.27       chs relock:
   3961   1.27       chs 	error = vn_lock(sdvp, LK_EXCLUSIVE);
   3962   1.27       chs 	if (error)
   3963   1.27       chs 		goto out;
   3964   1.27       chs 	sdzp = VTOZ(sdvp);
   3965   1.27       chs 
   3966   1.27       chs #ifdef __NetBSD__
   3967   1.27       chs 	if (tdvp == sdvp) {
   3968   1.27       chs 	} else {
   3969   1.27       chs #endif
   3970   1.27       chs 	error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
   3971   1.27       chs 	if (error != 0) {
   3972   1.27       chs 		VOP_UNLOCK(sdvp, 0);
   3973   1.27       chs 		if (error != EBUSY)
   3974   1.27       chs 			goto out;
   3975   1.27       chs 		error = vn_lock(tdvp, LK_EXCLUSIVE);
   3976   1.27       chs 		if (error)
   3977   1.27       chs 			goto out;
   3978   1.27       chs 		VOP_UNLOCK(tdvp, 0);
   3979   1.27       chs 		goto relock;
   3980   1.27       chs 	}
   3981   1.27       chs #ifdef __NetBSD__
   3982   1.27       chs 	} /* end if (tdvp == sdvp) */
   3983   1.27       chs #endif
   3984   1.27       chs 
   3985   1.27       chs 	tdzp = VTOZ(tdvp);
   3986   1.27       chs 
   3987   1.27       chs 	/*
   3988   1.27       chs 	 * Before using sdzp and tdzp we must ensure that they are live.
   3989   1.27       chs 	 * As a porting legacy from illumos we have two things to worry
   3990   1.27       chs 	 * about.  One is typical for FreeBSD and it is that the vnode is
   3991   1.27       chs 	 * not reclaimed (doomed).  The other is that the znode is live.
   3992   1.27       chs 	 * The current code can invalidate the znode without acquiring the
   3993   1.27       chs 	 * corresponding vnode lock if the object represented by the znode
   3994   1.27       chs 	 * and vnode is no longer valid after a rollback or receive operation.
   3995   1.27       chs 	 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
   3996   1.27       chs 	 * that protects the znodes from the invalidation.
   3997   1.27       chs 	 */
   3998   1.27       chs 	zfsvfs = sdzp->z_zfsvfs;
   3999   1.27       chs 	ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
   4000   1.27       chs 	ZFS_ENTER(zfsvfs);
   4001   1.27       chs 
   4002   1.27       chs 	/*
   4003   1.27       chs 	 * We can not use ZFS_VERIFY_ZP() here because it could directly return
   4004   1.27       chs 	 * bypassing the cleanup code in the case of an error.
   4005   1.27       chs 	 */
   4006   1.27       chs 	if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
   4007   1.27       chs 		ZFS_EXIT(zfsvfs);
   4008   1.27       chs 		VOP_UNLOCK(sdvp, 0);
   4009   1.27       chs #ifdef __NetBSD__
   4010   1.27       chs 		if (tdvp != sdvp)
   4011   1.27       chs #endif
   4012   1.27       chs 		VOP_UNLOCK(tdvp, 0);
   4013   1.27       chs 		error = SET_ERROR(EIO);
   4014   1.27       chs 		goto out;
   4015   1.27       chs 	}
   4016   1.27       chs 
   4017   1.27       chs 	/*
   4018   1.27       chs 	 * Re-resolve svp to be certain it still exists and fetch the
   4019   1.27       chs 	 * correct vnode.
   4020   1.27       chs 	 */
   4021   1.37   hannken #ifdef __NetBSD__
   4022   1.37   hannken 	/* ZFS wants a null-terminated name. */
   4023   1.37   hannken 	snm = PNBUF_GET();
   4024   1.37   hannken 	strlcpy(snm, scnp->cn_nameptr, scnp->cn_namelen + 1);
   4025   1.37   hannken #endif
   4026   1.27       chs 	error = zfs_dirent_lookup(sdzp, snm, &szp, ZEXISTS);
   4027   1.37   hannken #ifdef __NetBSD__
   4028   1.37   hannken 	PNBUF_PUT(snm);
   4029   1.37   hannken #endif
   4030   1.27       chs 	if (error != 0) {
   4031   1.27       chs 		/* Source entry invalid or not there. */
   4032   1.27       chs 		ZFS_EXIT(zfsvfs);
   4033   1.27       chs 		VOP_UNLOCK(sdvp, 0);
   4034   1.27       chs #ifdef __NetBSD__
   4035   1.27       chs 		if (tdvp != sdvp)
   4036   1.27       chs #endif
   4037   1.27       chs 		VOP_UNLOCK(tdvp, 0);
   4038   1.27       chs 		if ((scnp->cn_flags & ISDOTDOT) != 0 ||
   4039   1.27       chs 		    (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
   4040   1.27       chs 			error = SET_ERROR(EINVAL);
   4041   1.27       chs 		goto out;
   4042   1.27       chs 	}
   4043   1.27       chs 	svp = ZTOV(szp);
   4044    1.1      haad 
   4045   1.27       chs 	/*
   4046   1.27       chs 	 * Re-resolve tvp, if it disappeared we just carry on.
   4047   1.27       chs 	 */
   4048   1.37   hannken #ifdef __NetBSD__
   4049   1.37   hannken 	/* ZFS wants a null-terminated name. */
   4050   1.37   hannken 	tnm = PNBUF_GET();
   4051   1.37   hannken 	strlcpy(tnm, tcnp->cn_nameptr, tcnp->cn_namelen + 1);
   4052   1.37   hannken #endif
   4053   1.27       chs 	error = zfs_dirent_lookup(tdzp, tnm, &tzp, 0);
   4054   1.37   hannken #ifdef __NetBSD__
   4055   1.37   hannken 	PNBUF_PUT(tnm);
   4056   1.37   hannken #endif
   4057   1.27       chs 	if (error != 0) {
   4058   1.27       chs 		ZFS_EXIT(zfsvfs);
   4059   1.27       chs 		VOP_UNLOCK(sdvp, 0);
   4060   1.27       chs #ifdef __NetBSD__
   4061   1.27       chs 		if (tdvp != sdvp)
   4062   1.27       chs #endif
   4063   1.27       chs 		VOP_UNLOCK(tdvp, 0);
   4064   1.27       chs 		vrele(svp);
   4065   1.27       chs 		if ((tcnp->cn_flags & ISDOTDOT) != 0)
   4066   1.27       chs 			error = SET_ERROR(EINVAL);
   4067   1.27       chs 		goto out;
   4068    1.1      haad 	}
   4069   1.27       chs 	if (tzp != NULL)
   4070   1.27       chs 		tvp = ZTOV(tzp);
   4071   1.27       chs 	else
   4072   1.27       chs 		tvp = NULL;
   4073    1.1      haad 
   4074   1.27       chs 	/*
   4075   1.27       chs 	 * At present the vnode locks must be acquired before z_teardown_lock,
   4076   1.27       chs 	 * although it would be more logical to use the opposite order.
   4077   1.27       chs 	 */
   4078   1.27       chs 	ZFS_EXIT(zfsvfs);
   4079    1.1      haad 
   4080    1.1      haad 	/*
   4081   1.27       chs 	 * Now try acquire locks on svp and tvp.
   4082    1.1      haad 	 */
   4083   1.27       chs 	nvp = svp;
   4084   1.27       chs 	error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
   4085   1.27       chs 	if (error != 0) {
   4086   1.27       chs 		VOP_UNLOCK(sdvp, 0);
   4087   1.27       chs #ifdef __NetBSD__
   4088   1.27       chs 		if (tdvp != sdvp)
   4089   1.27       chs #endif
   4090   1.27       chs 		VOP_UNLOCK(tdvp, 0);
   4091   1.27       chs 		if (tvp != NULL)
   4092   1.27       chs 			vrele(tvp);
   4093   1.27       chs 		if (error != EBUSY) {
   4094   1.27       chs 			vrele(nvp);
   4095   1.27       chs 			goto out;
   4096   1.27       chs 		}
   4097   1.27       chs 		error = vn_lock(nvp, LK_EXCLUSIVE);
   4098   1.27       chs 		if (error != 0) {
   4099   1.27       chs 			vrele(nvp);
   4100   1.27       chs 			goto out;
   4101   1.27       chs 		}
   4102   1.27       chs 		VOP_UNLOCK(nvp, 0);
   4103   1.27       chs 		/*
   4104   1.27       chs 		 * Concurrent rename race.
   4105   1.27       chs 		 * XXX ?
   4106   1.27       chs 		 */
   4107   1.27       chs 		if (nvp == tdvp) {
   4108   1.27       chs 			vrele(nvp);
   4109   1.27       chs 			error = SET_ERROR(EINVAL);
   4110   1.27       chs 			goto out;
   4111   1.27       chs 		}
   4112   1.27       chs #ifdef __NetBSD__
   4113   1.27       chs 		if (*svpp != NULL)
   4114   1.27       chs #endif
   4115   1.27       chs 		vrele(*svpp);
   4116   1.27       chs 		*svpp = nvp;
   4117   1.27       chs 		goto relock;
   4118   1.27       chs 	}
   4119   1.27       chs #ifdef __NetBSD__
   4120   1.27       chs 	if (*svpp != NULL)
   4121   1.27       chs #endif
   4122   1.27       chs 	vrele(*svpp);
   4123   1.27       chs 	*svpp = nvp;
   4124   1.27       chs 
   4125   1.27       chs 	if (*tvpp != NULL)
   4126   1.27       chs 		vrele(*tvpp);
   4127   1.27       chs 	*tvpp = NULL;
   4128   1.27       chs 	if (tvp != NULL) {
   4129   1.27       chs 		nvp = tvp;
   4130   1.27       chs 
   4131   1.27       chs #ifdef __NetBSD__
   4132   1.27       chs 		if (tvp == svp || tvp == sdvp) {
   4133   1.27       chs 		} else {
   4134   1.27       chs #endif
   4135   1.27       chs 		error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
   4136   1.27       chs 		if (error != 0) {
   4137   1.27       chs 			VOP_UNLOCK(sdvp, 0);
   4138   1.27       chs #ifdef __NetBSD__
   4139   1.27       chs 			if (tdvp != sdvp)
   4140   1.27       chs #endif
   4141   1.27       chs 			VOP_UNLOCK(tdvp, 0);
   4142   1.27       chs #ifdef __NetBSD__
   4143   1.27       chs 			if (*svpp != tdvp)
   4144   1.27       chs #endif
   4145   1.27       chs 			VOP_UNLOCK(*svpp, 0);
   4146   1.27       chs 			if (error != EBUSY) {
   4147   1.27       chs 				vrele(nvp);
   4148   1.27       chs 				goto out;
   4149   1.27       chs 			}
   4150   1.27       chs 			error = vn_lock(nvp, LK_EXCLUSIVE);
   4151   1.27       chs 			if (error != 0) {
   4152   1.27       chs 				vrele(nvp);
   4153   1.27       chs 				goto out;
   4154    1.1      haad 			}
   4155   1.27       chs 			vput(nvp);
   4156   1.27       chs 			goto relock;
   4157    1.1      haad 		}
   4158   1.27       chs #ifdef __NetBSD__
   4159   1.27       chs 		} /* end if (tvp == svp || tvp == sdvp) */
   4160   1.27       chs #endif
   4161   1.27       chs 
   4162   1.27       chs 		*tvpp = nvp;
   4163   1.27       chs 	}
   4164   1.27       chs 
   4165   1.27       chs 	KASSERT(VOP_ISLOCKED(sdvp) == LK_EXCLUSIVE);
   4166   1.27       chs 	KASSERT(VOP_ISLOCKED(*svpp) == LK_EXCLUSIVE);
   4167   1.27       chs 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
   4168   1.27       chs 	KASSERT(*tvpp == NULL || VOP_ISLOCKED(*tvpp) == LK_EXCLUSIVE);
   4169   1.27       chs 
   4170   1.27       chs 	return (0);
   4171   1.27       chs 
   4172   1.27       chs out:
   4173   1.27       chs 	return (error);
   4174   1.27       chs }
   4175    1.1      haad 
   4176   1.27       chs /*
   4177   1.27       chs  * Note that we must use VRELE_ASYNC in this function as it walks
   4178   1.27       chs  * up the directory tree and vrele may need to acquire an exclusive
   4179   1.27       chs  * lock if a last reference to a vnode is dropped.
   4180   1.27       chs  */
   4181   1.27       chs static int
   4182   1.27       chs zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
   4183   1.27       chs {
   4184   1.27       chs 	zfsvfs_t	*zfsvfs;
   4185   1.27       chs 	znode_t		*zp, *zp1;
   4186   1.27       chs 	uint64_t	parent;
   4187   1.27       chs 	int		error;
   4188    1.1      haad 
   4189   1.27       chs 	zfsvfs = tdzp->z_zfsvfs;
   4190   1.27       chs 	if (tdzp == szp)
   4191   1.27       chs 		return (SET_ERROR(EINVAL));
   4192   1.27       chs 	if (tdzp == sdzp)
   4193   1.27       chs 		return (0);
   4194   1.27       chs 	if (tdzp->z_id == zfsvfs->z_root)
   4195   1.27       chs 		return (0);
   4196   1.27       chs 	zp = tdzp;
   4197   1.27       chs 	for (;;) {
   4198   1.27       chs 		ASSERT(!zp->z_unlinked);
   4199   1.27       chs 		if ((error = sa_lookup(zp->z_sa_hdl,
   4200   1.27       chs 		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
   4201   1.27       chs 			break;
   4202    1.1      haad 
   4203   1.27       chs 		if (parent == szp->z_id) {
   4204   1.27       chs 			error = SET_ERROR(EINVAL);
   4205   1.27       chs 			break;
   4206   1.27       chs 		}
   4207   1.27       chs 		if (parent == zfsvfs->z_root)
   4208   1.27       chs 			break;
   4209   1.27       chs 		if (parent == sdzp->z_id)
   4210   1.27       chs 			break;
   4211    1.1      haad 
   4212   1.27       chs 		error = zfs_zget(zfsvfs, parent, &zp1);
   4213   1.27       chs 		if (error != 0)
   4214   1.27       chs 			break;
   4215    1.1      haad 
   4216   1.27       chs 		if (zp != tdzp)
   4217   1.27       chs 			VN_RELE_ASYNC(ZTOV(zp),
   4218   1.27       chs 			    dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
   4219   1.27       chs 		zp = zp1;
   4220   1.27       chs 	}
   4221    1.1      haad 
   4222   1.27       chs 	if (error == ENOTDIR)
   4223   1.27       chs 		panic("checkpath: .. not a directory\n");
   4224   1.27       chs 	if (zp != tdzp)
   4225   1.27       chs 		VN_RELE_ASYNC(ZTOV(zp),
   4226   1.27       chs 		    dsl_pool_vnrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
   4227   1.27       chs 	return (error);
   4228    1.1      haad }
   4229    1.1      haad 
   4230    1.1      haad /*
   4231    1.1      haad  * Move an entry from the provided source directory to the target
   4232    1.1      haad  * directory.  Change the entry name as indicated.
   4233    1.1      haad  *
   4234    1.1      haad  *	IN:	sdvp	- Source directory containing the "old entry".
   4235    1.1      haad  *		snm	- Old entry name.
   4236    1.1      haad  *		tdvp	- Target directory to contain the "new entry".
   4237    1.1      haad  *		tnm	- New entry name.
   4238    1.1      haad  *		cr	- credentials of caller.
   4239    1.1      haad  *		ct	- caller context
   4240    1.1      haad  *		flags	- case flags
   4241    1.1      haad  *
   4242   1.27       chs  *	RETURN:	0 on success, error code on failure.
   4243    1.1      haad  *
   4244    1.1      haad  * Timestamps:
   4245    1.1      haad  *	sdvp,tdvp - ctime|mtime updated
   4246    1.1      haad  */
   4247    1.1      haad /*ARGSUSED*/
   4248    1.1      haad static int
   4249   1.27       chs zfs_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
   4250   1.27       chs     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
   4251   1.27       chs     cred_t *cr)
   4252    1.1      haad {
   4253   1.27       chs 	zfsvfs_t	*zfsvfs;
   4254   1.27       chs 	znode_t		*sdzp, *tdzp, *szp, *tzp;
   4255   1.27       chs 	zilog_t		*zilog = NULL;
   4256    1.1      haad 	dmu_tx_t	*tx;
   4257   1.37   hannken #ifdef __FreeBSD__
   4258   1.27       chs 	char		*snm = __UNCONST(scnp->cn_nameptr);
   4259   1.27       chs 	char		*tnm = __UNCONST(tcnp->cn_nameptr);
   4260   1.37   hannken #endif
   4261   1.37   hannken #ifdef __NetBSD__
   4262   1.37   hannken 	char *snm, *tnm;
   4263   1.37   hannken #endif
   4264    1.1      haad 	int		error = 0;
   4265    1.1      haad 
   4266   1.27       chs 	/* Reject renames across filesystems. */
   4267   1.27       chs 	if (((*svpp) != NULL && (*svpp)->v_mount != tdvp->v_mount) ||
   4268   1.27       chs 	    ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
   4269   1.27       chs 		error = SET_ERROR(EXDEV);
   4270   1.27       chs 		goto out;
   4271   1.27       chs 	}
   4272    1.1      haad 
   4273   1.27       chs 	if (zfsctl_is_node(tdvp)) {
   4274   1.27       chs 		error = SET_ERROR(EXDEV);
   4275   1.27       chs 		goto out;
   4276    1.1      haad 	}
   4277    1.1      haad 
   4278   1.27       chs 	/*
   4279   1.27       chs 	 * Lock all four vnodes to ensure safety and semantics of renaming.
   4280   1.27       chs 	 */
   4281   1.27       chs 	error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
   4282   1.27       chs 	if (error != 0) {
   4283   1.27       chs 		/* no vnodes are locked in the case of error here */
   4284   1.27       chs 		return (error);
   4285    1.1      haad 	}
   4286    1.1      haad 
   4287   1.27       chs 	tdzp = VTOZ(tdvp);
   4288   1.27       chs 	sdzp = VTOZ(sdvp);
   4289   1.27       chs 	zfsvfs = tdzp->z_zfsvfs;
   4290   1.27       chs 	zilog = zfsvfs->z_log;
   4291   1.37   hannken #ifdef __NetBSD__
   4292   1.37   hannken 	/* ZFS wants a null-terminated name. */
   4293   1.37   hannken 	snm = PNBUF_GET();
   4294   1.37   hannken 	strlcpy(snm, scnp->cn_nameptr, scnp->cn_namelen + 1);
   4295   1.37   hannken 	tnm = PNBUF_GET();
   4296   1.37   hannken 	strlcpy(tnm, tcnp->cn_nameptr, tcnp->cn_namelen + 1);
   4297   1.37   hannken #endif
   4298    1.1      haad 
   4299    1.1      haad 	/*
   4300   1.27       chs 	 * After we re-enter ZFS_ENTER() we will have to revalidate all
   4301   1.27       chs 	 * znodes involved.
   4302    1.1      haad 	 */
   4303   1.27       chs 	ZFS_ENTER(zfsvfs);
   4304   1.27       chs 
   4305   1.27       chs 	if (zfsvfs->z_utf8 && u8_validate(tnm,
   4306   1.27       chs 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
   4307   1.27       chs 		error = SET_ERROR(EILSEQ);
   4308   1.27       chs 		goto unlockout;
   4309    1.1      haad 	}
   4310    1.1      haad 
   4311   1.30   hannken #ifndef __NetBSD__
   4312   1.27       chs 	/* If source and target are the same file, there is nothing to do. */
   4313   1.27       chs 	if ((*svpp) == (*tvpp)) {
   4314   1.27       chs 		error = 0;
   4315   1.27       chs 		goto unlockout;
   4316   1.27       chs 	}
   4317   1.30   hannken #endif
   4318    1.1      haad 
   4319   1.27       chs 	if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
   4320   1.27       chs 	    ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
   4321   1.27       chs 	    (*tvpp)->v_mountedhere != NULL)) {
   4322   1.27       chs 		error = SET_ERROR(EXDEV);
   4323   1.27       chs 		goto unlockout;
   4324    1.1      haad 	}
   4325    1.1      haad 
   4326    1.4      haad 	/*
   4327   1.27       chs 	 * We can not use ZFS_VERIFY_ZP() here because it could directly return
   4328   1.27       chs 	 * bypassing the cleanup code in the case of an error.
   4329    1.4      haad 	 */
   4330   1.27       chs 	if (tdzp->z_sa_hdl == NULL || sdzp->z_sa_hdl == NULL) {
   4331   1.27       chs 		error = SET_ERROR(EIO);
   4332   1.27       chs 		goto unlockout;
   4333    1.4      haad 	}
   4334    1.4      haad 
   4335   1.27       chs 	szp = VTOZ(*svpp);
   4336   1.27       chs 	tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
   4337   1.27       chs 	if (szp->z_sa_hdl == NULL || (tzp != NULL && tzp->z_sa_hdl == NULL)) {
   4338   1.27       chs 		error = SET_ERROR(EIO);
   4339   1.27       chs 		goto unlockout;
   4340    1.1      haad 	}
   4341    1.1      haad 
   4342   1.27       chs 	/*
   4343   1.27       chs 	 * This is to prevent the creation of links into attribute space
   4344   1.27       chs 	 * by renaming a linked file into/outof an attribute directory.
   4345   1.27       chs 	 * See the comment in zfs_link() for why this is considered bad.
   4346   1.27       chs 	 */
   4347   1.27       chs 	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
   4348   1.27       chs 		error = SET_ERROR(EINVAL);
   4349   1.27       chs 		goto unlockout;
   4350    1.1      haad 	}
   4351    1.1      haad 
   4352    1.1      haad 	/*
   4353    1.1      haad 	 * Must have write access at the source to remove the old entry
   4354    1.1      haad 	 * and write access at the target to create the new entry.
   4355    1.1      haad 	 * Note that if target and source are the same, this can be
   4356    1.1      haad 	 * done in a single check.
   4357    1.1      haad 	 */
   4358   1.27       chs 	if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))
   4359   1.27       chs 		goto unlockout;
   4360    1.1      haad 
   4361   1.27       chs 	if ((*svpp)->v_type == VDIR) {
   4362   1.27       chs 		/*
   4363   1.27       chs 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
   4364   1.27       chs 		 */
   4365   1.27       chs 		if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
   4366   1.27       chs 		    sdzp == szp ||
   4367   1.27       chs 		    (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
   4368   1.27       chs 			error = SET_ERROR(EINVAL);
   4369   1.27       chs 			goto unlockout;
   4370   1.27       chs 		}
   4371    1.1      haad 
   4372    1.1      haad 		/*
   4373    1.1      haad 		 * Check to make sure rename is valid.
   4374    1.1      haad 		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
   4375    1.1      haad 		 */
   4376   1.27       chs 		if (error = zfs_rename_check(szp, sdzp, tdzp))
   4377   1.27       chs 			goto unlockout;
   4378    1.1      haad 	}
   4379    1.1      haad 
   4380    1.1      haad 	/*
   4381    1.1      haad 	 * Does target exist?
   4382    1.1      haad 	 */
   4383    1.1      haad 	if (tzp) {
   4384    1.1      haad 		/*
   4385    1.1      haad 		 * Source and target must be the same type.
   4386    1.1      haad 		 */
   4387   1.27       chs 		if ((*svpp)->v_type == VDIR) {
   4388   1.27       chs 			if ((*tvpp)->v_type != VDIR) {
   4389   1.27       chs 				error = SET_ERROR(ENOTDIR);
   4390   1.27       chs 				goto unlockout;
   4391   1.27       chs 			} else {
   4392   1.27       chs 				cache_purge(tdvp);
   4393   1.27       chs 				if (sdvp != tdvp)
   4394   1.27       chs 					cache_purge(sdvp);
   4395    1.1      haad 			}
   4396    1.1      haad 		} else {
   4397   1.27       chs 			if ((*tvpp)->v_type == VDIR) {
   4398   1.27       chs 				error = SET_ERROR(EISDIR);
   4399   1.27       chs 				goto unlockout;
   4400    1.1      haad 			}
   4401    1.1      haad 		}
   4402   1.27       chs 
   4403    1.1      haad 		/*
   4404    1.1      haad 		 * POSIX dictates that when the source and target
   4405    1.1      haad 		 * entries refer to the same file object, rename
   4406    1.1      haad 		 * must do nothing and exit without error.
   4407    1.1      haad 		 */
   4408   1.12  riastrad #ifndef __NetBSD__
   4409   1.12  riastrad 		/*
   4410   1.12  riastrad 		 * But on NetBSD we have a different system call to do
   4411   1.12  riastrad 		 * this, posix_rename, which sorta kinda handles this
   4412   1.12  riastrad 		 * case (modulo races), and our tests expect BSD
   4413   1.12  riastrad 		 * semantics for rename, so we'll do that until we can
   4414   1.12  riastrad 		 * push the choice between BSD and POSIX semantics into
   4415   1.12  riastrad 		 * the VOP_RENAME protocol as a flag.
   4416   1.12  riastrad 		 */
   4417    1.1      haad 		if (szp->z_id == tzp->z_id) {
   4418    1.1      haad 			error = 0;
   4419   1.27       chs 			goto unlockout;
   4420    1.1      haad 		}
   4421   1.12  riastrad #endif
   4422    1.1      haad 	}
   4423    1.1      haad 
   4424   1.27       chs 	vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
   4425    1.1      haad 	if (tzp)
   4426   1.27       chs 		vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
   4427    1.1      haad 
   4428    1.1      haad 	/*
   4429    1.1      haad 	 * notify the target directory if it is not the same
   4430    1.1      haad 	 * as source directory.
   4431    1.1      haad 	 */
   4432    1.1      haad 	if (tdvp != sdvp) {
   4433    1.1      haad 		vnevent_rename_dest_dir(tdvp, ct);
   4434    1.1      haad 	}
   4435    1.1      haad 
   4436    1.1      haad 	tx = dmu_tx_create(zfsvfs->z_os);
   4437   1.27       chs 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
   4438   1.27       chs 	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
   4439    1.1      haad 	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
   4440    1.1      haad 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
   4441   1.27       chs 	if (sdzp != tdzp) {
   4442   1.27       chs 		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
   4443   1.27       chs 		zfs_sa_upgrade_txholds(tx, tdzp);
   4444   1.27       chs 	}
   4445   1.27       chs 	if (tzp) {
   4446   1.27       chs 		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
   4447   1.27       chs 		zfs_sa_upgrade_txholds(tx, tzp);
   4448   1.27       chs 	}
   4449   1.27       chs 
   4450   1.27       chs 	zfs_sa_upgrade_txholds(tx, szp);
   4451    1.1      haad 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
   4452   1.27       chs 	error = dmu_tx_assign(tx, TXG_WAIT);
   4453    1.1      haad 	if (error) {
   4454    1.1      haad 		dmu_tx_abort(tx);
   4455   1.27       chs 		goto unlockout;
   4456    1.1      haad 	}
   4457    1.1      haad 
   4458   1.27       chs 
   4459   1.12  riastrad 	if (tzp && (tzp->z_id != szp->z_id))
   4460   1.12  riastrad 		/* Attempt to remove the existing target */
   4461   1.27       chs 		error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
   4462    1.1      haad 
   4463    1.1      haad 	if (error == 0) {
   4464   1.12  riastrad 		if (!tzp || (tzp->z_id != szp->z_id))
   4465   1.27       chs 			error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
   4466    1.1      haad 		if (error == 0) {
   4467   1.27       chs 			szp->z_pflags |= ZFS_AV_MODIFIED;
   4468   1.27       chs 
   4469   1.27       chs 			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
   4470   1.27       chs 			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
   4471   1.27       chs 			ASSERT0(error);
   4472    1.1      haad 
   4473   1.27       chs 			error = zfs_link_destroy(sdzp, snm, szp, tx,
   4474   1.12  riastrad 			    /* Kludge for BSD rename semantics.  */
   4475   1.27       chs 			    tzp && tzp->z_id == szp->z_id ? 0: ZRENAMING, NULL);
   4476   1.27       chs 			if (error == 0) {
   4477   1.27       chs 				zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
   4478   1.27       chs 				    snm, tdzp, tnm, szp);
   4479    1.1      haad 
   4480   1.27       chs 				/*
   4481   1.27       chs 				 * Update path information for the target vnode
   4482   1.27       chs 				 */
   4483   1.27       chs 				vn_renamepath(tdvp, *svpp, tnm, strlen(tnm));
   4484   1.27       chs 			} else {
   4485   1.27       chs 				/*
   4486   1.27       chs 				 * At this point, we have successfully created
   4487   1.27       chs 				 * the target name, but have failed to remove
   4488   1.27       chs 				 * the source name.  Since the create was done
   4489   1.27       chs 				 * with the ZRENAMING flag, there are
   4490   1.27       chs 				 * complications; for one, the link count is
   4491   1.27       chs 				 * wrong.  The easiest way to deal with this
   4492   1.27       chs 				 * is to remove the newly created target, and
   4493   1.27       chs 				 * return the original error.  This must
   4494   1.27       chs 				 * succeed; fortunately, it is very unlikely to
   4495   1.27       chs 				 * fail, since we just created it.
   4496   1.27       chs 				 */
   4497   1.27       chs 				VERIFY3U(zfs_link_destroy(tdzp, tnm, szp, tx,
   4498   1.27       chs 				    ZRENAMING, NULL), ==, 0);
   4499   1.27       chs 			}
   4500    1.1      haad 		}
   4501    1.2      haad 		if (error == 0) {
   4502   1.27       chs 			cache_purge(*svpp);
   4503   1.27       chs 			if (*tvpp != NULL)
   4504   1.27       chs 				cache_purge(*tvpp);
   4505   1.27       chs 			cache_purge_negative(tdvp);
   4506   1.52   hannken #ifdef __NetBSD__
   4507   1.52   hannken 			if (*svpp == *tvpp) {
   4508   1.52   hannken 				VN_KNOTE(sdvp, NOTE_WRITE);
   4509   1.52   hannken 				VN_KNOTE(*svpp, (szp->z_links == 0 ?
   4510   1.52   hannken 				    NOTE_DELETE : NOTE_LINK));
   4511   1.52   hannken 			} else {
   4512   1.52   hannken 				genfs_rename_knote(sdvp, *svpp, tdvp, *tvpp,
   4513   1.76   thorpej 				    tzp != NULL ? tzp->z_links : 0);
   4514   1.52   hannken 			}
   4515   1.52   hannken #endif
   4516    1.2      haad 		}
   4517    1.1      haad 	}
   4518    1.1      haad 
   4519    1.1      haad 	dmu_tx_commit(tx);
   4520    1.1      haad 
   4521   1.27       chs 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
   4522   1.27       chs 		zil_commit(zilog, 0);
   4523    1.4      haad 
   4524   1.27       chs unlockout:			/* all 4 vnodes are locked, ZFS_ENTER called */
   4525   1.27       chs 	ZFS_EXIT(zfsvfs);
   4526    1.1      haad 
   4527   1.27       chs 	VOP_UNLOCK(*svpp, 0);
   4528   1.27       chs 	VOP_UNLOCK(sdvp, 0);
   4529   1.37   hannken #ifdef __NetBSD__
   4530   1.37   hannken 	PNBUF_PUT(snm);
   4531   1.37   hannken 	PNBUF_PUT(tnm);
   4532   1.37   hannken #endif
   4533    1.1      haad 
   4534   1.27       chs 	if (*tvpp != sdvp && *tvpp != *svpp)
   4535   1.27       chs 	if (*tvpp != NULL)
   4536   1.27       chs 		VOP_UNLOCK(*tvpp, 0);
   4537   1.27       chs 	if (tdvp != sdvp && tdvp != *svpp)
   4538   1.27       chs 	if (tdvp != *tvpp)
   4539   1.27       chs 		VOP_UNLOCK(tdvp, 0);
   4540    1.2      haad 
   4541   1.27       chs out:
   4542    1.1      haad 	return (error);
   4543    1.1      haad }
   4544    1.1      haad 
   4545    1.1      haad /*
   4546    1.1      haad  * Insert the indicated symbolic reference entry into the directory.
   4547    1.1      haad  *
   4548    1.1      haad  *	IN:	dvp	- Directory to contain new symbolic link.
   4549    1.1      haad  *		link	- Name for new symlink entry.
   4550    1.1      haad  *		vap	- Attributes of new entry.
   4551    1.1      haad  *		cr	- credentials of caller.
   4552    1.1      haad  *		ct	- caller context
   4553    1.1      haad  *		flags	- case flags
   4554    1.1      haad  *
   4555   1.27       chs  *	RETURN:	0 on success, error code on failure.
   4556    1.1      haad  *
   4557    1.1      haad  * Timestamps:
   4558    1.1      haad  *	dvp - ctime|mtime updated
   4559    1.1      haad  */
   4560    1.1      haad /*ARGSUSED*/
   4561    1.1      haad static int
   4562    1.2      haad zfs_symlink(vnode_t *dvp, vnode_t **vpp, char *name, vattr_t *vap, char *link,
   4563   1.27       chs     cred_t *cr, kthread_t *td)
   4564    1.1      haad {
   4565    1.1      haad 	znode_t		*zp, *dzp = VTOZ(dvp);
   4566    1.1      haad 	dmu_tx_t	*tx;
   4567    1.1      haad 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
   4568    1.1      haad 	zilog_t		*zilog;
   4569   1.27       chs 	uint64_t	len = strlen(link);
   4570    1.1      haad 	int		error;
   4571    1.4      haad 	zfs_acl_ids_t	acl_ids;
   4572    1.4      haad 	boolean_t	fuid_dirtied;
   4573   1.27       chs 	uint64_t	txtype = TX_SYMLINK;
   4574   1.27       chs 	int		flags = 0;
   4575    1.1      haad 
   4576    1.1      haad 	ASSERT(vap->va_type == VLNK);
   4577    1.1      haad 
   4578    1.1      haad 	ZFS_ENTER(zfsvfs);
   4579    1.1      haad 	ZFS_VERIFY_ZP(dzp);
   4580    1.1      haad 	zilog = zfsvfs->z_log;
   4581    1.1      haad 
   4582    1.1      haad 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
   4583    1.1      haad 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
   4584    1.1      haad 		ZFS_EXIT(zfsvfs);
   4585   1.27       chs 		return (SET_ERROR(EILSEQ));
   4586    1.1      haad 	}
   4587   1.27       chs 
   4588   1.27       chs 	if (len > MAXPATHLEN) {
   4589    1.1      haad 		ZFS_EXIT(zfsvfs);
   4590   1.27       chs 		return (SET_ERROR(ENAMETOOLONG));
   4591    1.1      haad 	}
   4592    1.1      haad 
   4593   1.27       chs 	if ((error = zfs_acl_ids_create(dzp, 0,
   4594   1.27       chs 	    vap, cr, NULL, &acl_ids)) != 0) {
   4595    1.1      haad 		ZFS_EXIT(zfsvfs);
   4596   1.27       chs 		return (error);
   4597    1.1      haad 	}
   4598    1.1      haad 
   4599    1.1      haad 	/*
   4600    1.1      haad 	 * Attempt to lock directory; fail if entry already exists.
   4601    1.1      haad 	 */
   4602   1.27       chs 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
   4603    1.1      haad 	if (error) {
   4604   1.27       chs 		zfs_acl_ids_free(&acl_ids);
   4605   1.27       chs 		ZFS_EXIT(zfsvfs);
   4606   1.27       chs 		return (error);
   4607   1.27       chs 	}
   4608   1.27       chs 
   4609   1.27       chs 	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
   4610   1.27       chs 		zfs_acl_ids_free(&acl_ids);
   4611    1.1      haad 		ZFS_EXIT(zfsvfs);
   4612    1.1      haad 		return (error);
   4613    1.1      haad 	}
   4614    1.1      haad 
   4615    1.4      haad 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
   4616    1.4      haad 		zfs_acl_ids_free(&acl_ids);
   4617    1.4      haad 		ZFS_EXIT(zfsvfs);
   4618   1.27       chs 		return (SET_ERROR(EDQUOT));
   4619    1.4      haad 	}
   4620   1.27       chs 
   4621   1.27       chs 	getnewvnode_reserve(1);
   4622    1.1      haad 	tx = dmu_tx_create(zfsvfs->z_os);
   4623    1.4      haad 	fuid_dirtied = zfsvfs->z_fuid_dirty;
   4624    1.1      haad 	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
   4625    1.1      haad 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
   4626   1.27       chs 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
   4627   1.27       chs 	    ZFS_SA_BASE_ATTR_SIZE + len);
   4628   1.27       chs 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
   4629   1.27       chs 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
   4630   1.27       chs 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
   4631   1.27       chs 		    acl_ids.z_aclp->z_acl_bytes);
   4632   1.27       chs 	}
   4633    1.4      haad 	if (fuid_dirtied)
   4634    1.4      haad 		zfs_fuid_txhold(zfsvfs, tx);
   4635   1.27       chs 	error = dmu_tx_assign(tx, TXG_WAIT);
   4636    1.1      haad 	if (error) {
   4637    1.4      haad 		zfs_acl_ids_free(&acl_ids);
   4638    1.1      haad 		dmu_tx_abort(tx);
   4639   1.27       chs 		getnewvnode_drop_reserve();
   4640    1.1      haad 		ZFS_EXIT(zfsvfs);
   4641    1.1      haad 		return (error);
   4642    1.1      haad 	}
   4643    1.1      haad 
   4644    1.1      haad 	/*
   4645    1.1      haad 	 * Create a new object for the symlink.
   4646   1.27       chs 	 * for version 4 ZPL datsets the symlink will be an SA attribute
   4647    1.1      haad 	 */
   4648   1.27       chs 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
   4649    1.1      haad 
   4650   1.27       chs 	if (fuid_dirtied)
   4651   1.27       chs 		zfs_fuid_sync(zfsvfs, tx);
   4652    1.4      haad 
   4653   1.27       chs 	if (zp->z_is_sa)
   4654   1.27       chs 		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
   4655   1.27       chs 		    link, len, tx);
   4656   1.27       chs 	else
   4657   1.27       chs 		zfs_sa_symlink(zp, link, len, tx);
   4658    1.1      haad 
   4659   1.27       chs 	zp->z_size = len;
   4660   1.27       chs 	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
   4661   1.27       chs 	    &zp->z_size, sizeof (zp->z_size), tx);
   4662    1.1      haad 	/*
   4663    1.1      haad 	 * Insert the new object into the directory.
   4664    1.1      haad 	 */
   4665   1.27       chs 	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
   4666   1.27       chs 
   4667   1.27       chs 	zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
   4668   1.27       chs 	*vpp = ZTOV(zp);
   4669    1.4      haad 
   4670    1.4      haad 	zfs_acl_ids_free(&acl_ids);
   4671    1.1      haad 
   4672    1.1      haad 	dmu_tx_commit(tx);
   4673    1.1      haad 
   4674   1.27       chs 	getnewvnode_drop_reserve();
   4675   1.27       chs 
   4676   1.27       chs 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
   4677   1.27       chs 		zil_commit(zilog, 0);
   4678    1.1      haad 
   4679    1.1      haad 	ZFS_EXIT(zfsvfs);
   4680    1.1      haad 	return (error);
   4681    1.1      haad }
   4682    1.1      haad 
   4683    1.1      haad /*
   4684    1.1      haad  * Return, in the buffer contained in the provided uio structure,
   4685    1.1      haad  * the symbolic path referred to by vp.
   4686    1.1      haad  *
   4687    1.1      haad  *	IN:	vp	- vnode of symbolic link.
   4688   1.27       chs  *		uio	- structure to contain the link path.
   4689    1.1      haad  *		cr	- credentials of caller.
   4690    1.1      haad  *		ct	- caller context
   4691    1.1      haad  *
   4692   1.27       chs  *	OUT:	uio	- structure containing the link path.
   4693    1.1      haad  *
   4694   1.27       chs  *	RETURN:	0 on success, error code on failure.
   4695    1.1      haad  *
   4696    1.1      haad  * Timestamps:
   4697    1.1      haad  *	vp - atime updated
   4698    1.1      haad  */
   4699    1.1      haad /* ARGSUSED */
   4700    1.1      haad static int
   4701    1.1      haad zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct)
   4702    1.1      haad {
   4703    1.1      haad 	znode_t		*zp = VTOZ(vp);
   4704    1.1      haad 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
   4705    1.1      haad 	int		error;
   4706    1.1      haad 
   4707    1.1      haad 	ZFS_ENTER(zfsvfs);
   4708    1.1      haad 	ZFS_VERIFY_ZP(zp);
   4709    1.1      haad 
   4710   1.27       chs 	if (zp->z_is_sa)
   4711   1.27       chs 		error = sa_lookup_uio(zp->z_sa_hdl,
   4712   1.27       chs 		    SA_ZPL_SYMLINK(zfsvfs), uio);
   4713   1.27       chs 	else
   4714   1.27       chs 		error = zfs_sa_readlink(zp, uio);
   4715    1.1      haad 
   4716    1.1      haad 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
   4717   1.27       chs 
   4718    1.1      haad 	ZFS_EXIT(zfsvfs);
   4719    1.1      haad 	return (error);
   4720    1.1      haad }
   4721    1.1      haad 
   4722    1.1      haad /*
   4723    1.1      haad  * Insert a new entry into directory tdvp referencing svp.
   4724    1.1      haad  *
   4725    1.1      haad  *	IN:	tdvp	- Directory to contain new entry.
   4726    1.1      haad  *		svp	- vnode of new entry.
   4727    1.1      haad  *		name	- name of new entry.
   4728    1.1      haad  *		cr	- credentials of caller.
   4729    1.1      haad  *		ct	- caller context
   4730    1.1      haad  *
   4731   1.27       chs  *	RETURN:	0 on success, error code on failure.
   4732    1.1      haad  *
   4733    1.1      haad  * Timestamps:
   4734    1.1      haad  *	tdvp - ctime|mtime updated
   4735    1.1      haad  *	 svp - ctime updated
   4736    1.1      haad  */
   4737    1.1      haad /* ARGSUSED */
   4738    1.1      haad static int
   4739    1.1      haad zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
   4740    1.1      haad     caller_context_t *ct, int flags)
   4741    1.1      haad {
   4742    1.1      haad 	znode_t		*dzp = VTOZ(tdvp);
   4743    1.1      haad 	znode_t		*tzp, *szp;
   4744    1.1      haad 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
   4745    1.1      haad 	zilog_t		*zilog;
   4746    1.1      haad 	dmu_tx_t	*tx;
   4747    1.1      haad 	int		error;
   4748   1.27       chs 	uint64_t	parent;
   4749    1.1      haad 	uid_t		owner;
   4750    1.1      haad 
   4751    1.1      haad 	ASSERT(tdvp->v_type == VDIR);
   4752    1.1      haad 
   4753    1.1      haad 	ZFS_ENTER(zfsvfs);
   4754    1.1      haad 	ZFS_VERIFY_ZP(dzp);
   4755    1.1      haad 	zilog = zfsvfs->z_log;
   4756    1.1      haad 
   4757   1.27       chs 	/*
   4758   1.27       chs 	 * POSIX dictates that we return EPERM here.
   4759   1.27       chs 	 * Better choices include ENOTSUP or EISDIR.
   4760   1.27       chs 	 */
   4761   1.27       chs 	if (svp->v_type == VDIR) {
   4762    1.1      haad 		ZFS_EXIT(zfsvfs);
   4763   1.27       chs 		return (SET_ERROR(EPERM));
   4764    1.1      haad 	}
   4765   1.27       chs 
   4766    1.1      haad 	szp = VTOZ(svp);
   4767    1.1      haad 	ZFS_VERIFY_ZP(szp);
   4768    1.1      haad 
   4769   1.27       chs 	if (szp->z_pflags & (ZFS_APPENDONLY | ZFS_IMMUTABLE | ZFS_READONLY)) {
   4770   1.27       chs 		ZFS_EXIT(zfsvfs);
   4771   1.27       chs 		return (SET_ERROR(EPERM));
   4772   1.27       chs 	}
   4773   1.27       chs 
   4774   1.27       chs 	/* Prevent links to .zfs/shares files */
   4775   1.27       chs 
   4776   1.27       chs 	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
   4777   1.27       chs 	    &parent, sizeof (uint64_t))) != 0) {
   4778   1.27       chs 		ZFS_EXIT(zfsvfs);
   4779   1.27       chs 		return (error);
   4780   1.27       chs 	}
   4781   1.27       chs 	if (parent == zfsvfs->z_shares_dir) {
   4782   1.27       chs 		ZFS_EXIT(zfsvfs);
   4783   1.27       chs 		return (SET_ERROR(EPERM));
   4784   1.27       chs 	}
   4785   1.27       chs 
   4786    1.1      haad 	if (zfsvfs->z_utf8 && u8_validate(name,
   4787    1.1      haad 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
   4788    1.1      haad 		ZFS_EXIT(zfsvfs);
   4789   1.27       chs 		return (SET_ERROR(EILSEQ));
   4790    1.1      haad 	}
   4791    1.1      haad 
   4792    1.1      haad 	/*
   4793    1.1      haad 	 * We do not support links between attributes and non-attributes
   4794    1.1      haad 	 * because of the potential security risk of creating links
   4795    1.1      haad 	 * into "normal" file space in order to circumvent restrictions
   4796    1.1      haad 	 * imposed in attribute space.
   4797    1.1      haad 	 */
   4798   1.27       chs 	if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
   4799    1.1      haad 		ZFS_EXIT(zfsvfs);
   4800   1.27       chs 		return (SET_ERROR(EINVAL));
   4801    1.1      haad 	}
   4802    1.1      haad 
   4803    1.1      haad 
   4804   1.27       chs 	owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
   4805   1.27       chs 	if (owner != crgetuid(cr) && secpolicy_basic_link(svp, cr) != 0) {
   4806    1.1      haad 		ZFS_EXIT(zfsvfs);
   4807   1.27       chs 		return (SET_ERROR(EPERM));
   4808    1.1      haad 	}
   4809    1.1      haad 
   4810    1.1      haad 	if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
   4811    1.1      haad 		ZFS_EXIT(zfsvfs);
   4812    1.1      haad 		return (error);
   4813    1.1      haad 	}
   4814    1.1      haad 
   4815    1.1      haad 	/*
   4816    1.1      haad 	 * Attempt to lock directory; fail if entry already exists.
   4817    1.1      haad 	 */
   4818   1.27       chs 	error = zfs_dirent_lookup(dzp, name, &tzp, ZNEW);
   4819    1.1      haad 	if (error) {
   4820    1.1      haad 		ZFS_EXIT(zfsvfs);
   4821    1.1      haad 		return (error);
   4822    1.1      haad 	}
   4823    1.1      haad 
   4824    1.1      haad 	tx = dmu_tx_create(zfsvfs->z_os);
   4825   1.27       chs 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
   4826    1.1      haad 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
   4827   1.27       chs 	zfs_sa_upgrade_txholds(tx, szp);
   4828   1.27       chs 	zfs_sa_upgrade_txholds(tx, dzp);
   4829   1.27       chs 	error = dmu_tx_assign(tx, TXG_WAIT);
   4830    1.1      haad 	if (error) {
   4831    1.1      haad 		dmu_tx_abort(tx);
   4832    1.1      haad 		ZFS_EXIT(zfsvfs);
   4833    1.1      haad 		return (error);
   4834    1.1      haad 	}
   4835    1.1      haad 
   4836   1.27       chs 	error = zfs_link_create(dzp, name, szp, tx, 0);
   4837    1.1      haad 
   4838    1.1      haad 	if (error == 0) {
   4839    1.1      haad 		uint64_t txtype = TX_LINK;
   4840    1.1      haad 		zfs_log_link(zilog, tx, txtype, dzp, szp, name);
   4841    1.1      haad 	}
   4842    1.1      haad 
   4843    1.1      haad 	dmu_tx_commit(tx);
   4844    1.1      haad 
   4845    1.1      haad 	if (error == 0) {
   4846    1.1      haad 		vnevent_link(svp, ct);
   4847    1.1      haad 	}
   4848    1.1      haad 
   4849   1.27       chs 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
   4850   1.27       chs 		zil_commit(zilog, 0);
   4851   1.27       chs 
   4852    1.1      haad 	ZFS_EXIT(zfsvfs);
   4853    1.1      haad 	return (error);
   4854    1.1      haad }
   4855    1.1      haad 
   4856   1.27       chs 
   4857   1.97      yamt #if !defined(__NetBSD__)
   4858    1.2      haad /*ARGSUSED*/
   4859   1.27       chs void
   4860   1.27       chs zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
   4861   1.27       chs {
   4862   1.27       chs 	znode_t	*zp = VTOZ(vp);
   4863   1.27       chs 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   4864   1.27       chs 	int error;
   4865   1.27       chs 
   4866   1.27       chs 	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
   4867   1.27       chs 	if (zp->z_sa_hdl == NULL) {
   4868   1.27       chs 		/*
   4869   1.27       chs 		 * The fs has been unmounted, or we did a
   4870   1.27       chs 		 * suspend/resume and this file no longer exists.
   4871   1.27       chs 		 */
   4872   1.27       chs 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
   4873   1.27       chs 		vrecycle(vp);
   4874   1.27       chs 		return;
   4875   1.27       chs 	}
   4876   1.27       chs 
   4877   1.27       chs 	if (zp->z_unlinked) {
   4878   1.27       chs 		/*
   4879   1.27       chs 		 * Fast path to recycle a vnode of a removed file.
   4880   1.27       chs 		 */
   4881   1.27       chs 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
   4882   1.27       chs 		vrecycle(vp);
   4883   1.27       chs 		return;
   4884   1.27       chs 	}
   4885   1.27       chs 
   4886   1.27       chs 	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
   4887   1.27       chs 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
   4888   1.27       chs 
   4889   1.27       chs 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
   4890   1.27       chs 		zfs_sa_upgrade_txholds(tx, zp);
   4891   1.27       chs 		error = dmu_tx_assign(tx, TXG_WAIT);
   4892   1.27       chs 		if (error) {
   4893   1.27       chs 			dmu_tx_abort(tx);
   4894   1.27       chs 		} else {
   4895   1.27       chs 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
   4896   1.27       chs 			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
   4897   1.27       chs 			zp->z_atime_dirty = 0;
   4898   1.27       chs 			dmu_tx_commit(tx);
   4899   1.27       chs 		}
   4900   1.27       chs 	}
   4901   1.27       chs 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
   4902   1.27       chs }
   4903   1.97      yamt #endif /* !defined(__NetBSD__) */
   4904    1.2      haad 
   4905   1.27       chs 
   4906   1.27       chs #ifdef __FreeBSD__
   4907   1.27       chs CTASSERT(sizeof(struct zfid_short) <= sizeof(struct fid));
   4908   1.27       chs CTASSERT(sizeof(struct zfid_long) <= sizeof(struct fid));
   4909   1.27       chs #endif
   4910    1.1      haad 
   4911    1.2      haad /*ARGSUSED*/
   4912    1.1      haad static int
   4913    1.2      haad zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
   4914    1.1      haad {
   4915    1.1      haad 	znode_t		*zp = VTOZ(vp);
   4916    1.1      haad 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
   4917    1.2      haad 	uint32_t	gen;
   4918   1.27       chs 	uint64_t	gen64;
   4919    1.2      haad 	uint64_t	object = zp->z_id;
   4920    1.2      haad 	zfid_short_t	*zfid;
   4921   1.27       chs 	int		size, i, error;
   4922    1.2      haad 
   4923    1.2      haad 	ZFS_ENTER(zfsvfs);
   4924    1.2      haad 	ZFS_VERIFY_ZP(zp);
   4925   1.27       chs 
   4926   1.27       chs 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
   4927   1.27       chs 	    &gen64, sizeof (uint64_t))) != 0) {
   4928   1.27       chs 		ZFS_EXIT(zfsvfs);
   4929   1.27       chs 		return (error);
   4930   1.27       chs 	}
   4931   1.27       chs 
   4932   1.27       chs 	gen = (uint32_t)gen64;
   4933    1.2      haad 
   4934    1.2      haad 	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
   4935   1.27       chs 
   4936   1.27       chs #ifdef illumos
   4937   1.27       chs 	if (fidp->fid_len < size) {
   4938   1.27       chs 		fidp->fid_len = size;
   4939   1.27       chs 		ZFS_EXIT(zfsvfs);
   4940   1.27       chs 		return (SET_ERROR(ENOSPC));
   4941   1.27       chs 	}
   4942   1.27       chs #else
   4943    1.2      haad 	fidp->fid_len = size;
   4944   1.27       chs #endif
   4945    1.2      haad 
   4946    1.2      haad 	zfid = (zfid_short_t *)fidp;
   4947    1.2      haad 
   4948    1.2      haad 	zfid->zf_len = size;
   4949    1.1      haad 
   4950    1.2      haad 	for (i = 0; i < sizeof (zfid->zf_object); i++)
   4951    1.2      haad 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
   4952    1.1      haad 
   4953    1.2      haad 	/* Must have a non-zero generation number to distinguish from .zfs */
   4954    1.2      haad 	if (gen == 0)
   4955    1.2      haad 		gen = 1;
   4956    1.2      haad 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
   4957    1.2      haad 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
   4958    1.1      haad 
   4959    1.2      haad 	if (size == LONG_FID_LEN) {
   4960    1.2      haad 		uint64_t	objsetid = dmu_objset_id(zfsvfs->z_os);
   4961    1.2      haad 		zfid_long_t	*zlfid;
   4962    1.1      haad 
   4963    1.2      haad 		zlfid = (zfid_long_t *)fidp;
   4964    1.1      haad 
   4965    1.2      haad 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
   4966    1.2      haad 			zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
   4967    1.1      haad 
   4968    1.2      haad 		/* XXX - this should be the generation number for the objset */
   4969    1.2      haad 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
   4970    1.2      haad 			zlfid->zf_setgen[i] = 0;
   4971    1.2      haad 	}
   4972    1.2      haad 
   4973    1.2      haad 	ZFS_EXIT(zfsvfs);
   4974    1.2      haad 	return (0);
   4975    1.2      haad }
   4976    1.1      haad 
   4977    1.1      haad static int
   4978   1.27       chs zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
   4979    1.1      haad     caller_context_t *ct)
   4980    1.1      haad {
   4981   1.27       chs 	znode_t		*zp, *xzp;
   4982   1.27       chs 	zfsvfs_t	*zfsvfs;
   4983   1.27       chs 	int		error;
   4984    1.4      haad 
   4985   1.27       chs 	switch (cmd) {
   4986   1.27       chs 	case _PC_LINK_MAX:
   4987   1.27       chs 		*valp = INT_MAX;
   4988   1.27       chs 		return (0);
   4989    1.4      haad 
   4990   1.27       chs 	case _PC_FILESIZEBITS:
   4991   1.27       chs 		*valp = 64;
   4992    1.2      haad 		return (0);
   4993   1.27       chs #ifdef illumos
   4994   1.27       chs 	case _PC_XATTR_EXISTS:
   4995   1.27       chs 		zp = VTOZ(vp);
   4996   1.27       chs 		zfsvfs = zp->z_zfsvfs;
   4997   1.27       chs 		ZFS_ENTER(zfsvfs);
   4998   1.27       chs 		ZFS_VERIFY_ZP(zp);
   4999   1.27       chs 		*valp = 0;
   5000   1.27       chs 		error = zfs_dirent_lookup(zp, "", &xzp,
   5001   1.27       chs 		    ZXATTR | ZEXISTS | ZSHARED);
   5002   1.27       chs 		if (error == 0) {
   5003   1.27       chs 			if (!zfs_dirempty(xzp))
   5004   1.27       chs 				*valp = 1;
   5005   1.27       chs 			vrele(ZTOV(xzp));
   5006   1.27       chs 		} else if (error == ENOENT) {
   5007    1.2      haad 			/*
   5008   1.27       chs 			 * If there aren't extended attributes, it's the
   5009   1.27       chs 			 * same as having zero of them.
   5010    1.2      haad 			 */
   5011   1.27       chs 			error = 0;
   5012    1.2      haad 		}
   5013   1.27       chs 		ZFS_EXIT(zfsvfs);
   5014   1.27       chs 		return (error);
   5015    1.1      haad 
   5016   1.27       chs 	case _PC_SATTR_ENABLED:
   5017   1.27       chs 	case _PC_SATTR_EXISTS:
   5018   1.27       chs 		*valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
   5019   1.27       chs 		    (vp->v_type == VREG || vp->v_type == VDIR);
   5020   1.27       chs 		return (0);
   5021    1.1      haad 
   5022   1.27       chs 	case _PC_ACCESS_FILTERING:
   5023   1.27       chs 		*valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) &&
   5024   1.27       chs 		    vp->v_type == VDIR;
   5025   1.27       chs 		return (0);
   5026    1.1      haad 
   5027   1.27       chs 	case _PC_ACL_ENABLED:
   5028   1.27       chs 		*valp = _ACL_ACE_ENABLED;
   5029   1.27       chs 		return (0);
   5030   1.27       chs #endif	/* illumos */
   5031   1.27       chs 	case _PC_MIN_HOLE_SIZE:
   5032   1.27       chs 		*valp = (int)SPA_MINBLOCKSIZE;
   5033   1.27       chs 		return (0);
   5034   1.27       chs #ifdef illumos
   5035   1.27       chs 	case _PC_TIMESTAMP_RESOLUTION:
   5036   1.27       chs 		/* nanosecond timestamp resolution */
   5037   1.27       chs 		*valp = 1L;
   5038   1.27       chs 		return (0);
   5039   1.27       chs #endif
   5040   1.27       chs 	case _PC_ACL_EXTENDED:
   5041   1.27       chs 		*valp = 0;
   5042    1.4      haad 		return (0);
   5043    1.4      haad 
   5044   1.27       chs #ifndef __NetBSD__
   5045   1.27       chs 	case _PC_ACL_NFS4:
   5046   1.27       chs 		*valp = 1;
   5047    1.4      haad 		return (0);
   5048    1.4      haad 
   5049   1.27       chs 	case _PC_ACL_PATH_MAX:
   5050   1.27       chs 		*valp = ACL_MAX_ENTRIES;
   5051    1.4      haad 		return (0);
   5052   1.27       chs #endif
   5053    1.4      haad 
   5054   1.27       chs 	default:
   5055   1.80    kardel 		return (EOPNOTSUPP);
   5056    1.4      haad 	}
   5057    1.4      haad }
   5058    1.4      haad 
   5059    1.4      haad /*ARGSUSED*/
   5060    1.4      haad static int
   5061   1.27       chs zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
   5062    1.4      haad     caller_context_t *ct)
   5063    1.4      haad {
   5064    1.4      haad 	znode_t *zp = VTOZ(vp);
   5065    1.4      haad 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   5066   1.27       chs 	int error;
   5067   1.27       chs 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
   5068    1.4      haad 
   5069    1.4      haad 	ZFS_ENTER(zfsvfs);
   5070    1.4      haad 	ZFS_VERIFY_ZP(zp);
   5071   1.27       chs 	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
   5072   1.27       chs 	ZFS_EXIT(zfsvfs);
   5073    1.4      haad 
   5074    1.4      haad 	return (error);
   5075    1.4      haad }
   5076    1.4      haad 
   5077   1.27       chs /*ARGSUSED*/
   5078   1.27       chs int
   5079   1.27       chs zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
   5080    1.4      haad     caller_context_t *ct)
   5081    1.4      haad {
   5082   1.27       chs 	znode_t *zp = VTOZ(vp);
   5083   1.27       chs 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   5084   1.27       chs 	int error;
   5085   1.27       chs 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
   5086   1.27       chs 	zilog_t	*zilog = zfsvfs->z_log;
   5087    1.4      haad 
   5088    1.4      haad 	ZFS_ENTER(zfsvfs);
   5089    1.4      haad 	ZFS_VERIFY_ZP(zp);
   5090    1.4      haad 
   5091   1.27       chs 	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
   5092    1.4      haad 
   5093   1.27       chs 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
   5094   1.27       chs 		zil_commit(zilog, 0);
   5095    1.4      haad 
   5096    1.4      haad 	ZFS_EXIT(zfsvfs);
   5097    1.4      haad 	return (error);
   5098    1.4      haad }
   5099    1.4      haad 
   5100    1.4      haad static int
   5101   1.27       chs ioflags(int ioflags)
   5102    1.4      haad {
   5103   1.27       chs 	int flags = 0;
   5104    1.4      haad 
   5105   1.27       chs 	if (ioflags & IO_APPEND)
   5106   1.27       chs 		flags |= FAPPEND;
   5107   1.27       chs 	if (ioflags & IO_NDELAY)
   5108   1.27       chs 		flags |= FNONBLOCK;
   5109   1.27       chs 	if (ioflags & IO_SYNC)
   5110   1.27       chs 		flags |= (FSYNC | FDSYNC | FRSYNC);
   5111    1.4      haad 
   5112   1.27       chs 	return (flags);
   5113    1.4      haad }
   5114    1.4      haad 
   5115   1.27       chs #ifdef __NetBSD__
   5116   1.27       chs 
   5117   1.96      yamt static void zfs_netbsd_update_mctime(vnode_t *vp);
   5118   1.96      yamt 
   5119    1.4      haad static int
   5120    1.9  christos zfs_netbsd_open(void *v)
   5121    1.4      haad {
   5122    1.9  christos 	struct vop_open_args *ap = v;
   5123    1.4      haad 
   5124   1.12  riastrad 	return (zfs_open(&ap->a_vp, ap->a_mode, ap->a_cred, NULL));
   5125    1.4      haad }
   5126    1.4      haad 
   5127    1.4      haad static int
   5128    1.9  christos zfs_netbsd_close(void *v)
   5129    1.4      haad {
   5130    1.9  christos 	struct vop_close_args *ap = v;
   5131    1.1      haad 
   5132    1.2      haad 	return (zfs_close(ap->a_vp, ap->a_fflag, 0, 0, ap->a_cred, NULL));
   5133    1.2      haad }
   5134    1.1      haad 
   5135    1.2      haad static int
   5136    1.9  christos zfs_netbsd_ioctl(void *v)
   5137    1.2      haad {
   5138    1.9  christos 	struct vop_ioctl_args *ap = v;
   5139    1.1      haad 
   5140    1.2      haad 	return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
   5141    1.2      haad 		ap->a_fflag, ap->a_cred, NULL, NULL));
   5142    1.1      haad }
   5143    1.1      haad 
   5144    1.2      haad 
   5145    1.1      haad static int
   5146    1.9  christos zfs_netbsd_read(void *v)
   5147    1.1      haad {
   5148    1.9  christos 	struct vop_read_args *ap = v;
   5149   1.48   hannken 	vnode_t *vp = ap->a_vp;
   5150   1.48   hannken 	znode_t *zp = VTOZ(vp);
   5151    1.2      haad 
   5152   1.48   hannken 	switch (vp->v_type) {
   5153   1.48   hannken 	case VBLK:
   5154   1.48   hannken 	case VCHR:
   5155   1.48   hannken 		ZFS_ACCESSTIME_STAMP(zp->z_zfsvfs, zp);
   5156   1.48   hannken 		return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
   5157   1.48   hannken 	case VFIFO:
   5158   1.48   hannken 		ZFS_ACCESSTIME_STAMP(zp->z_zfsvfs, zp);
   5159   1.48   hannken 		return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
   5160   1.85      yamt 	case VREG:
   5161   1.85      yamt 		break;
   5162   1.85      yamt 	case VDIR:
   5163   1.85      yamt 		/*
   5164   1.85      yamt 		 * Note: this is normal on NetBSD because it historically
   5165   1.85      yamt 		 * allows read() on a directory.
   5166   1.85      yamt 		 * We simply reject it here though because it doesn't make
   5167   1.85      yamt 		 * sense to allow read() unless we implement a conversion
   5168   1.85      yamt 		 * to the historical version of the UFS dirent structure,
   5169   1.85      yamt 		 * which i (yamt) don't think is worth the effort.
   5170   1.85      yamt 		 */
   5171   1.85      yamt 		return EISDIR;
   5172   1.85      yamt 	default:
   5173   1.85      yamt 		return EINVAL;
   5174   1.48   hannken 	}
   5175   1.48   hannken 
   5176   1.48   hannken 	return (zfs_read(vp, ap->a_uio, ioflags(ap->a_ioflag), ap->a_cred, NULL));
   5177    1.1      haad }
   5178    1.1      haad 
   5179    1.1      haad static int
   5180    1.9  christos zfs_netbsd_write(void *v)
   5181    1.1      haad {
   5182    1.9  christos 	struct vop_write_args *ap = v;
   5183   1.48   hannken 	vnode_t *vp = ap->a_vp;
   5184   1.52   hannken 	znode_t *zp = VTOZ(vp);
   5185   1.52   hannken 	struct uio *uio = ap->a_uio;
   5186   1.52   hannken 	off_t osize = zp->z_size;
   5187   1.52   hannken 	int error, resid;
   5188    1.1      haad 
   5189   1.48   hannken 	switch (vp->v_type) {
   5190   1.48   hannken 	case VBLK:
   5191   1.48   hannken 	case VCHR:
   5192   1.96      yamt 		zfs_netbsd_update_mctime(vp);
   5193   1.48   hannken 		return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
   5194   1.48   hannken 	case VFIFO:
   5195   1.96      yamt 		zfs_netbsd_update_mctime(vp);
   5196   1.48   hannken 		return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
   5197   1.85      yamt 	case VREG:
   5198   1.85      yamt 		break;
   5199   1.85      yamt 	case VDIR:
   5200   1.85      yamt 		/*
   5201   1.85      yamt 		 * Note: this shouldn't happen as NetBSD's vn_openchk
   5202   1.85      yamt 		 * rejects FWRITE on VDIR.
   5203   1.85      yamt 		 */
   5204   1.85      yamt 		return EIO;
   5205   1.85      yamt 	default:
   5206   1.85      yamt 		return EINVAL;
   5207   1.48   hannken 	}
   5208   1.48   hannken 
   5209   1.52   hannken 	resid = uio->uio_resid;
   5210   1.52   hannken 	error = zfs_write(vp, uio, ioflags(ap->a_ioflag), ap->a_cred, NULL);
   5211   1.52   hannken 
   5212   1.52   hannken 	return error;
   5213    1.1      haad }
   5214    1.1      haad 
   5215    1.1      haad static int
   5216    1.9  christos zfs_netbsd_access(void *v)
   5217    1.1      haad {
   5218   1.14  riastrad 	struct vop_access_args /* {
   5219   1.14  riastrad 		struct vnode *a_vp;
   5220   1.67  christos 		accmode_t a_accmode;
   5221   1.14  riastrad 		kauth_cred_t a_cred;
   5222   1.14  riastrad 	} */ *ap = v;
   5223   1.81   hannken 	vnode_t *vp = ap->a_vp;
   5224   1.81   hannken 	znode_t *zp = VTOZ(vp);
   5225   1.81   hannken 	accmode_t accmode;
   5226   1.14  riastrad 	kauth_cred_t cred = ap->a_cred;
   5227   1.81   hannken 	int error = 0;
   5228    1.1      haad 
   5229    1.1      haad 	/*
   5230   1.81   hannken 	 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
   5231   1.14  riastrad 	 */
   5232   1.81   hannken 	accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
   5233   1.81   hannken 	if (accmode != 0)
   5234   1.81   hannken 		error = zfs_access(vp, accmode, 0, cred, NULL);
   5235    1.1      haad 
   5236   1.81   hannken 	/*
   5237   1.81   hannken 	 * VADMIN has to be handled by kauth_authorize_vnode().
   5238   1.81   hannken 	 */
   5239   1.81   hannken 	if (error == 0) {
   5240   1.81   hannken 		accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
   5241   1.81   hannken 		if (accmode != 0) {
   5242   1.81   hannken 			error = kauth_authorize_vnode(cred,
   5243   1.81   hannken 			    KAUTH_ACCESS_ACTION(accmode, vp->v_type,
   5244   1.81   hannken 			    zp->z_mode & ALLPERMS), vp, NULL,
   5245   1.81   hannken 			    genfs_can_access(vp, cred, zp->z_uid,
   5246   1.81   hannken 			    zp->z_gid, zp->z_mode & ALLPERMS, NULL, accmode));
   5247   1.81   hannken 		}
   5248   1.81   hannken 	}
   5249   1.81   hannken 
   5250   1.81   hannken 	/*
   5251   1.81   hannken 	 * For VEXEC, ensure that at least one execute bit is set for
   5252   1.81   hannken 	 * non-directories.
   5253   1.81   hannken 	 */
   5254   1.81   hannken 	if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
   5255   1.81   hannken 	    (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
   5256   1.81   hannken 		error = EACCES;
   5257   1.81   hannken 	}
   5258    1.1      haad 
   5259   1.32   hannken 	/* We expect EACCES as common error. */
   5260   1.32   hannken 	if (error == EPERM)
   5261   1.32   hannken 		error = EACCES;
   5262   1.32   hannken 
   5263   1.81   hannken 	return error;
   5264    1.2      haad }
   5265    1.1      haad 
   5266    1.2      haad static int
   5267    1.9  christos zfs_netbsd_lookup(void *v)
   5268    1.2      haad {
   5269   1.17   hannken 	struct vop_lookup_v2_args /* {
   5270   1.12  riastrad 		struct vnode *a_dvp;
   5271   1.12  riastrad 		struct vnode **a_vpp;
   5272   1.12  riastrad 		struct componentname *a_cnp;
   5273   1.12  riastrad 	} */ *ap = v;
   5274   1.12  riastrad 	struct vnode *dvp = ap->a_dvp;
   5275   1.12  riastrad 	struct vnode **vpp = ap->a_vpp;
   5276    1.2      haad 	struct componentname *cnp = ap->a_cnp;
   5277   1.86      yamt 	znode_t *zdp = VTOZ(dvp);
   5278   1.86      yamt 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
   5279   1.37   hannken 	char *nm, short_nm[31];
   5280   1.12  riastrad 	int error;
   5281   1.12  riastrad 
   5282   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5283   1.12  riastrad 
   5284   1.12  riastrad 	*vpp = NULL;
   5285   1.12  riastrad 
   5286   1.12  riastrad 	/*
   5287   1.12  riastrad 	 * Do an access check before the cache lookup.  zfs_lookup does
   5288   1.12  riastrad 	 * an access check too, but it's too scary to contemplate
   5289   1.12  riastrad 	 * injecting our namecache stuff into zfs internals.
   5290   1.12  riastrad 	 *
   5291   1.12  riastrad 	 * XXX Is this the correct access check?
   5292   1.12  riastrad 	 */
   5293   1.12  riastrad 	if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0)
   5294   1.12  riastrad 		goto out;
   5295   1.12  riastrad 
   5296   1.12  riastrad 	/*
   5297   1.12  riastrad 	 * Check the namecache before entering zfs_lookup.
   5298   1.12  riastrad 	 * cache_lookup does the locking dance for us.
   5299   1.12  riastrad 	 */
   5300   1.91      yamt 	if (zfsvfs->z_use_namecache) {
   5301   1.91      yamt 		if (cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
   5302   1.91      yamt 		    cnp->cn_nameiop, cnp->cn_flags, NULL, vpp)) {
   5303   1.91      yamt 			return *vpp == NULL ? ENOENT : 0;
   5304   1.91      yamt 		}
   5305   1.27       chs 	}
   5306   1.12  riastrad 
   5307   1.12  riastrad 	/*
   5308   1.12  riastrad 	 * zfs_lookup wants a null-terminated component name, but namei
   5309   1.12  riastrad 	 * gives us a pointer into the full pathname.
   5310   1.12  riastrad 	 */
   5311   1.37   hannken 	ASSERT(cnp->cn_namelen < PATH_MAX - 1);
   5312   1.37   hannken 	if (cnp->cn_namelen + 1 > sizeof(short_nm))
   5313   1.37   hannken 		nm = PNBUF_GET();
   5314   1.37   hannken 	else
   5315   1.37   hannken 		nm = short_nm;
   5316   1.12  riastrad 	(void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
   5317   1.12  riastrad 
   5318   1.44   hannken 	error = zfs_lookup(dvp, nm, vpp, 0, cnp, cnp->cn_nameiop, cnp->cn_cred);
   5319   1.12  riastrad 
   5320   1.37   hannken 	if (nm != short_nm)
   5321   1.37   hannken 		PNBUF_PUT(nm);
   5322   1.37   hannken 
   5323   1.12  riastrad 	/*
   5324   1.14  riastrad 	 * Translate errors to match our namei insanity.  Also, if the
   5325   1.14  riastrad 	 * caller wants to create an entry here, it's apparently our
   5326   1.14  riastrad 	 * responsibility as lookup to make sure that's permissible.
   5327   1.14  riastrad 	 * Go figure.
   5328   1.12  riastrad 	 */
   5329   1.12  riastrad 	if (cnp->cn_flags & ISLASTCN) {
   5330   1.12  riastrad 		switch (cnp->cn_nameiop) {
   5331   1.12  riastrad 		case CREATE:
   5332   1.12  riastrad 		case RENAME:
   5333   1.12  riastrad 			if (error == ENOENT) {
   5334   1.14  riastrad 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
   5335   1.14  riastrad 				if (error)
   5336   1.14  riastrad 					break;
   5337   1.12  riastrad 				error = EJUSTRETURN;
   5338   1.12  riastrad 				break;
   5339   1.12  riastrad 			}
   5340   1.34   hannken 			break;
   5341   1.12  riastrad 		case DELETE:
   5342   1.34   hannken 			if (error == 0) {
   5343   1.34   hannken 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
   5344   1.34   hannken 				if (error) {
   5345   1.34   hannken 					VN_RELE(*vpp);
   5346   1.34   hannken 					*vpp = NULL;
   5347   1.34   hannken 				}
   5348   1.34   hannken 			}
   5349   1.12  riastrad 			break;
   5350   1.12  riastrad 		}
   5351   1.12  riastrad 	}
   5352   1.12  riastrad 
   5353   1.12  riastrad 	if (error) {
   5354   1.12  riastrad 		KASSERT(*vpp == NULL);
   5355   1.12  riastrad 		goto out;
   5356   1.12  riastrad 	}
   5357   1.27       chs 	KASSERT(*vpp != NULL);
   5358   1.12  riastrad 
   5359   1.12  riastrad 	if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) {
   5360   1.12  riastrad 		KASSERT(!(cnp->cn_flags & ISDOTDOT));
   5361   1.12  riastrad 		KASSERT(dvp == *vpp);
   5362   1.12  riastrad 	} else if ((cnp->cn_namelen == 2) &&
   5363   1.12  riastrad 	    (cnp->cn_nameptr[0] == '.') &&
   5364   1.12  riastrad 	    (cnp->cn_nameptr[1] == '.')) {
   5365   1.12  riastrad 		KASSERT(cnp->cn_flags & ISDOTDOT);
   5366   1.12  riastrad 	} else {
   5367   1.12  riastrad 		KASSERT(!(cnp->cn_flags & ISDOTDOT));
   5368   1.12  riastrad 	}
   5369   1.12  riastrad 
   5370   1.12  riastrad out:
   5371   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5372   1.12  riastrad 
   5373   1.12  riastrad 	/*
   5374   1.12  riastrad 	 * Insert name into cache if appropriate.
   5375   1.12  riastrad 	 */
   5376   1.12  riastrad 
   5377   1.86      yamt 	if (zfsvfs->z_use_namecache) {
   5378   1.86      yamt 		if (error == 0 ||
   5379   1.86      yamt 		    (error == ENOENT && cnp->cn_nameiop != CREATE))
   5380   1.86      yamt 			cache_enter(dvp, *vpp, cnp->cn_nameptr,
   5381   1.86      yamt 			    cnp->cn_namelen, cnp->cn_flags);
   5382   1.86      yamt 	}
   5383    1.2      haad 
   5384   1.12  riastrad 	return (error);
   5385    1.1      haad }
   5386    1.1      haad 
   5387    1.1      haad static int
   5388    1.9  christos zfs_netbsd_create(void *v)
   5389    1.1      haad {
   5390   1.16   hannken 	struct vop_create_v3_args /* {
   5391   1.12  riastrad 		struct vnode *a_dvp;
   5392   1.12  riastrad 		struct vnode **a_vpp;
   5393   1.12  riastrad 		struct componentname *a_cnp;
   5394   1.12  riastrad 		struct vattr *a_vap;
   5395   1.12  riastrad 	} */ *ap = v;
   5396   1.12  riastrad 	struct vnode *dvp = ap->a_dvp;
   5397   1.12  riastrad 	struct vnode **vpp = ap->a_vpp;
   5398    1.2      haad 	struct componentname *cnp = ap->a_cnp;
   5399   1.12  riastrad 	struct vattr *vap = ap->a_vap;
   5400   1.37   hannken 	char *nm;
   5401    1.2      haad 	int mode;
   5402   1.12  riastrad 	int error;
   5403   1.12  riastrad 
   5404   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5405   1.12  riastrad 
   5406    1.2      haad 	vattr_init_mask(vap);
   5407    1.2      haad 	mode = vap->va_mode & ALLPERMS;
   5408    1.1      haad 
   5409   1.37   hannken 	/* ZFS wants a null-terminated name. */
   5410   1.37   hannken 	nm = PNBUF_GET();
   5411   1.37   hannken 	(void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
   5412   1.37   hannken 
   5413   1.12  riastrad 	/* XXX !EXCL is wrong here...  */
   5414   1.37   hannken 	error = zfs_create(dvp, nm, vap, !EXCL, mode, vpp, cnp->cn_cred, NULL);
   5415   1.37   hannken 
   5416   1.37   hannken 	PNBUF_PUT(nm);
   5417   1.12  riastrad 
   5418   1.16   hannken 	KASSERT((error == 0) == (*vpp != NULL));
   5419   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5420   1.53      brad 	if (*vpp != NULL)
   5421   1.53      brad 		VOP_UNLOCK(*vpp, 0);
   5422   1.12  riastrad 
   5423   1.12  riastrad 	return (error);
   5424    1.2      haad }
   5425    1.1      haad 
   5426    1.2      haad static int
   5427   1.48   hannken zfs_netbsd_mknod(void *v)
   5428   1.48   hannken {
   5429   1.48   hannken 	struct vop_mknod_v3_args /* {
   5430   1.48   hannken 		struct vnode *a_dvp;
   5431   1.48   hannken 		struct vnode **a_vpp;
   5432   1.48   hannken 		struct componentname *a_cnp;
   5433   1.48   hannken 		struct vattr *a_vap;
   5434   1.48   hannken 	} */ *ap = v;
   5435   1.48   hannken 	struct vnode *dvp = ap->a_dvp;
   5436   1.48   hannken 	struct vnode **vpp = ap->a_vpp;
   5437   1.48   hannken 	struct componentname *cnp = ap->a_cnp;
   5438   1.48   hannken 	struct vattr *vap = ap->a_vap;
   5439   1.48   hannken 	char *nm;
   5440   1.48   hannken 	int mode;
   5441   1.48   hannken 	int error;
   5442   1.48   hannken 
   5443   1.48   hannken 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5444   1.48   hannken 
   5445   1.48   hannken 	vattr_init_mask(vap);
   5446   1.48   hannken 	mode = vap->va_mode & ALLPERMS;
   5447   1.48   hannken 
   5448   1.48   hannken 	/* ZFS wants a null-terminated name. */
   5449   1.48   hannken 	nm = PNBUF_GET();
   5450   1.48   hannken 	(void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
   5451   1.48   hannken 
   5452   1.48   hannken 	/* XXX !EXCL is wrong here...  */
   5453   1.48   hannken 	error = zfs_create(dvp, nm, vap, !EXCL, mode, vpp, cnp->cn_cred, NULL);
   5454   1.48   hannken 
   5455   1.48   hannken 	PNBUF_PUT(nm);
   5456   1.48   hannken 
   5457   1.48   hannken 	KASSERT((error == 0) == (*vpp != NULL));
   5458   1.48   hannken 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5459   1.53      brad 	if (*vpp != NULL)
   5460   1.53      brad 		VOP_UNLOCK(*vpp, 0);
   5461   1.48   hannken 
   5462   1.48   hannken 	return (error);
   5463   1.48   hannken }
   5464   1.48   hannken 
   5465   1.48   hannken static int
   5466    1.9  christos zfs_netbsd_remove(void *v)
   5467    1.2      haad {
   5468   1.76   thorpej 	struct vop_remove_v3_args /* {
   5469   1.12  riastrad 		struct vnode *a_dvp;
   5470   1.12  riastrad 		struct vnode *a_vp;
   5471   1.12  riastrad 		struct componentname *a_cnp;
   5472   1.76   thorpej 		nlink_t ctx_vp_new_nlink;
   5473   1.12  riastrad 	} */ *ap = v;
   5474   1.12  riastrad 	struct vnode *dvp = ap->a_dvp;
   5475   1.12  riastrad 	struct vnode *vp = ap->a_vp;
   5476   1.12  riastrad 	struct componentname *cnp = ap->a_cnp;
   5477   1.37   hannken 	char *nm;
   5478   1.12  riastrad 	int error;
   5479   1.12  riastrad 
   5480   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5481   1.12  riastrad 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
   5482   1.12  riastrad 
   5483   1.37   hannken 	/* ZFS wants a null-terminated name. */
   5484   1.37   hannken 	nm = PNBUF_GET();
   5485   1.37   hannken 	(void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
   5486   1.37   hannken 
   5487   1.37   hannken 	error = zfs_remove(dvp, vp, nm, cnp->cn_cred);
   5488   1.37   hannken 
   5489   1.76   thorpej 	/*
   5490   1.76   thorpej 	 * XXX Should update ctx_vp_new_nlink, but for now the
   5491   1.76   thorpej 	 * XXX the kevent sent on "vp"  matches historical behavior.
   5492   1.76   thorpej 	 */
   5493   1.76   thorpej 
   5494   1.37   hannken 	PNBUF_PUT(nm);
   5495   1.27       chs 	vput(vp);
   5496   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5497   1.12  riastrad 	return (error);
   5498    1.2      haad }
   5499    1.1      haad 
   5500    1.2      haad static int
   5501    1.9  christos zfs_netbsd_mkdir(void *v)
   5502    1.2      haad {
   5503   1.16   hannken 	struct vop_mkdir_v3_args /* {
   5504   1.12  riastrad 		struct vnode *a_dvp;
   5505   1.12  riastrad 		struct vnode **a_vpp;
   5506   1.12  riastrad 		struct componentname *a_cnp;
   5507   1.12  riastrad 		struct vattr *a_vap;
   5508   1.12  riastrad 	} */ *ap = v;
   5509   1.12  riastrad 	struct vnode *dvp = ap->a_dvp;
   5510   1.12  riastrad 	struct vnode **vpp = ap->a_vpp;
   5511   1.12  riastrad 	struct componentname *cnp = ap->a_cnp;
   5512   1.12  riastrad 	struct vattr *vap = ap->a_vap;
   5513   1.37   hannken 	char *nm;
   5514   1.12  riastrad 	int error;
   5515   1.12  riastrad 
   5516   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5517   1.12  riastrad 
   5518    1.2      haad 	vattr_init_mask(vap);
   5519    1.1      haad 
   5520   1.37   hannken 	/* ZFS wants a null-terminated name. */
   5521   1.37   hannken 	nm = PNBUF_GET();
   5522   1.37   hannken 	(void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
   5523   1.37   hannken 
   5524   1.37   hannken 	error = zfs_mkdir(dvp, nm, vap, vpp, cnp->cn_cred);
   5525   1.37   hannken 
   5526   1.37   hannken 	PNBUF_PUT(nm);
   5527   1.12  riastrad 
   5528   1.16   hannken 	KASSERT((error == 0) == (*vpp != NULL));
   5529   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5530   1.53      brad 	if (*vpp != NULL)
   5531   1.53      brad 		VOP_UNLOCK(*vpp, 0);
   5532   1.12  riastrad 
   5533   1.12  riastrad 	return (error);
   5534    1.1      haad }
   5535    1.1      haad 
   5536    1.1      haad static int
   5537    1.9  christos zfs_netbsd_rmdir(void *v)
   5538    1.1      haad {
   5539   1.25  riastrad 	struct vop_rmdir_v2_args /* {
   5540   1.12  riastrad 		struct vnode *a_dvp;
   5541   1.12  riastrad 		struct vnode *a_vp;
   5542   1.12  riastrad 		struct componentname *a_cnp;
   5543   1.12  riastrad 	} */ *ap = v;
   5544   1.12  riastrad 	struct vnode *dvp = ap->a_dvp;
   5545   1.12  riastrad 	struct vnode *vp = ap->a_vp;
   5546    1.2      haad 	struct componentname *cnp = ap->a_cnp;
   5547   1.37   hannken 	char *nm;
   5548   1.12  riastrad 	int error;
   5549   1.12  riastrad 
   5550   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5551   1.12  riastrad 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
   5552   1.12  riastrad 
   5553   1.37   hannken 	/* ZFS wants a null-terminated name. */
   5554   1.37   hannken 	nm = PNBUF_GET();
   5555   1.37   hannken 	(void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
   5556   1.37   hannken 
   5557   1.37   hannken 	error = zfs_rmdir(dvp, vp, nm, cnp->cn_cred);
   5558   1.37   hannken 
   5559   1.37   hannken 	PNBUF_PUT(nm);
   5560   1.27       chs 	vput(vp);
   5561   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5562   1.12  riastrad 	return error;
   5563    1.2      haad }
   5564    1.1      haad 
   5565    1.2      haad static int
   5566    1.9  christos zfs_netbsd_readdir(void *v)
   5567    1.2      haad {
   5568    1.9  christos 	struct vop_readdir_args *ap = v;
   5569    1.1      haad 
   5570    1.2      haad 	return (zfs_readdir(ap->a_vp, ap->a_uio, ap->a_cred, ap->a_eofflag,
   5571   1.27       chs 		ap->a_ncookies, ap->a_cookies));
   5572    1.2      haad }
   5573    1.1      haad 
   5574    1.2      haad static int
   5575    1.9  christos zfs_netbsd_fsync(void *v)
   5576    1.2      haad {
   5577    1.9  christos 	struct vop_fsync_args *ap = v;
   5578   1.93      yamt 	struct vnode *vp = ap->a_vp;
   5579   1.93      yamt 	int flags = ap->a_flags;
   5580   1.93      yamt 	int error;
   5581   1.93      yamt 
   5582   1.93      yamt 	/*
   5583   1.93      yamt 	 * Regardless of whether this is required for standards conformance,
   5584   1.93      yamt 	 * this is the logical behavior when fsync() is called on a file with
   5585   1.93      yamt 	 * dirty pages.  We use async putpages since the ZIL transactions are
   5586   1.93      yamt 	 * already going to be pushed out as part of the zil_commit().
   5587   1.93      yamt 	 */
   5588   1.93      yamt 	rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
   5589   1.93      yamt 	error = VOP_PUTPAGES(vp, trunc_page(ap->a_offlo),
   5590   1.93      yamt 	    round_page(ap->a_offhi), PGO_CLEANIT);
   5591   1.93      yamt 	if (error != 0) {
   5592   1.93      yamt 		return error;
   5593   1.93      yamt 	}
   5594    1.1      haad 
   5595   1.92      yamt 	/*
   5596   1.92      yamt 	 * it isn't safe or necessary to call zil_commit when reclaiming
   5597   1.92      yamt 	 * a vnode.
   5598   1.92      yamt 	 *
   5599   1.92      yamt 	 * - it can deadlock by attempting vcache_get on itself.
   5600   1.92      yamt 	 *   (zfs_get_data)
   5601   1.92      yamt 	 *
   5602   1.92      yamt 	 * - for the purpose of vnode reclaim, we only need to push the
   5603   1.92      yamt 	 *   data to the txg. no need to log the intent.
   5604   1.94      yamt 	 *
   5605   1.94      yamt 	 * no need to commit the zil for ioflush either. (FSYNC_LAZY)
   5606   1.92      yamt 	 */
   5607   1.94      yamt 	if ((flags & (FSYNC_RECLAIM|FSYNC_LAZY)) != 0) {
   5608   1.92      yamt 		return (0);
   5609   1.92      yamt 	}
   5610   1.92      yamt 
   5611   1.93      yamt 	return (zfs_fsync(vp, flags, ap->a_cred, NULL));
   5612    1.1      haad }
   5613    1.1      haad 
   5614    1.1      haad static int
   5615   1.61  riastrad zfs_spec_fsync(void *v)
   5616   1.61  riastrad {
   5617   1.61  riastrad 	struct vop_fsync_args *ap = v;
   5618   1.61  riastrad 	int error;
   5619   1.61  riastrad 
   5620   1.61  riastrad 	error = spec_fsync(v);
   5621   1.61  riastrad 	if (error)
   5622   1.61  riastrad 		return error;
   5623   1.61  riastrad 
   5624   1.61  riastrad 	return (zfs_fsync(ap->a_vp, ap->a_flags, ap->a_cred, NULL));
   5625   1.61  riastrad }
   5626   1.61  riastrad 
   5627   1.61  riastrad static int
   5628    1.9  christos zfs_netbsd_getattr(void *v)
   5629    1.1      haad {
   5630    1.9  christos 	struct vop_getattr_args *ap = v;
   5631    1.2      haad 	vattr_t *vap = ap->a_vap;
   5632    1.2      haad 	xvattr_t xvap;
   5633    1.2      haad 	u_long fflags = 0;
   5634    1.2      haad 	int error;
   5635    1.2      haad 
   5636    1.2      haad 	xva_init(&xvap);
   5637    1.2      haad 	xvap.xva_vattr = *vap;
   5638    1.2      haad 	xvap.xva_vattr.va_mask |= AT_XVATTR;
   5639    1.2      haad 
   5640    1.2      haad 	/* Convert chflags into ZFS-type flags. */
   5641    1.2      haad 	/* XXX: what about SF_SETTABLE?. */
   5642    1.2      haad 	XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
   5643    1.2      haad 	XVA_SET_REQ(&xvap, XAT_APPENDONLY);
   5644    1.2      haad 	XVA_SET_REQ(&xvap, XAT_NOUNLINK);
   5645    1.2      haad 	XVA_SET_REQ(&xvap, XAT_NODUMP);
   5646    1.2      haad 	error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred, NULL);
   5647    1.2      haad 	if (error != 0)
   5648    1.2      haad 		return (error);
   5649    1.1      haad 
   5650    1.2      haad 	/* Convert ZFS xattr into chflags. */
   5651    1.2      haad #define	FLAG_CHECK(fflag, xflag, xfield)	do {			\
   5652    1.2      haad 	if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0)		\
   5653    1.2      haad 		fflags |= (fflag);					\
   5654    1.2      haad } while (0)
   5655    1.2      haad 	FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
   5656    1.2      haad 	    xvap.xva_xoptattrs.xoa_immutable);
   5657    1.2      haad 	FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
   5658    1.2      haad 	    xvap.xva_xoptattrs.xoa_appendonly);
   5659    1.2      haad 	FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
   5660    1.2      haad 	    xvap.xva_xoptattrs.xoa_nounlink);
   5661    1.2      haad 	FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
   5662    1.2      haad 	    xvap.xva_xoptattrs.xoa_nodump);
   5663    1.2      haad #undef	FLAG_CHECK
   5664    1.2      haad 	*vap = xvap.xva_vattr;
   5665    1.2      haad 	vap->va_flags = fflags;
   5666    1.1      haad 	return (0);
   5667    1.1      haad }
   5668    1.1      haad 
   5669    1.1      haad static int
   5670    1.9  christos zfs_netbsd_setattr(void *v)
   5671    1.1      haad {
   5672    1.9  christos 	struct vop_setattr_args *ap = v;
   5673    1.2      haad 	vnode_t *vp = ap->a_vp;
   5674    1.2      haad 	vattr_t *vap = ap->a_vap;
   5675    1.2      haad 	cred_t *cred = ap->a_cred;
   5676   1.33   hannken 	znode_t *zp = VTOZ(vp);
   5677   1.86      yamt 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   5678    1.2      haad 	xvattr_t xvap;
   5679   1.35   hannken 	kauth_action_t action;
   5680   1.35   hannken 	u_long fflags, sfflags = 0;
   5681    1.2      haad 	uint64_t zflags;
   5682   1.33   hannken 	int error, flags = 0;
   5683   1.35   hannken 	bool changing_sysflags;
   5684    1.1      haad 
   5685    1.2      haad 	vattr_init_mask(vap);
   5686    1.2      haad 	vap->va_mask &= ~AT_NOSET;
   5687   1.21  riastrad 	if (ISSET(vap->va_vaflags, VA_UTIMES_NULL))
   5688   1.21  riastrad 		flags |= ATTR_UTIME;
   5689    1.1      haad 
   5690    1.2      haad 	xva_init(&xvap);
   5691    1.2      haad 	xvap.xva_vattr = *vap;
   5692    1.1      haad 
   5693   1.27       chs 	zflags = VTOZ(vp)->z_pflags;
   5694    1.1      haad 
   5695   1.57   hannken 	/* Ignore size changes on device nodes. */
   5696   1.57   hannken 	if (vp->v_type == VBLK || vp->v_type == VCHR)
   5697   1.57   hannken 		xvap.xva_vattr.va_mask &= ~AT_SIZE;
   5698    1.2      haad 	if (vap->va_flags != VNOVAL) {
   5699    1.2      haad 		int error;
   5700    1.1      haad 
   5701    1.2      haad 		fflags = vap->va_flags;
   5702    1.2      haad 		if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_NODUMP)) != 0)
   5703    1.2      haad 			return (EOPNOTSUPP);
   5704    1.1      haad 
   5705    1.2      haad #define	FLAG_CHANGE(fflag, zflag, xflag, xfield)	do {		\
   5706    1.2      haad 	if (((fflags & (fflag)) && !(zflags & (zflag))) ||		\
   5707    1.2      haad 	    ((zflags & (zflag)) && !(fflags & (fflag)))) {		\
   5708    1.2      haad 		XVA_SET_REQ(&xvap, (xflag));				\
   5709    1.2      haad 		(xfield) = ((fflags & (fflag)) != 0);			\
   5710   1.35   hannken 		if (((fflag) & SF_SETTABLE) != 0)			\
   5711   1.35   hannken 			sfflags |= (fflag);				\
   5712    1.2      haad 	}								\
   5713    1.2      haad } while (0)
   5714    1.2      haad 		/* Convert chflags into ZFS-type flags. */
   5715    1.2      haad 		/* XXX: what about SF_SETTABLE?. */
   5716    1.2      haad 		FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
   5717    1.2      haad 		    xvap.xva_xoptattrs.xoa_immutable);
   5718    1.2      haad 		FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
   5719    1.2      haad 		    xvap.xva_xoptattrs.xoa_appendonly);
   5720    1.2      haad 		FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
   5721    1.2      haad 		    xvap.xva_xoptattrs.xoa_nounlink);
   5722    1.2      haad 		FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
   5723    1.2      haad 		    xvap.xva_xoptattrs.xoa_nodump);
   5724    1.2      haad #undef	FLAG_CHANGE
   5725   1.35   hannken 
   5726   1.35   hannken 		action = KAUTH_VNODE_WRITE_FLAGS;
   5727   1.35   hannken 		changing_sysflags = false;
   5728   1.35   hannken 
   5729   1.35   hannken 		if (zflags & (ZFS_IMMUTABLE|ZFS_APPENDONLY|ZFS_NOUNLINK)) {
   5730   1.35   hannken 			action |= KAUTH_VNODE_HAS_SYSFLAGS;
   5731   1.35   hannken 		}
   5732   1.35   hannken 		if (sfflags != 0) {
   5733   1.35   hannken 			action |= KAUTH_VNODE_WRITE_SYSFLAGS;
   5734   1.35   hannken 			changing_sysflags = true;
   5735   1.35   hannken 		}
   5736   1.35   hannken 
   5737   1.35   hannken 		error = kauth_authorize_vnode(cred, action, vp, NULL,
   5738   1.67  christos 		    genfs_can_chflags(vp, cred, zp->z_uid, changing_sysflags));
   5739   1.35   hannken 		if (error)
   5740   1.35   hannken 			return error;
   5741    1.1      haad 	}
   5742   1.33   hannken 
   5743   1.33   hannken 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL ||
   5744   1.33   hannken 	    vap->va_birthtime.tv_sec != VNOVAL) {
   5745   1.33   hannken 		error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp,
   5746   1.67  christos 		     NULL, genfs_can_chtimes(vp, cred, zp->z_uid,
   5747   1.67  christos 		     vap->va_vaflags));
   5748   1.33   hannken 		if (error)
   5749   1.33   hannken 			return error;
   5750   1.33   hannken 	}
   5751   1.33   hannken 
   5752   1.52   hannken 	error = zfs_setattr(vp, (vattr_t *)&xvap, flags, cred, NULL);
   5753   1.70   hannken 	if (error)
   5754   1.70   hannken 		return error;
   5755   1.70   hannken 
   5756   1.86      yamt 	if (zfsvfs->z_use_namecache)
   5757   1.86      yamt 		cache_enter_id(vp, zp->z_mode, zp->z_uid, zp->z_gid, true);
   5758   1.52   hannken 
   5759   1.52   hannken 	return error;
   5760    1.2      haad }
   5761    1.1      haad 
   5762    1.2      haad static int
   5763    1.9  christos zfs_netbsd_rename(void *v)
   5764    1.9  christos {
   5765   1.76   thorpej 	struct vop_rename_args /* {
   5766    1.2      haad 		struct vnode *a_fdvp;
   5767    1.2      haad 		struct vnode *a_fvp;
   5768    1.2      haad 		struct componentname *a_fcnp;
   5769    1.2      haad 		struct vnode *a_tdvp;
   5770    1.2      haad 		struct vnode *a_tvp;
   5771    1.2      haad 		struct componentname *a_tcnp;
   5772    1.9  christos 	} */ *ap = v;
   5773    1.2      haad 	vnode_t *fdvp = ap->a_fdvp;
   5774    1.2      haad 	vnode_t *fvp = ap->a_fvp;
   5775   1.12  riastrad 	struct componentname *fcnp = ap->a_fcnp;
   5776    1.2      haad 	vnode_t *tdvp = ap->a_tdvp;
   5777    1.2      haad 	vnode_t *tvp = ap->a_tvp;
   5778   1.12  riastrad 	struct componentname *tcnp = ap->a_tcnp;
   5779   1.12  riastrad 	kauth_cred_t cred;
   5780    1.2      haad 	int error;
   5781    1.1      haad 
   5782   1.12  riastrad 	KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE);
   5783   1.27       chs 	KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE);
   5784   1.12  riastrad 	KASSERT(fdvp->v_type == VDIR);
   5785   1.12  riastrad 	KASSERT(tdvp->v_type == VDIR);
   5786   1.12  riastrad 
   5787   1.12  riastrad 	cred = fcnp->cn_cred;
   5788   1.12  riastrad 
   5789   1.12  riastrad 	/*
   5790   1.12  riastrad 	 * XXX Want a better equality test.  `tcnp->cn_cred == cred'
   5791   1.12  riastrad 	 * hoses p2k because puffs transmits the creds separately and
   5792   1.12  riastrad 	 * allocates distinct but equivalent structures for them.
   5793   1.12  riastrad 	 */
   5794   1.12  riastrad 	KASSERT(kauth_cred_uidmatch(cred, tcnp->cn_cred));
   5795   1.12  riastrad 
   5796   1.12  riastrad 	/*
   5797   1.12  riastrad 	 * Drop the insane locks.
   5798   1.12  riastrad 	 */
   5799   1.27       chs 	VOP_UNLOCK(tdvp, 0);
   5800   1.27       chs 	if (tvp != NULL && tvp != tdvp)
   5801   1.27       chs 		VOP_UNLOCK(tvp, 0);
   5802   1.12  riastrad 
   5803   1.12  riastrad 	/*
   5804   1.12  riastrad 	 * Release the source and target nodes; zfs_rename will look
   5805   1.12  riastrad 	 * them up again once the locking situation is sane.
   5806   1.12  riastrad 	 */
   5807   1.12  riastrad 	VN_RELE(fvp);
   5808   1.12  riastrad 	if (tvp != NULL)
   5809   1.12  riastrad 		VN_RELE(tvp);
   5810   1.27       chs 	fvp = NULL;
   5811   1.27       chs 	tvp = NULL;
   5812    1.1      haad 
   5813   1.12  riastrad 	/*
   5814   1.12  riastrad 	 * Do the rename ZFSly.
   5815   1.12  riastrad 	 */
   5816   1.27       chs 	error = zfs_rename(fdvp, &fvp, fcnp, tdvp, &tvp, tcnp, cred);
   5817   1.12  riastrad 
   5818   1.12  riastrad 	/*
   5819   1.12  riastrad 	 * Release the directories now too, because the VOP_RENAME
   5820   1.12  riastrad 	 * protocol is insane.
   5821   1.12  riastrad 	 */
   5822   1.27       chs 
   5823    1.2      haad 	VN_RELE(fdvp);
   5824   1.12  riastrad 	VN_RELE(tdvp);
   5825   1.31   hannken 	if (fvp != NULL)
   5826   1.31   hannken 		VN_RELE(fvp);
   5827   1.27       chs 	if (tvp != NULL)
   5828   1.27       chs 		VN_RELE(tvp);
   5829    1.1      haad 
   5830    1.1      haad 	return (error);
   5831    1.1      haad }
   5832    1.1      haad 
   5833    1.1      haad static int
   5834    1.9  christos zfs_netbsd_symlink(void *v)
   5835    1.1      haad {
   5836   1.16   hannken 	struct vop_symlink_v3_args /* {
   5837   1.12  riastrad 		struct vnode *a_dvp;
   5838   1.12  riastrad 		struct vnode **a_vpp;
   5839   1.12  riastrad 		struct componentname *a_cnp;
   5840   1.12  riastrad 		struct vattr *a_vap;
   5841   1.12  riastrad 		char *a_target;
   5842   1.12  riastrad 	} */ *ap = v;
   5843   1.12  riastrad 	struct vnode *dvp = ap->a_dvp;
   5844   1.12  riastrad 	struct vnode **vpp = ap->a_vpp;
   5845    1.2      haad 	struct componentname *cnp = ap->a_cnp;
   5846   1.12  riastrad 	struct vattr *vap = ap->a_vap;
   5847   1.12  riastrad 	char *target = ap->a_target;
   5848   1.37   hannken 	char *nm;
   5849   1.12  riastrad 	int error;
   5850   1.12  riastrad 
   5851   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5852   1.12  riastrad 
   5853    1.2      haad 	vap->va_type = VLNK;	/* Netbsd: Syscall only sets va_mode. */
   5854    1.2      haad 	vattr_init_mask(vap);
   5855    1.1      haad 
   5856   1.37   hannken 	/* ZFS wants a null-terminated name. */
   5857   1.37   hannken 	nm = PNBUF_GET();
   5858   1.37   hannken 	(void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
   5859   1.37   hannken 
   5860   1.37   hannken 	error = zfs_symlink(dvp, vpp, nm, vap, target, cnp->cn_cred, 0);
   5861   1.37   hannken 
   5862   1.37   hannken 	PNBUF_PUT(nm);
   5863   1.16   hannken 	KASSERT((error == 0) == (*vpp != NULL));
   5864   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5865   1.53      brad 	if (*vpp != NULL)
   5866   1.53      brad 		VOP_UNLOCK(*vpp, 0);
   5867   1.12  riastrad 
   5868   1.12  riastrad 	return (error);
   5869    1.4      haad }
   5870    1.4      haad 
   5871    1.2      haad static int
   5872    1.9  christos zfs_netbsd_readlink(void *v)
   5873    1.2      haad {
   5874    1.9  christos 	struct vop_readlink_args *ap = v;
   5875    1.1      haad 
   5876    1.2      haad 	return (zfs_readlink(ap->a_vp, ap->a_uio, ap->a_cred, NULL));
   5877    1.2      haad }
   5878    1.1      haad 
   5879    1.2      haad static int
   5880    1.9  christos zfs_netbsd_link(void *v)
   5881    1.2      haad {
   5882   1.23  riastrad 	struct vop_link_v2_args /* {
   5883   1.12  riastrad 		struct vnode *a_dvp;
   5884   1.12  riastrad 		struct vnode *a_vp;
   5885   1.12  riastrad 		struct componentname *a_cnp;
   5886   1.12  riastrad 	} */ *ap = v;
   5887   1.12  riastrad 	struct vnode *dvp = ap->a_dvp;
   5888   1.12  riastrad 	struct vnode *vp = ap->a_vp;
   5889    1.2      haad 	struct componentname *cnp = ap->a_cnp;
   5890   1.37   hannken 	char *nm;
   5891   1.12  riastrad 	int error;
   5892   1.12  riastrad 
   5893   1.12  riastrad 	KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE);
   5894    1.1      haad 
   5895   1.37   hannken 	/* ZFS wants a null-terminated name. */
   5896   1.37   hannken 	nm = PNBUF_GET();
   5897   1.37   hannken 	(void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
   5898   1.37   hannken 
   5899   1.78  christos 	if ((error = vn_lock(vp, LK_EXCLUSIVE)) != 0) {
   5900   1.78  christos 		/* XXX: No ABORTOP? */
   5901   1.78  christos 		PNBUF_PUT(nm);
   5902   1.78  christos 		return error;
   5903   1.78  christos 	}
   5904   1.78  christos 	error = kauth_authorize_vnode(cnp->cn_cred, KAUTH_VNODE_ADD_LINK, vp,
   5905   1.78  christos 	    dvp, 0);
   5906   1.78  christos 	if (error)
   5907   1.78  christos 		goto out;
   5908   1.37   hannken 	error = zfs_link(dvp, vp, nm, cnp->cn_cred,
   5909   1.27       chs 	    NULL, 0);
   5910   1.37   hannken 
   5911   1.78  christos out:
   5912   1.37   hannken 	PNBUF_PUT(nm);
   5913   1.27       chs 	VOP_UNLOCK(vp, 0);
   5914   1.27       chs 	return error;
   5915    1.2      haad }
   5916    1.1      haad 
   5917    1.2      haad static int
   5918    1.9  christos zfs_netbsd_inactive(void *v)
   5919    1.2      haad {
   5920   1.24  riastrad 	struct vop_inactive_v2_args *ap = v;
   5921    1.2      haad 	vnode_t *vp = ap->a_vp;
   5922    1.2      haad 	znode_t	*zp = VTOZ(vp);
   5923   1.97      yamt 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   5924   1.97      yamt 	int error;
   5925   1.97      yamt 
   5926   1.97      yamt 	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
   5927   1.97      yamt 	if (zp->z_sa_hdl == NULL) {
   5928   1.97      yamt 		/*
   5929   1.97      yamt 		 * The fs has been unmounted, or we did a
   5930   1.97      yamt 		 * suspend/resume and this file no longer exists.
   5931   1.97      yamt 		 */
   5932   1.97      yamt 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
   5933   1.97      yamt 		*ap->a_recycle = true;
   5934   1.97      yamt 		return (0);
   5935   1.97      yamt 	}
   5936   1.97      yamt 
   5937   1.97      yamt 	if (zp->z_unlinked) {
   5938   1.97      yamt 		/*
   5939   1.97      yamt 		 * Fast path to recycle a vnode of a removed file.
   5940   1.97      yamt 		 */
   5941   1.97      yamt 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
   5942   1.97      yamt 		*ap->a_recycle = true;
   5943   1.97      yamt 		return (0);
   5944   1.97      yamt 	}
   5945   1.97      yamt 
   5946   1.97      yamt 	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
   5947   1.97      yamt 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
   5948    1.1      haad 
   5949   1.97      yamt 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
   5950   1.97      yamt 		zfs_sa_upgrade_txholds(tx, zp);
   5951   1.97      yamt 		error = dmu_tx_assign(tx, TXG_WAIT);
   5952   1.97      yamt 		if (error) {
   5953   1.97      yamt 			dmu_tx_abort(tx);
   5954   1.97      yamt 		} else {
   5955   1.97      yamt 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
   5956   1.97      yamt 			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
   5957   1.97      yamt 			zp->z_atime_dirty = 0;
   5958   1.97      yamt 			dmu_tx_commit(tx);
   5959   1.97      yamt 		}
   5960   1.97      yamt 	}
   5961   1.97      yamt 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
   5962   1.24  riastrad 
   5963   1.97      yamt 	*ap->a_recycle = false;
   5964    1.1      haad 	return (0);
   5965    1.1      haad }
   5966    1.1      haad 
   5967    1.1      haad static int
   5968    1.9  christos zfs_netbsd_reclaim(void *v)
   5969    1.1      haad {
   5970   1.26  riastrad 	struct vop_reclaim_v2_args /* {
   5971   1.12  riastrad 		struct vnode *a_vp;
   5972   1.12  riastrad 	} */ *ap = v;
   5973   1.12  riastrad 	struct vnode *vp = ap->a_vp;
   5974   1.12  riastrad 	znode_t	*zp;
   5975    1.2      haad 	zfsvfs_t *zfsvfs;
   5976   1.12  riastrad 	int error;
   5977    1.1      haad 
   5978   1.27       chs 	VOP_UNLOCK(vp, 0);
   5979   1.12  riastrad 	zp = VTOZ(vp);
   5980    1.2      haad 	zfsvfs = zp->z_zfsvfs;
   5981    1.1      haad 
   5982   1.27       chs 	KASSERTMSG(!vn_has_cached_data(vp), "vp %p", vp);
   5983    1.3      haad 
   5984   1.12  riastrad 	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
   5985   1.27       chs 	if (zp->z_sa_hdl == NULL)
   5986   1.27       chs 		zfs_znode_free(zp);
   5987   1.27       chs 	else
   5988   1.27       chs 		zfs_zinactive(zp);
   5989   1.12  riastrad 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
   5990   1.12  riastrad 	return 0;
   5991    1.2      haad }
   5992    1.1      haad 
   5993    1.2      haad static int
   5994    1.9  christos zfs_netbsd_fid(void *v)
   5995    1.2      haad {
   5996    1.9  christos 	struct vop_fid_args *ap = v;
   5997    1.1      haad 
   5998    1.2      haad 	return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
   5999    1.1      haad }
   6000    1.1      haad 
   6001    1.1      haad static int
   6002    1.9  christos zfs_netbsd_pathconf(void *v)
   6003    1.1      haad {
   6004    1.9  christos 	struct vop_pathconf_args *ap = v;
   6005    1.2      haad 	ulong_t val;
   6006    1.1      haad 	int error;
   6007    1.1      haad 
   6008    1.2      haad 	error = zfs_pathconf(ap->a_vp, ap->a_name, &val, curthread->l_cred, NULL);
   6009    1.2      haad 	if (error == 0)
   6010    1.2      haad 		*ap->a_retval = val;
   6011    1.2      haad 	else if (error == EOPNOTSUPP) {
   6012    1.2      haad 		switch (ap->a_name) {
   6013    1.2      haad 		case _PC_NAME_MAX:
   6014    1.2      haad 			*ap->a_retval = NAME_MAX;
   6015    1.2      haad 			return (0);
   6016    1.2      haad 		case _PC_PATH_MAX:
   6017    1.2      haad 			*ap->a_retval = PATH_MAX;
   6018    1.2      haad 			return (0);
   6019    1.2      haad 		case _PC_LINK_MAX:
   6020    1.2      haad 			*ap->a_retval = LINK_MAX;
   6021    1.2      haad 			return (0);
   6022    1.2      haad 		case _PC_MAX_CANON:
   6023    1.2      haad 			*ap->a_retval = MAX_CANON;
   6024    1.2      haad 			return (0);
   6025    1.2      haad 		case _PC_MAX_INPUT:
   6026    1.2      haad 			*ap->a_retval = MAX_INPUT;
   6027    1.2      haad 			return (0);
   6028    1.2      haad 		case _PC_PIPE_BUF:
   6029    1.2      haad 			*ap->a_retval = PIPE_BUF;
   6030    1.2      haad 			return (0);
   6031    1.2      haad 		case _PC_CHOWN_RESTRICTED:
   6032    1.2      haad 			*ap->a_retval = 1;
   6033    1.2      haad 			return (0);
   6034   1.13  riastrad 		case _PC_NO_TRUNC:
   6035   1.13  riastrad 			*ap->a_retval = 1;
   6036   1.13  riastrad 			return (0);
   6037    1.2      haad 		case _PC_VDISABLE:
   6038    1.2      haad 			*ap->a_retval = _POSIX_VDISABLE;
   6039    1.2      haad 			return (0);
   6040    1.2      haad 		default:
   6041    1.2      haad 			return (EINVAL);
   6042    1.2      haad 		}
   6043    1.2      haad 		/* NOTREACHED */
   6044   1.12  riastrad 	}
   6045    1.1      haad 	return (error);
   6046    1.1      haad }
   6047    1.1      haad 
   6048   1.20  riastrad static int
   6049   1.20  riastrad zfs_netbsd_advlock(void *v)
   6050   1.20  riastrad {
   6051   1.20  riastrad 	struct vop_advlock_args /* {
   6052   1.20  riastrad 		struct vnode *a_vp;
   6053   1.20  riastrad 		void *a_id;
   6054   1.20  riastrad 		int a_op;
   6055   1.20  riastrad 		struct flock *a_fl;
   6056   1.20  riastrad 		int a_flags;
   6057   1.20  riastrad 	} */ *ap = v;
   6058   1.20  riastrad 	struct vnode *vp;
   6059   1.20  riastrad 	struct znode *zp;
   6060   1.20  riastrad 	struct zfsvfs *zfsvfs;
   6061   1.20  riastrad 	int error;
   6062   1.20  riastrad 
   6063   1.20  riastrad 	vp = ap->a_vp;
   6064   1.20  riastrad 	zp = VTOZ(vp);
   6065   1.20  riastrad 	zfsvfs = zp->z_zfsvfs;
   6066   1.20  riastrad 
   6067   1.20  riastrad 	ZFS_ENTER(zfsvfs);
   6068   1.20  riastrad 	ZFS_VERIFY_ZP(zp);
   6069   1.27       chs 	error = lf_advlock(ap, &zp->z_lockf, zp->z_size);
   6070   1.20  riastrad 	ZFS_EXIT(zfsvfs);
   6071   1.20  riastrad 
   6072   1.20  riastrad 	return error;
   6073   1.20  riastrad }
   6074   1.20  riastrad 
   6075   1.27       chs static int
   6076    1.2      haad zfs_netbsd_getpages(void *v)
   6077    1.2      haad {
   6078   1.27       chs 	struct vop_getpages_args /* {
   6079   1.27       chs 		struct vnode *a_vp;
   6080   1.27       chs 		voff_t a_offset;
   6081   1.27       chs 		struct vm_page **a_m;
   6082   1.27       chs 		int *a_count;
   6083   1.27       chs 		int a_centeridx;
   6084   1.27       chs 		vm_prot_t a_access_type;
   6085   1.27       chs 		int a_advice;
   6086   1.27       chs 		int a_flags;
   6087   1.27       chs 	} */ * const ap = v;
   6088   1.27       chs 
   6089   1.27       chs 	vnode_t *const vp = ap->a_vp;
   6090   1.27       chs 	const int flags = ap->a_flags;
   6091   1.27       chs 	const bool async = (flags & PGO_SYNCIO) == 0;
   6092   1.27       chs 	const bool memwrite = (ap->a_access_type & VM_PROT_WRITE) != 0;
   6093   1.27       chs 
   6094   1.27       chs 	struct uvm_object * const uobj = &vp->v_uobj;
   6095   1.62        ad 	krwlock_t * const rw = uobj->vmobjlock;
   6096   1.27       chs 	znode_t *zp = VTOZ(vp);
   6097   1.27       chs 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   6098   1.50   hannken 	vfs_t *mp;
   6099   1.27       chs 	struct vm_page *pg;
   6100   1.27       chs 	caddr_t va;
   6101   1.69        ad 	int npages = *ap->a_count, found, err = 0;
   6102   1.27       chs 
   6103   1.27       chs 	if (flags & PGO_LOCKED) {
   6104   1.69        ad  		uvn_findpages(uobj, ap->a_offset, &npages, ap->a_m, NULL,
   6105   1.68        ad 		    UFP_NOWAIT | UFP_NOALLOC | UFP_NOBUSY |
   6106   1.68        ad 		    (memwrite ? UFP_NORDONLY : 0));
   6107   1.69        ad 		KASSERT(npages == *ap->a_count);
   6108   1.68        ad 		if (memwrite) {
   6109   1.68        ad 			KASSERT(rw_write_held(uobj->vmobjlock));
   6110   1.68        ad 			for (int i = 0; i < npages; i++) {
   6111   1.68        ad 				pg = ap->a_m[i];
   6112   1.68        ad 				if (pg == NULL || pg == PGO_DONTCARE) {
   6113   1.68        ad 					continue;
   6114   1.68        ad 				}
   6115   1.68        ad 				if (uvm_pagegetdirty(pg) ==
   6116   1.68        ad 				    UVM_PAGE_STATUS_CLEAN) {
   6117   1.68        ad 					uvm_pagemarkdirty(pg,
   6118   1.68        ad 					    UVM_PAGE_STATUS_UNKNOWN);
   6119   1.68        ad 				}
   6120   1.68        ad 			}
   6121   1.68        ad 		}
   6122   1.68        ad 		return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
   6123   1.27       chs 	}
   6124   1.62        ad 	rw_exit(rw);
   6125   1.27       chs 
   6126   1.27       chs 	if (async) {
   6127   1.27       chs 		return 0;
   6128   1.27       chs 	}
   6129   1.27       chs 
   6130   1.50   hannken 	mp = vp->v_mount;
   6131   1.50   hannken 	fstrans_start(mp);
   6132   1.50   hannken 	if (vp->v_mount != mp) {
   6133   1.50   hannken 		fstrans_done(mp);
   6134   1.50   hannken 		return ENOENT;
   6135   1.50   hannken 	}
   6136   1.27       chs 	ZFS_ENTER(zfsvfs);
   6137   1.27       chs 	ZFS_VERIFY_ZP(zp);
   6138   1.27       chs 
   6139   1.62        ad 	rw_enter(rw, RW_WRITER);
   6140   1.69        ad 	if (ap->a_offset + (npages << PAGE_SHIFT) > round_page(vp->v_size)) {
   6141   1.62        ad 		rw_exit(rw);
   6142   1.50   hannken 		ZFS_EXIT(zfsvfs);
   6143   1.50   hannken 		fstrans_done(mp);
   6144   1.50   hannken 		return EINVAL;
   6145   1.50   hannken 	}
   6146   1.69        ad 	uvn_findpages(uobj, ap->a_offset, &npages, ap->a_m, NULL, UFP_ALL);
   6147   1.69        ad 	KASSERT(npages == *ap->a_count);
   6148   1.27       chs 
   6149   1.68        ad 	for (int i = 0; i < npages; i++) {
   6150   1.68        ad 		pg = ap->a_m[i];
   6151   1.68        ad 		if (pg->flags & PG_FAKE) {
   6152   1.69        ad 			voff_t offset = pg->offset;
   6153   1.69        ad 			KASSERT(pg->offset == ap->a_offset + (i << PAGE_SHIFT));
   6154   1.68        ad 			rw_exit(rw);
   6155   1.27       chs 
   6156   1.68        ad 			va = zfs_map_page(pg, S_WRITE);
   6157   1.68        ad 			err = dmu_read(zfsvfs->z_os, zp->z_id, offset,
   6158   1.68        ad 			    PAGE_SIZE, va, DMU_READ_PREFETCH);
   6159   1.68        ad 			zfs_unmap_page(pg, va);
   6160   1.27       chs 
   6161   1.68        ad 			if (err != 0) {
   6162   1.71       chs 				uvm_aio_aiodone_pages(ap->a_m, npages, false, err);
   6163   1.68        ad 				memset(ap->a_m, 0, sizeof(ap->a_m[0]) *
   6164   1.68        ad 				    npages);
   6165   1.68        ad 				break;
   6166   1.68        ad 			}
   6167   1.71       chs 			rw_enter(rw, RW_WRITER);
   6168   1.68        ad 			pg->flags &= ~(PG_FAKE);
   6169   1.68        ad 		}
   6170   1.27       chs 
   6171   1.68        ad 		if (memwrite && uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) {
   6172   1.68        ad 			/* For write faults, start dirtiness tracking. */
   6173   1.68        ad 			uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN);
   6174   1.68        ad 		}
   6175   1.27       chs 	}
   6176   1.62        ad 	rw_exit(rw);
   6177   1.27       chs 
   6178   1.27       chs 	ZFS_EXIT(zfsvfs);
   6179   1.50   hannken 	fstrans_done(mp);
   6180   1.27       chs 
   6181   1.27       chs 	return (err);
   6182   1.27       chs }
   6183   1.27       chs 
   6184   1.27       chs static int
   6185   1.27       chs zfs_putapage(vnode_t *vp, page_t **pp, int count, int flags)
   6186   1.27       chs {
   6187   1.27       chs 	znode_t		*zp = VTOZ(vp);
   6188   1.27       chs 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
   6189   1.27       chs 	dmu_tx_t	*tx;
   6190   1.27       chs 	voff_t		off, koff;
   6191   1.27       chs 	voff_t		len, klen;
   6192   1.27       chs 	int		err;
   6193   1.27       chs 
   6194   1.27       chs 	bool *cleanedp;
   6195   1.27       chs 	struct uvm_object *uobj = &vp->v_uobj;
   6196   1.62        ad 	krwlock_t *rw = uobj->vmobjlock;
   6197   1.27       chs 
   6198   1.42   hannken 	if (zp->z_sa_hdl == NULL) {
   6199   1.42   hannken 		err = 0;
   6200   1.71       chs 		goto out;
   6201   1.42   hannken 	}
   6202   1.42   hannken 
   6203   1.66       chs 	/*
   6204   1.95      yamt 	 * writing to zfs needs memory allocation, locks, etc,
   6205   1.95      yamt 	 * which are not safe for the page daemon.
   6206   1.95      yamt 	 * ENOMEM to signal a transient error to uvm.
   6207   1.95      yamt 	 * hopefully it can find other pages to free.
   6208   1.95      yamt 	 */
   6209   1.95      yamt 
   6210  1.100       kre 	if (uvm_lwp_is_pagedaemon(curlwp)) {
   6211   1.95      yamt 		err = SET_ERROR(ENOMEM);
   6212   1.95      yamt 		goto out;
   6213   1.95      yamt 	}
   6214   1.95      yamt 
   6215   1.95      yamt 	/*
   6216   1.66       chs 	 * Calculate the length and assert that no whole pages are past EOF.
   6217   1.66       chs 	 * This check is equivalent to "off + len <= round_page(zp->z_size)",
   6218   1.66       chs 	 * with gyrations to avoid signed integer overflow.
   6219   1.66       chs 	 */
   6220   1.66       chs 
   6221   1.27       chs 	off = pp[0]->offset;
   6222   1.27       chs 	len = count * PAGESIZE;
   6223   1.66       chs 	KASSERT(off <= zp->z_size);
   6224   1.66       chs 	KASSERT(len <= round_page(zp->z_size));
   6225   1.66       chs 	KASSERT(off <= round_page(zp->z_size) - len);
   6226   1.66       chs 
   6227   1.66       chs 	/*
   6228   1.66       chs 	 * If EOF is within the last page, reduce len to avoid writing past
   6229   1.66       chs 	 * the file size in the ZFS buffer.  Assert that
   6230   1.66       chs 	 * "off + len <= zp->z_size", again avoiding signed integer overflow.
   6231   1.66       chs 	 */
   6232   1.66       chs 
   6233   1.66       chs 	if (len > zp->z_size - off) {
   6234   1.66       chs 		len = zp->z_size - off;
   6235   1.66       chs 	}
   6236   1.66       chs 	KASSERT(len <= zp->z_size);
   6237   1.66       chs 	KASSERT(off <= zp->z_size - len);
   6238   1.27       chs 
   6239   1.27       chs 	if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
   6240   1.27       chs 	    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
   6241   1.27       chs 		err = SET_ERROR(EDQUOT);
   6242   1.27       chs 		goto out;
   6243   1.27       chs 	}
   6244   1.27       chs 	tx = dmu_tx_create(zfsvfs->z_os);
   6245   1.27       chs 	dmu_tx_hold_write(tx, zp->z_id, off, len);
   6246   1.27       chs 
   6247   1.27       chs 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
   6248   1.27       chs 	zfs_sa_upgrade_txholds(tx, zp);
   6249   1.27       chs 	err = dmu_tx_assign(tx, TXG_WAIT);
   6250   1.27       chs 	if (err != 0) {
   6251   1.27       chs 		dmu_tx_abort(tx);
   6252   1.27       chs 		goto out;
   6253   1.27       chs 	}
   6254   1.27       chs 
   6255   1.27       chs 	if (zp->z_blksz <= PAGESIZE) {
   6256   1.27       chs 		KASSERTMSG(count == 1, "vp %p pp %p count %d", vp, pp, count);
   6257   1.27       chs 		caddr_t va = zfs_map_page(*pp, S_READ);
   6258   1.27       chs 		ASSERT3U(len, <=, PAGESIZE);
   6259   1.27       chs 		dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx);
   6260   1.27       chs 		zfs_unmap_page(*pp, va);
   6261   1.27       chs 	} else {
   6262   1.27       chs 		err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx);
   6263   1.27       chs 	}
   6264   1.27       chs 	cleanedp = tsd_get(zfs_putpage_key);
   6265   1.27       chs 	*cleanedp = true;
   6266   1.27       chs 
   6267   1.27       chs 	if (err == 0) {
   6268   1.27       chs 		uint64_t mtime[2], ctime[2];
   6269   1.27       chs 		sa_bulk_attr_t bulk[3];
   6270   1.27       chs 		int count = 0;
   6271   1.27       chs 
   6272   1.27       chs 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
   6273   1.27       chs 		    &mtime, 16);
   6274   1.27       chs 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
   6275   1.27       chs 		    &ctime, 16);
   6276   1.27       chs 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
   6277   1.27       chs 		    &zp->z_pflags, 8);
   6278   1.27       chs 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
   6279   1.27       chs 		    B_TRUE);
   6280   1.27       chs 		err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
   6281   1.27       chs 		ASSERT0(err);
   6282   1.99      yamt 		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len,
   6283   1.99      yamt 		    B_FALSE);
   6284   1.27       chs 	}
   6285   1.27       chs 	dmu_tx_commit(tx);
   6286   1.27       chs 
   6287   1.27       chs out:
   6288   1.71       chs 	uvm_aio_aiodone_pages(pp, count, true, err);
   6289   1.27       chs 	return (err);
   6290   1.27       chs }
   6291   1.27       chs 
   6292   1.27       chs static void
   6293   1.96      yamt zfs_netbsd_update_mctime(vnode_t *vp)
   6294   1.27       chs {
   6295   1.27       chs 	znode_t		*zp = VTOZ(vp);
   6296   1.27       chs 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
   6297   1.27       chs 	dmu_tx_t	*tx;
   6298   1.27       chs 	sa_bulk_attr_t	bulk[2];
   6299   1.27       chs 	uint64_t	mtime[2], ctime[2];
   6300   1.27       chs 	int		count = 0, err;
   6301    1.2      haad 
   6302   1.27       chs 	tx = dmu_tx_create(zfsvfs->z_os);
   6303   1.89      yamt 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
   6304   1.89      yamt 	zfs_sa_upgrade_txholds(tx, zp);
   6305   1.27       chs 	err = dmu_tx_assign(tx, TXG_WAIT);
   6306   1.27       chs 	if (err != 0) {
   6307   1.27       chs 		dmu_tx_abort(tx);
   6308   1.27       chs 		return;
   6309   1.27       chs 	}
   6310   1.27       chs 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
   6311   1.27       chs 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
   6312   1.27       chs 	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
   6313   1.89      yamt 	err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
   6314   1.27       chs 	dmu_tx_commit(tx);
   6315   1.89      yamt 	if (err != 0) {
   6316   1.89      yamt 		printf("%s: sa_bulk_update failed with %d\n", __func__, err);
   6317   1.89      yamt 	}
   6318    1.1      haad }
   6319    1.1      haad 
   6320   1.27       chs static int
   6321    1.2      haad zfs_netbsd_putpages(void *v)
   6322    1.1      haad {
   6323   1.27       chs 	struct vop_putpages_args /* {
   6324   1.27       chs 		struct vnode *a_vp;
   6325   1.27       chs 		voff_t a_offlo;
   6326   1.27       chs 		voff_t a_offhi;
   6327   1.27       chs 		int a_flags;
   6328   1.27       chs 	} */ * const ap = v;
   6329   1.27       chs 
   6330   1.27       chs 	struct vnode *vp = ap->a_vp;
   6331   1.27       chs 	voff_t offlo = ap->a_offlo;
   6332   1.27       chs 	voff_t offhi = ap->a_offhi;
   6333   1.27       chs 	int flags = ap->a_flags;
   6334   1.27       chs 
   6335    1.2      haad 	znode_t *zp = VTOZ(vp);
   6336   1.27       chs 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
   6337   1.27       chs 	rl_t *rl = NULL;
   6338   1.38   hannken 	uint64_t len;
   6339    1.2      haad 	int error;
   6340   1.27       chs 	bool cleaned = false;
   6341   1.27       chs 	bool cleaning = (flags & PGO_CLEANIT) != 0;
   6342   1.27       chs 
   6343   1.27       chs 	if (cleaning) {
   6344  1.100       kre 		bool pagedaemon = uvm_lwp_is_pagedaemon(curlwp);
   6345   1.82      yamt 
   6346   1.38   hannken 		ASSERT((offlo & PAGE_MASK) == 0 && (offhi & PAGE_MASK) == 0);
   6347   1.38   hannken 		ASSERT(offlo < offhi || offhi == 0);
   6348   1.38   hannken 		if (offhi == 0)
   6349   1.38   hannken 			len = UINT64_MAX;
   6350   1.38   hannken 		else
   6351   1.38   hannken 			len = offhi - offlo;
   6352   1.62        ad 		rw_exit(vp->v_uobj.vmobjlock);
   6353   1.82      yamt 		if (pagedaemon) {
   6354   1.41   hannken 			error = fstrans_start_nowait(vp->v_mount);
   6355   1.41   hannken 			if (error)
   6356   1.41   hannken 				return error;
   6357   1.41   hannken 		} else {
   6358   1.41   hannken 			vfs_t *mp = vp->v_mount;
   6359   1.41   hannken 			fstrans_start(mp);
   6360   1.41   hannken 			if (vp->v_mount != mp) {
   6361   1.41   hannken 				fstrans_done(mp);
   6362   1.41   hannken 				ASSERT(!vn_has_cached_data(vp));
   6363   1.41   hannken 				return 0;
   6364   1.41   hannken 			}
   6365   1.41   hannken 		}
   6366   1.42   hannken 		/*
   6367   1.42   hannken 		 * Cannot use ZFS_ENTER() here as it returns with error
   6368   1.42   hannken 		 * if z_unmounted.  The next statement is equivalent.
   6369   1.42   hannken 		 */
   6370   1.42   hannken 		rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
   6371   1.42   hannken 
   6372   1.82      yamt 		if (pagedaemon) {
   6373   1.82      yamt 			rl = zfs_range_lock_try(zp, offlo, len, RL_WRITER);
   6374   1.82      yamt 			if (rl == NULL) {
   6375   1.82      yamt 				error = EBUSY;
   6376   1.82      yamt 				goto fail;
   6377   1.82      yamt 			}
   6378   1.82      yamt 		} else {
   6379   1.82      yamt 			rl = zfs_range_lock(zp, offlo, len, RL_WRITER);
   6380   1.82      yamt 		}
   6381   1.62        ad 		rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);
   6382   1.27       chs 		tsd_set(zfs_putpage_key, &cleaned);
   6383   1.27       chs 	}
   6384    1.2      haad 	error = genfs_putpages(v);
   6385   1.41   hannken 	if (cleaning) {
   6386   1.27       chs 		tsd_set(zfs_putpage_key, NULL);
   6387   1.27       chs 		zfs_range_unlock(rl);
   6388   1.27       chs 
   6389   1.41   hannken 		/*
   6390   1.41   hannken 		 * Only zil_commit() if we cleaned something.  This avoids
   6391   1.41   hannken 		 * deadlock if we're called from zfs_netbsd_setsize().
   6392   1.92      yamt 		 *
   6393   1.92      yamt 		 * Also, it isn't safe or nessesary to call it for vnode
   6394   1.92      yamt 		 * reclaim. See the comment in zfs_netbsd_fsync.
   6395   1.41   hannken 		 */
   6396   1.27       chs 
   6397   1.92      yamt 		if (cleaned && (flags & PGO_RECLAIM) == 0) {
   6398   1.92      yamt 			if ((flags & PGO_SYNCIO) != 0
   6399   1.92      yamt 			    || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
   6400   1.92      yamt 				zil_commit(zfsvfs->z_log, zp->z_id);
   6401   1.92      yamt 		}
   6402   1.82      yamt fail:
   6403   1.42   hannken 		ZFS_EXIT(zfsvfs);
   6404   1.41   hannken 		fstrans_done(vp->v_mount);
   6405   1.41   hannken 	}
   6406    1.2      haad 	return error;
   6407    1.2      haad }
   6408    1.2      haad 
   6409   1.27       chs /*
   6410   1.27       chs  * Restrict the putpages range to the ZFS block containing the offset.
   6411   1.27       chs  */
   6412   1.27       chs static void
   6413   1.27       chs zfs_netbsd_gop_putrange(struct vnode *vp, off_t off, off_t *lop, off_t *hip)
   6414   1.27       chs {
   6415   1.27       chs 	znode_t *zp = VTOZ(vp);
   6416   1.27       chs 
   6417   1.27       chs 	*lop = trunc_page(rounddown2(off, zp->z_blksz));
   6418   1.27       chs 	*hip = round_page(*lop + zp->z_blksz);
   6419   1.27       chs }
   6420   1.27       chs 
   6421   1.27       chs void
   6422   1.27       chs zfs_netbsd_setsize(vnode_t *vp, off_t size)
   6423   1.27       chs {
   6424   1.27       chs 	struct uvm_object *uobj = &vp->v_uobj;
   6425   1.62        ad 	krwlock_t *rw = uobj->vmobjlock;
   6426   1.27       chs 	page_t *pg;
   6427   1.27       chs 	int count, pgoff;
   6428   1.27       chs 	caddr_t va;
   6429   1.27       chs 	off_t tsize;
   6430   1.27       chs 
   6431   1.27       chs 	uvm_vnp_setsize(vp, size);
   6432   1.27       chs 	if (!vn_has_cached_data(vp))
   6433   1.27       chs 		return;
   6434   1.27       chs 
   6435   1.27       chs 	tsize = trunc_page(size);
   6436   1.27       chs 	if (tsize == size)
   6437   1.27       chs 		return;
   6438   1.27       chs 
   6439   1.27       chs 	/*
   6440   1.27       chs 	 * If there's a partial page, we need to zero the tail.
   6441   1.27       chs 	 */
   6442   1.27       chs 
   6443   1.62        ad 	rw_enter(rw, RW_WRITER);
   6444   1.27       chs 	count = 1;
   6445   1.27       chs 	pg = NULL;
   6446   1.55        ad 	if (uvn_findpages(uobj, tsize, &count, &pg, NULL, UFP_NOALLOC)) {
   6447   1.27       chs 		va = zfs_map_page(pg, S_WRITE);
   6448   1.27       chs 		pgoff = size - tsize;
   6449   1.27       chs 		memset(va + pgoff, 0, PAGESIZE - pgoff);
   6450   1.27       chs 		zfs_unmap_page(pg, va);
   6451   1.27       chs 		uvm_page_unbusy(&pg, 1);
   6452   1.27       chs 	}
   6453   1.27       chs 
   6454   1.62        ad 	rw_exit(rw);
   6455   1.27       chs }
   6456   1.27       chs 
   6457   1.27       chs static int
   6458   1.27       chs zfs_netbsd_print(void *v)
   6459   1.27       chs {
   6460   1.27       chs 	struct vop_print_args /* {
   6461   1.27       chs 		struct vnode	*a_vp;
   6462   1.27       chs 	} */ *ap = v;
   6463   1.27       chs 	vnode_t	*vp;
   6464   1.27       chs 	znode_t	*zp;
   6465   1.27       chs 
   6466   1.27       chs 	vp = ap->a_vp;
   6467   1.27       chs 	zp = VTOZ(vp);
   6468   1.27       chs 
   6469   1.27       chs 	printf("\tino %" PRIu64 " size %" PRIu64 "\n",
   6470   1.27       chs 	       zp->z_id, zp->z_size);
   6471   1.27       chs 	return 0;
   6472   1.27       chs }
   6473   1.27       chs 
   6474   1.27       chs const struct genfs_ops zfs_genfsops = {
   6475   1.88      yamt 	.gop_write = zfs_putapage,
   6476   1.27       chs 	.gop_putrange = zfs_netbsd_gop_putrange,
   6477   1.27       chs };
   6478   1.27       chs 
   6479    1.2      haad int (**zfs_vnodeop_p)(void *);
   6480    1.2      haad const struct vnodeopv_entry_desc zfs_vnodeop_entries[] = {
   6481    1.2      haad 	{ &vop_default_desc,		vn_default_error },
   6482   1.72  dholland 	{ &vop_parsepath_desc,		genfs_parsepath },
   6483    1.2      haad 	{ &vop_lookup_desc,		zfs_netbsd_lookup },
   6484    1.2      haad 	{ &vop_create_desc,		zfs_netbsd_create },
   6485   1.48   hannken 	{ &vop_mknod_desc,		zfs_netbsd_mknod },
   6486    1.2      haad 	{ &vop_open_desc,		zfs_netbsd_open },
   6487    1.2      haad 	{ &vop_close_desc,		zfs_netbsd_close },
   6488    1.2      haad 	{ &vop_access_desc,		zfs_netbsd_access },
   6489   1.67  christos 	{ &vop_accessx_desc,		genfs_accessx },
   6490    1.2      haad 	{ &vop_getattr_desc,		zfs_netbsd_getattr },
   6491    1.2      haad 	{ &vop_setattr_desc,		zfs_netbsd_setattr },
   6492    1.2      haad 	{ &vop_read_desc,		zfs_netbsd_read },
   6493    1.2      haad 	{ &vop_write_desc,		zfs_netbsd_write },
   6494    1.2      haad 	{ &vop_ioctl_desc,		zfs_netbsd_ioctl },
   6495   1.51   hannken 	{ &vop_poll_desc,		genfs_poll },
   6496   1.52   hannken 	{ &vop_kqfilter_desc,		genfs_kqfilter },
   6497   1.58  riastrad 	{ &vop_revoke_desc,		genfs_revoke },
   6498    1.2      haad 	{ &vop_fsync_desc,		zfs_netbsd_fsync },
   6499    1.2      haad 	{ &vop_remove_desc,		zfs_netbsd_remove },
   6500    1.2      haad 	{ &vop_link_desc,		zfs_netbsd_link },
   6501   1.74  dholland 	{ &vop_lock_desc,		genfs_lock },
   6502   1.74  dholland 	{ &vop_unlock_desc,		genfs_unlock },
   6503    1.2      haad 	{ &vop_rename_desc,		zfs_netbsd_rename },
   6504    1.2      haad 	{ &vop_mkdir_desc,		zfs_netbsd_mkdir },
   6505    1.2      haad 	{ &vop_rmdir_desc,		zfs_netbsd_rmdir },
   6506    1.2      haad 	{ &vop_symlink_desc,		zfs_netbsd_symlink },
   6507    1.2      haad 	{ &vop_readdir_desc,		zfs_netbsd_readdir },
   6508    1.2      haad 	{ &vop_readlink_desc,		zfs_netbsd_readlink },
   6509    1.2      haad 	{ &vop_inactive_desc,		zfs_netbsd_inactive },
   6510    1.2      haad 	{ &vop_reclaim_desc,		zfs_netbsd_reclaim },
   6511    1.2      haad 	{ &vop_pathconf_desc,		zfs_netbsd_pathconf },
   6512   1.74  dholland 	{ &vop_seek_desc,		genfs_seek },
   6513    1.2      haad 	{ &vop_getpages_desc,		zfs_netbsd_getpages },
   6514    1.2      haad 	{ &vop_putpages_desc,		zfs_netbsd_putpages },
   6515   1.74  dholland 	{ &vop_mmap_desc,		genfs_mmap },
   6516   1.74  dholland 	{ &vop_islocked_desc,		genfs_islocked },
   6517   1.20  riastrad 	{ &vop_advlock_desc,		zfs_netbsd_advlock },
   6518   1.27       chs 	{ &vop_print_desc,		zfs_netbsd_print },
   6519   1.74  dholland 	{ &vop_fcntl_desc,		genfs_fcntl },
   6520    1.2      haad 	{ NULL, NULL }
   6521    1.1      haad };
   6522    1.1      haad 
   6523    1.2      haad const struct vnodeopv_desc zfs_vnodeop_opv_desc =
   6524    1.2      haad 	{ &zfs_vnodeop_p, zfs_vnodeop_entries };
   6525   1.27       chs 
   6526   1.48   hannken int (**zfs_specop_p)(void *);
   6527   1.48   hannken const struct vnodeopv_entry_desc zfs_specop_entries[] = {
   6528   1.48   hannken 	{ &vop_default_desc,		vn_default_error },
   6529   1.73  dholland 	GENFS_SPECOP_ENTRIES,
   6530   1.48   hannken 	{ &vop_close_desc,		spec_close },
   6531   1.48   hannken 	{ &vop_access_desc,		zfs_netbsd_access },
   6532   1.67  christos 	{ &vop_accessx_desc,		genfs_accessx },
   6533   1.48   hannken 	{ &vop_getattr_desc,		zfs_netbsd_getattr },
   6534   1.48   hannken 	{ &vop_setattr_desc,		zfs_netbsd_setattr },
   6535   1.90      yamt 	{ &vop_read_desc,		zfs_netbsd_read },
   6536   1.90      yamt 	{ &vop_write_desc,		zfs_netbsd_write },
   6537   1.61  riastrad 	{ &vop_fsync_desc,		zfs_spec_fsync },
   6538   1.74  dholland 	{ &vop_lock_desc,		genfs_lock },
   6539   1.74  dholland 	{ &vop_unlock_desc,		genfs_unlock },
   6540   1.48   hannken 	{ &vop_inactive_desc,		zfs_netbsd_inactive },
   6541   1.48   hannken 	{ &vop_reclaim_desc,		zfs_netbsd_reclaim },
   6542   1.74  dholland 	{ &vop_islocked_desc,		genfs_islocked },
   6543   1.74  dholland 	{ &vop_bwrite_desc,		vn_bwrite },
   6544   1.48   hannken 	{ &vop_print_desc,		zfs_netbsd_print },
   6545   1.74  dholland 	{ &vop_fcntl_desc,		genfs_fcntl },
   6546   1.48   hannken 	{ NULL, NULL }
   6547   1.48   hannken };
   6548   1.48   hannken 
   6549   1.48   hannken const struct vnodeopv_desc zfs_specop_opv_desc =
   6550   1.48   hannken 	{ &zfs_specop_p, zfs_specop_entries };
   6551   1.48   hannken 
   6552   1.48   hannken int (**zfs_fifoop_p)(void *);
   6553   1.48   hannken const struct vnodeopv_entry_desc zfs_fifoop_entries[] = {
   6554   1.48   hannken 	{ &vop_default_desc,		vn_default_error },
   6555   1.73  dholland 	GENFS_FIFOOP_ENTRIES,
   6556   1.48   hannken 	{ &vop_close_desc,		vn_fifo_bypass },
   6557   1.48   hannken 	{ &vop_access_desc,		zfs_netbsd_access },
   6558   1.67  christos 	{ &vop_accessx_desc,		genfs_accessx },
   6559   1.48   hannken 	{ &vop_getattr_desc,		zfs_netbsd_getattr },
   6560   1.48   hannken 	{ &vop_setattr_desc,		zfs_netbsd_setattr },
   6561   1.90      yamt 	{ &vop_read_desc,		zfs_netbsd_read },
   6562   1.90      yamt 	{ &vop_write_desc,		zfs_netbsd_write },
   6563   1.48   hannken 	{ &vop_fsync_desc,		zfs_netbsd_fsync },
   6564   1.74  dholland 	{ &vop_lock_desc,		genfs_lock },
   6565   1.74  dholland 	{ &vop_unlock_desc,		genfs_unlock },
   6566   1.48   hannken 	{ &vop_inactive_desc,		zfs_netbsd_inactive },
   6567   1.48   hannken 	{ &vop_reclaim_desc,		zfs_netbsd_reclaim },
   6568   1.74  dholland 	{ &vop_islocked_desc,		genfs_islocked },
   6569   1.73  dholland 	{ &vop_bwrite_desc,		vn_bwrite },
   6570   1.73  dholland 	{ &vop_strategy_desc,		vn_fifo_bypass },
   6571   1.48   hannken 	{ &vop_print_desc,		zfs_netbsd_print },
   6572   1.74  dholland 	{ &vop_fcntl_desc,		genfs_fcntl },
   6573   1.48   hannken 	{ NULL, NULL }
   6574   1.48   hannken };
   6575   1.48   hannken 
   6576   1.48   hannken const struct vnodeopv_desc zfs_fifoop_opv_desc =
   6577   1.48   hannken 	{ &zfs_fifoop_p, zfs_fifoop_entries };
   6578   1.48   hannken 
   6579   1.27       chs #endif /* __NetBSD__ */
   6580