Home | History | Annotate | Line # | Download | only in lfs
ulfs_extattr.c revision 1.14
      1 /*	$NetBSD: ulfs_extattr.c,v 1.14 2016/11/09 05:44:42 dholland Exp $	*/
      2 /*  from NetBSD: ulfs_extattr.c,v 1.48 2016/11/09 05:08:35 dholland Exp  */
      3 
      4 /*-
      5  * Copyright (c) 1999-2002 Robert N. M. Watson
      6  * Copyright (c) 2002-2003 Networks Associates Technology, Inc.
      7  * All rights reserved.
      8  *
      9  * This software was developed by Robert Watson for the TrustedBSD Project.
     10  *
     11  * This software was developed for the FreeBSD Project in part by Network
     12  * Associates Laboratories, the Security Research Division of Network
     13  * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
     14  * as part of the DARPA CHATS research program.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  * 1. Redistributions of source code must retain the above copyright
     20  *    notice, this list of conditions and the following disclaimer.
     21  * 2. Redistributions in binary form must reproduce the above copyright
     22  *    notice, this list of conditions and the following disclaimer in the
     23  *    documentation and/or other materials provided with the distribution.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     35  * SUCH DAMAGE.
     36  *
     37  */
     38 
     39 /*
     40  * Support for file system extended attributes on the ULFS1 file system.
     41  *
     42  * Extended attributes are defined in the form name=value, where name is
     43  * a nul-terminated string in the style of a file name, and value is a
     44  * binary blob of zero or more bytes.  The ULFS1 extended attribute service
     45  * layers support for extended attributes onto a backing file, in the style
     46  * of the quota implementation, meaning that it requires no underlying format
     47  * changes to the file system.  This design choice exchanges simplicity,
     48  * usability, and easy deployment for performance.
     49  */
     50 
     51 #include <sys/cdefs.h>
     52 __KERNEL_RCSID(0, "$NetBSD: ulfs_extattr.c,v 1.14 2016/11/09 05:44:42 dholland Exp $");
     53 
     54 #ifdef _KERNEL_OPT
     55 #include "opt_lfs.h"
     56 #endif
     57 
     58 #include <sys/param.h>
     59 #include <sys/systm.h>
     60 #include <sys/reboot.h>
     61 #include <sys/kauth.h>
     62 #include <sys/kernel.h>
     63 #include <sys/namei.h>
     64 #include <sys/kmem.h>
     65 #include <sys/fcntl.h>
     66 #include <sys/lwp.h>
     67 #include <sys/vnode.h>
     68 #include <sys/mount.h>
     69 #include <sys/lock.h>
     70 #include <sys/dirent.h>
     71 #include <sys/extattr.h>
     72 #include <sys/sysctl.h>
     73 
     74 #include <ufs/lfs/ulfs_extattr.h>
     75 #include <ufs/lfs/ulfsmount.h>
     76 #include <ufs/lfs/ulfs_inode.h>
     77 #include <ufs/lfs/ulfs_bswap.h>
     78 #include <ufs/lfs/ulfs_extern.h>
     79 
     80 int ulfs_extattr_sync = 1;
     81 int ulfs_extattr_autocreate = 1024;
     82 
     83 static int	ulfs_extattr_valid_attrname(int attrnamespace,
     84 		    const char *attrname);
     85 static int	ulfs_extattr_enable_with_open(struct ulfsmount *ump,
     86 		    struct vnode *vp, int attrnamespace, const char *attrname,
     87 		    struct lwp *l);
     88 static int	ulfs_extattr_enable(struct ulfsmount *ump, int attrnamespace,
     89 		    const char *attrname, struct vnode *backing_vnode,
     90 		    struct lwp *l);
     91 static int	ulfs_extattr_disable(struct ulfsmount *ump, int attrnamespace,
     92 		    const char *attrname, struct lwp *l);
     93 static int	ulfs_extattr_get(struct vnode *vp, int attrnamespace,
     94 		    const char *name, struct uio *uio, size_t *size,
     95 		    kauth_cred_t cred, struct lwp *l);
     96 static int	ulfs_extattr_list(struct vnode *vp, int attrnamespace,
     97 		    struct uio *uio, size_t *size, int flag,
     98 		    kauth_cred_t cred, struct lwp *l);
     99 static int	ulfs_extattr_set(struct vnode *vp, int attrnamespace,
    100 		    const char *name, struct uio *uio, kauth_cred_t cred,
    101 		    struct lwp *l);
    102 static int	ulfs_extattr_rm(struct vnode *vp, int attrnamespace,
    103 		    const char *name, kauth_cred_t cred, struct lwp *l);
    104 static struct ulfs_extattr_list_entry *ulfs_extattr_find_attr(struct ulfsmount *,
    105 		    int, const char *);
    106 static int	ulfs_extattr_get_header(struct vnode *,
    107 		    struct ulfs_extattr_list_entry *,
    108 		    struct ulfs_extattr_header *, off_t *);
    109 
    110 /*
    111  * Convert a FreeBSD extended attribute and namespace to a consistent string
    112  * representation.
    113  *
    114  * The returned value, if not NULL, is guaranteed to be an allocated object
    115  * of its size as returned by strlen() + 1 and must be freed by the caller.
    116  */
    117 static char *
    118 from_freebsd_extattr(int attrnamespace, const char *attrname)
    119 {
    120 	const char *namespace;
    121 	char *attr;
    122 	size_t len;
    123 
    124 	if (attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
    125 		namespace = "system";
    126 	else if (attrnamespace == EXTATTR_NAMESPACE_USER)
    127 		namespace = "user";
    128 	else
    129 		return NULL;
    130 
    131 	/* <namespace>.<attrname>\0 */
    132 	len = strlen(namespace) + 1 + strlen(attrname) + 1;
    133 
    134 	attr = kmem_alloc(len, KM_SLEEP);
    135 
    136 	snprintf(attr, len, "%s.%s", namespace, attrname);
    137 
    138 	return attr;
    139 }
    140 
    141 /*
    142  * Internal wrapper around a conversion-check-free sequence.
    143  */
    144 static int
    145 internal_extattr_check_cred(vnode_t *vp, int attrnamespace, const char *name,
    146     kauth_cred_t cred, int access_mode)
    147 {
    148 	char *attr;
    149 	int error;
    150 
    151 	attr = from_freebsd_extattr(attrnamespace, name);
    152 	if (attr == NULL)
    153 		return EINVAL;
    154 
    155 	error = extattr_check_cred(vp, attr, cred, access_mode);
    156 
    157 	kmem_free(attr, strlen(attr) + 1);
    158 
    159 	return error;
    160 }
    161 
    162 /*
    163  * Per-FS attribute lock protecting attribute operations.
    164  * XXX Right now there is a lot of lock contention due to having a single
    165  * lock per-FS; really, this should be far more fine-grained.
    166  */
    167 static void
    168 ulfs_extattr_uepm_lock(struct ulfsmount *ump)
    169 {
    170 
    171 	/*
    172 	 * XXX This needs to be recursive for the following reasons:
    173 	 *   - it is taken in ulfs_extattr_vnode_inactive
    174 	 *   - which is called from VOP_INACTIVE
    175 	 *   - which can be triggered by any vrele, vput, or vn_close
    176 	 *   - several of these can happen while it's held
    177 	 */
    178 	if (mutex_owned(&ump->um_extattr.uepm_lock)) {
    179 		ump->um_extattr.uepm_lockcnt++;
    180 		return;
    181 	}
    182 	mutex_enter(&ump->um_extattr.uepm_lock);
    183 }
    184 
    185 static void
    186 ulfs_extattr_uepm_unlock(struct ulfsmount *ump)
    187 {
    188 
    189 	if (ump->um_extattr.uepm_lockcnt != 0) {
    190 		KASSERT(mutex_owned(&ump->um_extattr.uepm_lock));
    191 		ump->um_extattr.uepm_lockcnt--;
    192 		return;
    193 	}
    194 	mutex_exit(&ump->um_extattr.uepm_lock);
    195 }
    196 
    197 /*-
    198  * Determine whether the name passed is a valid name for an actual
    199  * attribute.
    200  *
    201  * Invalid currently consists of:
    202  *	 NULL pointer for attrname
    203  *	 zero-length attrname (used to retrieve application attribute list)
    204  */
    205 static int
    206 ulfs_extattr_valid_attrname(int attrnamespace, const char *attrname)
    207 {
    208 
    209 	if (attrname == NULL)
    210 		return (0);
    211 	if (strlen(attrname) == 0)
    212 		return (0);
    213 	return (1);
    214 }
    215 
    216 /*
    217  * Autocreate an attribute storage
    218  */
    219 static int
    220 ulfs_extattr_autocreate_attr(struct vnode *vp, int attrnamespace,
    221     const char *attrname, struct lwp *l, struct ulfs_extattr_list_entry **uelep)
    222 {
    223 	struct mount *mp = vp->v_mount;
    224 	struct ulfsmount *ump = VFSTOULFS(mp);
    225 	struct vnode *backing_vp;
    226 	struct nameidata nd;
    227 	struct pathbuf *pb;
    228 	char *path;
    229 	struct ulfs_extattr_fileheader uef;
    230 	struct ulfs_extattr_list_entry *uele;
    231 	int error;
    232 
    233 	path = PNBUF_GET();
    234 
    235 	/*
    236 	 * We only support system and user namespace autocreation
    237 	 */
    238 	switch (attrnamespace) {
    239 	case EXTATTR_NAMESPACE_SYSTEM:
    240 		(void)snprintf(path, PATH_MAX, "%s/%s/%s/%s",
    241 			       mp->mnt_stat.f_mntonname,
    242 			       ULFS_EXTATTR_FSROOTSUBDIR,
    243 			       ULFS_EXTATTR_SUBDIR_SYSTEM,
    244 			       attrname);
    245 		break;
    246 	case EXTATTR_NAMESPACE_USER:
    247 		(void)snprintf(path, PATH_MAX, "%s/%s/%s/%s",
    248 			       mp->mnt_stat.f_mntonname,
    249 			       ULFS_EXTATTR_FSROOTSUBDIR,
    250 			       ULFS_EXTATTR_SUBDIR_USER,
    251 			       attrname);
    252 		break;
    253 	default:
    254 		PNBUF_PUT(path);
    255 		*uelep = NULL;
    256 		return EINVAL;
    257 		break;
    258 	}
    259 
    260 	/*
    261 	 * Release extended attribute mount lock, otherwise
    262 	 * we can deadlock with another thread that would lock
    263 	 * vp after we unlock it below, and call
    264 	 * ulfs_extattr_uepm_lock(ump), for instance
    265 	 * in ulfs_getextattr().
    266 	 */
    267 	ulfs_extattr_uepm_unlock(ump);
    268 
    269 	/*
    270 	 * XXX unlock/lock should only be done when setting extattr
    271 	 * on backing store or one of its parent directory
    272 	 * including root, but we always do it for now.
    273 	 */
    274 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
    275 	VOP_UNLOCK(vp);
    276 
    277 	pb = pathbuf_create(path);
    278 	NDINIT(&nd, CREATE, LOCKPARENT, pb);
    279 
    280 	/*
    281 	 * Since we do not hold ulfs_extattr_uepm_lock anymore,
    282 	 * another thread may race with us for backend creation,
    283 	 * but only one can succeed here thanks to O_EXCL
    284 	 */
    285 	error = vn_open(&nd, O_CREAT|O_EXCL|O_RDWR, 0600);
    286 
    287 	/*
    288 	 * Reacquire the lock on the vnode
    289 	 */
    290 	KASSERT(VOP_ISLOCKED(vp) == 0);
    291 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    292 
    293 	ulfs_extattr_uepm_lock(ump);
    294 
    295 	if (error != 0) {
    296 		pathbuf_destroy(pb);
    297 		PNBUF_PUT(path);
    298 		*uelep = NULL;
    299 		return error;
    300 	}
    301 
    302 	KASSERT(nd.ni_vp != NULL);
    303 	KASSERT(VOP_ISLOCKED(nd.ni_vp) == LK_EXCLUSIVE);
    304 	KASSERT(VOP_ISLOCKED(nd.ni_dvp) == 0);
    305 
    306 	/*
    307  	 * backing_vp is the backing store.
    308 	 */
    309 	backing_vp = nd.ni_vp;
    310 	pathbuf_destroy(pb);
    311 	PNBUF_PUT(path);
    312 
    313 	uef.uef_magic = ULFS_EXTATTR_MAGIC;
    314 	uef.uef_version = ULFS_EXTATTR_VERSION;
    315 	uef.uef_size = ulfs_extattr_autocreate;
    316 
    317 	error = vn_rdwr(UIO_WRITE, backing_vp, &uef, sizeof(uef), 0,
    318 		        UIO_SYSSPACE, IO_NODELOCKED|IO_APPEND,
    319 			l->l_cred, NULL, l);
    320 
    321 	VOP_UNLOCK(backing_vp);
    322 
    323 	if (error != 0) {
    324 		printf("%s: write uef header failed for %s, error = %d\n",
    325 		       __func__, attrname, error);
    326 		vn_close(backing_vp, FREAD|FWRITE, l->l_cred);
    327 		*uelep = NULL;
    328 		return error;
    329 	}
    330 
    331 	/*
    332 	 * Now enable attribute.
    333 	 */
    334 	error = ulfs_extattr_enable(ump,attrnamespace, attrname, backing_vp, l);
    335 	KASSERT(VOP_ISLOCKED(backing_vp) == 0);
    336 
    337 	if (error != 0) {
    338 		printf("%s: enable %s failed, error %d\n",
    339 		       __func__, attrname, error);
    340 		vn_close(backing_vp, FREAD|FWRITE, l->l_cred);
    341 		*uelep = NULL;
    342 		return error;
    343 	}
    344 
    345 	uele = ulfs_extattr_find_attr(ump, attrnamespace, attrname);
    346 	if (uele == NULL) {
    347 		printf("%s: atttribute %s created but not found!\n",
    348 		       __func__, attrname);
    349 		vn_close(backing_vp, FREAD|FWRITE, l->l_cred);
    350 		*uelep = NULL;
    351 		return ESRCH; /* really internal error */
    352 	}
    353 
    354 	printf("%s: EA backing store autocreated for %s\n",
    355 	       mp->mnt_stat.f_mntonname, attrname);
    356 
    357 	*uelep = uele;
    358 	return 0;
    359 }
    360 
    361 /*
    362  * Locate an attribute given a name and mountpoint.
    363  * Must be holding uepm lock for the mount point.
    364  */
    365 static struct ulfs_extattr_list_entry *
    366 ulfs_extattr_find_attr(struct ulfsmount *ump, int attrnamespace,
    367     const char *attrname)
    368 {
    369 	struct ulfs_extattr_list_entry *search_attribute;
    370 
    371 	for (search_attribute = LIST_FIRST(&ump->um_extattr.uepm_list);
    372 	    search_attribute != NULL;
    373 	    search_attribute = LIST_NEXT(search_attribute, uele_entries)) {
    374 		if (!(strncmp(attrname, search_attribute->uele_attrname,
    375 		    ULFS_EXTATTR_MAXEXTATTRNAME)) &&
    376 		    (attrnamespace == search_attribute->uele_attrnamespace)) {
    377 			return (search_attribute);
    378 		}
    379 	}
    380 
    381 	return (0);
    382 }
    383 
    384 /*
    385  * Initialize per-FS structures supporting extended attributes.  Do not
    386  * start extended attributes yet.
    387  */
    388 void
    389 ulfs_extattr_uepm_init(struct ulfs_extattr_per_mount *uepm)
    390 {
    391 
    392 	uepm->uepm_flags = 0;
    393 	uepm->uepm_lockcnt = 0;
    394 
    395 	LIST_INIT(&uepm->uepm_list);
    396 	mutex_init(&uepm->uepm_lock, MUTEX_DEFAULT, IPL_NONE);
    397 	uepm->uepm_flags |= ULFS_EXTATTR_UEPM_INITIALIZED;
    398 }
    399 
    400 /*
    401  * Destroy per-FS structures supporting extended attributes.  Assumes
    402  * that EAs have already been stopped, and will panic if not.
    403  */
    404 void
    405 ulfs_extattr_uepm_destroy(struct ulfs_extattr_per_mount *uepm)
    406 {
    407 
    408 	if (!(uepm->uepm_flags & ULFS_EXTATTR_UEPM_INITIALIZED))
    409 		panic("ulfs_extattr_uepm_destroy: not initialized");
    410 
    411 	if ((uepm->uepm_flags & ULFS_EXTATTR_UEPM_STARTED))
    412 		panic("ulfs_extattr_uepm_destroy: called while still started");
    413 
    414 	/*
    415 	 * It's not clear that either order for the next three lines is
    416 	 * ideal, and it should never be a problem if this is only called
    417 	 * during unmount, and with vfs_busy().
    418 	 */
    419 	uepm->uepm_flags &= ~ULFS_EXTATTR_UEPM_STARTED;
    420 	uepm->uepm_flags &= ~ULFS_EXTATTR_UEPM_INITIALIZED;
    421 	mutex_destroy(&uepm->uepm_lock);
    422 }
    423 
    424 /*
    425  * Start extended attribute support on an FS.
    426  */
    427 int
    428 ulfs_extattr_start(struct mount *mp, struct lwp *l)
    429 {
    430 	struct ulfsmount *ump;
    431 	int error = 0;
    432 
    433 	ump = VFSTOULFS(mp);
    434 
    435 	if (!(ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_INITIALIZED))
    436 		ulfs_extattr_uepm_init(&ump->um_extattr);
    437 
    438 	ulfs_extattr_uepm_lock(ump);
    439 
    440 	if (!(ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_INITIALIZED)) {
    441 		error = EOPNOTSUPP;
    442 		goto unlock;
    443 	}
    444 	if (ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_STARTED) {
    445 		error = EBUSY;
    446 		goto unlock;
    447 	}
    448 
    449 	ump->um_extattr.uepm_flags |= ULFS_EXTATTR_UEPM_STARTED;
    450 
    451 	ump->um_extattr.uepm_ucred = l->l_cred;
    452 	kauth_cred_hold(ump->um_extattr.uepm_ucred);
    453 
    454  unlock:
    455 	ulfs_extattr_uepm_unlock(ump);
    456 
    457 	return (error);
    458 }
    459 
    460 /*
    461  * Helper routine: given a locked parent directory and filename, return
    462  * the locked vnode of the inode associated with the name.  Will not
    463  * follow symlinks, may return any type of vnode.  Lock on parent will
    464  * be released even in the event of a failure.  In the event that the
    465  * target is the parent (i.e., "."), there will be two references and
    466  * one lock, requiring the caller to possibly special-case.
    467  */
    468 static int
    469 ulfs_extattr_lookup(struct vnode *start_dvp, int lockparent, const char *dirname,
    470     struct vnode **vp, struct lwp *l)
    471 {
    472 	struct vop_lookup_v2_args vargs;
    473 	struct componentname cnp;
    474 	struct vnode *target_vp;
    475 	char *pnbuf;
    476 	int error;
    477 
    478 	KASSERT(VOP_ISLOCKED(start_dvp) == LK_EXCLUSIVE);
    479 
    480 	pnbuf = PNBUF_GET();
    481 
    482 	memset(&cnp, 0, sizeof(cnp));
    483 	cnp.cn_nameiop = LOOKUP;
    484 	cnp.cn_flags = ISLASTCN | lockparent;
    485 	cnp.cn_cred = l->l_cred;
    486 	cnp.cn_nameptr = pnbuf;
    487 	error = copystr(dirname, pnbuf, MAXPATHLEN, &cnp.cn_namelen);
    488 	if (error) {
    489 		if (lockparent == 0) {
    490 			VOP_UNLOCK(start_dvp);
    491 		}
    492 		PNBUF_PUT(pnbuf);
    493 		printf("ulfs_extattr_lookup: copystr failed\n");
    494 		return (error);
    495 	}
    496 	cnp.cn_namelen--;	/* trim nul termination */
    497 	vargs.a_desc = NULL;
    498 	vargs.a_dvp = start_dvp;
    499 	vargs.a_vpp = &target_vp;
    500 	vargs.a_cnp = &cnp;
    501 	error = ulfs_lookup(&vargs);
    502 	PNBUF_PUT(pnbuf);
    503 	if (error) {
    504 		if (lockparent == 0) {
    505 			VOP_UNLOCK(start_dvp);
    506 		}
    507 		return (error);
    508 	}
    509 #if 0
    510 	if (target_vp == start_dvp)
    511 		panic("ulfs_extattr_lookup: target_vp == start_dvp");
    512 #endif
    513 
    514 	if (target_vp != start_dvp) {
    515 		error = vn_lock(target_vp, LK_EXCLUSIVE);
    516 		if (lockparent == 0)
    517 			VOP_UNLOCK(start_dvp);
    518 		if (error) {
    519 			vrele(target_vp);
    520 			return error;
    521 		}
    522 	}
    523 
    524 	KASSERT(VOP_ISLOCKED(target_vp) == LK_EXCLUSIVE);
    525 	*vp = target_vp;
    526 	return (0);
    527 }
    528 
    529 /*
    530  * Enable an EA using the passed filesystem, backing vnode, attribute name,
    531  * namespace, and proc.  Will perform a VOP_OPEN() on the vp, so expects vp
    532  * to be locked when passed in.  The vnode will be returned unlocked,
    533  * regardless of success/failure of the function.  As a result, the caller
    534  * will always need to vrele(), but not vput().
    535  */
    536 static int
    537 ulfs_extattr_enable_with_open(struct ulfsmount *ump, struct vnode *vp,
    538     int attrnamespace, const char *attrname, struct lwp *l)
    539 {
    540 	int error;
    541 
    542 	error = VOP_OPEN(vp, FREAD|FWRITE, l->l_cred);
    543 	if (error) {
    544 		printf("ulfs_extattr_enable_with_open.VOP_OPEN(): failed "
    545 		    "with %d\n", error);
    546 		VOP_UNLOCK(vp);
    547 		return (error);
    548 	}
    549 
    550 	mutex_enter(vp->v_interlock);
    551 	vp->v_writecount++;
    552 	mutex_exit(vp->v_interlock);
    553 
    554 	vref(vp);
    555 
    556 	VOP_UNLOCK(vp);
    557 
    558 	error = ulfs_extattr_enable(ump, attrnamespace, attrname, vp, l);
    559 	if (error != 0)
    560 		vn_close(vp, FREAD|FWRITE, l->l_cred);
    561 	return (error);
    562 }
    563 
    564 /*
    565  * Given a locked directory vnode, iterate over the names in the directory
    566  * and use ulfs_extattr_lookup() to retrieve locked vnodes of potential
    567  * attribute files.  Then invoke ulfs_extattr_enable_with_open() on each
    568  * to attempt to start the attribute.  Leaves the directory locked on
    569  * exit.
    570  */
    571 static int
    572 ulfs_extattr_iterate_directory(struct ulfsmount *ump, struct vnode *dvp,
    573     int attrnamespace, struct lwp *l)
    574 {
    575 	struct vop_readdir_args vargs;
    576 	struct statvfs *sbp = &ump->um_mountp->mnt_stat;
    577 	struct dirent *dp, *edp;
    578 	struct vnode *attr_vp;
    579 	struct uio auio;
    580 	struct iovec aiov;
    581 	char *dirbuf;
    582 	int error, eofflag = 0;
    583 
    584 	if (dvp->v_type != VDIR)
    585 		return (ENOTDIR);
    586 
    587 	dirbuf = kmem_alloc(LFS_DIRBLKSIZ, KM_SLEEP);
    588 
    589 	auio.uio_iov = &aiov;
    590 	auio.uio_iovcnt = 1;
    591 	auio.uio_rw = UIO_READ;
    592 	auio.uio_offset = 0;
    593 	UIO_SETUP_SYSSPACE(&auio);
    594 
    595 	vargs.a_desc = NULL;
    596 	vargs.a_vp = dvp;
    597 	vargs.a_uio = &auio;
    598 	vargs.a_cred = l->l_cred;
    599 	vargs.a_eofflag = &eofflag;
    600 	vargs.a_ncookies = NULL;
    601 	vargs.a_cookies = NULL;
    602 
    603 	while (!eofflag) {
    604 		auio.uio_resid = LFS_DIRBLKSIZ;
    605 		aiov.iov_base = dirbuf;
    606 		aiov.iov_len = LFS_DIRBLKSIZ;
    607 		error = ulfs_readdir(&vargs);
    608 		if (error) {
    609 			printf("ulfs_extattr_iterate_directory: ulfs_readdir "
    610 			    "%d\n", error);
    611 			return (error);
    612 		}
    613 
    614 		/*
    615 		 * XXXRW: While in LFS, we always get LFS_DIRBLKSIZ returns from
    616 		 * the directory code on success, on other file systems this
    617 		 * may not be the case.  For portability, we should check the
    618 		 * read length on return from ulfs_readdir().
    619 		 */
    620 		edp = (struct dirent *)&dirbuf[LFS_DIRBLKSIZ];
    621 		for (dp = (struct dirent *)dirbuf; dp < edp; ) {
    622 			if (dp->d_reclen == 0)
    623 				break;
    624 			/* Skip "." and ".." */
    625 			if (dp->d_name[0] == '.' &&
    626 			    (dp->d_name[1] == '\0' ||
    627 			     (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))
    628 				goto next;
    629 			error = ulfs_extattr_lookup(dvp, LOCKPARENT,
    630 			    dp->d_name, &attr_vp, l);
    631 			if (error == ENOENT) {
    632 				goto next; /* keep silent */
    633 			} else if (error) {
    634 				printf("ulfs_extattr_iterate_directory: lookup "
    635 				    "%s %d\n", dp->d_name, error);
    636 			} else if (attr_vp == dvp) {
    637 				vrele(attr_vp);
    638 			} else if (attr_vp->v_type != VREG) {
    639 				vput(attr_vp);
    640 			} else {
    641 				error = ulfs_extattr_enable_with_open(ump,
    642 				    attr_vp, attrnamespace, dp->d_name, l);
    643 				vrele(attr_vp);
    644 				if (error) {
    645 					printf("ulfs_extattr_iterate_directory: "
    646 					    "enable %s %d\n", dp->d_name,
    647 					    error);
    648 				} else if (bootverbose) {
    649 					printf("%s: EA %s loaded\n",
    650 					       sbp->f_mntonname, dp->d_name);
    651 				}
    652 			}
    653  next:
    654 			dp = (struct dirent *) ((char *)dp + dp->d_reclen);
    655 			if (dp >= edp)
    656 				break;
    657 		}
    658 	}
    659 	kmem_free(dirbuf, LFS_DIRBLKSIZ);
    660 
    661 	return (0);
    662 }
    663 
    664 /*
    665  * Auto-start of extended attributes, to be executed (optionally) at
    666  * mount-time.
    667  */
    668 int
    669 ulfs_extattr_autostart(struct mount *mp, struct lwp *l)
    670 {
    671 	struct vnode *rvp, *attr_dvp, *attr_system_dvp, *attr_user_dvp;
    672 	int error;
    673 
    674 	/*
    675 	 * Does ULFS_EXTATTR_FSROOTSUBDIR exist off the filesystem root?
    676 	 * If so, automatically start EA's.
    677 	 */
    678 	error = VFS_ROOT(mp, &rvp);
    679 	if (error) {
    680 		printf("ulfs_extattr_autostart.VFS_ROOT() returned %d\n",
    681 		    error);
    682 		return (error);
    683 	}
    684 
    685 	KASSERT(VOP_ISLOCKED(rvp) == LK_EXCLUSIVE);
    686 
    687 	error = ulfs_extattr_lookup(rvp, 0,
    688 	    ULFS_EXTATTR_FSROOTSUBDIR, &attr_dvp, l);
    689 	if (error) {
    690 		/* rvp ref'd but now unlocked */
    691 		KASSERT(VOP_ISLOCKED(rvp) == 0);
    692 		vrele(rvp);
    693 		return (error);
    694 	}
    695 	if (rvp == attr_dvp) {
    696 		/* Should never happen. */
    697 		KASSERT(VOP_ISLOCKED(rvp) == LK_EXCLUSIVE);
    698 		vrele(attr_dvp);
    699 		vput(rvp);
    700 		return (EINVAL);
    701 	}
    702 	KASSERT(VOP_ISLOCKED(rvp) == 0);
    703 	vrele(rvp);
    704 
    705 	KASSERT(VOP_ISLOCKED(attr_dvp) == LK_EXCLUSIVE);
    706 
    707 	if (attr_dvp->v_type != VDIR) {
    708 		printf("ulfs_extattr_autostart: %s != VDIR\n",
    709 		    ULFS_EXTATTR_FSROOTSUBDIR);
    710 		goto return_vput_attr_dvp;
    711 	}
    712 
    713 	error = ulfs_extattr_start(mp, l);
    714 	if (error) {
    715 		printf("ulfs_extattr_autostart: ulfs_extattr_start failed (%d)\n",
    716 		    error);
    717 		goto return_vput_attr_dvp;
    718 	}
    719 
    720 	/*
    721 	 * Look for two subdirectories: ULFS_EXTATTR_SUBDIR_SYSTEM,
    722 	 * ULFS_EXTATTR_SUBDIR_USER.  For each, iterate over the sub-directory,
    723 	 * and start with appropriate type.  Failures in either don't
    724 	 * result in an over-all failure.  attr_dvp is left locked to
    725 	 * be cleaned up on exit.
    726 	 */
    727 	error = ulfs_extattr_lookup(attr_dvp, LOCKPARENT,
    728 	    ULFS_EXTATTR_SUBDIR_SYSTEM, &attr_system_dvp, l);
    729 	KASSERT(VOP_ISLOCKED(attr_dvp) == LK_EXCLUSIVE);
    730 	if (error == 0) {
    731 		KASSERT(VOP_ISLOCKED(attr_system_dvp) == LK_EXCLUSIVE);
    732 		error = ulfs_extattr_iterate_directory(VFSTOULFS(mp),
    733 		    attr_system_dvp, EXTATTR_NAMESPACE_SYSTEM, l);
    734 		if (error)
    735 			printf("ulfs_extattr_iterate_directory returned %d\n",
    736 			    error);
    737 		KASSERT(VOP_ISLOCKED(attr_system_dvp) == LK_EXCLUSIVE);
    738 		vput(attr_system_dvp);
    739 	}
    740 
    741 	error = ulfs_extattr_lookup(attr_dvp, LOCKPARENT,
    742 	    ULFS_EXTATTR_SUBDIR_USER, &attr_user_dvp, l);
    743 	KASSERT(VOP_ISLOCKED(attr_dvp) == LK_EXCLUSIVE);
    744 	if (error == 0) {
    745 		KASSERT(VOP_ISLOCKED(attr_user_dvp) == LK_EXCLUSIVE);
    746 		error = ulfs_extattr_iterate_directory(VFSTOULFS(mp),
    747 		    attr_user_dvp, EXTATTR_NAMESPACE_USER, l);
    748 		if (error)
    749 			printf("ulfs_extattr_iterate_directory returned %d\n",
    750 			    error);
    751 		KASSERT(VOP_ISLOCKED(attr_user_dvp) == LK_EXCLUSIVE);
    752 		vput(attr_user_dvp);
    753 	}
    754 
    755 	/* Mask startup failures in sub-directories. */
    756 	error = 0;
    757 
    758  return_vput_attr_dvp:
    759 	KASSERT(VOP_ISLOCKED(attr_dvp) == LK_EXCLUSIVE);
    760 	vput(attr_dvp);
    761 
    762 	return (error);
    763 }
    764 
    765 /*
    766  * Stop extended attribute support on an FS.
    767  */
    768 void
    769 ulfs_extattr_stop(struct mount *mp, struct lwp *l)
    770 {
    771 	struct ulfs_extattr_list_entry *uele;
    772 	struct ulfsmount *ump = VFSTOULFS(mp);
    773 
    774 	ulfs_extattr_uepm_lock(ump);
    775 
    776 	/*
    777 	 * If we haven't been started, no big deal.  Just short-circuit
    778 	 * the processing work.
    779 	 */
    780 	if (!(ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_STARTED)) {
    781 		goto unlock;
    782 	}
    783 
    784 	while (LIST_FIRST(&ump->um_extattr.uepm_list) != NULL) {
    785 		uele = LIST_FIRST(&ump->um_extattr.uepm_list);
    786 		ulfs_extattr_disable(ump, uele->uele_attrnamespace,
    787 		    uele->uele_attrname, l);
    788 	}
    789 
    790 	ump->um_extattr.uepm_flags &= ~ULFS_EXTATTR_UEPM_STARTED;
    791 
    792 	kauth_cred_free(ump->um_extattr.uepm_ucred);
    793 	ump->um_extattr.uepm_ucred = NULL;
    794 
    795  unlock:
    796 	ulfs_extattr_uepm_unlock(ump);
    797 }
    798 
    799 /*
    800  * Enable a named attribute on the specified filesystem; provide an
    801  * unlocked backing vnode to hold the attribute data.
    802  */
    803 static int
    804 ulfs_extattr_enable(struct ulfsmount *ump, int attrnamespace,
    805     const char *attrname, struct vnode *backing_vnode, struct lwp *l)
    806 {
    807 	struct ulfs_extattr_list_entry *attribute;
    808 	struct iovec aiov;
    809 	struct uio auio;
    810 	int error = 0;
    811 
    812 	if (!ulfs_extattr_valid_attrname(attrnamespace, attrname))
    813 		return (EINVAL);
    814 	if (backing_vnode->v_type != VREG)
    815 		return (EINVAL);
    816 
    817 	attribute = kmem_zalloc(sizeof(*attribute), KM_SLEEP);
    818 
    819 	if (!(ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_STARTED)) {
    820 		error = EOPNOTSUPP;
    821 		goto free_exit;
    822 	}
    823 
    824 	if (ulfs_extattr_find_attr(ump, attrnamespace, attrname)) {
    825 		error = EEXIST;
    826 		goto free_exit;
    827 	}
    828 
    829 	strncpy(attribute->uele_attrname, attrname,
    830 	    ULFS_EXTATTR_MAXEXTATTRNAME);
    831 	attribute->uele_attrnamespace = attrnamespace;
    832 	memset(&attribute->uele_fileheader, 0,
    833 	    sizeof(struct ulfs_extattr_fileheader));
    834 
    835 	attribute->uele_backing_vnode = backing_vnode;
    836 
    837 	auio.uio_iov = &aiov;
    838 	auio.uio_iovcnt = 1;
    839 	aiov.iov_base = (void *) &attribute->uele_fileheader;
    840 	aiov.iov_len = sizeof(struct ulfs_extattr_fileheader);
    841 	auio.uio_resid = sizeof(struct ulfs_extattr_fileheader);
    842 	auio.uio_offset = (off_t) 0;
    843 	auio.uio_rw = UIO_READ;
    844 	UIO_SETUP_SYSSPACE(&auio);
    845 
    846 	vn_lock(backing_vnode, LK_SHARED | LK_RETRY);
    847 	error = VOP_READ(backing_vnode, &auio, IO_NODELOCKED,
    848 	    ump->um_extattr.uepm_ucred);
    849 
    850 	if (error)
    851 		goto unlock_free_exit;
    852 
    853 	if (auio.uio_resid != 0) {
    854 		printf("ulfs_extattr_enable: malformed attribute header\n");
    855 		error = EINVAL;
    856 		goto unlock_free_exit;
    857 	}
    858 
    859 	/*
    860 	 * Try to determine the byte order of the attribute file.
    861 	 */
    862 	if (attribute->uele_fileheader.uef_magic != ULFS_EXTATTR_MAGIC) {
    863 		attribute->uele_flags |= UELE_F_NEEDSWAP;
    864 		attribute->uele_fileheader.uef_magic =
    865 		    ulfs_rw32(attribute->uele_fileheader.uef_magic,
    866 			     UELE_NEEDSWAP(attribute));
    867 		if (attribute->uele_fileheader.uef_magic != ULFS_EXTATTR_MAGIC) {
    868 			printf("ulfs_extattr_enable: invalid attribute header "
    869 			       "magic\n");
    870 			error = EINVAL;
    871 			goto unlock_free_exit;
    872 		}
    873 	}
    874 	attribute->uele_fileheader.uef_version =
    875 	    ulfs_rw32(attribute->uele_fileheader.uef_version,
    876 		     UELE_NEEDSWAP(attribute));
    877 	attribute->uele_fileheader.uef_size =
    878 	    ulfs_rw32(attribute->uele_fileheader.uef_size,
    879 		     UELE_NEEDSWAP(attribute));
    880 
    881 	if (attribute->uele_fileheader.uef_version != ULFS_EXTATTR_VERSION) {
    882 		printf("ulfs_extattr_enable: incorrect attribute header "
    883 		    "version\n");
    884 		error = EINVAL;
    885 		goto unlock_free_exit;
    886 	}
    887 
    888 	LIST_INSERT_HEAD(&ump->um_extattr.uepm_list, attribute,
    889 	    uele_entries);
    890 
    891 	VOP_UNLOCK(backing_vnode);
    892 	return (0);
    893 
    894  unlock_free_exit:
    895 	VOP_UNLOCK(backing_vnode);
    896 
    897  free_exit:
    898 	kmem_free(attribute, sizeof(*attribute));
    899 	return (error);
    900 }
    901 
    902 /*
    903  * Disable extended attribute support on an FS.
    904  */
    905 static int
    906 ulfs_extattr_disable(struct ulfsmount *ump, int attrnamespace,
    907     const char *attrname, struct lwp *l)
    908 {
    909 	struct ulfs_extattr_list_entry *uele;
    910 	int error = 0;
    911 
    912 	if (!ulfs_extattr_valid_attrname(attrnamespace, attrname))
    913 		return (EINVAL);
    914 
    915 	uele = ulfs_extattr_find_attr(ump, attrnamespace, attrname);
    916 	if (!uele)
    917 		return (ENODATA);
    918 
    919 	LIST_REMOVE(uele, uele_entries);
    920 
    921 	error = vn_close(uele->uele_backing_vnode, FREAD|FWRITE,
    922 	    l->l_cred);
    923 
    924 	kmem_free(uele, sizeof(*uele));
    925 
    926 	return (error);
    927 }
    928 
    929 /*
    930  * VFS call to manage extended attributes in ULFS.  If filename_vp is
    931  * non-NULL, it must be passed in locked, and regardless of errors in
    932  * processing, will be unlocked.
    933  */
    934 int
    935 ulfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
    936     int attrnamespace, const char *attrname)
    937 {
    938 	struct lwp *l = curlwp;
    939 	struct ulfsmount *ump = VFSTOULFS(mp);
    940 	int error;
    941 
    942 	/*
    943 	 * Only privileged processes can configure extended attributes.
    944 	 */
    945 	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_EXTATTR,
    946 	    0, mp, NULL, NULL);
    947 	if (error) {
    948 		if (filename_vp != NULL)
    949 			VOP_UNLOCK(filename_vp);
    950 		return (error);
    951 	}
    952 
    953 	switch(cmd) {
    954 	case ULFS_EXTATTR_CMD_START:
    955 		if (filename_vp != NULL) {
    956 			VOP_UNLOCK(filename_vp);
    957 			return (EINVAL);
    958 		}
    959 		if (attrname != NULL)
    960 			return (EINVAL);
    961 
    962 		error = ulfs_extattr_autostart(mp, l);
    963 		return (error);
    964 
    965 	case ULFS_EXTATTR_CMD_STOP:
    966 		if (filename_vp != NULL) {
    967 			VOP_UNLOCK(filename_vp);
    968 			return (EINVAL);
    969 		}
    970 		if (attrname != NULL)
    971 			return (EINVAL);
    972 
    973 		ulfs_extattr_stop(mp, l);
    974 		return (0);
    975 
    976 	case ULFS_EXTATTR_CMD_ENABLE:
    977 		if (filename_vp == NULL)
    978 			return (EINVAL);
    979 		if (attrname == NULL) {
    980 			VOP_UNLOCK(filename_vp);
    981 			return (EINVAL);
    982 		}
    983 
    984 		/*
    985 		 * ulfs_extattr_enable_with_open() will always unlock the
    986 		 * vnode, regardless of failure.
    987 		 */
    988 		ulfs_extattr_uepm_lock(ump);
    989 		error = ulfs_extattr_enable_with_open(ump, filename_vp,
    990 		    attrnamespace, attrname, l);
    991 		ulfs_extattr_uepm_unlock(ump);
    992 		return (error);
    993 
    994 	case ULFS_EXTATTR_CMD_DISABLE:
    995 		if (filename_vp != NULL) {
    996 			VOP_UNLOCK(filename_vp);
    997 			return (EINVAL);
    998 		}
    999 		if (attrname == NULL)
   1000 			return (EINVAL);
   1001 
   1002 		ulfs_extattr_uepm_lock(ump);
   1003 		error = ulfs_extattr_disable(ump, attrnamespace, attrname, l);
   1004 		ulfs_extattr_uepm_unlock(ump);
   1005 		return (error);
   1006 
   1007 	default:
   1008 		return (EINVAL);
   1009 	}
   1010 }
   1011 
   1012 /*
   1013  * Read extended attribute header for a given vnode and attribute.
   1014  * Backing vnode should be locked and unlocked by caller.
   1015  */
   1016 static int
   1017 ulfs_extattr_get_header(struct vnode *vp, struct ulfs_extattr_list_entry *uele,
   1018     struct ulfs_extattr_header *ueh, off_t *bap)
   1019 {
   1020 	struct mount *mp = vp->v_mount;
   1021 	struct ulfsmount *ump = VFSTOULFS(mp);
   1022 	struct inode *ip = VTOI(vp);
   1023 	off_t base_offset;
   1024 	struct iovec aiov;
   1025 	struct uio aio;
   1026 	int error;
   1027 
   1028 	/*
   1029 	 * Find base offset of header in file based on file header size, and
   1030 	 * data header size + maximum data size, indexed by inode number.
   1031 	 */
   1032 	base_offset = sizeof(struct ulfs_extattr_fileheader) +
   1033 	    ip->i_number * (sizeof(struct ulfs_extattr_header) +
   1034 	    uele->uele_fileheader.uef_size);
   1035 
   1036 	/*
   1037 	 * Read in the data header to see if the data is defined, and if so
   1038 	 * how much.
   1039 	 */
   1040 	memset(ueh, 0, sizeof(struct ulfs_extattr_header));
   1041 	aiov.iov_base = ueh;
   1042 	aiov.iov_len = sizeof(struct ulfs_extattr_header);
   1043 	aio.uio_iov = &aiov;
   1044 	aio.uio_iovcnt = 1;
   1045 	aio.uio_rw = UIO_READ;
   1046 	aio.uio_offset = base_offset;
   1047 	aio.uio_resid = sizeof(struct ulfs_extattr_header);
   1048 	UIO_SETUP_SYSSPACE(&aio);
   1049 
   1050 	error = VOP_READ(uele->uele_backing_vnode, &aio,
   1051 	    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
   1052 	if (error)
   1053 		return error;
   1054 
   1055 	/*
   1056 	 * Attribute headers are kept in file system byte order.
   1057 	 * XXX What about the blob of data?
   1058 	 */
   1059 	ueh->ueh_flags = ulfs_rw32(ueh->ueh_flags, UELE_NEEDSWAP(uele));
   1060 	ueh->ueh_len   = ulfs_rw32(ueh->ueh_len, UELE_NEEDSWAP(uele));
   1061 	ueh->ueh_i_gen = ulfs_rw32(ueh->ueh_i_gen, UELE_NEEDSWAP(uele));
   1062 
   1063 	/* Defined? */
   1064 	if ((ueh->ueh_flags & ULFS_EXTATTR_ATTR_FLAG_INUSE) == 0)
   1065 		return ENODATA;
   1066 
   1067 	/* Valid for the current inode generation? */
   1068 	if (ueh->ueh_i_gen != ip->i_gen) {
   1069 		/*
   1070 		 * The inode itself has a different generation number
   1071 		 * than the uele data.  For now, the best solution
   1072 		 * is to coerce this to undefined, and let it get cleaned
   1073 		 * up by the next write or extattrctl clean.
   1074 		 */
   1075 		printf("%s (%s): inode gen inconsistency (%u, %jd)\n",
   1076 		       __func__,  mp->mnt_stat.f_mntonname, ueh->ueh_i_gen,
   1077 		       (intmax_t)ip->i_gen);
   1078 		return ENODATA;
   1079 	}
   1080 
   1081 	/* Local size consistency check. */
   1082 	if (ueh->ueh_len > uele->uele_fileheader.uef_size)
   1083 		return ENXIO;
   1084 
   1085 	/* Return base offset */
   1086 	if (bap != NULL)
   1087 		*bap = base_offset;
   1088 
   1089 	return 0;
   1090 }
   1091 
   1092 /*
   1093  * Vnode operation to retrieve a named extended attribute.
   1094  */
   1095 int
   1096 ulfs_getextattr(struct vop_getextattr_args *ap)
   1097 /*
   1098 vop_getextattr {
   1099 	IN struct vnode *a_vp;
   1100 	IN int a_attrnamespace;
   1101 	IN const char *a_name;
   1102 	INOUT struct uio *a_uio;
   1103 	OUT size_t *a_size;
   1104 	IN kauth_cred_t a_cred;
   1105 };
   1106 */
   1107 {
   1108 	struct mount *mp = ap->a_vp->v_mount;
   1109 	struct ulfsmount *ump = VFSTOULFS(mp);
   1110 	int error;
   1111 
   1112 	if (!(ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_STARTED))
   1113 		return (EOPNOTSUPP);
   1114 
   1115 	ulfs_extattr_uepm_lock(ump);
   1116 
   1117 	error = ulfs_extattr_get(ap->a_vp, ap->a_attrnamespace, ap->a_name,
   1118 	    ap->a_uio, ap->a_size, ap->a_cred, curlwp);
   1119 
   1120 	ulfs_extattr_uepm_unlock(ump);
   1121 
   1122 	return (error);
   1123 }
   1124 
   1125 /*
   1126  * Real work associated with retrieving a named attribute--assumes that
   1127  * the attribute lock has already been grabbed.
   1128  */
   1129 static int
   1130 ulfs_extattr_get(struct vnode *vp, int attrnamespace, const char *name,
   1131     struct uio *uio, size_t *size, kauth_cred_t cred, struct lwp *l)
   1132 {
   1133 	struct ulfs_extattr_list_entry *attribute;
   1134 	struct ulfs_extattr_header ueh;
   1135 	struct mount *mp = vp->v_mount;
   1136 	struct ulfsmount *ump = VFSTOULFS(mp);
   1137 	off_t base_offset;
   1138 	size_t len, old_len;
   1139 	int error = 0;
   1140 
   1141 	if (strlen(name) == 0)
   1142 		return (EINVAL);
   1143 
   1144 	error = internal_extattr_check_cred(vp, attrnamespace, name, cred,
   1145 	    VREAD);
   1146 	if (error)
   1147 		return (error);
   1148 
   1149 	attribute = ulfs_extattr_find_attr(ump, attrnamespace, name);
   1150 	if (!attribute)
   1151 		return (ENODATA);
   1152 
   1153 	/*
   1154 	 * Allow only offsets of zero to encourage the read/replace
   1155 	 * extended attribute semantic.  Otherwise we can't guarantee
   1156 	 * atomicity, as we don't provide locks for extended attributes.
   1157 	 */
   1158 	if (uio != NULL && uio->uio_offset != 0)
   1159 		return (ENXIO);
   1160 
   1161 	/*
   1162 	 * Don't need to get a lock on the backing file if the getattr is
   1163 	 * being applied to the backing file, as the lock is already held.
   1164 	 */
   1165 	if (attribute->uele_backing_vnode != vp)
   1166 		vn_lock(attribute->uele_backing_vnode, LK_SHARED | LK_RETRY);
   1167 
   1168 	error = ulfs_extattr_get_header(vp, attribute, &ueh, &base_offset);
   1169 	if (error)
   1170 		goto vopunlock_exit;
   1171 
   1172 	/* Return full data size if caller requested it. */
   1173 	if (size != NULL)
   1174 		*size = ueh.ueh_len;
   1175 
   1176 	/* Return data if the caller requested it. */
   1177 	if (uio != NULL) {
   1178 		/* Allow for offset into the attribute data. */
   1179 		uio->uio_offset = base_offset + sizeof(struct
   1180 		    ulfs_extattr_header);
   1181 
   1182 		/*
   1183 		 * Figure out maximum to transfer -- use buffer size and
   1184 		 * local data limit.
   1185 		 */
   1186 		len = MIN(uio->uio_resid, ueh.ueh_len);
   1187 		old_len = uio->uio_resid;
   1188 		uio->uio_resid = len;
   1189 
   1190 		error = VOP_READ(attribute->uele_backing_vnode, uio,
   1191 		    IO_NODELOCKED, ump->um_extattr.uepm_ucred);
   1192 		if (error)
   1193 			goto vopunlock_exit;
   1194 
   1195 		uio->uio_resid = old_len - (len - uio->uio_resid);
   1196 	}
   1197 
   1198  vopunlock_exit:
   1199 
   1200 	if (uio != NULL)
   1201 		uio->uio_offset = 0;
   1202 
   1203 	if (attribute->uele_backing_vnode != vp)
   1204 		VOP_UNLOCK(attribute->uele_backing_vnode);
   1205 
   1206 	return (error);
   1207 }
   1208 
   1209 /*
   1210  * Vnode operation to list extended attribute for a vnode
   1211  */
   1212 int
   1213 ulfs_listextattr(struct vop_listextattr_args *ap)
   1214 /*
   1215 vop_listextattr {
   1216 	IN struct vnode *a_vp;
   1217 	IN int a_attrnamespace;
   1218 	INOUT struct uio *a_uio;
   1219 	OUT size_t *a_size;
   1220 	IN int flag;
   1221 	IN kauth_cred_t a_cred;
   1222 	struct proc *a_p;
   1223 };
   1224 */
   1225 {
   1226 	struct mount *mp = ap->a_vp->v_mount;
   1227 	struct ulfsmount *ump = VFSTOULFS(mp);
   1228 	int error;
   1229 
   1230 	if (!(ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_STARTED))
   1231 		return (EOPNOTSUPP);
   1232 
   1233 	ulfs_extattr_uepm_lock(ump);
   1234 
   1235 	error = ulfs_extattr_list(ap->a_vp, ap->a_attrnamespace,
   1236 	    ap->a_uio, ap->a_size, ap->a_flag, ap->a_cred, curlwp);
   1237 
   1238 	ulfs_extattr_uepm_unlock(ump);
   1239 
   1240 	return (error);
   1241 }
   1242 
   1243 /*
   1244  * Real work associated with retrieving list of attributes--assumes that
   1245  * the attribute lock has already been grabbed.
   1246  */
   1247 static int
   1248 ulfs_extattr_list(struct vnode *vp, int attrnamespace,
   1249     struct uio *uio, size_t *size, int flag,
   1250     kauth_cred_t cred, struct lwp *l)
   1251 {
   1252 	struct ulfs_extattr_list_entry *uele;
   1253 	struct ulfs_extattr_header ueh;
   1254 	struct mount *mp = vp->v_mount;
   1255 	struct ulfsmount *ump = VFSTOULFS(mp);
   1256 	size_t listsize = 0;
   1257 	int error = 0;
   1258 
   1259 	/*
   1260 	 * XXX: We can move this inside the loop and iterate on individual
   1261 	 *	attributes.
   1262 	 */
   1263 	error = internal_extattr_check_cred(vp, attrnamespace, "", cred,
   1264 	    VREAD);
   1265 	if (error)
   1266 		return (error);
   1267 
   1268 	LIST_FOREACH(uele, &ump->um_extattr.uepm_list, uele_entries) {
   1269 		unsigned char attrnamelen;
   1270 
   1271 		if (uele->uele_attrnamespace != attrnamespace)
   1272 			continue;
   1273 
   1274 		error = ulfs_extattr_get_header(vp, uele, &ueh, NULL);
   1275 		if (error == ENODATA)
   1276 			continue;
   1277 		if (error != 0)
   1278 			return error;
   1279 
   1280 		/*
   1281 		 * Don't need to get a lock on the backing file if
   1282 		 * the listattr is being applied to the backing file,
   1283 		 * as the lock is already held.
   1284 		 */
   1285 		if (uele->uele_backing_vnode != vp)
   1286 			vn_lock(uele->uele_backing_vnode, LK_SHARED | LK_RETRY);
   1287 
   1288 		/*
   1289 		 * +1 for trailing NUL (listxattr flavor)
   1290 		 *  or leading name length (extattr_list_file flavor)
   1291 	 	 */
   1292 		attrnamelen = strlen(uele->uele_attrname);
   1293 		listsize += attrnamelen + 1;
   1294 
   1295 		/* Return data if the caller requested it. */
   1296 		if (uio != NULL) {
   1297 			/*
   1298 			 * We support two flavors. Either NUL-terminated
   1299 			 * strings (a la listxattr), or non NUL-terminated,
   1300 			 * one byte length prefixed strings (for
   1301 			 * extattr_list_file). EXTATTR_LIST_LENPREFIX switches
   1302 		 	 * that second behavior.
   1303 			 */
   1304 			if (flag & EXTATTR_LIST_LENPREFIX) {
   1305 				uint8_t len = (uint8_t)attrnamelen;
   1306 
   1307 				/* Copy leading name length */
   1308 				error = uiomove(&len, sizeof(len), uio);
   1309 				if (error != 0)
   1310 					break;
   1311 			} else {
   1312 				/* Include trailing NULL */
   1313 				attrnamelen++;
   1314 			}
   1315 
   1316 			error = uiomove(uele->uele_attrname,
   1317 					(size_t)attrnamelen, uio);
   1318 			if (error != 0)
   1319 				break;
   1320 		}
   1321 
   1322 		if (uele->uele_backing_vnode != vp)
   1323 			VOP_UNLOCK(uele->uele_backing_vnode);
   1324 
   1325 		if (error != 0)
   1326 			return error;
   1327 	}
   1328 
   1329 	if (uio != NULL)
   1330 		uio->uio_offset = 0;
   1331 
   1332 	/* Return full data size if caller requested it. */
   1333 	if (size != NULL)
   1334 		*size = listsize;
   1335 
   1336 	return 0;
   1337 }
   1338 
   1339 /*
   1340  * Vnode operation to remove a named attribute.
   1341  */
   1342 int
   1343 ulfs_deleteextattr(struct vop_deleteextattr_args *ap)
   1344 /*
   1345 vop_deleteextattr {
   1346 	IN struct vnode *a_vp;
   1347 	IN int a_attrnamespace;
   1348 	IN const char *a_name;
   1349 	IN kauth_cred_t a_cred;
   1350 };
   1351 */
   1352 {
   1353 	struct mount *mp = ap->a_vp->v_mount;
   1354 	struct ulfsmount *ump = VFSTOULFS(mp);
   1355 	int error;
   1356 
   1357 	if (!(ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_STARTED))
   1358 		return (EOPNOTSUPP);
   1359 
   1360 	ulfs_extattr_uepm_lock(ump);
   1361 
   1362 	error = ulfs_extattr_rm(ap->a_vp, ap->a_attrnamespace, ap->a_name,
   1363 	    ap->a_cred, curlwp);
   1364 
   1365 	ulfs_extattr_uepm_unlock(ump);
   1366 
   1367 	return (error);
   1368 }
   1369 
   1370 /*
   1371  * Vnode operation to set a named attribute.
   1372  */
   1373 int
   1374 ulfs_setextattr(struct vop_setextattr_args *ap)
   1375 /*
   1376 vop_setextattr {
   1377 	IN struct vnode *a_vp;
   1378 	IN int a_attrnamespace;
   1379 	IN const char *a_name;
   1380 	INOUT struct uio *a_uio;
   1381 	IN kauth_cred_t a_cred;
   1382 };
   1383 */
   1384 {
   1385 	struct mount *mp = ap->a_vp->v_mount;
   1386 	struct ulfsmount *ump = VFSTOULFS(mp);
   1387 	int error;
   1388 
   1389 	if (!(ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_STARTED))
   1390 		return (EOPNOTSUPP);
   1391 
   1392 	ulfs_extattr_uepm_lock(ump);
   1393 
   1394 	/*
   1395 	 * XXX: No longer a supported way to delete extended attributes.
   1396 	 */
   1397 	if (ap->a_uio == NULL) {
   1398 		ulfs_extattr_uepm_unlock(ump);
   1399 		return (EINVAL);
   1400 	}
   1401 
   1402 	error = ulfs_extattr_set(ap->a_vp, ap->a_attrnamespace, ap->a_name,
   1403 	    ap->a_uio, ap->a_cred, curlwp);
   1404 
   1405 	ulfs_extattr_uepm_unlock(ump);
   1406 
   1407 	return (error);
   1408 }
   1409 
   1410 /*
   1411  * Real work associated with setting a vnode's extended attributes;
   1412  * assumes that the attribute lock has already been grabbed.
   1413  */
   1414 static int
   1415 ulfs_extattr_set(struct vnode *vp, int attrnamespace, const char *name,
   1416     struct uio *uio, kauth_cred_t cred, struct lwp *l)
   1417 {
   1418 	struct ulfs_extattr_list_entry *attribute;
   1419 	struct ulfs_extattr_header ueh;
   1420 	struct iovec local_aiov;
   1421 	struct uio local_aio;
   1422 	struct mount *mp = vp->v_mount;
   1423 	struct ulfsmount *ump = VFSTOULFS(mp);
   1424 	struct inode *ip = VTOI(vp);
   1425 	off_t base_offset;
   1426 	int error = 0, ioflag;
   1427 
   1428 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
   1429 		return (EROFS);
   1430 
   1431 	if (!ulfs_extattr_valid_attrname(attrnamespace, name))
   1432 		return (EINVAL);
   1433 
   1434 	error = internal_extattr_check_cred(vp, attrnamespace, name, cred,
   1435 	    VWRITE);
   1436 	if (error)
   1437 		return (error);
   1438 
   1439 	attribute = ulfs_extattr_find_attr(ump, attrnamespace, name);
   1440 	if (!attribute) {
   1441 		error = ulfs_extattr_autocreate_attr(vp, attrnamespace,
   1442 						    name, l, &attribute);
   1443 		if (error == EEXIST) {
   1444 			/* Another thread raced us for backend creation */
   1445 			error = 0;
   1446 			attribute =
   1447 			    ulfs_extattr_find_attr(ump, attrnamespace, name);
   1448 		}
   1449 
   1450 		if (error || !attribute)
   1451 			return ENODATA;
   1452 	}
   1453 
   1454 	/*
   1455 	 * Early rejection of invalid offsets/length.
   1456 	 * Reject: any offset but 0 (replace)
   1457 	 *	 Any size greater than attribute size limit
   1458  	 */
   1459 	if (uio->uio_offset != 0 ||
   1460 	    uio->uio_resid > attribute->uele_fileheader.uef_size)
   1461 		return (ENXIO);
   1462 
   1463 	/*
   1464 	 * Find base offset of header in file based on file header size, and
   1465 	 * data header size + maximum data size, indexed by inode number.
   1466 	 */
   1467 	base_offset = sizeof(struct ulfs_extattr_fileheader) +
   1468 	    ip->i_number * (sizeof(struct ulfs_extattr_header) +
   1469 	    attribute->uele_fileheader.uef_size);
   1470 
   1471 	/*
   1472 	 * Write out a data header for the data.
   1473 	 */
   1474 	ueh.ueh_len = ulfs_rw32((uint32_t) uio->uio_resid,
   1475 	    UELE_NEEDSWAP(attribute));
   1476 	ueh.ueh_flags = ulfs_rw32(ULFS_EXTATTR_ATTR_FLAG_INUSE,
   1477 				 UELE_NEEDSWAP(attribute));
   1478 	ueh.ueh_i_gen = ulfs_rw32(ip->i_gen, UELE_NEEDSWAP(attribute));
   1479 	local_aiov.iov_base = &ueh;
   1480 	local_aiov.iov_len = sizeof(struct ulfs_extattr_header);
   1481 	local_aio.uio_iov = &local_aiov;
   1482 	local_aio.uio_iovcnt = 1;
   1483 	local_aio.uio_rw = UIO_WRITE;
   1484 	local_aio.uio_offset = base_offset;
   1485 	local_aio.uio_resid = sizeof(struct ulfs_extattr_header);
   1486 	UIO_SETUP_SYSSPACE(&local_aio);
   1487 
   1488 	/*
   1489 	 * Don't need to get a lock on the backing file if the setattr is
   1490 	 * being applied to the backing file, as the lock is already held.
   1491 	 */
   1492 	if (attribute->uele_backing_vnode != vp)
   1493 		vn_lock(attribute->uele_backing_vnode,
   1494 		    LK_EXCLUSIVE | LK_RETRY);
   1495 
   1496 	ioflag = IO_NODELOCKED;
   1497 	if (ulfs_extattr_sync)
   1498 		ioflag |= IO_SYNC;
   1499 	error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag,
   1500 	    ump->um_extattr.uepm_ucred);
   1501 	if (error)
   1502 		goto vopunlock_exit;
   1503 
   1504 	if (local_aio.uio_resid != 0) {
   1505 		error = ENXIO;
   1506 		goto vopunlock_exit;
   1507 	}
   1508 
   1509 	/*
   1510 	 * Write out user data.
   1511 	 * XXX NOT ATOMIC WITH RESPECT TO THE HEADER.
   1512 	 */
   1513 	uio->uio_offset = base_offset + sizeof(struct ulfs_extattr_header);
   1514 
   1515 	ioflag = IO_NODELOCKED;
   1516 	if (ulfs_extattr_sync)
   1517 		ioflag |= IO_SYNC;
   1518 	error = VOP_WRITE(attribute->uele_backing_vnode, uio, ioflag,
   1519 	    ump->um_extattr.uepm_ucred);
   1520 
   1521  vopunlock_exit:
   1522 	uio->uio_offset = 0;
   1523 
   1524 	if (attribute->uele_backing_vnode != vp)
   1525 		VOP_UNLOCK(attribute->uele_backing_vnode);
   1526 
   1527 	return (error);
   1528 }
   1529 
   1530 /*
   1531  * Real work associated with removing an extended attribute from a vnode.
   1532  * Assumes the attribute lock has already been grabbed.
   1533  */
   1534 static int
   1535 ulfs_extattr_rm(struct vnode *vp, int attrnamespace, const char *name,
   1536     kauth_cred_t cred, struct lwp *l)
   1537 {
   1538 	struct ulfs_extattr_list_entry *attribute;
   1539 	struct ulfs_extattr_header ueh;
   1540 	struct mount *mp = vp->v_mount;
   1541 	struct ulfsmount *ump = VFSTOULFS(mp);
   1542 	struct iovec local_aiov;
   1543 	struct uio local_aio;
   1544 	off_t base_offset;
   1545 	int error = 0, ioflag;
   1546 
   1547 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
   1548 		return (EROFS);
   1549 
   1550 	if (!ulfs_extattr_valid_attrname(attrnamespace, name))
   1551 		return (EINVAL);
   1552 
   1553 	error = internal_extattr_check_cred(vp, attrnamespace, name, cred,
   1554 	    VWRITE);
   1555 	if (error)
   1556 		return (error);
   1557 
   1558 	attribute = ulfs_extattr_find_attr(ump, attrnamespace, name);
   1559 	if (!attribute)
   1560 		return (ENODATA);
   1561 
   1562 	/*
   1563 	 * Don't need to get a lock on the backing file if the getattr is
   1564 	 * being applied to the backing file, as the lock is already held.
   1565 	 */
   1566 	if (attribute->uele_backing_vnode != vp)
   1567 		vn_lock(attribute->uele_backing_vnode, LK_EXCLUSIVE | LK_RETRY);
   1568 
   1569 	error = ulfs_extattr_get_header(vp, attribute, &ueh, &base_offset);
   1570 	if (error)
   1571 		goto vopunlock_exit;
   1572 
   1573 	/* Flag it as not in use. */
   1574 	ueh.ueh_flags = 0;		/* No need to byte swap 0 */
   1575 	ueh.ueh_len = 0;		/* ...ditto... */
   1576 
   1577 	local_aiov.iov_base = &ueh;
   1578 	local_aiov.iov_len = sizeof(struct ulfs_extattr_header);
   1579 	local_aio.uio_iov = &local_aiov;
   1580 	local_aio.uio_iovcnt = 1;
   1581 	local_aio.uio_rw = UIO_WRITE;
   1582 	local_aio.uio_offset = base_offset;
   1583 	local_aio.uio_resid = sizeof(struct ulfs_extattr_header);
   1584 	UIO_SETUP_SYSSPACE(&local_aio);
   1585 
   1586 	ioflag = IO_NODELOCKED;
   1587 	if (ulfs_extattr_sync)
   1588 		ioflag |= IO_SYNC;
   1589 	error = VOP_WRITE(attribute->uele_backing_vnode, &local_aio, ioflag,
   1590 	    ump->um_extattr.uepm_ucred);
   1591 	if (error)
   1592 		goto vopunlock_exit;
   1593 
   1594 	if (local_aio.uio_resid != 0)
   1595 		error = ENXIO;
   1596 
   1597  vopunlock_exit:
   1598 	VOP_UNLOCK(attribute->uele_backing_vnode);
   1599 
   1600 	return (error);
   1601 }
   1602 
   1603 /*
   1604  * Called by ULFS when an inode is no longer active and should have its
   1605  * attributes stripped.
   1606  */
   1607 void
   1608 ulfs_extattr_vnode_inactive(struct vnode *vp, struct lwp *l)
   1609 {
   1610 	struct ulfs_extattr_list_entry *uele;
   1611 	struct mount *mp = vp->v_mount;
   1612 	struct ulfsmount *ump = VFSTOULFS(mp);
   1613 
   1614 	/*
   1615 	 * In that case, we cannot lock. We should not have any active vnodes
   1616 	 * on the fs if this is not yet initialized but is going to be, so
   1617 	 * this can go unlocked.
   1618 	 */
   1619 	if (!(ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_INITIALIZED))
   1620 		return;
   1621 
   1622 	if (!(ump->um_extattr.uepm_flags & ULFS_EXTATTR_UEPM_STARTED))
   1623 		return;
   1624 
   1625 	ulfs_extattr_uepm_lock(ump);
   1626 
   1627 	LIST_FOREACH(uele, &ump->um_extattr.uepm_list, uele_entries)
   1628 		ulfs_extattr_rm(vp, uele->uele_attrnamespace,
   1629 		    uele->uele_attrname, lwp0.l_cred, l);
   1630 
   1631 	ulfs_extattr_uepm_unlock(ump);
   1632 }
   1633 
   1634 void
   1635 ulfs_extattr_init(void)
   1636 {
   1637 
   1638 }
   1639 
   1640 void
   1641 ulfs_extattr_done(void)
   1642 {
   1643 
   1644 }
   1645