Home | History | Annotate | Line # | Download | only in ffs
ffs_extattr.c revision 1.9
      1 /*	$NetBSD: ffs_extattr.c,v 1.9 2022/11/17 06:40:40 chs Exp $	*/
      2 
      3 /*-
      4  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
      5  *
      6  * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
      7  * All rights reserved.
      8  *
      9  * This software was developed for the FreeBSD Project by Marshall
     10  * Kirk McKusick and Network Associates Laboratories, the Security
     11  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
     12  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
     13  * research program
     14  *
     15  * Redistribution and use in source and binary forms, with or without
     16  * modification, are permitted provided that the following conditions
     17  * are met:
     18  * 1. Redistributions of source code must retain the above copyright
     19  *    notice, this list of conditions and the following disclaimer.
     20  * 2. Redistributions in binary form must reproduce the above copyright
     21  *    notice, this list of conditions and the following disclaimer in the
     22  *    documentation and/or other materials provided with the distribution.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  * Copyright (c) 1982, 1986, 1989, 1993
     37  *	The Regents of the University of California.  All rights reserved.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code must retain the above copyright
     43  *    notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  * 3. Neither the name of the University nor the names of its contributors
     48  *    may be used to endorse or promote products derived from this software
     49  *    without specific prior written permission.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61  * SUCH DAMAGE.
     62  *
     63  *	from: @(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
     64  * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
     65  *	@(#)ffs_vnops.c	8.15 (Berkeley) 5/14/95
     66  */
     67 
     68 #include <sys/cdefs.h>
     69 __KERNEL_RCSID(0, "$NetBSD: ffs_extattr.c,v 1.9 2022/11/17 06:40:40 chs Exp $");
     70 
     71 #if defined(_KERNEL_OPT)
     72 #include "opt_ffs.h"
     73 #include "opt_wapbl.h"
     74 #endif
     75 
     76 #include <sys/param.h>
     77 #include <sys/systm.h>
     78 #include <sys/resourcevar.h>
     79 #include <sys/kernel.h>
     80 #include <sys/file.h>
     81 #include <sys/stat.h>
     82 #include <sys/buf.h>
     83 #include <sys/event.h>
     84 #include <sys/extattr.h>
     85 #include <sys/kauth.h>
     86 #include <sys/proc.h>
     87 #include <sys/mount.h>
     88 #include <sys/vnode.h>
     89 #include <sys/malloc.h>
     90 #include <sys/pool.h>
     91 #include <sys/signalvar.h>
     92 #include <sys/kauth.h>
     93 #include <sys/wapbl.h>
     94 
     95 #include <miscfs/fifofs/fifo.h>
     96 #include <miscfs/genfs/genfs.h>
     97 #include <miscfs/specfs/specdev.h>
     98 
     99 #include <ufs/ufs/inode.h>
    100 #include <ufs/ufs/dir.h>
    101 #include <ufs/ufs/ufs_extern.h>
    102 #include <ufs/ufs/ufsmount.h>
    103 #include <ufs/ufs/ufs_wapbl.h>
    104 
    105 #include <ufs/ffs/fs.h>
    106 #include <ufs/ffs/ffs_extern.h>
    107 
    108 #define ALIGNED_TO(ptr, s)  \
    109     (((uintptr_t)(ptr) & (_Alignof(s) - 1)) == 0)
    110 #define uoff_t uintmax_t
    111 #define ITOFS(ip) (ip)->i_fs
    112 #define i_din2 i_din.ffs2_din
    113 #define VI_LOCK(vp)		mutex_enter((vp)->v_interlock)
    114 #define VI_UNLOCK(vp)		mutex_exit((vp)->v_interlock)
    115 #define UFS_INODE_SET_FLAG(ip, f)	((ip)->i_flag |= (f))
    116 #define ASSERT_VOP_ELOCKED(vp, m)	KASSERT(VOP_ISLOCKED(vp))
    117 #define I_IS_UFS2(ip)		(ITOFS(ip)->fs_magic == FS_UFS2_MAGIC)
    118 #define	lblktosize(fs, o)	ffs_lblktosize(fs, o)
    119 #define	lblkno(fs, o)		ffs_lblkno(fs, o)
    120 #define	blkoff(fs, o)		ffs_blkoff(fs, o)
    121 #define	sblksize(fs, o, lbn)	ffs_sblksize(fs, o, lbn)
    122 typedef daddr_t ufs_lbn_t;
    123 #define msleep(chan, mtx, pri, wmesg, timeo) \
    124     mtsleep((chan), (pri), (wmesg), (timeo), *(mtx))
    125 #define vm_page_count_severe()		0
    126 #define buf_dirty_count_severe()	0
    127 #define BA_CLRBUF B_CLRBUF
    128 #define IO_ASYNC 0
    129 #define vfs_bio_brelse(bp, ioflag) 	brelse(bp, 0)
    130 #define vfs_bio_clrbuf(bp) 		clrbuf(bp)
    131 #define vfs_bio_set_flags(bp, ioflag) 	__nothing
    132 
    133 /*
    134  * Extended attribute area reading.
    135  */
    136 static int
    137 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
    138 {
    139 	struct inode *ip;
    140 	struct ufs2_dinode *dp;
    141 	struct fs *fs;
    142 	struct buf *bp;
    143 	ufs_lbn_t lbn, nextlbn;
    144 	off_t bytesinfile;
    145 	long size, xfersize, blkoffset;
    146 	ssize_t orig_resid;
    147 	int error;
    148 
    149 	ip = VTOI(vp);
    150 	fs = ITOFS(ip);
    151 	dp = ip->i_din2;
    152 
    153 #ifdef INVARIANTS
    154 	if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
    155 		panic("ffs_extread: mode");
    156 
    157 #endif
    158 	orig_resid = uio->uio_resid;
    159 	KASSERT(orig_resid >= 0);
    160 	if (orig_resid == 0)
    161 		return (0);
    162 	KASSERT(uio->uio_offset >= 0);
    163 
    164 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
    165 		if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
    166 			break;
    167 		lbn = lblkno(fs, uio->uio_offset);
    168 		nextlbn = lbn + 1;
    169 
    170 		/*
    171 		 * size of buffer.  The buffer representing the
    172 		 * end of the file is rounded up to the size of
    173 		 * the block type ( fragment or full block,
    174 		 * depending ).
    175 		 */
    176 		size = sblksize(fs, dp->di_extsize, lbn);
    177 		blkoffset = blkoff(fs, uio->uio_offset);
    178 
    179 		/*
    180 		 * The amount we want to transfer in this iteration is
    181 		 * one FS block less the amount of the data before
    182 		 * our startpoint (duh!)
    183 		 */
    184 		xfersize = fs->fs_bsize - blkoffset;
    185 
    186 		/*
    187 		 * But if we actually want less than the block,
    188 		 * or the file doesn't have a whole block more of data,
    189 		 * then use the lesser number.
    190 		 */
    191 		if (uio->uio_resid < xfersize)
    192 			xfersize = uio->uio_resid;
    193 		if (bytesinfile < xfersize)
    194 			xfersize = bytesinfile;
    195 
    196 		if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
    197 			/*
    198 			 * Don't do readahead if this is the end of the info.
    199 			 */
    200 			error = bread(vp, -1 - lbn, size, 0, &bp);
    201 		} else {
    202 			/*
    203 			 * If we have a second block, then
    204 			 * fire off a request for a readahead
    205 			 * as well as a read. Note that the 4th and 5th
    206 			 * arguments point to arrays of the size specified in
    207 			 * the 6th argument.
    208 			 */
    209 			u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
    210 
    211 			nextlbn = -1 - nextlbn;
    212 			error = breadn(vp, -1 - lbn,
    213 			    size, &nextlbn, &nextsize, 1, 0, &bp);
    214 		}
    215 		if (error) {
    216 			brelse(bp, 0);
    217 			bp = NULL;
    218 			break;
    219 		}
    220 
    221 		/*
    222 		 * We should only get non-zero b_resid when an I/O error
    223 		 * has occurred, which should cause us to break above.
    224 		 * However, if the short read did not cause an error,
    225 		 * then we want to ensure that we do not uiomove bad
    226 		 * or uninitialized data.
    227 		 */
    228 		size -= bp->b_resid;
    229 		if (size < xfersize) {
    230 			if (size == 0)
    231 				break;
    232 			xfersize = size;
    233 		}
    234 
    235 		error = uiomove((char *)bp->b_data + blkoffset,
    236 					(int)xfersize, uio);
    237 		if (error)
    238 			break;
    239 		vfs_bio_brelse(bp, ioflag);
    240 	}
    241 
    242 	/*
    243 	 * This can only happen in the case of an error
    244 	 * because the loop above resets bp to NULL on each iteration
    245 	 * and on normal completion has not set a new value into it.
    246 	 * so it must have come from a 'break' statement
    247 	 */
    248 	if (bp != NULL)
    249 		vfs_bio_brelse(bp, ioflag);
    250 	return (error);
    251 }
    252 /*
    253  * Extended attribute area writing.
    254  */
    255 static int
    256 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t ucred)
    257 {
    258 	struct inode *ip;
    259 	struct ufs2_dinode *dp;
    260 	struct fs *fs;
    261 	struct buf *bp;
    262 	ufs_lbn_t lbn;
    263 	off_t osize;
    264 	ssize_t resid;
    265 	int blkoffset, error, flags, size, xfersize;
    266 
    267 	ip = VTOI(vp);
    268 	fs = ITOFS(ip);
    269 	dp = ip->i_din2;
    270 
    271 #ifdef INVARIANTS
    272 	if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
    273 		panic("ffs_extwrite: mode");
    274 #endif
    275 
    276 	if (ioflag & IO_APPEND)
    277 		uio->uio_offset = dp->di_extsize;
    278 	KASSERT(uio->uio_offset >= 0);
    279 	if ((uoff_t)uio->uio_offset + uio->uio_resid >
    280 	    UFS_NXADDR * fs->fs_bsize)
    281 		return (EFBIG);
    282 
    283 	resid = uio->uio_resid;
    284 	osize = dp->di_extsize;
    285 	flags = IO_EXT;
    286 	if (ioflag & IO_SYNC)
    287 		flags |= IO_SYNC;
    288 
    289 	if ((error = UFS_WAPBL_BEGIN(vp->v_mount)) != 0)
    290 		return error;
    291 
    292 	for (error = 0; uio->uio_resid > 0;) {
    293 		lbn = lblkno(fs, uio->uio_offset);
    294 		blkoffset = blkoff(fs, uio->uio_offset);
    295 		xfersize = fs->fs_bsize - blkoffset;
    296 		if (uio->uio_resid < xfersize)
    297 			xfersize = uio->uio_resid;
    298 
    299 		/*
    300 		 * We must perform a read-before-write if the transfer size
    301 		 * does not cover the entire buffer.
    302 		 */
    303 		if (fs->fs_bsize > xfersize)
    304 			flags |= BA_CLRBUF;
    305 		else
    306 			flags &= ~BA_CLRBUF;
    307 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
    308 		    ucred, flags, &bp);
    309 		if (error != 0)
    310 			break;
    311 		/*
    312 		 * If the buffer is not valid we have to clear out any
    313 		 * garbage data from the pages instantiated for the buffer.
    314 		 * If we do not, a failed uiomove() during a write can leave
    315 		 * the prior contents of the pages exposed to a userland
    316 		 * mmap().  XXX deal with uiomove() errors a better way.
    317 		 */
    318 		if ((bp->b_flags & BC_NOCACHE) && fs->fs_bsize <= xfersize)
    319 			vfs_bio_clrbuf(bp);
    320 
    321 		if (uio->uio_offset + xfersize > dp->di_extsize)
    322 			dp->di_extsize = uio->uio_offset + xfersize;
    323 
    324 		size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
    325 		if (size < xfersize)
    326 			xfersize = size;
    327 
    328 		error =
    329 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
    330 
    331 		vfs_bio_set_flags(bp, ioflag);
    332 
    333 		/*
    334 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
    335 		 * if we have a severe page deficiency write the buffer
    336 		 * asynchronously.  Otherwise try to cluster, and if that
    337 		 * doesn't do it then either do an async write (if O_DIRECT),
    338 		 * or a delayed write (if not).
    339 		 */
    340 		if (ioflag & IO_SYNC) {
    341 			(void)bwrite(bp);
    342 		} else if (vm_page_count_severe() ||
    343 			    buf_dirty_count_severe() ||
    344 			    xfersize + blkoffset == fs->fs_bsize ||
    345 			    (ioflag & (IO_ASYNC | IO_DIRECT)))
    346 			bawrite(bp);
    347 		else
    348 			bdwrite(bp);
    349 		if (error || xfersize == 0)
    350 			break;
    351 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
    352 	}
    353 	/*
    354 	 * If we successfully wrote any data, and we are not the superuser
    355 	 * we clear the setuid and setgid bits as a precaution against
    356 	 * tampering.
    357 	 */
    358 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
    359 		ip->i_mode &= ~(ISUID | ISGID);
    360 		dp->di_mode = ip->i_mode;
    361 	}
    362 	if (error) {
    363 		if (ioflag & IO_UNIT) {
    364 			(void)ffs_truncate(vp, osize,
    365 			    IO_EXT | (ioflag&IO_SYNC), ucred);
    366 			uio->uio_offset -= resid - uio->uio_resid;
    367 			uio->uio_resid = resid;
    368 		}
    369 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
    370 		error = ffs_update(vp, NULL, NULL, UPDATE_WAIT);
    371 	UFS_WAPBL_END(vp->v_mount);
    372 	return (error);
    373 }
    374 
    375 /*
    376  * Vnode operating to retrieve a named extended attribute.
    377  *
    378  * Locate a particular EA (nspace:name) in the area (ptr:length), and return
    379  * the length of the EA, and possibly the pointer to the entry and to the data.
    380  */
    381 static int
    382 ffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name,
    383     struct extattr **eapp, u_char **eac)
    384 {
    385 	struct extattr *eap, *eaend;
    386 	size_t nlen;
    387 
    388 	nlen = strlen(name);
    389 	KASSERT(ALIGNED_TO(ptr, struct extattr));
    390 	eap = (struct extattr *)ptr;
    391 	eaend = (struct extattr *)(ptr + length);
    392 	for (; eap < eaend; eap = EXTATTR_NEXT(eap)) {
    393 		/* make sure this entry is complete */
    394 		if (EXTATTR_NEXT(eap) > eaend)
    395 			break;
    396 		if (eap->ea_namespace != nspace || eap->ea_namelength != nlen
    397 		    || memcmp(eap->ea_name, name, nlen) != 0)
    398 			continue;
    399 		if (eapp != NULL)
    400 			*eapp = eap;
    401 		if (eac != NULL)
    402 			*eac = EXTATTR_CONTENT(eap);
    403 		return (EXTATTR_CONTENT_SIZE(eap));
    404 	}
    405 	return (-1);
    406 }
    407 
    408 static int
    409 ffs_rdextattr(u_char **p, struct vnode *vp, int extra)
    410 {
    411 	struct inode *ip;
    412 	struct ufs2_dinode *dp;
    413 	struct fs *fs;
    414 	struct uio luio;
    415 	struct iovec liovec;
    416 	u_int easize;
    417 	int error;
    418 	u_char *eae;
    419 
    420 	ip = VTOI(vp);
    421 	fs = ITOFS(ip);
    422 	dp = ip->i_din2;
    423 	easize = dp->di_extsize;
    424 	if ((uoff_t)easize + extra > UFS_NXADDR * fs->fs_bsize)
    425 		return (EFBIG);
    426 
    427 	eae = malloc(easize + extra, M_TEMP, M_WAITOK);
    428 
    429 	liovec.iov_base = eae;
    430 	liovec.iov_len = easize;
    431 	luio.uio_iov = &liovec;
    432 	luio.uio_iovcnt = 1;
    433 	luio.uio_offset = 0;
    434 	luio.uio_resid = easize;
    435 	luio.uio_vmspace = vmspace_kernel();
    436 	luio.uio_rw = UIO_READ;
    437 
    438 	error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
    439 	if (error) {
    440 		free(eae, M_TEMP);
    441 		return(error);
    442 	}
    443 	*p = eae;
    444 	return (0);
    445 }
    446 
    447 static void
    448 ffs_lock_ea(struct vnode *vp)
    449 {
    450 	genfs_node_wrlock(vp);
    451 }
    452 
    453 static void
    454 ffs_unlock_ea(struct vnode *vp)
    455 {
    456 	genfs_node_unlock(vp);
    457 }
    458 
    459 static int
    460 ffs_open_ea(struct vnode *vp, kauth_cred_t cred)
    461 {
    462 	struct inode *ip;
    463 	struct ufs2_dinode *dp;
    464 	int error;
    465 
    466 	ip = VTOI(vp);
    467 	if ((ip->i_ump->um_flags & UFS_EA) == 0) {
    468 		return EOPNOTSUPP;
    469 	}
    470 
    471 	ffs_lock_ea(vp);
    472 	if (ip->i_ea_area != NULL) {
    473 		ip->i_ea_refs++;
    474 		ffs_unlock_ea(vp);
    475 		return (0);
    476 	}
    477 	dp = ip->i_din2;
    478 	error = ffs_rdextattr(&ip->i_ea_area, vp, 0);
    479 	if (error) {
    480 		ffs_unlock_ea(vp);
    481 		return (error);
    482 	}
    483 	ip->i_ea_len = dp->di_extsize;
    484 	ip->i_ea_error = 0;
    485 	ip->i_ea_refs++;
    486 	ffs_unlock_ea(vp);
    487 	return (0);
    488 }
    489 
    490 /*
    491  * Vnode extattr transaction commit/abort
    492  */
    493 static int
    494 ffs_close_ea(struct vnode *vp, int commit, kauth_cred_t cred)
    495 {
    496 	struct inode *ip;
    497 	struct uio luio;
    498 	struct iovec liovec;
    499 	int error;
    500 	struct ufs2_dinode *dp;
    501 
    502 	ip = VTOI(vp);
    503 	KASSERT((ip->i_ump->um_flags & UFS_EA) != 0);
    504 
    505 	if (commit)
    506 		KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
    507 	else
    508 		KASSERT(VOP_ISLOCKED(vp));
    509 	ffs_lock_ea(vp);
    510 	if (ip->i_ea_area == NULL) {
    511 		ffs_unlock_ea(vp);
    512 		return (EINVAL);
    513 	}
    514 	dp = ip->i_din2;
    515 	error = ip->i_ea_error;
    516 	if (commit && error == 0) {
    517 		ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit");
    518 		if (cred == NOCRED)
    519 			cred =  lwp0.l_cred;
    520 		liovec.iov_base = ip->i_ea_area;
    521 		liovec.iov_len = ip->i_ea_len;
    522 		luio.uio_iov = &liovec;
    523 		luio.uio_iovcnt = 1;
    524 		luio.uio_offset = 0;
    525 		luio.uio_resid = ip->i_ea_len;
    526 		luio.uio_vmspace = vmspace_kernel();
    527 		luio.uio_rw = UIO_WRITE;
    528 
    529 		/* XXX: I'm not happy about truncating to zero size */
    530 		if (ip->i_ea_len < dp->di_extsize) {
    531 			if ((error = UFS_WAPBL_BEGIN(vp->v_mount)) != 0) {
    532 				ffs_unlock_ea(vp);
    533 				return error;
    534 			}
    535 			error = ffs_truncate(vp, 0, IO_EXT, cred);
    536 			UFS_WAPBL_END(vp->v_mount);
    537 		}
    538 		error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
    539 	}
    540 	if (--ip->i_ea_refs == 0) {
    541 		free(ip->i_ea_area, M_TEMP);
    542 		ip->i_ea_area = NULL;
    543 		ip->i_ea_len = 0;
    544 		ip->i_ea_error = 0;
    545 	}
    546 	ffs_unlock_ea(vp);
    547 	return (error);
    548 }
    549 
    550 /*
    551  * Vnode extattr strategy routine for fifos.
    552  *
    553  * We need to check for a read or write of the external attributes.
    554  * Otherwise we just fall through and do the usual thing.
    555  */
    556 int
    557 ffsext_strategy(void *v)
    558 {
    559 	struct vop_strategy_args /* {
    560 		struct vnodeop_desc *a_desc;
    561 		struct vnode *a_vp;
    562 		struct buf *a_bp;
    563 	} */ *ap = v;
    564 	struct vnode *vp;
    565 	daddr_t lbn;
    566 
    567 	vp = ap->a_vp;
    568 	lbn = ap->a_bp->b_lblkno;
    569 	if (I_IS_UFS2(VTOI(vp)) && lbn < 0 && lbn >= -UFS_NXADDR)
    570 		return ufs_strategy(ap);
    571 	if (vp->v_type == VFIFO)
    572 		return vn_fifo_bypass(ap);
    573 	panic("spec nodes went here");
    574 }
    575 
    576 /*
    577  * Vnode extattr transaction commit/abort
    578  */
    579 int
    580 ffs_openextattr(void *v)
    581 {
    582 	struct vop_openextattr_args /* {
    583 		struct vnode *a_vp;
    584 		kauth_cred_t a_cred;
    585 		struct proc *a_p;
    586 	} */ *ap = v;
    587 	struct inode *ip = VTOI(ap->a_vp);
    588 	struct fs *fs = ip->i_fs;
    589 
    590 	/* Not supported for UFS1 file systems. */
    591 	if (fs->fs_magic == FS_UFS1_MAGIC)
    592 		return (EOPNOTSUPP);
    593 
    594 #ifdef __FreeBSD__
    595 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
    596 		return (EOPNOTSUPP);
    597 #endif
    598 
    599 	return (ffs_open_ea(ap->a_vp, ap->a_cred));
    600 }
    601 
    602 /*
    603  * Vnode extattr transaction commit/abort
    604  */
    605 int
    606 ffs_closeextattr(void *v)
    607 {
    608 	struct vop_closeextattr_args /* {
    609 		struct vnode *a_vp;
    610 		int a_commit;
    611 		kauth_cred_t a_cred;
    612 		struct proc *a_p;
    613 	} */ *ap = v;
    614 	struct inode *ip = VTOI(ap->a_vp);
    615 	struct fs *fs = ip->i_fs;
    616 
    617 	/* Not supported for UFS1 file systems. */
    618 	if (fs->fs_magic == FS_UFS1_MAGIC)
    619 		return (EOPNOTSUPP);
    620 
    621 #ifdef __FreeBSD__
    622 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
    623 		return (EOPNOTSUPP);
    624 #endif
    625 
    626 	if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY))
    627 		return (EROFS);
    628 
    629 	return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred));
    630 }
    631 
    632 /*
    633  * Vnode operation to retrieve a named extended attribute.
    634  */
    635 int
    636 ffs_getextattr(void *v)
    637 {
    638 	struct vop_getextattr_args /* {
    639 		struct vnode *a_vp;
    640 		int a_attrnamespace;
    641 		const char *a_name;
    642 		struct uio *a_uio;
    643 		size_t *a_size;
    644 		kauth_cred_t a_cred;
    645 		struct proc *a_p;
    646 	} */ *ap = v;
    647 	struct vnode *vp = ap->a_vp;
    648 	struct inode *ip = VTOI(vp);
    649 	struct fs *fs = ip->i_fs;
    650 
    651 	KASSERT(VOP_ISLOCKED(vp));
    652 	if (fs->fs_magic == FS_UFS1_MAGIC) {
    653 		return ufs_getextattr(ap);
    654 	}
    655 
    656 	u_char *eae, *p;
    657 	unsigned easize;
    658 	int error, ealen;
    659 
    660 #ifdef __FreeBSD__
    661 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
    662 		return (EOPNOTSUPP);
    663 #endif
    664 
    665 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
    666 	    ap->a_cred, VREAD);
    667 	if (error)
    668 		return (error);
    669 
    670 	error = ffs_open_ea(ap->a_vp, ap->a_cred);
    671 	if (error)
    672 		return (error);
    673 
    674 	eae = ip->i_ea_area;
    675 	easize = ip->i_ea_len;
    676 
    677 	ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
    678 	    NULL, &p);
    679 	if (ealen >= 0) {
    680 		error = 0;
    681 		if (ap->a_size != NULL)
    682 			*ap->a_size = ealen;
    683 		else if (ap->a_uio != NULL)
    684 			error = uiomove(p, ealen, ap->a_uio);
    685 	} else
    686 		error = ENOATTR;
    687 
    688 	ffs_close_ea(ap->a_vp, 0, ap->a_cred);
    689 	return (error);
    690 }
    691 
    692 /*
    693  * Vnode operation to set a named attribute.
    694  */
    695 int
    696 ffs_setextattr(void *v)
    697 {
    698 	struct vop_setextattr_args /* {
    699 		struct vnode *a_vp;
    700 		int a_attrnamespace;
    701 		const char *a_name;
    702 		struct uio *a_uio;
    703 		kauth_cred_t a_cred;
    704 		struct proc *a_p;
    705 	} */ *ap = v;
    706 	struct vnode *vp = ap->a_vp;
    707 	struct inode *ip = VTOI(vp);
    708 	struct fs *fs = ip->i_fs;
    709 
    710 	KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
    711 	if (fs->fs_magic == FS_UFS1_MAGIC) {
    712 		return ufs_setextattr(ap);
    713 	}
    714 
    715 	struct extattr *eap;
    716 	uint32_t ealength, ul;
    717 	ssize_t ealen;
    718 	int olen, eapad1, eapad2, error, i, easize;
    719 	u_char *eae;
    720 	void *tmp;
    721 
    722 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
    723 		return (EOPNOTSUPP);
    724 
    725 	if (strlen(ap->a_name) == 0)
    726 		return (EINVAL);
    727 
    728 	/* XXX Now unsupported API to delete EAs using NULL uio. */
    729 	if (ap->a_uio == NULL)
    730 		return (EOPNOTSUPP);
    731 
    732 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
    733 		return (EROFS);
    734 
    735 	ealen = ap->a_uio->uio_resid;
    736 	if (ealen < 0 || ealen > lblktosize(fs, UFS_NXADDR))
    737 		return (EINVAL);
    738 
    739 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
    740 	    ap->a_cred, VWRITE);
    741 	if (error) {
    742 
    743 		/*
    744 		 * ffs_lock_ea is not needed there, because the vnode
    745 		 * must be exclusively locked.
    746 		 */
    747 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
    748 			ip->i_ea_error = error;
    749 		return (error);
    750 	}
    751 
    752 	error = ffs_open_ea(ap->a_vp, ap->a_cred);
    753 	if (error)
    754 		return (error);
    755 
    756 	ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
    757 	eapad1 = roundup2(ealength, 8) - ealength;
    758 	eapad2 = roundup2(ealen, 8) - ealen;
    759 	ealength += eapad1 + ealen + eapad2;
    760 
    761 	/*
    762 	 * CEM: rewrites of the same size or smaller could be done in-place
    763 	 * instead.  (We don't acquire any fine-grained locks in here either,
    764 	 * so we could also do bigger writes in-place.)
    765 	 */
    766 	eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
    767 	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
    768 	easize = ip->i_ea_len;
    769 
    770 	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
    771 	    &eap, NULL);
    772         if (olen == -1) {
    773 		/* new, append at end */
    774 		KASSERT(ALIGNED_TO(eae + easize, struct extattr));
    775 		eap = (struct extattr *)(eae + easize);
    776 		easize += ealength;
    777 	} else {
    778 		ul = eap->ea_length;
    779 		i = (u_char *)EXTATTR_NEXT(eap) - eae;
    780 		if (ul != ealength) {
    781 			bcopy(EXTATTR_NEXT(eap), (u_char *)eap + ealength,
    782 			    easize - i);
    783 			easize += (ealength - ul);
    784 		}
    785 	}
    786 	if (easize > lblktosize(fs, UFS_NXADDR)) {
    787 		free(eae, M_TEMP);
    788 		ffs_close_ea(ap->a_vp, 0, ap->a_cred);
    789 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
    790 			ip->i_ea_error = ENOSPC;
    791 		return (ENOSPC);
    792 	}
    793 	eap->ea_length = ealength;
    794 	eap->ea_namespace = ap->a_attrnamespace;
    795 	eap->ea_contentpadlen = eapad2;
    796 	eap->ea_namelength = strlen(ap->a_name);
    797 	memcpy(eap->ea_name, ap->a_name, strlen(ap->a_name));
    798 	bzero(&eap->ea_name[strlen(ap->a_name)], eapad1);
    799 	error = uiomove(EXTATTR_CONTENT(eap), ealen, ap->a_uio);
    800 	if (error) {
    801 		free(eae, M_TEMP);
    802 		ffs_close_ea(ap->a_vp, 0, ap->a_cred);
    803 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
    804 			ip->i_ea_error = error;
    805 		return (error);
    806 	}
    807 	bzero((u_char *)EXTATTR_CONTENT(eap) + ealen, eapad2);
    808 
    809 	tmp = ip->i_ea_area;
    810 	ip->i_ea_area = eae;
    811 	ip->i_ea_len = easize;
    812 	free(tmp, M_TEMP);
    813 	error = ffs_close_ea(ap->a_vp, 1, ap->a_cred);
    814 	return (error);
    815 }
    816 
    817 /*
    818  * Vnode operation to retrieve extended attributes on a vnode.
    819  */
    820 int
    821 ffs_listextattr(void *v)
    822 {
    823 	struct vop_listextattr_args /* {
    824 		struct vnode *a_vp;
    825 		int a_attrnamespace;
    826 		struct uio *a_uio;
    827 		size_t *a_size;
    828 		kauth_cred_t a_cred;
    829 		struct proc *a_p;
    830 	} */ *ap = v;
    831 	struct inode *ip = VTOI(ap->a_vp);
    832 	struct fs *fs = ip->i_fs;
    833 
    834 	if (fs->fs_magic == FS_UFS1_MAGIC) {
    835 		return ufs_listextattr(ap);
    836 	}
    837 
    838 	struct extattr *eap, *eaend;
    839 	int error, ealen;
    840 
    841 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
    842 		return (EOPNOTSUPP);
    843 
    844 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
    845 	    ap->a_cred, VREAD);
    846 	if (error)
    847 		return (error);
    848 
    849 	error = ffs_open_ea(ap->a_vp, ap->a_cred);
    850 	if (error)
    851 		return (error);
    852 
    853 	error = 0;
    854 	if (ap->a_size != NULL)
    855 		*ap->a_size = 0;
    856 
    857 	KASSERT(ALIGNED_TO(ip->i_ea_area, struct extattr));
    858 	eap = (struct extattr *)ip->i_ea_area;
    859 	eaend = (struct extattr *)(ip->i_ea_area + ip->i_ea_len);
    860 	for (; error == 0 && eap < eaend; eap = EXTATTR_NEXT(eap)) {
    861 		/* make sure this entry is complete */
    862 		if (EXTATTR_NEXT(eap) > eaend)
    863 			break;
    864 		if (eap->ea_namespace != ap->a_attrnamespace)
    865 			continue;
    866 
    867 		ealen = eap->ea_namelength;
    868 		if (ap->a_size != NULL)
    869 			*ap->a_size += ealen + 1;
    870 		else if (ap->a_uio != NULL)
    871 			error = uiomove(&eap->ea_namelength, ealen + 1,
    872 			    ap->a_uio);
    873 	}
    874 
    875 	ffs_close_ea(ap->a_vp, 0, ap->a_cred);
    876 	return (error);
    877 }
    878 
    879 /*
    880  * Vnode operation to remove a named attribute.
    881  */
    882 int
    883 ffs_deleteextattr(void *v)
    884 {
    885 	struct vop_deleteextattr_args /* {
    886 		struct vnode *a_vp;
    887 		int a_attrnamespace;
    888 		kauth_cred_t a_cred;
    889 		struct proc *a_p;
    890 	} */ *ap = v;
    891 	struct vnode *vp = ap->a_vp;
    892 	struct inode *ip = VTOI(vp);
    893 	struct fs *fs = ip->i_fs;
    894 
    895 	if (fs->fs_magic == FS_UFS1_MAGIC) {
    896 		return ufs_deleteextattr(ap);
    897 	}
    898 
    899 	struct extattr *eap;
    900 	uint32_t ul;
    901 	int olen, error, i, easize;
    902 	u_char *eae;
    903 	void *tmp;
    904 
    905 #ifdef __FreeBSD__
    906 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
    907 		return (EOPNOTSUPP);
    908 #endif
    909 
    910 	if (strlen(ap->a_name) == 0)
    911 		return (EINVAL);
    912 
    913 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
    914 		return (EROFS);
    915 
    916 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
    917 	    ap->a_cred, VWRITE);
    918 	if (error) {
    919 		/*
    920 		 * ffs_lock_ea is not needed there, because the vnode
    921 		 * must be exclusively locked.
    922 		 */
    923 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
    924 			ip->i_ea_error = error;
    925 		return (error);
    926 	}
    927 
    928 	error = ffs_open_ea(ap->a_vp, ap->a_cred);
    929 	if (error)
    930 		return (error);
    931 
    932 	/* CEM: delete could be done in-place instead */
    933 	eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
    934 	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
    935 	easize = ip->i_ea_len;
    936 
    937 	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
    938 	    &eap, NULL);
    939 	if (olen == -1) {
    940 		/* delete but nonexistent */
    941 		free(eae, M_TEMP);
    942 		ffs_close_ea(ap->a_vp, 0, ap->a_cred);
    943 		return (ENOATTR);
    944 	}
    945 	ul = eap->ea_length;
    946 	i = (u_char *)EXTATTR_NEXT(eap) - eae;
    947 	bcopy(EXTATTR_NEXT(eap), eap, easize - i);
    948 	easize -= ul;
    949 
    950 	tmp = ip->i_ea_area;
    951 	ip->i_ea_area = eae;
    952 	ip->i_ea_len = easize;
    953 	free(tmp, M_TEMP);
    954 	error = ffs_close_ea(ap->a_vp, 1, ap->a_cred);
    955 	return error;
    956 }
    957