Home | History | Annotate | Line # | Download | only in ufs
      1 /*	$NetBSD: ufs_inode.c,v 1.112 2020/09/05 16:30:13 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  * (c) UNIX System Laboratories, Inc.
      7  * All or some portions of this file are derived from material licensed
      8  * to the University of California by American Telephone and Telegraph
      9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     10  * the permission of UNIX System Laboratories, Inc.
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  * 3. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  *	@(#)ufs_inode.c	8.9 (Berkeley) 5/14/95
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.112 2020/09/05 16:30:13 riastradh Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_ffs.h"
     44 #include "opt_quota.h"
     45 #include "opt_wapbl.h"
     46 #include "opt_uvmhist.h"
     47 #endif
     48 
     49 #include <sys/param.h>
     50 #include <sys/systm.h>
     51 #include <sys/proc.h>
     52 #include <sys/vnode.h>
     53 #include <sys/mount.h>
     54 #include <sys/kernel.h>
     55 #include <sys/namei.h>
     56 #include <sys/kauth.h>
     57 #include <sys/wapbl.h>
     58 #include <sys/kmem.h>
     59 
     60 #include <ufs/ufs/inode.h>
     61 #include <ufs/ufs/ufsmount.h>
     62 #include <ufs/ufs/ufs_extern.h>
     63 #include <ufs/ufs/ufs_wapbl.h>
     64 #ifdef UFS_DIRHASH
     65 #include <ufs/ufs/dirhash.h>
     66 #endif
     67 #ifdef UFS_EXTATTR
     68 #include <ufs/ufs/extattr.h>
     69 #endif
     70 
     71 #ifdef UVMHIST
     72 #include <uvm/uvm.h>
     73 #endif
     74 #include <uvm/uvm_page.h>
     75 #include <uvm/uvm_stat.h>
     76 
     77 /*
     78  * Last reference to an inode.  If necessary, write or delete it.
     79  */
     80 int
     81 ufs_inactive(void *v)
     82 {
     83 	struct vop_inactive_v2_args /* {
     84 		struct vnode *a_vp;
     85 		struct bool *a_recycle;
     86 	} */ *ap = v;
     87 	struct vnode *vp = ap->a_vp;
     88 	struct inode *ip = VTOI(vp);
     89 	struct mount *mp = vp->v_mount;
     90 	mode_t mode;
     91 	int allerror = 0, error;
     92 	bool wapbl_locked = false;
     93 
     94 	UFS_WAPBL_JUNLOCK_ASSERT(mp);
     95 
     96 	/*
     97 	 * Ignore inodes related to stale file handles.
     98 	 */
     99 	if (ip->i_mode == 0)
    100 		goto out;
    101 
    102 	if (ip->i_nlink <= 0 && (mp->mnt_flag & MNT_RDONLY) == 0) {
    103 #ifdef UFS_EXTATTR
    104 		ufs_extattr_vnode_inactive(vp, curlwp);
    105 #endif
    106 		/*
    107 		 * All file blocks must be freed before we can let the vnode
    108 		 * be reclaimed, so can't postpone full truncating any further.
    109 		 */
    110 		ufs_truncate_all(vp);
    111 
    112 #if defined(QUOTA) || defined(QUOTA2)
    113 		error = UFS_WAPBL_BEGIN(mp);
    114 		if (error) {
    115 			allerror = error;
    116 		} else {
    117 			wapbl_locked = true;
    118 			(void)chkiq(ip, -1, NOCRED, 0);
    119 		}
    120 #endif
    121 		DIP_ASSIGN(ip, rdev, 0);
    122 		mode = ip->i_mode;
    123 		ip->i_mode = 0;
    124 		ip->i_omode = mode;
    125 		DIP_ASSIGN(ip, mode, 0);
    126 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
    127 		/*
    128 		 * Defer final inode free and update to ufs_reclaim().
    129 		 */
    130 	}
    131 
    132 	if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) {
    133 		if (! wapbl_locked) {
    134 			error = UFS_WAPBL_BEGIN(mp);
    135 			if (error) {
    136 				allerror = error;
    137 				goto out;
    138 			}
    139 			wapbl_locked = true;
    140 		}
    141 		UFS_UPDATE(vp, NULL, NULL, 0);
    142 	}
    143 out:
    144 	if (wapbl_locked)
    145 		UFS_WAPBL_END(mp);
    146 	/*
    147 	 * If we are done with the inode, reclaim it
    148 	 * so that it can be reused immediately.
    149 	 */
    150 	*ap->a_recycle = (ip->i_mode == 0);
    151 
    152 	if (ip->i_mode == 0 && (DIP(ip, size) != 0 || DIP(ip, blocks) != 0)) {
    153 		printf("%s: unlinked ino %" PRId64 " on \"%s\" has"
    154 		    " non zero size %" PRIx64 " or blocks %" PRIx64
    155 		    " with allerror %d\n",
    156 		    __func__, ip->i_number, mp->mnt_stat.f_mntonname,
    157 		    DIP(ip, size), DIP(ip, blocks), allerror);
    158 		panic("%s: dirty filesystem?", __func__);
    159 	}
    160 
    161 	return (allerror);
    162 }
    163 
    164 /*
    165  * Reclaim an inode so that it can be used for other purposes.
    166  */
    167 int
    168 ufs_reclaim(struct vnode *vp)
    169 {
    170 	struct inode *ip = VTOI(vp);
    171 
    172 	if (!UFS_WAPBL_BEGIN(vp->v_mount)) {
    173 		UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);
    174 		UFS_WAPBL_END(vp->v_mount);
    175 	}
    176 	UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE);
    177 
    178 	if (ip->i_devvp) {
    179 		vrele(ip->i_devvp);
    180 		ip->i_devvp = 0;
    181 	}
    182 #if defined(QUOTA) || defined(QUOTA2)
    183 	ufsquota_free(ip);
    184 #endif
    185 #ifdef UFS_DIRHASH
    186 	if (ip->i_dirhash != NULL)
    187 		ufsdirhash_free(ip);
    188 #endif
    189 	return (0);
    190 }
    191 
    192 /*
    193  * allocate a range of blocks in a file.
    194  * after this function returns, any page entirely contained within the range
    195  * will map to invalid data and thus must be overwritten before it is made
    196  * accessible to others.
    197  */
    198 
    199 int
    200 ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred,
    201     int flags)
    202 {
    203 	off_t neweof;	/* file size after the operation */
    204 	off_t neweob;	/* offset next to the last block after the operation */
    205 	off_t pagestart; /* starting offset of range covered by pgs */
    206 	off_t eob;	/* offset next to allocated blocks */
    207 	struct uvm_object *uobj;
    208 	int i, delta, error, npages;
    209 	int bshift = vp->v_mount->mnt_fs_bshift;
    210 	int bsize = 1 << bshift;
    211 	int ppb = MAX(bsize >> PAGE_SHIFT, 1);
    212 	struct vm_page **pgs;
    213 	size_t pgssize;
    214 	UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist);
    215 	UVMHIST_LOG(ubchist, "vp %#jx off 0x%jx len 0x%jx u_size 0x%jx",
    216 		    (uintptr_t)vp, off, len, vp->v_size);
    217 
    218 	neweof = MAX(vp->v_size, off + len);
    219 	GOP_SIZE(vp, neweof, &neweob, 0);
    220 
    221 	error = 0;
    222 	uobj = &vp->v_uobj;
    223 
    224 	/*
    225 	 * read or create pages covering the range of the allocation and
    226 	 * keep them locked until the new block is allocated, so there
    227 	 * will be no window where the old contents of the new block are
    228 	 * visible to racing threads.
    229 	 */
    230 
    231 	pagestart = trunc_page(off) & ~(bsize - 1);
    232 	npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT);
    233 	pgssize = npages * sizeof(struct vm_page *);
    234 	pgs = kmem_zalloc(pgssize, KM_SLEEP);
    235 
    236 	/*
    237 	 * adjust off to be block-aligned.
    238 	 */
    239 
    240 	delta = off & (bsize - 1);
    241 	off -= delta;
    242 	len += delta;
    243 
    244 	genfs_node_wrlock(vp);
    245 	rw_enter(uobj->vmobjlock, RW_WRITER);
    246 	error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
    247 	    VM_PROT_WRITE, 0, PGO_SYNCIO | PGO_PASTEOF | PGO_NOBLOCKALLOC |
    248 	    PGO_NOTIMESTAMP | PGO_GLOCKHELD);
    249 	if (error) {
    250 		genfs_node_unlock(vp);
    251 		goto out;
    252 	}
    253 
    254 	/*
    255 	 * now allocate the range.
    256 	 */
    257 
    258 	error = GOP_ALLOC(vp, off, len, flags, cred);
    259 	genfs_node_unlock(vp);
    260 
    261 	/*
    262 	 * if the allocation succeeded, mark all the pages dirty
    263 	 * and clear PG_RDONLY on any pages that are now fully backed
    264 	 * by disk blocks.  if the allocation failed, we do not invalidate
    265 	 * the pages since they might have already existed and been dirty,
    266 	 * in which case we need to keep them around.  if we created the pages,
    267 	 * they will be clean and read-only, and leaving such pages
    268 	 * in the cache won't cause any problems.
    269 	 */
    270 
    271 	GOP_SIZE(vp, off + len, &eob, 0);
    272 	rw_enter(uobj->vmobjlock, RW_WRITER);
    273 	for (i = 0; i < npages; i++) {
    274 		KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
    275 		if (!error) {
    276 			if (off <= pagestart + (i << PAGE_SHIFT) &&
    277 			    pagestart + ((i + 1) << PAGE_SHIFT) <= eob) {
    278 				pgs[i]->flags &= ~PG_RDONLY;
    279 			}
    280 			uvm_pagemarkdirty(pgs[i], UVM_PAGE_STATUS_DIRTY);
    281 		}
    282 		uvm_pagelock(pgs[i]);
    283 		uvm_pageactivate(pgs[i]);
    284 		uvm_pageunlock(pgs[i]);
    285 	}
    286 	uvm_page_unbusy(pgs, npages);
    287 	rw_exit(uobj->vmobjlock);
    288 
    289  out:
    290  	kmem_free(pgs, pgssize);
    291 	return error;
    292 }
    293 
    294 int
    295 ufs_truncate_retry(struct vnode *vp, int ioflag, uint64_t newsize,
    296     kauth_cred_t cred)
    297 {
    298 	struct inode *ip = VTOI(vp);
    299 	struct mount *mp = vp->v_mount;
    300 	int error = 0;
    301 
    302 	UFS_WAPBL_JUNLOCK_ASSERT(mp);
    303 
    304 	/*
    305 	 * Truncate might temporarily fail, loop until done.
    306 	 */
    307 	do {
    308 		error = UFS_WAPBL_BEGIN(mp);
    309 		if (error)
    310 			goto out;
    311 
    312 		error = UFS_TRUNCATE(vp, newsize, ioflag, cred);
    313 		UFS_WAPBL_END(mp);
    314 
    315 		if (error != 0 && error != EAGAIN)
    316 			goto out;
    317 	} while (ip->i_size != newsize);
    318 
    319   out:
    320 	return error;
    321 }
    322 
    323 /* truncate all the data of the inode including extended attributes */
    324 int
    325 ufs_truncate_all(struct vnode *vp)
    326 {
    327 	struct inode *ip = VTOI(vp);
    328 	off_t isize = ip->i_size;
    329 
    330 	if (ip->i_ump->um_fstype == UFS2)
    331 		isize += ip->i_ffs2_extsize;
    332 
    333 	if (isize == 0)
    334 		return 0;
    335 	return ufs_truncate_retry(vp, IO_NORMAL | IO_EXT, 0, NOCRED);
    336 }
    337