1 /* $NetBSD: ufs_inode.c,v 1.112 2020/09/05 16:30:13 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_inode.c 8.9 (Berkeley) 5/14/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.112 2020/09/05 16:30:13 riastradh Exp $"); 41 42 #if defined(_KERNEL_OPT) 43 #include "opt_ffs.h" 44 #include "opt_quota.h" 45 #include "opt_wapbl.h" 46 #include "opt_uvmhist.h" 47 #endif 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/proc.h> 52 #include <sys/vnode.h> 53 #include <sys/mount.h> 54 #include <sys/kernel.h> 55 #include <sys/namei.h> 56 #include <sys/kauth.h> 57 #include <sys/wapbl.h> 58 #include <sys/kmem.h> 59 60 #include <ufs/ufs/inode.h> 61 #include <ufs/ufs/ufsmount.h> 62 #include <ufs/ufs/ufs_extern.h> 63 #include <ufs/ufs/ufs_wapbl.h> 64 #ifdef UFS_DIRHASH 65 #include <ufs/ufs/dirhash.h> 66 #endif 67 #ifdef UFS_EXTATTR 68 #include <ufs/ufs/extattr.h> 69 #endif 70 71 #ifdef UVMHIST 72 #include <uvm/uvm.h> 73 #endif 74 #include <uvm/uvm_page.h> 75 #include <uvm/uvm_stat.h> 76 77 /* 78 * Last reference to an inode. If necessary, write or delete it. 79 */ 80 int 81 ufs_inactive(void *v) 82 { 83 struct vop_inactive_v2_args /* { 84 struct vnode *a_vp; 85 struct bool *a_recycle; 86 } */ *ap = v; 87 struct vnode *vp = ap->a_vp; 88 struct inode *ip = VTOI(vp); 89 struct mount *mp = vp->v_mount; 90 mode_t mode; 91 int allerror = 0, error; 92 bool wapbl_locked = false; 93 94 UFS_WAPBL_JUNLOCK_ASSERT(mp); 95 96 /* 97 * Ignore inodes related to stale file handles. 98 */ 99 if (ip->i_mode == 0) 100 goto out; 101 102 if (ip->i_nlink <= 0 && (mp->mnt_flag & MNT_RDONLY) == 0) { 103 #ifdef UFS_EXTATTR 104 ufs_extattr_vnode_inactive(vp, curlwp); 105 #endif 106 /* 107 * All file blocks must be freed before we can let the vnode 108 * be reclaimed, so can't postpone full truncating any further. 109 */ 110 ufs_truncate_all(vp); 111 112 #if defined(QUOTA) || defined(QUOTA2) 113 error = UFS_WAPBL_BEGIN(mp); 114 if (error) { 115 allerror = error; 116 } else { 117 wapbl_locked = true; 118 (void)chkiq(ip, -1, NOCRED, 0); 119 } 120 #endif 121 DIP_ASSIGN(ip, rdev, 0); 122 mode = ip->i_mode; 123 ip->i_mode = 0; 124 ip->i_omode = mode; 125 DIP_ASSIGN(ip, mode, 0); 126 ip->i_flag |= IN_CHANGE | IN_UPDATE; 127 /* 128 * Defer final inode free and update to ufs_reclaim(). 129 */ 130 } 131 132 if (ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) { 133 if (! wapbl_locked) { 134 error = UFS_WAPBL_BEGIN(mp); 135 if (error) { 136 allerror = error; 137 goto out; 138 } 139 wapbl_locked = true; 140 } 141 UFS_UPDATE(vp, NULL, NULL, 0); 142 } 143 out: 144 if (wapbl_locked) 145 UFS_WAPBL_END(mp); 146 /* 147 * If we are done with the inode, reclaim it 148 * so that it can be reused immediately. 149 */ 150 *ap->a_recycle = (ip->i_mode == 0); 151 152 if (ip->i_mode == 0 && (DIP(ip, size) != 0 || DIP(ip, blocks) != 0)) { 153 printf("%s: unlinked ino %" PRId64 " on \"%s\" has" 154 " non zero size %" PRIx64 " or blocks %" PRIx64 155 " with allerror %d\n", 156 __func__, ip->i_number, mp->mnt_stat.f_mntonname, 157 DIP(ip, size), DIP(ip, blocks), allerror); 158 panic("%s: dirty filesystem?", __func__); 159 } 160 161 return (allerror); 162 } 163 164 /* 165 * Reclaim an inode so that it can be used for other purposes. 166 */ 167 int 168 ufs_reclaim(struct vnode *vp) 169 { 170 struct inode *ip = VTOI(vp); 171 172 if (!UFS_WAPBL_BEGIN(vp->v_mount)) { 173 UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE); 174 UFS_WAPBL_END(vp->v_mount); 175 } 176 UFS_UPDATE(vp, NULL, NULL, UPDATE_CLOSE); 177 178 if (ip->i_devvp) { 179 vrele(ip->i_devvp); 180 ip->i_devvp = 0; 181 } 182 #if defined(QUOTA) || defined(QUOTA2) 183 ufsquota_free(ip); 184 #endif 185 #ifdef UFS_DIRHASH 186 if (ip->i_dirhash != NULL) 187 ufsdirhash_free(ip); 188 #endif 189 return (0); 190 } 191 192 /* 193 * allocate a range of blocks in a file. 194 * after this function returns, any page entirely contained within the range 195 * will map to invalid data and thus must be overwritten before it is made 196 * accessible to others. 197 */ 198 199 int 200 ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred, 201 int flags) 202 { 203 off_t neweof; /* file size after the operation */ 204 off_t neweob; /* offset next to the last block after the operation */ 205 off_t pagestart; /* starting offset of range covered by pgs */ 206 off_t eob; /* offset next to allocated blocks */ 207 struct uvm_object *uobj; 208 int i, delta, error, npages; 209 int bshift = vp->v_mount->mnt_fs_bshift; 210 int bsize = 1 << bshift; 211 int ppb = MAX(bsize >> PAGE_SHIFT, 1); 212 struct vm_page **pgs; 213 size_t pgssize; 214 UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist); 215 UVMHIST_LOG(ubchist, "vp %#jx off 0x%jx len 0x%jx u_size 0x%jx", 216 (uintptr_t)vp, off, len, vp->v_size); 217 218 neweof = MAX(vp->v_size, off + len); 219 GOP_SIZE(vp, neweof, &neweob, 0); 220 221 error = 0; 222 uobj = &vp->v_uobj; 223 224 /* 225 * read or create pages covering the range of the allocation and 226 * keep them locked until the new block is allocated, so there 227 * will be no window where the old contents of the new block are 228 * visible to racing threads. 229 */ 230 231 pagestart = trunc_page(off) & ~(bsize - 1); 232 npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT); 233 pgssize = npages * sizeof(struct vm_page *); 234 pgs = kmem_zalloc(pgssize, KM_SLEEP); 235 236 /* 237 * adjust off to be block-aligned. 238 */ 239 240 delta = off & (bsize - 1); 241 off -= delta; 242 len += delta; 243 244 genfs_node_wrlock(vp); 245 rw_enter(uobj->vmobjlock, RW_WRITER); 246 error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0, 247 VM_PROT_WRITE, 0, PGO_SYNCIO | PGO_PASTEOF | PGO_NOBLOCKALLOC | 248 PGO_NOTIMESTAMP | PGO_GLOCKHELD); 249 if (error) { 250 genfs_node_unlock(vp); 251 goto out; 252 } 253 254 /* 255 * now allocate the range. 256 */ 257 258 error = GOP_ALLOC(vp, off, len, flags, cred); 259 genfs_node_unlock(vp); 260 261 /* 262 * if the allocation succeeded, mark all the pages dirty 263 * and clear PG_RDONLY on any pages that are now fully backed 264 * by disk blocks. if the allocation failed, we do not invalidate 265 * the pages since they might have already existed and been dirty, 266 * in which case we need to keep them around. if we created the pages, 267 * they will be clean and read-only, and leaving such pages 268 * in the cache won't cause any problems. 269 */ 270 271 GOP_SIZE(vp, off + len, &eob, 0); 272 rw_enter(uobj->vmobjlock, RW_WRITER); 273 for (i = 0; i < npages; i++) { 274 KASSERT((pgs[i]->flags & PG_RELEASED) == 0); 275 if (!error) { 276 if (off <= pagestart + (i << PAGE_SHIFT) && 277 pagestart + ((i + 1) << PAGE_SHIFT) <= eob) { 278 pgs[i]->flags &= ~PG_RDONLY; 279 } 280 uvm_pagemarkdirty(pgs[i], UVM_PAGE_STATUS_DIRTY); 281 } 282 uvm_pagelock(pgs[i]); 283 uvm_pageactivate(pgs[i]); 284 uvm_pageunlock(pgs[i]); 285 } 286 uvm_page_unbusy(pgs, npages); 287 rw_exit(uobj->vmobjlock); 288 289 out: 290 kmem_free(pgs, pgssize); 291 return error; 292 } 293 294 int 295 ufs_truncate_retry(struct vnode *vp, int ioflag, uint64_t newsize, 296 kauth_cred_t cred) 297 { 298 struct inode *ip = VTOI(vp); 299 struct mount *mp = vp->v_mount; 300 int error = 0; 301 302 UFS_WAPBL_JUNLOCK_ASSERT(mp); 303 304 /* 305 * Truncate might temporarily fail, loop until done. 306 */ 307 do { 308 error = UFS_WAPBL_BEGIN(mp); 309 if (error) 310 goto out; 311 312 error = UFS_TRUNCATE(vp, newsize, ioflag, cred); 313 UFS_WAPBL_END(mp); 314 315 if (error != 0 && error != EAGAIN) 316 goto out; 317 } while (ip->i_size != newsize); 318 319 out: 320 return error; 321 } 322 323 /* truncate all the data of the inode including extended attributes */ 324 int 325 ufs_truncate_all(struct vnode *vp) 326 { 327 struct inode *ip = VTOI(vp); 328 off_t isize = ip->i_size; 329 330 if (ip->i_ump->um_fstype == UFS2) 331 isize += ip->i_ffs2_extsize; 332 333 if (isize == 0) 334 return 0; 335 return ufs_truncate_retry(vp, IO_NORMAL | IO_EXT, 0, NOCRED); 336 } 337