1 /* $NetBSD: ufs_bmap.c,v 1.54 2022/11/17 06:40:40 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_bmap.c 8.8 (Berkeley) 8/11/95 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: ufs_bmap.c,v 1.54 2022/11/17 06:40:40 chs Exp $"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/buf.h> 45 #include <sys/proc.h> 46 #include <sys/vnode.h> 47 #include <sys/mount.h> 48 #include <sys/resourcevar.h> 49 #include <sys/trace.h> 50 51 #include <miscfs/specfs/specdev.h> 52 53 #include <ufs/ufs/inode.h> 54 #include <ufs/ufs/ufsmount.h> 55 #include <ufs/ufs/ufs_extern.h> 56 #include <ufs/ufs/ufs_bswap.h> 57 58 static bool 59 ufs_issequential(const struct ufsmount *ump, daddr_t daddr0, daddr_t daddr1) 60 { 61 62 /* for ufs, blocks in a hole is not 'contiguous'. */ 63 if (daddr0 == 0) 64 return false; 65 66 return (daddr0 + ump->um_seqinc == daddr1); 67 } 68 69 /* 70 * Bmap converts the logical block number of a file to its physical block 71 * number on the disk. The conversion is done by using the logical block 72 * number to index into the array of block pointers described by the dinode. 73 */ 74 int 75 ufs_bmap(void *v) 76 { 77 struct vop_bmap_args /* { 78 struct vnode *a_vp; 79 daddr_t a_bn; 80 struct vnode **a_vpp; 81 daddr_t *a_bnp; 82 int *a_runp; 83 } */ *ap = v; 84 int error; 85 86 /* 87 * Check for underlying vnode requests and ensure that logical 88 * to physical mapping is requested. 89 */ 90 if (ap->a_vpp != NULL) 91 *ap->a_vpp = VTOI(ap->a_vp)->i_devvp; 92 if (ap->a_bnp == NULL) 93 return (0); 94 95 error = ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL, 96 ap->a_runp, ufs_issequential); 97 return error; 98 } 99 100 /* 101 * Indirect blocks are now on the vnode for the file. They are given negative 102 * logical block numbers. Indirect blocks are addressed by the negative 103 * address of the first data block to which they point. Double indirect blocks 104 * are addressed by one less than the address of the first indirect block to 105 * which they point. Triple indirect blocks are addressed by one less than 106 * the address of the first double indirect block to which they point. 107 * 108 * ufs_bmaparray does the bmap conversion, and if requested returns the 109 * array of logical blocks which must be traversed to get to a block. 110 * Each entry contains the offset into that block that gets you to the 111 * next block and the disk address of the block (if it is assigned). 112 */ 113 114 int 115 ufs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap, 116 int *nump, int *runp, ufs_issequential_callback_t is_sequential) 117 { 118 struct inode *ip; 119 struct buf *bp, *cbp; 120 struct ufsmount *ump; 121 struct mount *mp; 122 struct indir a[UFS_NIADDR + 1], *xap; 123 daddr_t daddr; 124 daddr_t metalbn; 125 int error, maxrun = 0, num; 126 127 ip = VTOI(vp); 128 mp = vp->v_mount; 129 ump = ip->i_ump; 130 KASSERTMSG(((ap == NULL) == (nump == NULL)), 131 "ufs_bmaparray: invalid arguments: ap = %p, nump = %p", ap, nump); 132 133 if (runp) { 134 /* 135 * XXX 136 * If MAXBSIZE is the largest transfer the disks can handle, 137 * we probably want maxrun to be 1 block less so that we 138 * don't create a block larger than the device can handle. 139 */ 140 *runp = 0; 141 maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1; 142 } 143 144 if (bn >= 0 && bn < UFS_NDADDR) { 145 if (nump != NULL) 146 *nump = 0; 147 if (ump->um_fstype == UFS1) 148 daddr = ufs_rw32(ip->i_ffs1_db[bn], 149 UFS_MPNEEDSWAP(ump)); 150 else 151 daddr = ufs_rw64(ip->i_ffs2_db[bn], 152 UFS_MPNEEDSWAP(ump)); 153 *bnp = blkptrtodb(ump, daddr); 154 /* 155 * Since this is FFS independent code, we are out of 156 * scope for the definitions of BLK_NOCOPY and 157 * BLK_SNAP, but we do know that they will fall in 158 * the range 1..um_seqinc, so we use that test and 159 * return a request for a zeroed out buffer if attempts 160 * are made to read a BLK_NOCOPY or BLK_SNAP block. 161 */ 162 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT 163 && daddr > 0 && 164 daddr < ump->um_seqinc) { 165 *bnp = -1; 166 } else if (*bnp == 0) { 167 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) 168 == SF_SNAPSHOT) { 169 *bnp = blkptrtodb(ump, bn * ump->um_seqinc); 170 } else { 171 *bnp = -1; 172 } 173 } else if (runp) { 174 if (ump->um_fstype == UFS1) { 175 for (++bn; bn < UFS_NDADDR && *runp < maxrun && 176 is_sequential(ump, 177 ufs_rw32(ip->i_ffs1_db[bn - 1], 178 UFS_MPNEEDSWAP(ump)), 179 ufs_rw32(ip->i_ffs1_db[bn], 180 UFS_MPNEEDSWAP(ump))); 181 ++bn, ++*runp); 182 } else { 183 for (++bn; bn < UFS_NDADDR && *runp < maxrun && 184 is_sequential(ump, 185 ufs_rw64(ip->i_ffs2_db[bn - 1], 186 UFS_MPNEEDSWAP(ump)), 187 ufs_rw64(ip->i_ffs2_db[bn], 188 UFS_MPNEEDSWAP(ump))); 189 ++bn, ++*runp); 190 } 191 } 192 return (0); 193 } else if (bn < 0 && bn >= -UFS_NXADDR) { 194 KASSERT(ump->um_fstype == UFS2 && (ump->um_flags & UFS_EA) != 0); 195 daddr = ufs_rw64(ip->i_ffs2_extb[-1 - bn], UFS_MPNEEDSWAP(ump)); 196 *bnp = blkptrtodb(ump, daddr); 197 if (*bnp == 0) 198 *bnp = -1; 199 return 0; 200 } 201 202 xap = ap == NULL ? a : ap; 203 if (!nump) 204 nump = # 205 if ((error = ufs_getlbns(vp, bn, xap, nump)) != 0) 206 return (error); 207 208 num = *nump; 209 210 /* Get disk address out of indirect block array */ 211 if (ump->um_fstype == UFS1) 212 daddr = ufs_rw32(ip->i_ffs1_ib[xap->in_off], 213 UFS_MPNEEDSWAP(ump)); 214 else 215 daddr = ufs_rw64(ip->i_ffs2_ib[xap->in_off], 216 UFS_MPNEEDSWAP(ump)); 217 218 for (bp = NULL, ++xap; --num; ++xap) { 219 /* 220 * Exit the loop if there is no disk address assigned yet and 221 * the indirect block isn't in the cache, or if we were 222 * looking for an indirect block and we've found it. 223 */ 224 225 metalbn = xap->in_lbn; 226 if (metalbn == bn) 227 break; 228 if (daddr == 0) { 229 mutex_enter(&bufcache_lock); 230 cbp = incore(vp, metalbn); 231 mutex_exit(&bufcache_lock); 232 if (cbp == NULL) 233 break; 234 } 235 236 /* 237 * If we get here, we've either got the block in the cache 238 * or we have a disk address for it, go fetch it. 239 */ 240 if (bp) 241 brelse(bp, 0); 242 243 xap->in_exists = 1; 244 bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); 245 if (bp == NULL) { 246 247 /* 248 * getblk() above returns NULL only iff we are 249 * pagedaemon. See the implementation of getblk 250 * for detail. 251 */ 252 253 return (ENOMEM); 254 } 255 if (bp->b_oflags & (BO_DONE | BO_DELWRI)) { 256 trace(TR_BREADHIT, pack(vp, size), metalbn); 257 } else { 258 KASSERTMSG((daddr != 0), 259 "ufs_bmaparray: indirect block not in cache"); 260 trace(TR_BREADMISS, pack(vp, size), metalbn); 261 bp->b_blkno = blkptrtodb(ump, daddr); 262 bp->b_flags |= B_READ; 263 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); 264 VOP_STRATEGY(vp, bp); 265 curlwp->l_ru.ru_inblock++; /* XXX */ 266 if ((error = biowait(bp)) != 0) { 267 brelse(bp, 0); 268 return (error); 269 } 270 } 271 if (ump->um_fstype == UFS1) { 272 daddr = ufs_rw32(((u_int32_t *)bp->b_data)[xap->in_off], 273 UFS_MPNEEDSWAP(ump)); 274 if (num == 1 && daddr && runp) { 275 for (bn = xap->in_off + 1; 276 bn < MNINDIR(ump) && *runp < maxrun && 277 is_sequential(ump, 278 ufs_rw32(((int32_t *)bp->b_data)[bn-1], 279 UFS_MPNEEDSWAP(ump)), 280 ufs_rw32(((int32_t *)bp->b_data)[bn], 281 UFS_MPNEEDSWAP(ump))); 282 ++bn, ++*runp); 283 } 284 } else { 285 daddr = ufs_rw64(((u_int64_t *)bp->b_data)[xap->in_off], 286 UFS_MPNEEDSWAP(ump)); 287 if (num == 1 && daddr && runp) { 288 for (bn = xap->in_off + 1; 289 bn < MNINDIR(ump) && *runp < maxrun && 290 is_sequential(ump, 291 ufs_rw64(((int64_t *)bp->b_data)[bn-1], 292 UFS_MPNEEDSWAP(ump)), 293 ufs_rw64(((int64_t *)bp->b_data)[bn], 294 UFS_MPNEEDSWAP(ump))); 295 ++bn, ++*runp); 296 } 297 } 298 } 299 if (bp) 300 brelse(bp, 0); 301 302 /* 303 * Since this is FFS independent code, we are out of scope for the 304 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they 305 * will fall in the range 1..um_seqinc, so we use that test and 306 * return a request for a zeroed out buffer if attempts are made 307 * to read a BLK_NOCOPY or BLK_SNAP block. 308 */ 309 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT 310 && daddr > 0 && daddr < ump->um_seqinc) { 311 *bnp = -1; 312 return (0); 313 } 314 *bnp = blkptrtodb(ump, daddr); 315 if (*bnp == 0) { 316 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) 317 == SF_SNAPSHOT) { 318 *bnp = blkptrtodb(ump, bn * ump->um_seqinc); 319 } else { 320 *bnp = -1; 321 } 322 } 323 return (0); 324 } 325 326 /* 327 * Create an array of logical block number/offset pairs which represent the 328 * path of indirect blocks required to access a data block. The first "pair" 329 * contains the logical block number of the appropriate single, double or 330 * triple indirect block and the offset into the inode indirect block array. 331 * Note, the logical block number of the inode single/double/triple indirect 332 * block appears twice in the array, once with the offset into the i_ffs1_ib and 333 * once with the offset into the page itself. 334 */ 335 int 336 ufs_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump) 337 { 338 daddr_t metalbn, realbn; 339 struct ufsmount *ump; 340 int64_t blockcnt; 341 int lbc; 342 int i, numlevels, off; 343 344 ump = VFSTOUFS(vp->v_mount); 345 if (nump) 346 *nump = 0; 347 numlevels = 0; 348 realbn = bn; 349 if (bn < 0) 350 bn = -bn; 351 KASSERT(bn >= UFS_NDADDR); 352 353 /* 354 * Determine the number of levels of indirection. After this loop 355 * is done, blockcnt indicates the number of data blocks possible 356 * at the given level of indirection, and UFS_NIADDR - i is the number 357 * of levels of indirection needed to locate the requested block. 358 */ 359 360 bn -= UFS_NDADDR; 361 for (lbc = 0, i = UFS_NIADDR;; i--, bn -= blockcnt) { 362 if (i == 0) 363 return (EFBIG); 364 365 lbc += ump->um_lognindir; 366 blockcnt = (int64_t)1 << lbc; 367 368 if (bn < blockcnt) 369 break; 370 } 371 372 /* Calculate the address of the first meta-block. */ 373 metalbn = -((realbn >= 0 ? realbn : -realbn) - bn + UFS_NIADDR - i); 374 375 /* 376 * At each iteration, off is the offset into the bap array which is 377 * an array of disk addresses at the current level of indirection. 378 * The logical block number and the offset in that block are stored 379 * into the argument array. 380 */ 381 ap->in_lbn = metalbn; 382 ap->in_off = off = UFS_NIADDR - i; 383 ap->in_exists = 0; 384 ap++; 385 for (++numlevels; i <= UFS_NIADDR; i++) { 386 /* If searching for a meta-data block, quit when found. */ 387 if (metalbn == realbn) 388 break; 389 390 lbc -= ump->um_lognindir; 391 off = (bn >> lbc) & (MNINDIR(ump) - 1); 392 393 ++numlevels; 394 ap->in_lbn = metalbn; 395 ap->in_off = off; 396 ap->in_exists = 0; 397 ++ap; 398 399 metalbn -= -1 + ((int64_t)off << lbc); 400 } 401 if (nump) 402 *nump = numlevels; 403 return (0); 404 } 405