ulfs_bmap.c revision 1.3 1 /* $NetBSD: ulfs_bmap.c,v 1.3 2013/06/06 00:48:04 dholland Exp $ */
2 /* from NetBSD: ufs_bmap.c,v 1.50 2013/01/22 09:39:18 dholland Exp */
3
4 /*
5 * Copyright (c) 1989, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)ufs_bmap.c 8.8 (Berkeley) 8/11/95
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: ulfs_bmap.c,v 1.3 2013/06/06 00:48:04 dholland Exp $");
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/buf.h>
46 #include <sys/proc.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/resourcevar.h>
50 #include <sys/trace.h>
51 #include <sys/fstrans.h>
52
53 #include <miscfs/specfs/specdev.h>
54
55 #include <ufs/lfs/ulfs_inode.h>
56 #include <ufs/lfs/ulfsmount.h>
57 #include <ufs/lfs/ulfs_extern.h>
58 #include <ufs/lfs/ulfs_bswap.h>
59
60 static bool
61 ulfs_issequential(const struct ulfsmount *ump, daddr_t daddr0, daddr_t daddr1)
62 {
63
64 /* for ulfs, blocks in a hole is not 'contiguous'. */
65 if (daddr0 == 0)
66 return false;
67
68 return (daddr0 + ump->um_seqinc == daddr1);
69 }
70
71 /*
72 * Bmap converts the logical block number of a file to its physical block
73 * number on the disk. The conversion is done by using the logical block
74 * number to index into the array of block pointers described by the dinode.
75 */
76 int
77 ulfs_bmap(void *v)
78 {
79 struct vop_bmap_args /* {
80 struct vnode *a_vp;
81 daddr_t a_bn;
82 struct vnode **a_vpp;
83 daddr_t *a_bnp;
84 int *a_runp;
85 } */ *ap = v;
86 int error;
87
88 /*
89 * Check for underlying vnode requests and ensure that logical
90 * to physical mapping is requested.
91 */
92 if (ap->a_vpp != NULL)
93 *ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
94 if (ap->a_bnp == NULL)
95 return (0);
96
97 fstrans_start(ap->a_vp->v_mount, FSTRANS_SHARED);
98 error = ulfs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
99 ap->a_runp, ulfs_issequential);
100 fstrans_done(ap->a_vp->v_mount);
101 return error;
102 }
103
104 /*
105 * Indirect blocks are now on the vnode for the file. They are given negative
106 * logical block numbers. Indirect blocks are addressed by the negative
107 * address of the first data block to which they point. Double indirect blocks
108 * are addressed by one less than the address of the first indirect block to
109 * which they point. Triple indirect blocks are addressed by one less than
110 * the address of the first double indirect block to which they point.
111 *
112 * ulfs_bmaparray does the bmap conversion, and if requested returns the
113 * array of logical blocks which must be traversed to get to a block.
114 * Each entry contains the offset into that block that gets you to the
115 * next block and the disk address of the block (if it is assigned).
116 */
117
118 int
119 ulfs_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, struct indir *ap,
120 int *nump, int *runp, ulfs_issequential_callback_t is_sequential)
121 {
122 struct inode *ip;
123 struct buf *bp, *cbp;
124 struct ulfsmount *ump;
125 struct mount *mp;
126 struct indir a[ULFS_NIADDR + 1], *xap;
127 daddr_t daddr;
128 daddr_t metalbn;
129 int error, maxrun = 0, num;
130
131 ip = VTOI(vp);
132 mp = vp->v_mount;
133 ump = ip->i_ump;
134 #ifdef DIAGNOSTIC
135 if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL))
136 panic("ulfs_bmaparray: invalid arguments");
137 #endif
138
139 if (runp) {
140 /*
141 * XXX
142 * If MAXBSIZE is the largest transfer the disks can handle,
143 * we probably want maxrun to be 1 block less so that we
144 * don't create a block larger than the device can handle.
145 */
146 *runp = 0;
147 maxrun = MAXPHYS / mp->mnt_stat.f_iosize - 1;
148 }
149
150 if (bn >= 0 && bn < ULFS_NDADDR) {
151 if (nump != NULL)
152 *nump = 0;
153 if (ump->um_fstype == ULFS1)
154 daddr = ulfs_rw32(ip->i_ffs1_db[bn],
155 ULFS_MPNEEDSWAP(ump));
156 else
157 daddr = ulfs_rw64(ip->i_ffs2_db[bn],
158 ULFS_MPNEEDSWAP(ump));
159 *bnp = blkptrtodb(ump, daddr);
160 /*
161 * Since this is FFS independent code, we are out of
162 * scope for the definitions of BLK_NOCOPY and
163 * BLK_SNAP, but we do know that they will fall in
164 * the range 1..um_seqinc, so we use that test and
165 * return a request for a zeroed out buffer if attempts
166 * are made to read a BLK_NOCOPY or BLK_SNAP block.
167 */
168 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT
169 && daddr > 0 &&
170 daddr < ump->um_seqinc) {
171 *bnp = -1;
172 } else if (*bnp == 0) {
173 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))
174 == SF_SNAPSHOT) {
175 *bnp = blkptrtodb(ump, bn * ump->um_seqinc);
176 } else {
177 *bnp = -1;
178 }
179 } else if (runp) {
180 if (ump->um_fstype == ULFS1) {
181 for (++bn; bn < ULFS_NDADDR && *runp < maxrun &&
182 is_sequential(ump,
183 ulfs_rw32(ip->i_ffs1_db[bn - 1],
184 ULFS_MPNEEDSWAP(ump)),
185 ulfs_rw32(ip->i_ffs1_db[bn],
186 ULFS_MPNEEDSWAP(ump)));
187 ++bn, ++*runp);
188 } else {
189 for (++bn; bn < ULFS_NDADDR && *runp < maxrun &&
190 is_sequential(ump,
191 ulfs_rw64(ip->i_ffs2_db[bn - 1],
192 ULFS_MPNEEDSWAP(ump)),
193 ulfs_rw64(ip->i_ffs2_db[bn],
194 ULFS_MPNEEDSWAP(ump)));
195 ++bn, ++*runp);
196 }
197 }
198 return (0);
199 }
200
201 xap = ap == NULL ? a : ap;
202 if (!nump)
203 nump = #
204 if ((error = ulfs_getlbns(vp, bn, xap, nump)) != 0)
205 return (error);
206
207 num = *nump;
208
209 /* Get disk address out of indirect block array */
210 if (ump->um_fstype == ULFS1)
211 daddr = ulfs_rw32(ip->i_ffs1_ib[xap->in_off],
212 ULFS_MPNEEDSWAP(ump));
213 else
214 daddr = ulfs_rw64(ip->i_ffs2_ib[xap->in_off],
215 ULFS_MPNEEDSWAP(ump));
216
217 for (bp = NULL, ++xap; --num; ++xap) {
218 /*
219 * Exit the loop if there is no disk address assigned yet and
220 * the indirect block isn't in the cache, or if we were
221 * looking for an indirect block and we've found it.
222 */
223
224 metalbn = xap->in_lbn;
225 if (metalbn == bn)
226 break;
227 if (daddr == 0) {
228 mutex_enter(&bufcache_lock);
229 cbp = incore(vp, metalbn);
230 mutex_exit(&bufcache_lock);
231 if (cbp == NULL)
232 break;
233 }
234
235 /*
236 * If we get here, we've either got the block in the cache
237 * or we have a disk address for it, go fetch it.
238 */
239 if (bp)
240 brelse(bp, 0);
241
242 xap->in_exists = 1;
243 bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
244 if (bp == NULL) {
245
246 /*
247 * getblk() above returns NULL only iff we are
248 * pagedaemon. See the implementation of getblk
249 * for detail.
250 */
251
252 return (ENOMEM);
253 }
254 if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
255 trace(TR_BREADHIT, pack(vp, size), metalbn);
256 }
257 #ifdef DIAGNOSTIC
258 else if (!daddr)
259 panic("ulfs_bmaparray: indirect block not in cache");
260 #endif
261 else {
262 trace(TR_BREADMISS, pack(vp, size), metalbn);
263 bp->b_blkno = blkptrtodb(ump, daddr);
264 bp->b_flags |= B_READ;
265 BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);
266 VOP_STRATEGY(vp, bp);
267 curlwp->l_ru.ru_inblock++; /* XXX */
268 if ((error = biowait(bp)) != 0) {
269 brelse(bp, 0);
270 return (error);
271 }
272 }
273 if (ump->um_fstype == ULFS1) {
274 daddr = ulfs_rw32(((u_int32_t *)bp->b_data)[xap->in_off],
275 ULFS_MPNEEDSWAP(ump));
276 if (num == 1 && daddr && runp) {
277 for (bn = xap->in_off + 1;
278 bn < MNINDIR(ump) && *runp < maxrun &&
279 is_sequential(ump,
280 ulfs_rw32(((int32_t *)bp->b_data)[bn-1],
281 ULFS_MPNEEDSWAP(ump)),
282 ulfs_rw32(((int32_t *)bp->b_data)[bn],
283 ULFS_MPNEEDSWAP(ump)));
284 ++bn, ++*runp);
285 }
286 } else {
287 daddr = ulfs_rw64(((u_int64_t *)bp->b_data)[xap->in_off],
288 ULFS_MPNEEDSWAP(ump));
289 if (num == 1 && daddr && runp) {
290 for (bn = xap->in_off + 1;
291 bn < MNINDIR(ump) && *runp < maxrun &&
292 is_sequential(ump,
293 ulfs_rw64(((int64_t *)bp->b_data)[bn-1],
294 ULFS_MPNEEDSWAP(ump)),
295 ulfs_rw64(((int64_t *)bp->b_data)[bn],
296 ULFS_MPNEEDSWAP(ump)));
297 ++bn, ++*runp);
298 }
299 }
300 }
301 if (bp)
302 brelse(bp, 0);
303
304 /*
305 * Since this is FFS independent code, we are out of scope for the
306 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
307 * will fall in the range 1..um_seqinc, so we use that test and
308 * return a request for a zeroed out buffer if attempts are made
309 * to read a BLK_NOCOPY or BLK_SNAP block.
310 */
311 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT
312 && daddr > 0 && daddr < ump->um_seqinc) {
313 *bnp = -1;
314 return (0);
315 }
316 *bnp = blkptrtodb(ump, daddr);
317 if (*bnp == 0) {
318 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))
319 == SF_SNAPSHOT) {
320 *bnp = blkptrtodb(ump, bn * ump->um_seqinc);
321 } else {
322 *bnp = -1;
323 }
324 }
325 return (0);
326 }
327
328 /*
329 * Create an array of logical block number/offset pairs which represent the
330 * path of indirect blocks required to access a data block. The first "pair"
331 * contains the logical block number of the appropriate single, double or
332 * triple indirect block and the offset into the inode indirect block array.
333 * Note, the logical block number of the inode single/double/triple indirect
334 * block appears twice in the array, once with the offset into the i_ffs1_ib and
335 * once with the offset into the page itself.
336 */
337 int
338 ulfs_getlbns(struct vnode *vp, daddr_t bn, struct indir *ap, int *nump)
339 {
340 daddr_t metalbn, realbn;
341 struct ulfsmount *ump;
342 int64_t blockcnt;
343 int lbc;
344 int i, numlevels, off;
345
346 ump = VFSTOULFS(vp->v_mount);
347 if (nump)
348 *nump = 0;
349 numlevels = 0;
350 realbn = bn;
351 if (bn < 0)
352 bn = -bn;
353 KASSERT(bn >= ULFS_NDADDR);
354
355 /*
356 * Determine the number of levels of indirection. After this loop
357 * is done, blockcnt indicates the number of data blocks possible
358 * at the given level of indirection, and ULFS_NIADDR - i is the number
359 * of levels of indirection needed to locate the requested block.
360 */
361
362 bn -= ULFS_NDADDR;
363 for (lbc = 0, i = ULFS_NIADDR;; i--, bn -= blockcnt) {
364 if (i == 0)
365 return (EFBIG);
366
367 lbc += ump->um_lognindir;
368 blockcnt = (int64_t)1 << lbc;
369
370 if (bn < blockcnt)
371 break;
372 }
373
374 /* Calculate the address of the first meta-block. */
375 metalbn = -((realbn >= 0 ? realbn : -realbn) - bn + ULFS_NIADDR - i);
376
377 /*
378 * At each iteration, off is the offset into the bap array which is
379 * an array of disk addresses at the current level of indirection.
380 * The logical block number and the offset in that block are stored
381 * into the argument array.
382 */
383 ap->in_lbn = metalbn;
384 ap->in_off = off = ULFS_NIADDR - i;
385 ap->in_exists = 0;
386 ap++;
387 for (++numlevels; i <= ULFS_NIADDR; i++) {
388 /* If searching for a meta-data block, quit when found. */
389 if (metalbn == realbn)
390 break;
391
392 lbc -= ump->um_lognindir;
393 off = (bn >> lbc) & (MNINDIR(ump) - 1);
394
395 ++numlevels;
396 ap->in_lbn = metalbn;
397 ap->in_off = off;
398 ap->in_exists = 0;
399 ++ap;
400
401 metalbn -= -1 + ((int64_t)off << lbc);
402 }
403 if (nump)
404 *nump = numlevels;
405 return (0);
406 }
407