ffs_balloc.c revision 1.28 1 /* $NetBSD: ffs_balloc.c,v 1.28 2001/10/30 01:11:53 lukem Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.28 2001/10/30 01:11:53 lukem Exp $");
40
41 #if defined(_KERNEL_OPT)
42 #include "opt_quota.h"
43 #endif
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/buf.h>
48 #include <sys/proc.h>
49 #include <sys/file.h>
50 #include <sys/mount.h>
51 #include <sys/vnode.h>
52 #include <sys/mount.h>
53
54 #include <ufs/ufs/quota.h>
55 #include <ufs/ufs/ufsmount.h>
56 #include <ufs/ufs/inode.h>
57 #include <ufs/ufs/ufs_extern.h>
58 #include <ufs/ufs/ufs_bswap.h>
59
60 #include <ufs/ffs/fs.h>
61 #include <ufs/ffs/ffs_extern.h>
62
63 #include <uvm/uvm.h>
64
65 /*
66 * Balloc defines the structure of file system storage
67 * by allocating the physical blocks on a device given
68 * the inode and the logical block number in a file.
69 */
70 int
71 ffs_balloc(v)
72 void *v;
73 {
74 struct vop_balloc_args /* {
75 struct vnode *a_vp;
76 off_t a_startoffset;
77 int a_size;
78 struct ucred *a_cred;
79 int a_flags;
80 struct buf **a_bpp;
81 } */ *ap = v;
82 ufs_daddr_t lbn;
83 int size;
84 struct ucred *cred;
85 int flags;
86 ufs_daddr_t nb;
87 struct buf *bp, *nbp;
88 struct vnode *vp = ap->a_vp;
89 struct inode *ip = VTOI(vp);
90 struct fs *fs = ip->i_fs;
91 struct indir indirs[NIADDR + 2];
92 ufs_daddr_t newb, *bap, pref;
93 int deallocated, osize, nsize, num, i, error;
94 ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
95 int unwindidx = -1;
96 struct buf **bpp = ap->a_bpp;
97 off_t off;
98 #ifdef FFS_EI
99 const int needswap = UFS_FSNEEDSWAP(fs);
100 #endif
101 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
102
103 lbn = lblkno(fs, ap->a_startoffset);
104 size = blkoff(fs, ap->a_startoffset) + ap->a_size;
105 if (size > fs->fs_bsize)
106 panic("ffs_balloc: blk too big");
107 if (bpp != NULL) {
108 *bpp = NULL;
109 }
110 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
111
112 KASSERT(size <= fs->fs_bsize);
113 if (lbn < 0)
114 return (EFBIG);
115 cred = ap->a_cred;
116 flags = ap->a_flags;
117
118 /*
119 * If the next write will extend the file into a new block,
120 * and the file is currently composed of a fragment
121 * this fragment has to be extended to be a full block.
122 */
123
124 nb = lblkno(fs, ip->i_ffs_size);
125 if (nb < NDADDR && nb < lbn) {
126 osize = blksize(fs, ip, nb);
127 if (osize < fs->fs_bsize && osize > 0) {
128 error = ffs_realloccg(ip, nb,
129 ffs_blkpref(ip, nb, (int)nb, &ip->i_ffs_db[0]),
130 osize, (int)fs->fs_bsize, cred, bpp, &newb);
131 if (error)
132 return (error);
133 if (DOINGSOFTDEP(vp))
134 softdep_setup_allocdirect(ip, nb, newb,
135 ufs_rw32(ip->i_ffs_db[nb], needswap),
136 fs->fs_bsize, osize, bpp ? *bpp : NULL);
137 ip->i_ffs_size = lblktosize(fs, nb + 1);
138 uvm_vnp_setsize(vp, ip->i_ffs_size);
139 ip->i_ffs_db[nb] = ufs_rw32(newb, needswap);
140 ip->i_flag |= IN_CHANGE | IN_UPDATE;
141 if (bpp) {
142 if (flags & B_SYNC)
143 bwrite(*bpp);
144 else
145 bawrite(*bpp);
146 }
147 }
148 }
149
150 /*
151 * The first NDADDR blocks are direct blocks
152 */
153
154 if (lbn < NDADDR) {
155 nb = ufs_rw32(ip->i_ffs_db[lbn], needswap);
156 if (nb != 0 && ip->i_ffs_size >= lblktosize(fs, lbn + 1)) {
157
158 /*
159 * The block is an already-allocated direct block
160 * and the file already extends past this block,
161 * thus this must be a whole block.
162 * Just read the block (if requested).
163 */
164
165 if (bpp != NULL) {
166 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
167 bpp);
168 if (error) {
169 brelse(*bpp);
170 return (error);
171 }
172 }
173 return (0);
174 }
175 if (nb != 0) {
176
177 /*
178 * Consider need to reallocate a fragment.
179 */
180
181 osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size));
182 nsize = fragroundup(fs, size);
183 if (nsize <= osize) {
184
185 /*
186 * The existing block is already
187 * at least as big as we want.
188 * Just read the block (if requested).
189 */
190
191 if (bpp != NULL) {
192 error = bread(vp, lbn, osize, NOCRED,
193 bpp);
194 if (error) {
195 brelse(*bpp);
196 return (error);
197 }
198 }
199 return 0;
200 } else {
201
202 /*
203 * The existing block is smaller than we want,
204 * grow it.
205 */
206
207 error = ffs_realloccg(ip, lbn,
208 ffs_blkpref(ip, lbn, (int)lbn,
209 &ip->i_ffs_db[0]), osize, nsize, cred,
210 bpp, &newb);
211 if (error)
212 return (error);
213 if (DOINGSOFTDEP(vp))
214 softdep_setup_allocdirect(ip, lbn,
215 newb, nb, nsize, osize,
216 bpp ? *bpp : NULL);
217 }
218 } else {
219
220 /*
221 * the block was not previously allocated,
222 * allocate a new block or fragment.
223 */
224
225 if (ip->i_ffs_size < lblktosize(fs, lbn + 1))
226 nsize = fragroundup(fs, size);
227 else
228 nsize = fs->fs_bsize;
229 error = ffs_alloc(ip, lbn,
230 ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]),
231 nsize, cred, &newb);
232 if (error)
233 return (error);
234 if (bpp != NULL) {
235 bp = getblk(vp, lbn, nsize, 0, 0);
236 bp->b_blkno = fsbtodb(fs, newb);
237 if (flags & B_CLRBUF)
238 clrbuf(bp);
239 *bpp = bp;
240 }
241 if (DOINGSOFTDEP(vp)) {
242 softdep_setup_allocdirect(ip, lbn, newb, 0,
243 nsize, 0, bpp ? *bpp : NULL);
244 }
245 }
246 ip->i_ffs_db[lbn] = ufs_rw32(newb, needswap);
247 ip->i_flag |= IN_CHANGE | IN_UPDATE;
248 return (0);
249 }
250 /*
251 * Determine the number of levels of indirection.
252 */
253 pref = 0;
254 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
255 return(error);
256
257 #ifdef DIAGNOSTIC
258 if (num < 1)
259 panic ("ffs_balloc: ufs_bmaparray returned indirect block\n");
260 #endif
261 /*
262 * Fetch the first indirect block allocating if necessary.
263 */
264 --num;
265 nb = ufs_rw32(ip->i_ffs_ib[indirs[0].in_off], needswap);
266 allocib = NULL;
267 allocblk = allociblk;
268 if (nb == 0) {
269 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
270 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
271 &newb);
272 if (error)
273 goto fail;
274 nb = newb;
275 *allocblk++ = nb;
276 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
277 bp->b_blkno = fsbtodb(fs, nb);
278 clrbuf(bp);
279 if (DOINGSOFTDEP(vp)) {
280 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
281 newb, 0, fs->fs_bsize, 0, bp);
282 bdwrite(bp);
283 } else {
284 /*
285 * Write synchronously so that indirect blocks
286 * never point at garbage.
287 */
288 if ((error = bwrite(bp)) != 0)
289 goto fail;
290 }
291 unwindidx = 0;
292 allocib = &ip->i_ffs_ib[indirs[0].in_off];
293 *allocib = ufs_rw32(nb, needswap);
294 ip->i_flag |= IN_CHANGE | IN_UPDATE;
295 }
296 /*
297 * Fetch through the indirect blocks, allocating as necessary.
298 */
299 for (i = 1;;) {
300 error = bread(vp,
301 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
302 if (error) {
303 brelse(bp);
304 goto fail;
305 }
306 bap = (ufs_daddr_t *)bp->b_data;
307 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
308 if (i == num)
309 break;
310 i++;
311 if (nb != 0) {
312 brelse(bp);
313 continue;
314 }
315 if (pref == 0)
316 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
317 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
318 &newb);
319 if (error) {
320 brelse(bp);
321 goto fail;
322 }
323 nb = newb;
324 *allocblk++ = nb;
325 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
326 nbp->b_blkno = fsbtodb(fs, nb);
327 clrbuf(nbp);
328 if (DOINGSOFTDEP(vp)) {
329 softdep_setup_allocindir_meta(nbp, ip, bp,
330 indirs[i - 1].in_off, nb);
331 bdwrite(nbp);
332 } else {
333 /*
334 * Write synchronously so that indirect blocks
335 * never point at garbage.
336 */
337 if ((error = bwrite(nbp)) != 0) {
338 brelse(bp);
339 goto fail;
340 }
341 }
342 if (unwindidx < 0)
343 unwindidx = i - 1;
344 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
345 /*
346 * If required, write synchronously, otherwise use
347 * delayed write.
348 */
349 if (flags & B_SYNC) {
350 bwrite(bp);
351 } else {
352 bdwrite(bp);
353 }
354 }
355 /*
356 * Get the data block, allocating if necessary.
357 */
358 if (nb == 0) {
359 pref = ffs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
360 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
361 &newb);
362 if (error) {
363 brelse(bp);
364 goto fail;
365 }
366 nb = newb;
367 *allocblk++ = nb;
368 if (bpp != NULL) {
369 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
370 nbp->b_blkno = fsbtodb(fs, nb);
371 if (flags & B_CLRBUF)
372 clrbuf(nbp);
373 *bpp = nbp;
374 }
375 if (DOINGSOFTDEP(vp))
376 softdep_setup_allocindir_page(ip, lbn, bp,
377 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
378 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
379 if (allocib == NULL && unwindidx < 0) {
380 unwindidx = i - 1;
381 }
382 /*
383 * If required, write synchronously, otherwise use
384 * delayed write.
385 */
386 if (flags & B_SYNC) {
387 bwrite(bp);
388 } else {
389 bdwrite(bp);
390 }
391 return (0);
392 }
393 brelse(bp);
394 if (bpp != NULL) {
395 if (flags & B_CLRBUF) {
396 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
397 if (error) {
398 brelse(nbp);
399 goto fail;
400 }
401 } else {
402 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
403 nbp->b_blkno = fsbtodb(fs, nb);
404 clrbuf(nbp);
405 }
406 *bpp = nbp;
407 }
408 return (0);
409
410 fail:
411
412 /*
413 * Restore the UVM state to what the rest of the FFS code is
414 * expecting. Unbusy any pages that we allocated and left busy up in
415 * ufs_balloc_range(). the following VOP_FSYNC() will try to busy
416 * those pages again, which would deadlock if they are still busy
417 * from before. After this we're back to a state where we can undo
418 * any partial allocation.
419 */
420
421 simple_lock(&vp->v_uobj.vmobjlock);
422 for (off = ap->a_startoffset; off < ap->a_startoffset + fs->fs_bsize;
423 off += PAGE_SIZE) {
424 struct vm_page *pg;
425
426 pg = uvm_pagelookup(&vp->v_uobj, off);
427 if (pg == NULL) {
428 break;
429 }
430 uvm_pageactivate(pg);
431 KASSERT((pg->flags & PG_FAKE) == 0);
432 pg->flags &= ~(PG_BUSY);
433 UVM_PAGE_OWN(pg, NULL);
434 }
435 simple_unlock(&vp->v_uobj.vmobjlock);
436
437 /*
438 * If we have failed part way through block allocation, we
439 * have to deallocate any indirect blocks that we have allocated.
440 * We have to fsync the file before we start to get rid of all
441 * of its dependencies so that we do not leave them dangling.
442 * We have to sync it at the end so that the soft updates code
443 * does not find any untracked changes. Although this is really
444 * slow, running out of disk space is not expected to be a common
445 * occurence. The error return from fsync is ignored as we already
446 * have an error to return to the user.
447 */
448
449 (void) VOP_FSYNC(vp, cred, FSYNC_WAIT, 0, 0, curproc);
450 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
451 ffs_blkfree(ip, *blkp, fs->fs_bsize);
452 deallocated += fs->fs_bsize;
453 }
454 if (unwindidx >= 0) {
455 if (unwindidx == 0) {
456 *allocib = 0;
457 } else {
458 int r;
459
460 r = bread(vp, indirs[unwindidx].in_lbn,
461 (int)fs->fs_bsize, NOCRED, &bp);
462 if (r) {
463 panic("Could not unwind indirect block, error %d", r);
464 brelse(bp);
465 } else {
466 bap = (ufs_daddr_t *)bp->b_data;
467 bap[indirs[unwindidx].in_off] = 0;
468 if (flags & B_SYNC)
469 bwrite(bp);
470 else
471 bdwrite(bp);
472 }
473 }
474 for (i = unwindidx + 1; i <= num; i++) {
475 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
476 0);
477 bp->b_flags |= B_INVAL;
478 brelse(bp);
479 }
480 }
481 if (deallocated) {
482 #ifdef QUOTA
483 /*
484 * Restore user's disk quota because allocation failed.
485 */
486 (void)chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
487 #endif
488 ip->i_ffs_blocks -= btodb(deallocated);
489 ip->i_flag |= IN_CHANGE | IN_UPDATE;
490 }
491 (void) VOP_FSYNC(vp, cred, FSYNC_WAIT, 0, 0, curproc);
492 return (error);
493 }
494
495
496 int
497 ffs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
498 struct ucred *cred)
499 {
500 struct inode *ip = VTOI(vp);
501 struct fs *fs = ip->i_fs;
502 int error, delta, bshift, bsize;
503 UVMHIST_FUNC("ffs_gop_alloc"); UVMHIST_CALLED(ubchist);
504
505 error = 0;
506 bshift = fs->fs_bshift;
507 bsize = 1 << bshift;
508
509 delta = off & (bsize - 1);
510 off -= delta;
511 len += delta;
512
513 while (len > 0) {
514 bsize = MIN(bsize, len);
515
516 error = VOP_BALLOC(vp, off, bsize, cred, flags, NULL);
517 if (error) {
518 goto out;
519 }
520
521 /*
522 * increase file size now, VOP_BALLOC() requires that
523 * EOF be up-to-date before each call.
524 */
525
526 if (ip->i_ffs_size < off + bsize) {
527 UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
528 vp, ip->i_ffs_size, off + bsize, 0);
529 ip->i_ffs_size = off + bsize;
530 }
531
532 off += bsize;
533 len -= bsize;
534 }
535
536 out:
537 return error;
538 }
539