ffs_balloc.c revision 1.35 1 /* $NetBSD: ffs_balloc.c,v 1.35 2004/05/25 14:54:59 hannken Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.35 2004/05/25 14:54:59 hannken Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/mount.h>
57
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufs_extern.h>
62 #include <ufs/ufs/ufs_bswap.h>
63
64 #include <ufs/ffs/fs.h>
65 #include <ufs/ffs/ffs_extern.h>
66
67 #include <uvm/uvm.h>
68
69 static int ffs_balloc_ufs1(void *);
70 static int ffs_balloc_ufs2(void *);
71
72 /*
73 * Balloc defines the structure of file system storage
74 * by allocating the physical blocks on a device given
75 * the inode and the logical block number in a file.
76 */
77
78 int
79 ffs_balloc(v)
80 void *v;
81 {
82 struct vop_balloc_args *ap = v;
83
84 if (VTOI(ap->a_vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
85 return ffs_balloc_ufs2(v);
86 else
87 return ffs_balloc_ufs1(v);
88 }
89
90 static int
91 ffs_balloc_ufs1(v)
92 void *v;
93 {
94 struct vop_balloc_args /* {
95 struct vnode *a_vp;
96 off_t a_startoffset;
97 int a_size;
98 struct ucred *a_cred;
99 int a_flags;
100 struct buf **a_bpp;
101 } */ *ap = v;
102 daddr_t lbn, lastlbn;
103 int size;
104 struct ucred *cred;
105 int flags;
106 int32_t nb;
107 struct buf *bp, *nbp;
108 struct vnode *vp = ap->a_vp;
109 struct inode *ip = VTOI(vp);
110 struct fs *fs = ip->i_fs;
111 struct indir indirs[NIADDR + 2];
112 daddr_t newb, pref;
113 int32_t *bap; /* XXX ondisk32 */
114 int deallocated, osize, nsize, num, i, error;
115 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
116 int32_t *allocib;
117 int unwindidx = -1;
118 struct buf **bpp = ap->a_bpp;
119 #ifdef FFS_EI
120 const int needswap = UFS_FSNEEDSWAP(fs);
121 #endif
122 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
123
124 lbn = lblkno(fs, ap->a_startoffset);
125 size = blkoff(fs, ap->a_startoffset) + ap->a_size;
126 if (size > fs->fs_bsize)
127 panic("ffs_balloc: blk too big");
128 if (bpp != NULL) {
129 *bpp = NULL;
130 }
131 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
132
133 KASSERT(size <= fs->fs_bsize);
134 if (lbn < 0)
135 return (EFBIG);
136 cred = ap->a_cred;
137 flags = ap->a_flags;
138
139 /*
140 * If the next write will extend the file into a new block,
141 * and the file is currently composed of a fragment
142 * this fragment has to be extended to be a full block.
143 */
144
145 lastlbn = lblkno(fs, ip->i_size);
146 if (lastlbn < NDADDR && lastlbn < lbn) {
147 nb = lastlbn;
148 osize = blksize(fs, ip, nb);
149 if (osize < fs->fs_bsize && osize > 0) {
150 error = ffs_realloccg(ip, nb,
151 ffs_blkpref_ufs1(ip, lastlbn, nb,
152 &ip->i_ffs1_db[0]),
153 osize, (int)fs->fs_bsize, cred, bpp, &newb);
154 if (error)
155 return (error);
156 if (DOINGSOFTDEP(vp))
157 softdep_setup_allocdirect(ip, nb, newb,
158 ufs_rw32(ip->i_ffs1_db[nb], needswap),
159 fs->fs_bsize, osize, bpp ? *bpp : NULL);
160 ip->i_size = lblktosize(fs, nb + 1);
161 ip->i_ffs1_size = ip->i_size;
162 uvm_vnp_setsize(vp, ip->i_ffs1_size);
163 ip->i_ffs1_db[nb] = ufs_rw32((int32_t)newb, needswap);
164 ip->i_flag |= IN_CHANGE | IN_UPDATE;
165 if (bpp) {
166 if (flags & B_SYNC)
167 bwrite(*bpp);
168 else
169 bawrite(*bpp);
170 }
171 }
172 }
173
174 /*
175 * The first NDADDR blocks are direct blocks
176 */
177
178 if (lbn < NDADDR) {
179 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
180 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
181
182 /*
183 * The block is an already-allocated direct block
184 * and the file already extends past this block,
185 * thus this must be a whole block.
186 * Just read the block (if requested).
187 */
188
189 if (bpp != NULL) {
190 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
191 bpp);
192 if (error) {
193 brelse(*bpp);
194 return (error);
195 }
196 }
197 return (0);
198 }
199 if (nb != 0) {
200
201 /*
202 * Consider need to reallocate a fragment.
203 */
204
205 osize = fragroundup(fs, blkoff(fs, ip->i_size));
206 nsize = fragroundup(fs, size);
207 if (nsize <= osize) {
208
209 /*
210 * The existing block is already
211 * at least as big as we want.
212 * Just read the block (if requested).
213 */
214
215 if (bpp != NULL) {
216 error = bread(vp, lbn, osize, NOCRED,
217 bpp);
218 if (error) {
219 brelse(*bpp);
220 return (error);
221 }
222 }
223 return 0;
224 } else {
225
226 /*
227 * The existing block is smaller than we want,
228 * grow it.
229 */
230
231 error = ffs_realloccg(ip, lbn,
232 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
233 &ip->i_ffs1_db[0]), osize, nsize, cred,
234 bpp, &newb);
235 if (error)
236 return (error);
237 if (DOINGSOFTDEP(vp))
238 softdep_setup_allocdirect(ip, lbn,
239 newb, nb, nsize, osize,
240 bpp ? *bpp : NULL);
241 }
242 } else {
243
244 /*
245 * the block was not previously allocated,
246 * allocate a new block or fragment.
247 */
248
249 if (ip->i_size < lblktosize(fs, lbn + 1))
250 nsize = fragroundup(fs, size);
251 else
252 nsize = fs->fs_bsize;
253 error = ffs_alloc(ip, lbn,
254 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
255 &ip->i_ffs1_db[0]),
256 nsize, cred, &newb);
257 if (error)
258 return (error);
259 if (bpp != NULL) {
260 bp = getblk(vp, lbn, nsize, 0, 0);
261 bp->b_blkno = fsbtodb(fs, newb);
262 if (flags & B_CLRBUF)
263 clrbuf(bp);
264 *bpp = bp;
265 }
266 if (DOINGSOFTDEP(vp)) {
267 softdep_setup_allocdirect(ip, lbn, newb, 0,
268 nsize, 0, bpp ? *bpp : NULL);
269 }
270 }
271 ip->i_ffs1_db[lbn] = ufs_rw32((int32_t)newb, needswap);
272 ip->i_flag |= IN_CHANGE | IN_UPDATE;
273 return (0);
274 }
275
276 /*
277 * Determine the number of levels of indirection.
278 */
279
280 pref = 0;
281 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
282 return (error);
283
284 /*
285 * Fetch the first indirect block allocating if necessary.
286 */
287
288 --num;
289 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
290 allocib = NULL;
291 allocblk = allociblk;
292 if (nb == 0) {
293 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
294 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
295 &newb);
296 if (error)
297 goto fail;
298 nb = newb;
299 *allocblk++ = nb;
300 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
301 bp->b_blkno = fsbtodb(fs, nb);
302 clrbuf(bp);
303 if (DOINGSOFTDEP(vp)) {
304 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
305 newb, 0, fs->fs_bsize, 0, bp);
306 bdwrite(bp);
307 } else {
308
309 /*
310 * Write synchronously so that indirect blocks
311 * never point at garbage.
312 */
313
314 if ((error = bwrite(bp)) != 0)
315 goto fail;
316 }
317 unwindidx = 0;
318 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
319 *allocib = ufs_rw32(nb, needswap);
320 ip->i_flag |= IN_CHANGE | IN_UPDATE;
321 }
322
323 /*
324 * Fetch through the indirect blocks, allocating as necessary.
325 */
326
327 for (i = 1;;) {
328 error = bread(vp,
329 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
330 if (error) {
331 brelse(bp);
332 goto fail;
333 }
334 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
335 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
336 if (i == num)
337 break;
338 i++;
339 if (nb != 0) {
340 brelse(bp);
341 continue;
342 }
343 if (pref == 0)
344 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
345 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
346 &newb);
347 if (error) {
348 brelse(bp);
349 goto fail;
350 }
351 nb = newb;
352 *allocblk++ = nb;
353 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
354 nbp->b_blkno = fsbtodb(fs, nb);
355 clrbuf(nbp);
356 if (DOINGSOFTDEP(vp)) {
357 softdep_setup_allocindir_meta(nbp, ip, bp,
358 indirs[i - 1].in_off, nb);
359 bdwrite(nbp);
360 } else {
361
362 /*
363 * Write synchronously so that indirect blocks
364 * never point at garbage.
365 */
366
367 if ((error = bwrite(nbp)) != 0) {
368 brelse(bp);
369 goto fail;
370 }
371 }
372 if (unwindidx < 0)
373 unwindidx = i - 1;
374 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
375
376 /*
377 * If required, write synchronously, otherwise use
378 * delayed write.
379 */
380
381 if (flags & B_SYNC) {
382 bwrite(bp);
383 } else {
384 bdwrite(bp);
385 }
386 }
387
388 if (flags & B_METAONLY) {
389 *bpp = bp;
390 return (0);
391 }
392
393 /*
394 * Get the data block, allocating if necessary.
395 */
396
397 if (nb == 0) {
398 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
399 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
400 &newb);
401 if (error) {
402 brelse(bp);
403 goto fail;
404 }
405 nb = newb;
406 *allocblk++ = nb;
407 if (bpp != NULL) {
408 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
409 nbp->b_blkno = fsbtodb(fs, nb);
410 if (flags & B_CLRBUF)
411 clrbuf(nbp);
412 *bpp = nbp;
413 }
414 if (DOINGSOFTDEP(vp))
415 softdep_setup_allocindir_page(ip, lbn, bp,
416 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
417 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
418 if (allocib == NULL && unwindidx < 0) {
419 unwindidx = i - 1;
420 }
421
422 /*
423 * If required, write synchronously, otherwise use
424 * delayed write.
425 */
426
427 if (flags & B_SYNC) {
428 bwrite(bp);
429 } else {
430 bdwrite(bp);
431 }
432 return (0);
433 }
434 brelse(bp);
435 if (bpp != NULL) {
436 if (flags & B_CLRBUF) {
437 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
438 if (error) {
439 brelse(nbp);
440 goto fail;
441 }
442 } else {
443 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
444 nbp->b_blkno = fsbtodb(fs, nb);
445 clrbuf(nbp);
446 }
447 *bpp = nbp;
448 }
449 return (0);
450
451 fail:
452 /*
453 * If we have failed part way through block allocation, we
454 * have to deallocate any indirect blocks that we have allocated.
455 */
456
457 if (unwindidx >= 0) {
458
459 /*
460 * First write out any buffers we've created to resolve their
461 * softdeps. This must be done in reverse order of creation
462 * so that we resolve the dependencies in one pass.
463 * Write the cylinder group buffers for these buffers too.
464 */
465
466 for (i = num; i >= unwindidx; i--) {
467 if (i == 0) {
468 break;
469 }
470 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
471 0);
472 if (bp->b_flags & B_DELWRI) {
473 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
474 dbtofsb(fs, bp->b_blkno))));
475 bwrite(bp);
476 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
477 0, 0);
478 if (bp->b_flags & B_DELWRI) {
479 bwrite(bp);
480 } else {
481 bp->b_flags |= B_INVAL;
482 brelse(bp);
483 }
484 } else {
485 bp->b_flags |= B_INVAL;
486 brelse(bp);
487 }
488 }
489 if (unwindidx == 0) {
490 ip->i_flag |= IN_MODIFIED | IN_CHANGE | IN_UPDATE;
491 VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
492 }
493
494 /*
495 * Now that any dependencies that we created have been
496 * resolved, we can undo the partial allocation.
497 */
498
499 if (unwindidx == 0) {
500 *allocib = 0;
501 ip->i_flag |= IN_MODIFIED | IN_CHANGE | IN_UPDATE;
502 VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
503 } else {
504 int r;
505
506 r = bread(vp, indirs[unwindidx].in_lbn,
507 (int)fs->fs_bsize, NOCRED, &bp);
508 if (r) {
509 panic("Could not unwind indirect block, error %d", r);
510 brelse(bp);
511 } else {
512 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
513 bap[indirs[unwindidx].in_off] = 0;
514 bwrite(bp);
515 }
516 }
517 for (i = unwindidx + 1; i <= num; i++) {
518 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
519 0);
520 bp->b_flags |= B_INVAL;
521 brelse(bp);
522 }
523 }
524 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
525 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
526 deallocated += fs->fs_bsize;
527 }
528 if (deallocated) {
529 #ifdef QUOTA
530 /*
531 * Restore user's disk quota because allocation failed.
532 */
533 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
534 #endif
535 ip->i_ffs1_blocks -= btodb(deallocated);
536 ip->i_flag |= IN_CHANGE | IN_UPDATE;
537 }
538 return (error);
539 }
540
541 static int
542 ffs_balloc_ufs2(v)
543 void *v;
544 {
545 struct vop_balloc_args /* {
546 struct vnode *a_vp;
547 off_t a_startoffset;
548 int a_size;
549 struct ucred *a_cred;
550 int a_flags;
551 struct buf **a_bpp;
552 } */ *ap = v;
553 daddr_t lbn, lastlbn;
554 int size;
555 struct ucred *cred;
556 int flags;
557 struct buf *bp, *nbp;
558 struct vnode *vp = ap->a_vp;
559 struct inode *ip = VTOI(vp);
560 struct fs *fs = ip->i_fs;
561 struct indir indirs[NIADDR + 2];
562 daddr_t newb, pref, nb;
563 int64_t *bap;
564 int deallocated, osize, nsize, num, i, error;
565 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
566 int64_t *allocib;
567 int unwindidx = -1;
568 struct buf **bpp = ap->a_bpp;
569 #ifdef FFS_EI
570 const int needswap = UFS_FSNEEDSWAP(fs);
571 #endif
572 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
573
574 lbn = lblkno(fs, ap->a_startoffset);
575 size = blkoff(fs, ap->a_startoffset) + ap->a_size;
576 if (size > fs->fs_bsize)
577 panic("ffs_balloc: blk too big");
578 if (bpp != NULL) {
579 *bpp = NULL;
580 }
581 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
582
583 KASSERT(size <= fs->fs_bsize);
584 if (lbn < 0)
585 return (EFBIG);
586 cred = ap->a_cred;
587 flags = ap->a_flags;
588
589 #ifdef notyet
590 /*
591 * Check for allocating external data.
592 */
593 if (flags & IO_EXT) {
594 if (lbn >= NXADDR)
595 return (EFBIG);
596 /*
597 * If the next write will extend the data into a new block,
598 * and the data is currently composed of a fragment
599 * this fragment has to be extended to be a full block.
600 */
601 lastlbn = lblkno(fs, dp->di_extsize);
602 if (lastlbn < lbn) {
603 nb = lastlbn;
604 osize = sblksize(fs, dp->di_extsize, nb);
605 if (osize < fs->fs_bsize && osize > 0) {
606 error = ffs_realloccg(ip, -1 - nb,
607 dp->di_extb[nb],
608 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
609 &dp->di_extb[0]), osize,
610 (int)fs->fs_bsize, cred, &bp);
611 if (error)
612 return (error);
613 if (DOINGSOFTDEP(vp))
614 softdep_setup_allocext(ip, nb,
615 dbtofsb(fs, bp->b_blkno),
616 dp->di_extb[nb],
617 fs->fs_bsize, osize, bp);
618 dp->di_extsize = smalllblktosize(fs, nb + 1);
619 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
620 bp->b_xflags |= BX_ALTDATA;
621 ip->i_flag |= IN_CHANGE | IN_UPDATE;
622 if (flags & IO_SYNC)
623 bwrite(bp);
624 else
625 bawrite(bp);
626 }
627 }
628 /*
629 * All blocks are direct blocks
630 */
631 if (flags & BA_METAONLY)
632 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
633 nb = dp->di_extb[lbn];
634 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
635 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
636 if (error) {
637 brelse(bp);
638 return (error);
639 }
640 bp->b_blkno = fsbtodb(fs, nb);
641 bp->b_xflags |= BX_ALTDATA;
642 *bpp = bp;
643 return (0);
644 }
645 if (nb != 0) {
646 /*
647 * Consider need to reallocate a fragment.
648 */
649 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
650 nsize = fragroundup(fs, size);
651 if (nsize <= osize) {
652 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
653 if (error) {
654 brelse(bp);
655 return (error);
656 }
657 bp->b_blkno = fsbtodb(fs, nb);
658 bp->b_xflags |= BX_ALTDATA;
659 } else {
660 error = ffs_realloccg(ip, -1 - lbn,
661 dp->di_extb[lbn],
662 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
663 &dp->di_extb[0]), osize, nsize, cred, &bp);
664 if (error)
665 return (error);
666 bp->b_xflags |= BX_ALTDATA;
667 if (DOINGSOFTDEP(vp))
668 softdep_setup_allocext(ip, lbn,
669 dbtofsb(fs, bp->b_blkno), nb,
670 nsize, osize, bp);
671 }
672 } else {
673 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
674 nsize = fragroundup(fs, size);
675 else
676 nsize = fs->fs_bsize;
677 error = ffs_alloc(ip, lbn,
678 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
679 nsize, cred, &newb);
680 if (error)
681 return (error);
682 bp = getblk(vp, -1 - lbn, nsize, 0, 0);
683 bp->b_blkno = fsbtodb(fs, newb);
684 bp->b_xflags |= BX_ALTDATA;
685 if (flags & BA_CLRBUF)
686 vfs_bio_clrbuf(bp);
687 if (DOINGSOFTDEP(vp))
688 softdep_setup_allocext(ip, lbn, newb, 0,
689 nsize, 0, bp);
690 }
691 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
692 ip->i_flag |= IN_CHANGE | IN_UPDATE;
693 *bpp = bp;
694 return (0);
695 }
696 #endif
697 /*
698 * If the next write will extend the file into a new block,
699 * and the file is currently composed of a fragment
700 * this fragment has to be extended to be a full block.
701 */
702
703 lastlbn = lblkno(fs, ip->i_size);
704 if (lastlbn < NDADDR && lastlbn < lbn) {
705 nb = lastlbn;
706 osize = blksize(fs, ip, nb);
707 if (osize < fs->fs_bsize && osize > 0) {
708 error = ffs_realloccg(ip, nb,
709 ffs_blkpref_ufs2(ip, lastlbn, nb,
710 &ip->i_ffs2_db[0]),
711 osize, (int)fs->fs_bsize, cred, bpp, &newb);
712 if (error)
713 return (error);
714 if (DOINGSOFTDEP(vp))
715 softdep_setup_allocdirect(ip, nb, newb,
716 ufs_rw64(ip->i_ffs2_db[nb], needswap),
717 fs->fs_bsize, osize, bpp ? *bpp : NULL);
718 ip->i_size = lblktosize(fs, nb + 1);
719 ip->i_ffs2_size = ip->i_size;
720 uvm_vnp_setsize(vp, ip->i_size);
721 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
722 ip->i_flag |= IN_CHANGE | IN_UPDATE;
723 if (bpp) {
724 if (flags & B_SYNC)
725 bwrite(*bpp);
726 else
727 bawrite(*bpp);
728 }
729 }
730 }
731
732 /*
733 * The first NDADDR blocks are direct blocks
734 */
735
736 if (lbn < NDADDR) {
737 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
738 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
739
740 /*
741 * The block is an already-allocated direct block
742 * and the file already extends past this block,
743 * thus this must be a whole block.
744 * Just read the block (if requested).
745 */
746
747 if (bpp != NULL) {
748 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
749 bpp);
750 if (error) {
751 brelse(*bpp);
752 return (error);
753 }
754 }
755 return (0);
756 }
757 if (nb != 0) {
758
759 /*
760 * Consider need to reallocate a fragment.
761 */
762
763 osize = fragroundup(fs, blkoff(fs, ip->i_size));
764 nsize = fragroundup(fs, size);
765 if (nsize <= osize) {
766
767 /*
768 * The existing block is already
769 * at least as big as we want.
770 * Just read the block (if requested).
771 */
772
773 if (bpp != NULL) {
774 error = bread(vp, lbn, osize, NOCRED,
775 bpp);
776 if (error) {
777 brelse(*bpp);
778 return (error);
779 }
780 }
781 return 0;
782 } else {
783
784 /*
785 * The existing block is smaller than we want,
786 * grow it.
787 */
788
789 error = ffs_realloccg(ip, lbn,
790 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
791 &ip->i_ffs2_db[0]), osize, nsize, cred,
792 bpp, &newb);
793 if (error)
794 return (error);
795 if (DOINGSOFTDEP(vp))
796 softdep_setup_allocdirect(ip, lbn,
797 newb, nb, nsize, osize,
798 bpp ? *bpp : NULL);
799 }
800 } else {
801
802 /*
803 * the block was not previously allocated,
804 * allocate a new block or fragment.
805 */
806
807 if (ip->i_size < lblktosize(fs, lbn + 1))
808 nsize = fragroundup(fs, size);
809 else
810 nsize = fs->fs_bsize;
811 error = ffs_alloc(ip, lbn,
812 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
813 &ip->i_ffs2_db[0]), nsize, cred, &newb);
814 if (error)
815 return (error);
816 if (bpp != NULL) {
817 bp = getblk(vp, lbn, nsize, 0, 0);
818 bp->b_blkno = fsbtodb(fs, newb);
819 if (flags & B_CLRBUF)
820 clrbuf(bp);
821 *bpp = bp;
822 }
823 if (DOINGSOFTDEP(vp)) {
824 softdep_setup_allocdirect(ip, lbn, newb, 0,
825 nsize, 0, bpp ? *bpp : NULL);
826 }
827 }
828 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
829 ip->i_flag |= IN_CHANGE | IN_UPDATE;
830 return (0);
831 }
832
833 /*
834 * Determine the number of levels of indirection.
835 */
836
837 pref = 0;
838 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
839 return (error);
840
841 /*
842 * Fetch the first indirect block allocating if necessary.
843 */
844
845 --num;
846 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
847 allocib = NULL;
848 allocblk = allociblk;
849 if (nb == 0) {
850 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
851 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
852 &newb);
853 if (error)
854 goto fail;
855 nb = newb;
856 *allocblk++ = nb;
857 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
858 bp->b_blkno = fsbtodb(fs, nb);
859 clrbuf(bp);
860 if (DOINGSOFTDEP(vp)) {
861 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
862 newb, 0, fs->fs_bsize, 0, bp);
863 bdwrite(bp);
864 } else {
865
866 /*
867 * Write synchronously so that indirect blocks
868 * never point at garbage.
869 */
870
871 if ((error = bwrite(bp)) != 0)
872 goto fail;
873 }
874 unwindidx = 0;
875 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
876 *allocib = ufs_rw64(nb, needswap);
877 ip->i_flag |= IN_CHANGE | IN_UPDATE;
878 }
879
880 /*
881 * Fetch through the indirect blocks, allocating as necessary.
882 */
883
884 for (i = 1;;) {
885 error = bread(vp,
886 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
887 if (error) {
888 brelse(bp);
889 goto fail;
890 }
891 bap = (int64_t *)bp->b_data;
892 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
893 if (i == num)
894 break;
895 i++;
896 if (nb != 0) {
897 brelse(bp);
898 continue;
899 }
900 if (pref == 0)
901 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
902 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
903 &newb);
904 if (error) {
905 brelse(bp);
906 goto fail;
907 }
908 nb = newb;
909 *allocblk++ = nb;
910 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
911 nbp->b_blkno = fsbtodb(fs, nb);
912 clrbuf(nbp);
913 if (DOINGSOFTDEP(vp)) {
914 softdep_setup_allocindir_meta(nbp, ip, bp,
915 indirs[i - 1].in_off, nb);
916 bdwrite(nbp);
917 } else {
918
919 /*
920 * Write synchronously so that indirect blocks
921 * never point at garbage.
922 */
923
924 if ((error = bwrite(nbp)) != 0) {
925 brelse(bp);
926 goto fail;
927 }
928 }
929 if (unwindidx < 0)
930 unwindidx = i - 1;
931 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
932
933 /*
934 * If required, write synchronously, otherwise use
935 * delayed write.
936 */
937
938 if (flags & B_SYNC) {
939 bwrite(bp);
940 } else {
941 bdwrite(bp);
942 }
943 }
944
945 if (flags & B_METAONLY) {
946 *bpp = bp;
947 return (0);
948 }
949
950 /*
951 * Get the data block, allocating if necessary.
952 */
953
954 if (nb == 0) {
955 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
956 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
957 &newb);
958 if (error) {
959 brelse(bp);
960 goto fail;
961 }
962 nb = newb;
963 *allocblk++ = nb;
964 if (bpp != NULL) {
965 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
966 nbp->b_blkno = fsbtodb(fs, nb);
967 if (flags & B_CLRBUF)
968 clrbuf(nbp);
969 *bpp = nbp;
970 }
971 if (DOINGSOFTDEP(vp))
972 softdep_setup_allocindir_page(ip, lbn, bp,
973 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
974 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
975 if (allocib == NULL && unwindidx < 0) {
976 unwindidx = i - 1;
977 }
978
979 /*
980 * If required, write synchronously, otherwise use
981 * delayed write.
982 */
983
984 if (flags & B_SYNC) {
985 bwrite(bp);
986 } else {
987 bdwrite(bp);
988 }
989 return (0);
990 }
991 brelse(bp);
992 if (bpp != NULL) {
993 if (flags & B_CLRBUF) {
994 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
995 if (error) {
996 brelse(nbp);
997 goto fail;
998 }
999 } else {
1000 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
1001 nbp->b_blkno = fsbtodb(fs, nb);
1002 clrbuf(nbp);
1003 }
1004 *bpp = nbp;
1005 }
1006 return (0);
1007
1008 fail:
1009 /*
1010 * If we have failed part way through block allocation, we
1011 * have to deallocate any indirect blocks that we have allocated.
1012 */
1013
1014 if (unwindidx >= 0) {
1015
1016 /*
1017 * First write out any buffers we've created to resolve their
1018 * softdeps. This must be done in reverse order of creation
1019 * so that we resolve the dependencies in one pass.
1020 * Write the cylinder group buffers for these buffers too.
1021 */
1022
1023 for (i = num; i >= unwindidx; i--) {
1024 if (i == 0) {
1025 break;
1026 }
1027 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1028 0);
1029 if (bp->b_flags & B_DELWRI) {
1030 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1031 dbtofsb(fs, bp->b_blkno))));
1032 bwrite(bp);
1033 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
1034 0, 0);
1035 if (bp->b_flags & B_DELWRI) {
1036 bwrite(bp);
1037 } else {
1038 bp->b_flags |= B_INVAL;
1039 brelse(bp);
1040 }
1041 } else {
1042 bp->b_flags |= B_INVAL;
1043 brelse(bp);
1044 }
1045 }
1046 if (unwindidx == 0) {
1047 ip->i_flag |= IN_MODIFIED | IN_CHANGE | IN_UPDATE;
1048 VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
1049 }
1050
1051 /*
1052 * Now that any dependencies that we created have been
1053 * resolved, we can undo the partial allocation.
1054 */
1055
1056 if (unwindidx == 0) {
1057 *allocib = 0;
1058 ip->i_flag |= IN_MODIFIED | IN_CHANGE | IN_UPDATE;
1059 VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
1060 } else {
1061 int r;
1062
1063 r = bread(vp, indirs[unwindidx].in_lbn,
1064 (int)fs->fs_bsize, NOCRED, &bp);
1065 if (r) {
1066 panic("Could not unwind indirect block, error %d", r);
1067 brelse(bp);
1068 } else {
1069 bap = (int64_t *)bp->b_data;
1070 bap[indirs[unwindidx].in_off] = 0;
1071 bwrite(bp);
1072 }
1073 }
1074 for (i = unwindidx + 1; i <= num; i++) {
1075 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1076 0);
1077 bp->b_flags |= B_INVAL;
1078 brelse(bp);
1079 }
1080 }
1081 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1082 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1083 deallocated += fs->fs_bsize;
1084 }
1085 if (deallocated) {
1086 #ifdef QUOTA
1087 /*
1088 * Restore user's disk quota because allocation failed.
1089 */
1090 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1091 #endif
1092 ip->i_ffs2_blocks -= btodb(deallocated);
1093 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1094 }
1095 return (error);
1096 }
1097