ffs_balloc.c revision 1.44.6.4 1 /* $NetBSD: ffs_balloc.c,v 1.44.6.4 2007/08/24 23:28:43 ad Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.44.6.4 2007/08/24 23:28:43 ad Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufs_extern.h>
62 #include <ufs/ufs/ufs_bswap.h>
63
64 #include <ufs/ffs/fs.h>
65 #include <ufs/ffs/ffs_extern.h>
66
67 #include <uvm/uvm.h>
68
69 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
70 struct buf **);
71 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
72 struct buf **);
73
74 /*
75 * Balloc defines the structure of file system storage
76 * by allocating the physical blocks on a device given
77 * the inode and the logical block number in a file.
78 */
79
80 int
81 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
82 struct buf **bpp)
83 {
84
85 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
86 return ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
87 else
88 return ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
89 }
90
91 static int
92 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
93 int flags, struct buf **bpp)
94 {
95 daddr_t lbn, lastlbn;
96 struct buf *bp, *nbp;
97 struct inode *ip = VTOI(vp);
98 struct fs *fs = ip->i_fs;
99 struct ufsmount *ump = ip->i_ump;
100 struct indir indirs[NIADDR + 2];
101 daddr_t newb, pref, nb;
102 int32_t *bap; /* XXX ondisk32 */
103 int deallocated, osize, nsize, num, i, error;
104 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
105 int32_t *allocib;
106 int unwindidx = -1;
107 #ifdef FFS_EI
108 const int needswap = UFS_FSNEEDSWAP(fs);
109 #endif
110 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
111
112 lbn = lblkno(fs, off);
113 size = blkoff(fs, off) + size;
114 if (size > fs->fs_bsize)
115 panic("ffs_balloc: blk too big");
116 if (bpp != NULL) {
117 *bpp = NULL;
118 }
119 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
120
121 if (lbn < 0)
122 return (EFBIG);
123
124 /*
125 * If the next write will extend the file into a new block,
126 * and the file is currently composed of a fragment
127 * this fragment has to be extended to be a full block.
128 */
129
130 lastlbn = lblkno(fs, ip->i_size);
131 if (lastlbn < NDADDR && lastlbn < lbn) {
132 nb = lastlbn;
133 osize = blksize(fs, ip, nb);
134 if (osize < fs->fs_bsize && osize > 0) {
135 mutex_enter(&ump->um_lock);
136 error = ffs_realloccg(ip, nb,
137 ffs_blkpref_ufs1(ip, lastlbn, nb,
138 &ip->i_ffs1_db[0]),
139 osize, (int)fs->fs_bsize, cred, bpp, &newb);
140 if (error)
141 return (error);
142 if (DOINGSOFTDEP(vp))
143 softdep_setup_allocdirect(ip, nb, newb,
144 ufs_rw32(ip->i_ffs1_db[nb], needswap),
145 fs->fs_bsize, osize, bpp ? *bpp : NULL);
146 ip->i_size = lblktosize(fs, nb + 1);
147 ip->i_ffs1_size = ip->i_size;
148 uvm_vnp_setsize(vp, ip->i_ffs1_size);
149 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
150 ip->i_flag |= IN_CHANGE | IN_UPDATE;
151 if (bpp && *bpp) {
152 if (flags & B_SYNC)
153 bwrite(*bpp);
154 else
155 bawrite(*bpp);
156 }
157 }
158 }
159
160 /*
161 * The first NDADDR blocks are direct blocks
162 */
163
164 if (lbn < NDADDR) {
165 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
166 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
167
168 /*
169 * The block is an already-allocated direct block
170 * and the file already extends past this block,
171 * thus this must be a whole block.
172 * Just read the block (if requested).
173 */
174
175 if (bpp != NULL) {
176 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
177 bpp);
178 if (error) {
179 brelse(*bpp, 0);
180 return (error);
181 }
182 }
183 return (0);
184 }
185 if (nb != 0) {
186
187 /*
188 * Consider need to reallocate a fragment.
189 */
190
191 osize = fragroundup(fs, blkoff(fs, ip->i_size));
192 nsize = fragroundup(fs, size);
193 if (nsize <= osize) {
194
195 /*
196 * The existing block is already
197 * at least as big as we want.
198 * Just read the block (if requested).
199 */
200
201 if (bpp != NULL) {
202 error = bread(vp, lbn, osize, NOCRED,
203 bpp);
204 if (error) {
205 brelse(*bpp, 0);
206 return (error);
207 }
208 }
209 return 0;
210 } else {
211
212 /*
213 * The existing block is smaller than we want,
214 * grow it.
215 */
216 mutex_enter(&ump->um_lock);
217 error = ffs_realloccg(ip, lbn,
218 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
219 &ip->i_ffs1_db[0]), osize, nsize, cred,
220 bpp, &newb);
221 if (error)
222 return (error);
223 if (DOINGSOFTDEP(vp))
224 softdep_setup_allocdirect(ip, lbn,
225 newb, nb, nsize, osize,
226 bpp ? *bpp : NULL);
227 }
228 } else {
229
230 /*
231 * the block was not previously allocated,
232 * allocate a new block or fragment.
233 */
234
235 if (ip->i_size < lblktosize(fs, lbn + 1))
236 nsize = fragroundup(fs, size);
237 else
238 nsize = fs->fs_bsize;
239 mutex_enter(&ump->um_lock);
240 error = ffs_alloc(ip, lbn,
241 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
242 &ip->i_ffs1_db[0]),
243 nsize, cred, &newb);
244 if (error)
245 return (error);
246 if (bpp != NULL) {
247 bp = getblk(vp, lbn, nsize, 0, 0);
248 bp->b_blkno = fsbtodb(fs, newb);
249 if (flags & B_CLRBUF)
250 clrbuf(bp);
251 *bpp = bp;
252 }
253 if (DOINGSOFTDEP(vp)) {
254 softdep_setup_allocdirect(ip, lbn, newb, 0,
255 nsize, 0, bpp ? *bpp : NULL);
256 }
257 }
258 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
259 ip->i_flag |= IN_CHANGE | IN_UPDATE;
260 return (0);
261 }
262
263 /*
264 * Determine the number of levels of indirection.
265 */
266
267 pref = 0;
268 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
269 return (error);
270
271 /*
272 * Fetch the first indirect block allocating if necessary.
273 */
274
275 --num;
276 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
277 allocib = NULL;
278 allocblk = allociblk;
279 if (nb == 0) {
280 mutex_enter(&ump->um_lock);
281 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
282 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
283 &newb);
284 if (error)
285 goto fail;
286 nb = newb;
287 *allocblk++ = nb;
288 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
289 bp->b_blkno = fsbtodb(fs, nb);
290 clrbuf(bp);
291 if (DOINGSOFTDEP(vp)) {
292 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
293 newb, 0, fs->fs_bsize, 0, bp);
294 bdwrite(bp);
295 } else {
296
297 /*
298 * Write synchronously so that indirect blocks
299 * never point at garbage.
300 */
301
302 if ((error = bwrite(bp)) != 0)
303 goto fail;
304 }
305 unwindidx = 0;
306 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
307 *allocib = ufs_rw32(nb, needswap);
308 ip->i_flag |= IN_CHANGE | IN_UPDATE;
309 }
310
311 /*
312 * Fetch through the indirect blocks, allocating as necessary.
313 */
314
315 for (i = 1;;) {
316 error = bread(vp,
317 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
318 if (error) {
319 brelse(bp, 0);
320 goto fail;
321 }
322 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
323 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
324 if (i == num)
325 break;
326 i++;
327 if (nb != 0) {
328 brelse(bp, 0);
329 continue;
330 }
331 mutex_enter(&ump->um_lock);
332 if (pref == 0)
333 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
334 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
335 &newb);
336 if (error) {
337 brelse(bp, 0);
338 goto fail;
339 }
340 nb = newb;
341 *allocblk++ = nb;
342 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
343 nbp->b_blkno = fsbtodb(fs, nb);
344 clrbuf(nbp);
345 if (DOINGSOFTDEP(vp)) {
346 softdep_setup_allocindir_meta(nbp, ip, bp,
347 indirs[i - 1].in_off, nb);
348 bdwrite(nbp);
349 } else {
350
351 /*
352 * Write synchronously so that indirect blocks
353 * never point at garbage.
354 */
355
356 if ((error = bwrite(nbp)) != 0) {
357 brelse(bp, 0);
358 goto fail;
359 }
360 }
361 if (unwindidx < 0)
362 unwindidx = i - 1;
363 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
364
365 /*
366 * If required, write synchronously, otherwise use
367 * delayed write.
368 */
369
370 if (flags & B_SYNC) {
371 bwrite(bp);
372 } else {
373 bdwrite(bp);
374 }
375 }
376
377 if (flags & B_METAONLY) {
378 KASSERT(bpp != NULL);
379 *bpp = bp;
380 return (0);
381 }
382
383 /*
384 * Get the data block, allocating if necessary.
385 */
386
387 if (nb == 0) {
388 mutex_enter(&ump->um_lock);
389 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
390 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
391 &newb);
392 if (error) {
393 brelse(bp, 0);
394 goto fail;
395 }
396 nb = newb;
397 *allocblk++ = nb;
398 if (bpp != NULL) {
399 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
400 nbp->b_blkno = fsbtodb(fs, nb);
401 if (flags & B_CLRBUF)
402 clrbuf(nbp);
403 *bpp = nbp;
404 }
405 if (DOINGSOFTDEP(vp))
406 softdep_setup_allocindir_page(ip, lbn, bp,
407 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
408 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
409 if (allocib == NULL && unwindidx < 0) {
410 unwindidx = i - 1;
411 }
412
413 /*
414 * If required, write synchronously, otherwise use
415 * delayed write.
416 */
417
418 if (flags & B_SYNC) {
419 bwrite(bp);
420 } else {
421 bdwrite(bp);
422 }
423 return (0);
424 }
425 brelse(bp, 0);
426 if (bpp != NULL) {
427 if (flags & B_CLRBUF) {
428 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
429 if (error) {
430 brelse(nbp, 0);
431 goto fail;
432 }
433 } else {
434 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
435 nbp->b_blkno = fsbtodb(fs, nb);
436 clrbuf(nbp);
437 }
438 *bpp = nbp;
439 }
440 return (0);
441
442 fail:
443 /*
444 * If we have failed part way through block allocation, we
445 * have to deallocate any indirect blocks that we have allocated.
446 */
447
448 if (unwindidx >= 0) {
449
450 /*
451 * First write out any buffers we've created to resolve their
452 * softdeps. This must be done in reverse order of creation
453 * so that we resolve the dependencies in one pass.
454 * Write the cylinder group buffers for these buffers too.
455 */
456
457 for (i = num; i >= unwindidx; i--) {
458 if (i == 0) {
459 break;
460 }
461 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
462 0);
463 if (bp->b_oflags & BO_DELWRI) {
464 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
465 dbtofsb(fs, bp->b_blkno))));
466 bwrite(bp);
467 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
468 0, 0);
469 if (bp->b_oflags & BO_DELWRI) {
470 bwrite(bp);
471 } else {
472 brelse(bp, BC_INVAL);
473 }
474 } else {
475 brelse(bp, BC_INVAL);
476 }
477 }
478 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
479 ip->i_flag |= IN_CHANGE | IN_UPDATE;
480 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
481 }
482
483 /*
484 * Now that any dependencies that we created have been
485 * resolved, we can undo the partial allocation.
486 */
487
488 if (unwindidx == 0) {
489 *allocib = 0;
490 ip->i_flag |= IN_CHANGE | IN_UPDATE;
491 if (DOINGSOFTDEP(vp))
492 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
493 } else {
494 int r;
495
496 r = bread(vp, indirs[unwindidx].in_lbn,
497 (int)fs->fs_bsize, NOCRED, &bp);
498 if (r) {
499 panic("Could not unwind indirect block, error %d", r);
500 brelse(bp, 0);
501 } else {
502 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
503 bap[indirs[unwindidx].in_off] = 0;
504 bwrite(bp);
505 }
506 }
507 for (i = unwindidx + 1; i <= num; i++) {
508 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
509 0);
510 brelse(bp, BC_INVAL);
511 }
512 }
513 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
514 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
515 deallocated += fs->fs_bsize;
516 }
517 if (deallocated) {
518 #ifdef QUOTA
519 /*
520 * Restore user's disk quota because allocation failed.
521 */
522 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
523 #endif
524 ip->i_ffs1_blocks -= btodb(deallocated);
525 ip->i_flag |= IN_CHANGE | IN_UPDATE;
526 }
527 return (error);
528 }
529
530 static int
531 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
532 int flags, struct buf **bpp)
533 {
534 daddr_t lbn, lastlbn;
535 struct buf *bp, *nbp;
536 struct inode *ip = VTOI(vp);
537 struct fs *fs = ip->i_fs;
538 struct ufsmount *ump = ip->i_ump;
539 struct indir indirs[NIADDR + 2];
540 daddr_t newb, pref, nb;
541 int64_t *bap;
542 int deallocated, osize, nsize, num, i, error;
543 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
544 int64_t *allocib;
545 int unwindidx = -1;
546 #ifdef FFS_EI
547 const int needswap = UFS_FSNEEDSWAP(fs);
548 #endif
549 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
550
551 lbn = lblkno(fs, off);
552 size = blkoff(fs, off) + size;
553 if (size > fs->fs_bsize)
554 panic("ffs_balloc: blk too big");
555 if (bpp != NULL) {
556 *bpp = NULL;
557 }
558 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
559
560 if (lbn < 0)
561 return (EFBIG);
562
563 #ifdef notyet
564 /*
565 * Check for allocating external data.
566 */
567 if (flags & IO_EXT) {
568 if (lbn >= NXADDR)
569 return (EFBIG);
570 /*
571 * If the next write will extend the data into a new block,
572 * and the data is currently composed of a fragment
573 * this fragment has to be extended to be a full block.
574 */
575 lastlbn = lblkno(fs, dp->di_extsize);
576 if (lastlbn < lbn) {
577 nb = lastlbn;
578 osize = sblksize(fs, dp->di_extsize, nb);
579 if (osize < fs->fs_bsize && osize > 0) {
580 mutex_enter(&ump->um_lock);
581 error = ffs_realloccg(ip, -1 - nb,
582 dp->di_extb[nb],
583 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
584 &dp->di_extb[0]), osize,
585 (int)fs->fs_bsize, cred, &bp);
586 if (error)
587 return (error);
588 if (DOINGSOFTDEP(vp))
589 softdep_setup_allocext(ip, nb,
590 dbtofsb(fs, bp->b_blkno),
591 dp->di_extb[nb],
592 fs->fs_bsize, osize, bp);
593 dp->di_extsize = smalllblktosize(fs, nb + 1);
594 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
595 bp->b_xflags |= BX_ALTDATA;
596 ip->i_flag |= IN_CHANGE | IN_UPDATE;
597 if (flags & IO_SYNC)
598 bwrite(bp);
599 else
600 bawrite(bp);
601 }
602 }
603 /*
604 * All blocks are direct blocks
605 */
606 if (flags & BA_METAONLY)
607 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
608 nb = dp->di_extb[lbn];
609 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
610 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
611 if (error) {
612 brelse(bp, 0);
613 return (error);
614 }
615 mutex_enter(&bp->b_interlock);
616 bp->b_blkno = fsbtodb(fs, nb);
617 bp->b_xflags |= BX_ALTDATA;
618 mutex_exit(&bp->b_interlock);
619 *bpp = bp;
620 return (0);
621 }
622 if (nb != 0) {
623 /*
624 * Consider need to reallocate a fragment.
625 */
626 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
627 nsize = fragroundup(fs, size);
628 if (nsize <= osize) {
629 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
630 if (error) {
631 brelse(bp, 0);
632 return (error);
633 }
634 mutex_enter(&bp->b_interlock);
635 bp->b_blkno = fsbtodb(fs, nb);
636 bp->b_xflags |= BX_ALTDATA;
637 mutex_exit(&bp->b_interlock);
638 } else {
639 mutex_enter(&ump->um_lock);
640 error = ffs_realloccg(ip, -1 - lbn,
641 dp->di_extb[lbn],
642 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
643 &dp->di_extb[0]), osize, nsize, cred, &bp);
644 if (error)
645 return (error);
646 bp->b_xflags |= BX_ALTDATA;
647 if (DOINGSOFTDEP(vp))
648 softdep_setup_allocext(ip, lbn,
649 dbtofsb(fs, bp->b_blkno), nb,
650 nsize, osize, bp);
651 }
652 } else {
653 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
654 nsize = fragroundup(fs, size);
655 else
656 nsize = fs->fs_bsize;
657 mutex_enter(&ump->um_lock);
658 error = ffs_alloc(ip, lbn,
659 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
660 nsize, cred, &newb);
661 if (error)
662 return (error);
663 bp = getblk(vp, -1 - lbn, nsize, 0, 0);
664 bp->b_blkno = fsbtodb(fs, newb);
665 bp->b_xflags |= BX_ALTDATA;
666 if (flags & BA_CLRBUF)
667 vfs_bio_clrbuf(bp);
668 if (DOINGSOFTDEP(vp))
669 softdep_setup_allocext(ip, lbn, newb, 0,
670 nsize, 0, bp);
671 }
672 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
673 ip->i_flag |= IN_CHANGE | IN_UPDATE;
674 *bpp = bp;
675 return (0);
676 }
677 #endif
678 /*
679 * If the next write will extend the file into a new block,
680 * and the file is currently composed of a fragment
681 * this fragment has to be extended to be a full block.
682 */
683
684 lastlbn = lblkno(fs, ip->i_size);
685 if (lastlbn < NDADDR && lastlbn < lbn) {
686 nb = lastlbn;
687 osize = blksize(fs, ip, nb);
688 if (osize < fs->fs_bsize && osize > 0) {
689 mutex_enter(&ump->um_lock);
690 error = ffs_realloccg(ip, nb,
691 ffs_blkpref_ufs2(ip, lastlbn, nb,
692 &ip->i_ffs2_db[0]),
693 osize, (int)fs->fs_bsize, cred, bpp, &newb);
694 if (error)
695 return (error);
696 if (DOINGSOFTDEP(vp))
697 softdep_setup_allocdirect(ip, nb, newb,
698 ufs_rw64(ip->i_ffs2_db[nb], needswap),
699 fs->fs_bsize, osize, bpp ? *bpp : NULL);
700 ip->i_size = lblktosize(fs, nb + 1);
701 ip->i_ffs2_size = ip->i_size;
702 uvm_vnp_setsize(vp, ip->i_size);
703 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
704 ip->i_flag |= IN_CHANGE | IN_UPDATE;
705 if (bpp) {
706 if (flags & B_SYNC)
707 bwrite(*bpp);
708 else
709 bawrite(*bpp);
710 }
711 }
712 }
713
714 /*
715 * The first NDADDR blocks are direct blocks
716 */
717
718 if (lbn < NDADDR) {
719 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
720 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
721
722 /*
723 * The block is an already-allocated direct block
724 * and the file already extends past this block,
725 * thus this must be a whole block.
726 * Just read the block (if requested).
727 */
728
729 if (bpp != NULL) {
730 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
731 bpp);
732 if (error) {
733 brelse(*bpp, 0);
734 return (error);
735 }
736 }
737 return (0);
738 }
739 if (nb != 0) {
740
741 /*
742 * Consider need to reallocate a fragment.
743 */
744
745 osize = fragroundup(fs, blkoff(fs, ip->i_size));
746 nsize = fragroundup(fs, size);
747 if (nsize <= osize) {
748
749 /*
750 * The existing block is already
751 * at least as big as we want.
752 * Just read the block (if requested).
753 */
754
755 if (bpp != NULL) {
756 error = bread(vp, lbn, osize, NOCRED,
757 bpp);
758 if (error) {
759 brelse(*bpp, 0);
760 return (error);
761 }
762 }
763 return 0;
764 } else {
765
766 /*
767 * The existing block is smaller than we want,
768 * grow it.
769 */
770 mutex_enter(&ump->um_lock);
771 error = ffs_realloccg(ip, lbn,
772 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
773 &ip->i_ffs2_db[0]), osize, nsize, cred,
774 bpp, &newb);
775 if (error)
776 return (error);
777 if (DOINGSOFTDEP(vp))
778 softdep_setup_allocdirect(ip, lbn,
779 newb, nb, nsize, osize,
780 bpp ? *bpp : NULL);
781 }
782 } else {
783
784 /*
785 * the block was not previously allocated,
786 * allocate a new block or fragment.
787 */
788
789 if (ip->i_size < lblktosize(fs, lbn + 1))
790 nsize = fragroundup(fs, size);
791 else
792 nsize = fs->fs_bsize;
793 mutex_enter(&ump->um_lock);
794 error = ffs_alloc(ip, lbn,
795 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
796 &ip->i_ffs2_db[0]), nsize, cred, &newb);
797 if (error)
798 return (error);
799 if (bpp != NULL) {
800 bp = getblk(vp, lbn, nsize, 0, 0);
801 bp->b_blkno = fsbtodb(fs, newb);
802 if (flags & B_CLRBUF)
803 clrbuf(bp);
804 *bpp = bp;
805 }
806 if (DOINGSOFTDEP(vp)) {
807 softdep_setup_allocdirect(ip, lbn, newb, 0,
808 nsize, 0, bpp ? *bpp : NULL);
809 }
810 }
811 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
812 ip->i_flag |= IN_CHANGE | IN_UPDATE;
813 return (0);
814 }
815
816 /*
817 * Determine the number of levels of indirection.
818 */
819
820 pref = 0;
821 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
822 return (error);
823
824 /*
825 * Fetch the first indirect block allocating if necessary.
826 */
827
828 --num;
829 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
830 allocib = NULL;
831 allocblk = allociblk;
832 if (nb == 0) {
833 mutex_enter(&ump->um_lock);
834 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
835 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
836 &newb);
837 if (error)
838 goto fail;
839 nb = newb;
840 *allocblk++ = nb;
841 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
842 bp->b_blkno = fsbtodb(fs, nb);
843 clrbuf(bp);
844 if (DOINGSOFTDEP(vp)) {
845 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
846 newb, 0, fs->fs_bsize, 0, bp);
847 bdwrite(bp);
848 } else {
849
850 /*
851 * Write synchronously so that indirect blocks
852 * never point at garbage.
853 */
854
855 if ((error = bwrite(bp)) != 0)
856 goto fail;
857 }
858 unwindidx = 0;
859 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
860 *allocib = ufs_rw64(nb, needswap);
861 ip->i_flag |= IN_CHANGE | IN_UPDATE;
862 }
863
864 /*
865 * Fetch through the indirect blocks, allocating as necessary.
866 */
867
868 for (i = 1;;) {
869 error = bread(vp,
870 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
871 if (error) {
872 brelse(bp, 0);
873 goto fail;
874 }
875 bap = (int64_t *)bp->b_data;
876 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
877 if (i == num)
878 break;
879 i++;
880 if (nb != 0) {
881 brelse(bp, 0);
882 continue;
883 }
884 mutex_enter(&ump->um_lock);
885 if (pref == 0)
886 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
887 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
888 &newb);
889 if (error) {
890 brelse(bp, 0);
891 goto fail;
892 }
893 nb = newb;
894 *allocblk++ = nb;
895 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
896 nbp->b_blkno = fsbtodb(fs, nb);
897 clrbuf(nbp);
898 if (DOINGSOFTDEP(vp)) {
899 softdep_setup_allocindir_meta(nbp, ip, bp,
900 indirs[i - 1].in_off, nb);
901 bdwrite(nbp);
902 } else {
903
904 /*
905 * Write synchronously so that indirect blocks
906 * never point at garbage.
907 */
908
909 if ((error = bwrite(nbp)) != 0) {
910 brelse(bp, 0);
911 goto fail;
912 }
913 }
914 if (unwindidx < 0)
915 unwindidx = i - 1;
916 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
917
918 /*
919 * If required, write synchronously, otherwise use
920 * delayed write.
921 */
922
923 if (flags & B_SYNC) {
924 bwrite(bp);
925 } else {
926 bdwrite(bp);
927 }
928 }
929
930 if (flags & B_METAONLY) {
931 KASSERT(bpp != NULL);
932 *bpp = bp;
933 return (0);
934 }
935
936 /*
937 * Get the data block, allocating if necessary.
938 */
939
940 if (nb == 0) {
941 mutex_enter(&ump->um_lock);
942 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
943 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
944 &newb);
945 if (error) {
946 brelse(bp, 0);
947 goto fail;
948 }
949 nb = newb;
950 *allocblk++ = nb;
951 if (bpp != NULL) {
952 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
953 nbp->b_blkno = fsbtodb(fs, nb);
954 if (flags & B_CLRBUF)
955 clrbuf(nbp);
956 *bpp = nbp;
957 }
958 if (DOINGSOFTDEP(vp))
959 softdep_setup_allocindir_page(ip, lbn, bp,
960 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
961 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
962 if (allocib == NULL && unwindidx < 0) {
963 unwindidx = i - 1;
964 }
965
966 /*
967 * If required, write synchronously, otherwise use
968 * delayed write.
969 */
970
971 if (flags & B_SYNC) {
972 bwrite(bp);
973 } else {
974 bdwrite(bp);
975 }
976 return (0);
977 }
978 brelse(bp, 0);
979 if (bpp != NULL) {
980 if (flags & B_CLRBUF) {
981 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
982 if (error) {
983 brelse(nbp, 0);
984 goto fail;
985 }
986 } else {
987 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
988 nbp->b_blkno = fsbtodb(fs, nb);
989 clrbuf(nbp);
990 }
991 *bpp = nbp;
992 }
993 return (0);
994
995 fail:
996 /*
997 * If we have failed part way through block allocation, we
998 * have to deallocate any indirect blocks that we have allocated.
999 */
1000
1001 if (unwindidx >= 0) {
1002
1003 /*
1004 * First write out any buffers we've created to resolve their
1005 * softdeps. This must be done in reverse order of creation
1006 * so that we resolve the dependencies in one pass.
1007 * Write the cylinder group buffers for these buffers too.
1008 */
1009
1010 for (i = num; i >= unwindidx; i--) {
1011 if (i == 0) {
1012 break;
1013 }
1014 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1015 0);
1016 if (bp->b_oflags & BO_DELWRI) {
1017 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1018 dbtofsb(fs, bp->b_blkno))));
1019 bwrite(bp);
1020 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
1021 0, 0);
1022 if (bp->b_oflags & BO_DELWRI) {
1023 bwrite(bp);
1024 } else {
1025 brelse(bp, BC_INVAL);
1026 }
1027 } else {
1028 brelse(bp, BC_INVAL);
1029 }
1030 }
1031 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
1032 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1033 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1034 }
1035
1036 /*
1037 * Now that any dependencies that we created have been
1038 * resolved, we can undo the partial allocation.
1039 */
1040
1041 if (unwindidx == 0) {
1042 *allocib = 0;
1043 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1044 if (DOINGSOFTDEP(vp))
1045 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1046 } else {
1047 int r;
1048
1049 r = bread(vp, indirs[unwindidx].in_lbn,
1050 (int)fs->fs_bsize, NOCRED, &bp);
1051 if (r) {
1052 panic("Could not unwind indirect block, error %d", r);
1053 brelse(bp, 0);
1054 } else {
1055 bap = (int64_t *)bp->b_data;
1056 bap[indirs[unwindidx].in_off] = 0;
1057 bwrite(bp);
1058 }
1059 }
1060 for (i = unwindidx + 1; i <= num; i++) {
1061 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1062 0);
1063 brelse(bp, BC_INVAL);
1064 }
1065 }
1066 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1067 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1068 deallocated += fs->fs_bsize;
1069 }
1070 if (deallocated) {
1071 #ifdef QUOTA
1072 /*
1073 * Restore user's disk quota because allocation failed.
1074 */
1075 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1076 #endif
1077 ip->i_ffs2_blocks -= btodb(deallocated);
1078 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1079 }
1080 return (error);
1081 }
1082