ffs_balloc.c revision 1.44.6.1 1 /* $NetBSD: ffs_balloc.c,v 1.44.6.1 2007/04/13 15:47:03 ad Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.44.6.1 2007/04/13 15:47:03 ad Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufs_extern.h>
62 #include <ufs/ufs/ufs_bswap.h>
63
64 #include <ufs/ffs/fs.h>
65 #include <ufs/ffs/ffs_extern.h>
66
67 #include <uvm/uvm.h>
68
69 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
70 struct buf **);
71 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
72 struct buf **);
73
74 /*
75 * Balloc defines the structure of file system storage
76 * by allocating the physical blocks on a device given
77 * the inode and the logical block number in a file.
78 */
79
80 int
81 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
82 struct buf **bpp)
83 {
84
85 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
86 return ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
87 else
88 return ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
89 }
90
91 static int
92 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
93 int flags, struct buf **bpp)
94 {
95 daddr_t lbn, lastlbn;
96 struct buf *bp, *nbp;
97 struct inode *ip = VTOI(vp);
98 struct fs *fs = ip->i_fs;
99 struct ufsmount *ump = ip->i_ump;
100 struct indir indirs[NIADDR + 2];
101 daddr_t newb, pref, nb;
102 int32_t *bap; /* XXX ondisk32 */
103 int deallocated, osize, nsize, num, i, error;
104 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
105 int32_t *allocib;
106 int unwindidx = -1;
107 #ifdef FFS_EI
108 const int needswap = UFS_FSNEEDSWAP(fs);
109 #endif
110 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
111
112 lbn = lblkno(fs, off);
113 size = blkoff(fs, off) + size;
114 if (size > fs->fs_bsize)
115 panic("ffs_balloc: blk too big");
116 if (bpp != NULL) {
117 *bpp = NULL;
118 }
119 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
120
121 KASSERT(size <= fs->fs_bsize);
122 if (lbn < 0)
123 return (EFBIG);
124
125 /*
126 * If the next write will extend the file into a new block,
127 * and the file is currently composed of a fragment
128 * this fragment has to be extended to be a full block.
129 */
130
131 lastlbn = lblkno(fs, ip->i_size);
132 if (lastlbn < NDADDR && lastlbn < lbn) {
133 nb = lastlbn;
134 osize = blksize(fs, ip, nb);
135 if (osize < fs->fs_bsize && osize > 0) {
136 mutex_enter(&ump->um_lock);
137 error = ffs_realloccg(ip, nb,
138 ffs_blkpref_ufs1(ip, lastlbn, nb,
139 &ip->i_ffs1_db[0]),
140 osize, (int)fs->fs_bsize, cred, bpp, &newb);
141 if (error)
142 return (error);
143 if (DOINGSOFTDEP(vp))
144 softdep_setup_allocdirect(ip, nb, newb,
145 ufs_rw32(ip->i_ffs1_db[nb], needswap),
146 fs->fs_bsize, osize, bpp ? *bpp : NULL);
147 ip->i_size = lblktosize(fs, nb + 1);
148 ip->i_ffs1_size = ip->i_size;
149 uvm_vnp_setsize(vp, ip->i_ffs1_size);
150 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
151 ip->i_flag |= IN_CHANGE | IN_UPDATE;
152 if (bpp && *bpp) {
153 if (flags & B_SYNC)
154 bwrite(*bpp);
155 else
156 bawrite(*bpp);
157 }
158 }
159 }
160
161 /*
162 * The first NDADDR blocks are direct blocks
163 */
164
165 if (lbn < NDADDR) {
166 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
167 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
168
169 /*
170 * The block is an already-allocated direct block
171 * and the file already extends past this block,
172 * thus this must be a whole block.
173 * Just read the block (if requested).
174 */
175
176 if (bpp != NULL) {
177 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
178 bpp);
179 if (error) {
180 brelse(*bpp);
181 return (error);
182 }
183 }
184 return (0);
185 }
186 if (nb != 0) {
187
188 /*
189 * Consider need to reallocate a fragment.
190 */
191
192 osize = fragroundup(fs, blkoff(fs, ip->i_size));
193 nsize = fragroundup(fs, size);
194 if (nsize <= osize) {
195
196 /*
197 * The existing block is already
198 * at least as big as we want.
199 * Just read the block (if requested).
200 */
201
202 if (bpp != NULL) {
203 error = bread(vp, lbn, osize, NOCRED,
204 bpp);
205 if (error) {
206 brelse(*bpp);
207 return (error);
208 }
209 }
210 return 0;
211 } else {
212
213 /*
214 * The existing block is smaller than we want,
215 * grow it.
216 */
217 mutex_enter(&ump->um_lock);
218 error = ffs_realloccg(ip, lbn,
219 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
220 &ip->i_ffs1_db[0]), osize, nsize, cred,
221 bpp, &newb);
222 if (error)
223 return (error);
224 if (DOINGSOFTDEP(vp))
225 softdep_setup_allocdirect(ip, lbn,
226 newb, nb, nsize, osize,
227 bpp ? *bpp : NULL);
228 }
229 } else {
230
231 /*
232 * the block was not previously allocated,
233 * allocate a new block or fragment.
234 */
235
236 if (ip->i_size < lblktosize(fs, lbn + 1))
237 nsize = fragroundup(fs, size);
238 else
239 nsize = fs->fs_bsize;
240 mutex_enter(&ump->um_lock);
241 error = ffs_alloc(ip, lbn,
242 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
243 &ip->i_ffs1_db[0]),
244 nsize, cred, &newb);
245 if (error)
246 return (error);
247 if (bpp != NULL) {
248 bp = getblk(vp, lbn, nsize, 0, 0);
249 bp->b_blkno = fsbtodb(fs, newb);
250 if (flags & B_CLRBUF)
251 clrbuf(bp);
252 *bpp = bp;
253 }
254 if (DOINGSOFTDEP(vp)) {
255 softdep_setup_allocdirect(ip, lbn, newb, 0,
256 nsize, 0, bpp ? *bpp : NULL);
257 }
258 }
259 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
260 ip->i_flag |= IN_CHANGE | IN_UPDATE;
261 return (0);
262 }
263
264 /*
265 * Determine the number of levels of indirection.
266 */
267
268 pref = 0;
269 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
270 return (error);
271
272 /*
273 * Fetch the first indirect block allocating if necessary.
274 */
275
276 --num;
277 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
278 allocib = NULL;
279 allocblk = allociblk;
280 if (nb == 0) {
281 mutex_enter(&ump->um_lock);
282 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
283 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
284 &newb);
285 if (error)
286 goto fail;
287 nb = newb;
288 *allocblk++ = nb;
289 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
290 bp->b_blkno = fsbtodb(fs, nb);
291 clrbuf(bp);
292 if (DOINGSOFTDEP(vp)) {
293 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
294 newb, 0, fs->fs_bsize, 0, bp);
295 bdwrite(bp);
296 } else {
297
298 /*
299 * Write synchronously so that indirect blocks
300 * never point at garbage.
301 */
302
303 if ((error = bwrite(bp)) != 0)
304 goto fail;
305 }
306 unwindidx = 0;
307 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
308 *allocib = ufs_rw32(nb, needswap);
309 ip->i_flag |= IN_CHANGE | IN_UPDATE;
310 }
311
312 /*
313 * Fetch through the indirect blocks, allocating as necessary.
314 */
315
316 for (i = 1;;) {
317 error = bread(vp,
318 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
319 if (error) {
320 brelse(bp);
321 goto fail;
322 }
323 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
324 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
325 if (i == num)
326 break;
327 i++;
328 if (nb != 0) {
329 brelse(bp);
330 continue;
331 }
332 mutex_enter(&ump->um_lock);
333 if (pref == 0)
334 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
335 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
336 &newb);
337 if (error) {
338 brelse(bp);
339 goto fail;
340 }
341 nb = newb;
342 *allocblk++ = nb;
343 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
344 nbp->b_blkno = fsbtodb(fs, nb);
345 clrbuf(nbp);
346 if (DOINGSOFTDEP(vp)) {
347 softdep_setup_allocindir_meta(nbp, ip, bp,
348 indirs[i - 1].in_off, nb);
349 bdwrite(nbp);
350 } else {
351
352 /*
353 * Write synchronously so that indirect blocks
354 * never point at garbage.
355 */
356
357 if ((error = bwrite(nbp)) != 0) {
358 brelse(bp);
359 goto fail;
360 }
361 }
362 if (unwindidx < 0)
363 unwindidx = i - 1;
364 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
365
366 /*
367 * If required, write synchronously, otherwise use
368 * delayed write.
369 */
370
371 if (flags & B_SYNC) {
372 bwrite(bp);
373 } else {
374 bdwrite(bp);
375 }
376 }
377
378 if (flags & B_METAONLY) {
379 KASSERT(bpp != NULL);
380 *bpp = bp;
381 return (0);
382 }
383
384 /*
385 * Get the data block, allocating if necessary.
386 */
387
388 if (nb == 0) {
389 mutex_enter(&ump->um_lock);
390 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
391 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
392 &newb);
393 if (error) {
394 brelse(bp);
395 goto fail;
396 }
397 nb = newb;
398 *allocblk++ = nb;
399 if (bpp != NULL) {
400 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
401 nbp->b_blkno = fsbtodb(fs, nb);
402 if (flags & B_CLRBUF)
403 clrbuf(nbp);
404 *bpp = nbp;
405 }
406 if (DOINGSOFTDEP(vp))
407 softdep_setup_allocindir_page(ip, lbn, bp,
408 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
409 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
410 if (allocib == NULL && unwindidx < 0) {
411 unwindidx = i - 1;
412 }
413
414 /*
415 * If required, write synchronously, otherwise use
416 * delayed write.
417 */
418
419 if (flags & B_SYNC) {
420 bwrite(bp);
421 } else {
422 bdwrite(bp);
423 }
424 return (0);
425 }
426 brelse(bp);
427 if (bpp != NULL) {
428 if (flags & B_CLRBUF) {
429 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
430 if (error) {
431 brelse(nbp);
432 goto fail;
433 }
434 } else {
435 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
436 nbp->b_blkno = fsbtodb(fs, nb);
437 clrbuf(nbp);
438 }
439 *bpp = nbp;
440 }
441 return (0);
442
443 fail:
444 /*
445 * If we have failed part way through block allocation, we
446 * have to deallocate any indirect blocks that we have allocated.
447 */
448
449 if (unwindidx >= 0) {
450
451 /*
452 * First write out any buffers we've created to resolve their
453 * softdeps. This must be done in reverse order of creation
454 * so that we resolve the dependencies in one pass.
455 * Write the cylinder group buffers for these buffers too.
456 */
457
458 for (i = num; i >= unwindidx; i--) {
459 if (i == 0) {
460 break;
461 }
462 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
463 0);
464 if (bp->b_flags & B_DELWRI) {
465 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
466 dbtofsb(fs, bp->b_blkno))));
467 bwrite(bp);
468 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
469 0, 0);
470 if (bp->b_flags & B_DELWRI) {
471 bwrite(bp);
472 } else {
473 bp->b_flags |= B_INVAL;
474 brelse(bp);
475 }
476 } else {
477 bp->b_flags |= B_INVAL;
478 brelse(bp);
479 }
480 }
481 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
482 ip->i_flag |= IN_CHANGE | IN_UPDATE;
483 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
484 }
485
486 /*
487 * Now that any dependencies that we created have been
488 * resolved, we can undo the partial allocation.
489 */
490
491 if (unwindidx == 0) {
492 *allocib = 0;
493 ip->i_flag |= IN_CHANGE | IN_UPDATE;
494 if (DOINGSOFTDEP(vp))
495 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
496 } else {
497 int r;
498
499 r = bread(vp, indirs[unwindidx].in_lbn,
500 (int)fs->fs_bsize, NOCRED, &bp);
501 if (r) {
502 panic("Could not unwind indirect block, error %d", r);
503 brelse(bp);
504 } else {
505 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
506 bap[indirs[unwindidx].in_off] = 0;
507 bwrite(bp);
508 }
509 }
510 for (i = unwindidx + 1; i <= num; i++) {
511 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
512 0);
513 bp->b_flags |= B_INVAL;
514 brelse(bp);
515 }
516 }
517 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
518 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
519 deallocated += fs->fs_bsize;
520 }
521 if (deallocated) {
522 #ifdef QUOTA
523 /*
524 * Restore user's disk quota because allocation failed.
525 */
526 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
527 #endif
528 ip->i_ffs1_blocks -= btodb(deallocated);
529 ip->i_flag |= IN_CHANGE | IN_UPDATE;
530 }
531 return (error);
532 }
533
534 static int
535 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
536 int flags, struct buf **bpp)
537 {
538 daddr_t lbn, lastlbn;
539 struct buf *bp, *nbp;
540 struct inode *ip = VTOI(vp);
541 struct fs *fs = ip->i_fs;
542 struct ufsmount *ump = ip->i_ump;
543 struct indir indirs[NIADDR + 2];
544 daddr_t newb, pref, nb;
545 int64_t *bap;
546 int deallocated, osize, nsize, num, i, error;
547 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
548 int64_t *allocib;
549 int unwindidx = -1;
550 #ifdef FFS_EI
551 const int needswap = UFS_FSNEEDSWAP(fs);
552 #endif
553 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
554
555 lbn = lblkno(fs, off);
556 size = blkoff(fs, off) + size;
557 if (size > fs->fs_bsize)
558 panic("ffs_balloc: blk too big");
559 if (bpp != NULL) {
560 *bpp = NULL;
561 }
562 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
563
564 KASSERT(size <= fs->fs_bsize);
565 if (lbn < 0)
566 return (EFBIG);
567
568 #ifdef notyet
569 /*
570 * Check for allocating external data.
571 */
572 if (flags & IO_EXT) {
573 if (lbn >= NXADDR)
574 return (EFBIG);
575 /*
576 * If the next write will extend the data into a new block,
577 * and the data is currently composed of a fragment
578 * this fragment has to be extended to be a full block.
579 */
580 lastlbn = lblkno(fs, dp->di_extsize);
581 if (lastlbn < lbn) {
582 nb = lastlbn;
583 osize = sblksize(fs, dp->di_extsize, nb);
584 if (osize < fs->fs_bsize && osize > 0) {
585 mutex_enter(&ump->um_lock);
586 error = ffs_realloccg(ip, -1 - nb,
587 dp->di_extb[nb],
588 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
589 &dp->di_extb[0]), osize,
590 (int)fs->fs_bsize, cred, &bp);
591 if (error)
592 return (error);
593 if (DOINGSOFTDEP(vp))
594 softdep_setup_allocext(ip, nb,
595 dbtofsb(fs, bp->b_blkno),
596 dp->di_extb[nb],
597 fs->fs_bsize, osize, bp);
598 dp->di_extsize = smalllblktosize(fs, nb + 1);
599 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
600 bp->b_xflags |= BX_ALTDATA;
601 ip->i_flag |= IN_CHANGE | IN_UPDATE;
602 if (flags & IO_SYNC)
603 bwrite(bp);
604 else
605 bawrite(bp);
606 }
607 }
608 /*
609 * All blocks are direct blocks
610 */
611 if (flags & BA_METAONLY)
612 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
613 nb = dp->di_extb[lbn];
614 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
615 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
616 if (error) {
617 brelse(bp);
618 return (error);
619 }
620 bp->b_blkno = fsbtodb(fs, nb);
621 bp->b_xflags |= BX_ALTDATA;
622 *bpp = bp;
623 return (0);
624 }
625 if (nb != 0) {
626 /*
627 * Consider need to reallocate a fragment.
628 */
629 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
630 nsize = fragroundup(fs, size);
631 if (nsize <= osize) {
632 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
633 if (error) {
634 brelse(bp);
635 return (error);
636 }
637 bp->b_blkno = fsbtodb(fs, nb);
638 bp->b_xflags |= BX_ALTDATA;
639 } else {
640 mutex_enter(&ump->um_lock);
641 error = ffs_realloccg(ip, -1 - lbn,
642 dp->di_extb[lbn],
643 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
644 &dp->di_extb[0]), osize, nsize, cred, &bp);
645 if (error)
646 return (error);
647 bp->b_xflags |= BX_ALTDATA;
648 if (DOINGSOFTDEP(vp))
649 softdep_setup_allocext(ip, lbn,
650 dbtofsb(fs, bp->b_blkno), nb,
651 nsize, osize, bp);
652 }
653 } else {
654 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
655 nsize = fragroundup(fs, size);
656 else
657 nsize = fs->fs_bsize;
658 mutex_enter(&ump->um_lock);
659 error = ffs_alloc(ip, lbn,
660 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
661 nsize, cred, &newb);
662 if (error)
663 return (error);
664 bp = getblk(vp, -1 - lbn, nsize, 0, 0);
665 bp->b_blkno = fsbtodb(fs, newb);
666 bp->b_xflags |= BX_ALTDATA;
667 if (flags & BA_CLRBUF)
668 vfs_bio_clrbuf(bp);
669 if (DOINGSOFTDEP(vp))
670 softdep_setup_allocext(ip, lbn, newb, 0,
671 nsize, 0, bp);
672 }
673 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
674 ip->i_flag |= IN_CHANGE | IN_UPDATE;
675 *bpp = bp;
676 return (0);
677 }
678 #endif
679 /*
680 * If the next write will extend the file into a new block,
681 * and the file is currently composed of a fragment
682 * this fragment has to be extended to be a full block.
683 */
684
685 lastlbn = lblkno(fs, ip->i_size);
686 if (lastlbn < NDADDR && lastlbn < lbn) {
687 nb = lastlbn;
688 osize = blksize(fs, ip, nb);
689 if (osize < fs->fs_bsize && osize > 0) {
690 mutex_enter(&ump->um_lock);
691 error = ffs_realloccg(ip, nb,
692 ffs_blkpref_ufs2(ip, lastlbn, nb,
693 &ip->i_ffs2_db[0]),
694 osize, (int)fs->fs_bsize, cred, bpp, &newb);
695 if (error)
696 return (error);
697 if (DOINGSOFTDEP(vp))
698 softdep_setup_allocdirect(ip, nb, newb,
699 ufs_rw64(ip->i_ffs2_db[nb], needswap),
700 fs->fs_bsize, osize, bpp ? *bpp : NULL);
701 ip->i_size = lblktosize(fs, nb + 1);
702 ip->i_ffs2_size = ip->i_size;
703 uvm_vnp_setsize(vp, ip->i_size);
704 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
705 ip->i_flag |= IN_CHANGE | IN_UPDATE;
706 if (bpp) {
707 if (flags & B_SYNC)
708 bwrite(*bpp);
709 else
710 bawrite(*bpp);
711 }
712 }
713 }
714
715 /*
716 * The first NDADDR blocks are direct blocks
717 */
718
719 if (lbn < NDADDR) {
720 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
721 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
722
723 /*
724 * The block is an already-allocated direct block
725 * and the file already extends past this block,
726 * thus this must be a whole block.
727 * Just read the block (if requested).
728 */
729
730 if (bpp != NULL) {
731 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
732 bpp);
733 if (error) {
734 brelse(*bpp);
735 return (error);
736 }
737 }
738 return (0);
739 }
740 if (nb != 0) {
741
742 /*
743 * Consider need to reallocate a fragment.
744 */
745
746 osize = fragroundup(fs, blkoff(fs, ip->i_size));
747 nsize = fragroundup(fs, size);
748 if (nsize <= osize) {
749
750 /*
751 * The existing block is already
752 * at least as big as we want.
753 * Just read the block (if requested).
754 */
755
756 if (bpp != NULL) {
757 error = bread(vp, lbn, osize, NOCRED,
758 bpp);
759 if (error) {
760 brelse(*bpp);
761 return (error);
762 }
763 }
764 return 0;
765 } else {
766
767 /*
768 * The existing block is smaller than we want,
769 * grow it.
770 */
771 mutex_enter(&ump->um_lock);
772 error = ffs_realloccg(ip, lbn,
773 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
774 &ip->i_ffs2_db[0]), osize, nsize, cred,
775 bpp, &newb);
776 if (error)
777 return (error);
778 if (DOINGSOFTDEP(vp))
779 softdep_setup_allocdirect(ip, lbn,
780 newb, nb, nsize, osize,
781 bpp ? *bpp : NULL);
782 }
783 } else {
784
785 /*
786 * the block was not previously allocated,
787 * allocate a new block or fragment.
788 */
789
790 if (ip->i_size < lblktosize(fs, lbn + 1))
791 nsize = fragroundup(fs, size);
792 else
793 nsize = fs->fs_bsize;
794 mutex_enter(&ump->um_lock);
795 error = ffs_alloc(ip, lbn,
796 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
797 &ip->i_ffs2_db[0]), nsize, cred, &newb);
798 if (error)
799 return (error);
800 if (bpp != NULL) {
801 bp = getblk(vp, lbn, nsize, 0, 0);
802 bp->b_blkno = fsbtodb(fs, newb);
803 if (flags & B_CLRBUF)
804 clrbuf(bp);
805 *bpp = bp;
806 }
807 if (DOINGSOFTDEP(vp)) {
808 softdep_setup_allocdirect(ip, lbn, newb, 0,
809 nsize, 0, bpp ? *bpp : NULL);
810 }
811 }
812 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
813 ip->i_flag |= IN_CHANGE | IN_UPDATE;
814 return (0);
815 }
816
817 /*
818 * Determine the number of levels of indirection.
819 */
820
821 pref = 0;
822 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
823 return (error);
824
825 /*
826 * Fetch the first indirect block allocating if necessary.
827 */
828
829 --num;
830 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
831 allocib = NULL;
832 allocblk = allociblk;
833 if (nb == 0) {
834 mutex_enter(&ump->um_lock);
835 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
836 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
837 &newb);
838 if (error)
839 goto fail;
840 nb = newb;
841 *allocblk++ = nb;
842 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
843 bp->b_blkno = fsbtodb(fs, nb);
844 clrbuf(bp);
845 if (DOINGSOFTDEP(vp)) {
846 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
847 newb, 0, fs->fs_bsize, 0, bp);
848 bdwrite(bp);
849 } else {
850
851 /*
852 * Write synchronously so that indirect blocks
853 * never point at garbage.
854 */
855
856 if ((error = bwrite(bp)) != 0)
857 goto fail;
858 }
859 unwindidx = 0;
860 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
861 *allocib = ufs_rw64(nb, needswap);
862 ip->i_flag |= IN_CHANGE | IN_UPDATE;
863 }
864
865 /*
866 * Fetch through the indirect blocks, allocating as necessary.
867 */
868
869 for (i = 1;;) {
870 error = bread(vp,
871 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
872 if (error) {
873 brelse(bp);
874 goto fail;
875 }
876 bap = (int64_t *)bp->b_data;
877 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
878 if (i == num)
879 break;
880 i++;
881 if (nb != 0) {
882 brelse(bp);
883 continue;
884 }
885 mutex_enter(&ump->um_lock);
886 if (pref == 0)
887 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
888 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
889 &newb);
890 if (error) {
891 brelse(bp);
892 goto fail;
893 }
894 nb = newb;
895 *allocblk++ = nb;
896 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
897 nbp->b_blkno = fsbtodb(fs, nb);
898 clrbuf(nbp);
899 if (DOINGSOFTDEP(vp)) {
900 softdep_setup_allocindir_meta(nbp, ip, bp,
901 indirs[i - 1].in_off, nb);
902 bdwrite(nbp);
903 } else {
904
905 /*
906 * Write synchronously so that indirect blocks
907 * never point at garbage.
908 */
909
910 if ((error = bwrite(nbp)) != 0) {
911 brelse(bp);
912 goto fail;
913 }
914 }
915 if (unwindidx < 0)
916 unwindidx = i - 1;
917 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
918
919 /*
920 * If required, write synchronously, otherwise use
921 * delayed write.
922 */
923
924 if (flags & B_SYNC) {
925 bwrite(bp);
926 } else {
927 bdwrite(bp);
928 }
929 }
930
931 if (flags & B_METAONLY) {
932 KASSERT(bpp != NULL);
933 *bpp = bp;
934 return (0);
935 }
936
937 /*
938 * Get the data block, allocating if necessary.
939 */
940
941 if (nb == 0) {
942 mutex_enter(&ump->um_lock);
943 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
944 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
945 &newb);
946 if (error) {
947 brelse(bp);
948 goto fail;
949 }
950 nb = newb;
951 *allocblk++ = nb;
952 if (bpp != NULL) {
953 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
954 nbp->b_blkno = fsbtodb(fs, nb);
955 if (flags & B_CLRBUF)
956 clrbuf(nbp);
957 *bpp = nbp;
958 }
959 if (DOINGSOFTDEP(vp))
960 softdep_setup_allocindir_page(ip, lbn, bp,
961 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
962 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
963 if (allocib == NULL && unwindidx < 0) {
964 unwindidx = i - 1;
965 }
966
967 /*
968 * If required, write synchronously, otherwise use
969 * delayed write.
970 */
971
972 if (flags & B_SYNC) {
973 bwrite(bp);
974 } else {
975 bdwrite(bp);
976 }
977 return (0);
978 }
979 brelse(bp);
980 if (bpp != NULL) {
981 if (flags & B_CLRBUF) {
982 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
983 if (error) {
984 brelse(nbp);
985 goto fail;
986 }
987 } else {
988 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
989 nbp->b_blkno = fsbtodb(fs, nb);
990 clrbuf(nbp);
991 }
992 *bpp = nbp;
993 }
994 return (0);
995
996 fail:
997 /*
998 * If we have failed part way through block allocation, we
999 * have to deallocate any indirect blocks that we have allocated.
1000 */
1001
1002 if (unwindidx >= 0) {
1003
1004 /*
1005 * First write out any buffers we've created to resolve their
1006 * softdeps. This must be done in reverse order of creation
1007 * so that we resolve the dependencies in one pass.
1008 * Write the cylinder group buffers for these buffers too.
1009 */
1010
1011 for (i = num; i >= unwindidx; i--) {
1012 if (i == 0) {
1013 break;
1014 }
1015 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1016 0);
1017 if (bp->b_flags & B_DELWRI) {
1018 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1019 dbtofsb(fs, bp->b_blkno))));
1020 bwrite(bp);
1021 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
1022 0, 0);
1023 if (bp->b_flags & B_DELWRI) {
1024 bwrite(bp);
1025 } else {
1026 bp->b_flags |= B_INVAL;
1027 brelse(bp);
1028 }
1029 } else {
1030 bp->b_flags |= B_INVAL;
1031 brelse(bp);
1032 }
1033 }
1034 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
1035 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1036 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1037 }
1038
1039 /*
1040 * Now that any dependencies that we created have been
1041 * resolved, we can undo the partial allocation.
1042 */
1043
1044 if (unwindidx == 0) {
1045 *allocib = 0;
1046 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1047 if (DOINGSOFTDEP(vp))
1048 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1049 } else {
1050 int r;
1051
1052 r = bread(vp, indirs[unwindidx].in_lbn,
1053 (int)fs->fs_bsize, NOCRED, &bp);
1054 if (r) {
1055 panic("Could not unwind indirect block, error %d", r);
1056 brelse(bp);
1057 } else {
1058 bap = (int64_t *)bp->b_data;
1059 bap[indirs[unwindidx].in_off] = 0;
1060 bwrite(bp);
1061 }
1062 }
1063 for (i = unwindidx + 1; i <= num; i++) {
1064 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1065 0);
1066 bp->b_flags |= B_INVAL;
1067 brelse(bp);
1068 }
1069 }
1070 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1071 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1072 deallocated += fs->fs_bsize;
1073 }
1074 if (deallocated) {
1075 #ifdef QUOTA
1076 /*
1077 * Restore user's disk quota because allocation failed.
1078 */
1079 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1080 #endif
1081 ip->i_ffs2_blocks -= btodb(deallocated);
1082 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1083 }
1084 return (error);
1085 }
1086