ffs_balloc.c revision 1.46 1 /* $NetBSD: ffs_balloc.c,v 1.46 2007/10/08 18:01:28 ad Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.46 2007/10/08 18:01:28 ad Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufs_extern.h>
62 #include <ufs/ufs/ufs_bswap.h>
63
64 #include <ufs/ffs/fs.h>
65 #include <ufs/ffs/ffs_extern.h>
66
67 #include <uvm/uvm.h>
68
69 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
70 struct buf **);
71 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
72 struct buf **);
73
74 /*
75 * Balloc defines the structure of file system storage
76 * by allocating the physical blocks on a device given
77 * the inode and the logical block number in a file.
78 */
79
80 int
81 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
82 struct buf **bpp)
83 {
84
85 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
86 return ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
87 else
88 return ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
89 }
90
91 static int
92 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
93 int flags, struct buf **bpp)
94 {
95 daddr_t lbn, lastlbn;
96 struct buf *bp, *nbp;
97 struct inode *ip = VTOI(vp);
98 struct fs *fs = ip->i_fs;
99 struct ufsmount *ump = ip->i_ump;
100 struct indir indirs[NIADDR + 2];
101 daddr_t newb, pref, nb;
102 int32_t *bap; /* XXX ondisk32 */
103 int deallocated, osize, nsize, num, i, error;
104 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
105 int32_t *allocib;
106 int unwindidx = -1;
107 #ifdef FFS_EI
108 const int needswap = UFS_FSNEEDSWAP(fs);
109 #endif
110 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
111
112 lbn = lblkno(fs, off);
113 size = blkoff(fs, off) + size;
114 if (size > fs->fs_bsize)
115 panic("ffs_balloc: blk too big");
116 if (bpp != NULL) {
117 *bpp = NULL;
118 }
119 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
120
121 if (lbn < 0)
122 return (EFBIG);
123
124 /*
125 * If the next write will extend the file into a new block,
126 * and the file is currently composed of a fragment
127 * this fragment has to be extended to be a full block.
128 */
129
130 lastlbn = lblkno(fs, ip->i_size);
131 if (lastlbn < NDADDR && lastlbn < lbn) {
132 nb = lastlbn;
133 osize = blksize(fs, ip, nb);
134 if (osize < fs->fs_bsize && osize > 0) {
135 mutex_enter(&ump->um_lock);
136 error = ffs_realloccg(ip, nb,
137 ffs_blkpref_ufs1(ip, lastlbn, nb,
138 &ip->i_ffs1_db[0]),
139 osize, (int)fs->fs_bsize, cred, bpp, &newb);
140 if (error)
141 return (error);
142 if (DOINGSOFTDEP(vp))
143 softdep_setup_allocdirect(ip, nb, newb,
144 ufs_rw32(ip->i_ffs1_db[nb], needswap),
145 fs->fs_bsize, osize, bpp ? *bpp : NULL);
146 ip->i_size = lblktosize(fs, nb + 1);
147 ip->i_ffs1_size = ip->i_size;
148 uvm_vnp_setsize(vp, ip->i_ffs1_size);
149 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
150 ip->i_flag |= IN_CHANGE | IN_UPDATE;
151 if (bpp && *bpp) {
152 if (flags & B_SYNC)
153 bwrite(*bpp);
154 else
155 bawrite(*bpp);
156 }
157 }
158 }
159
160 /*
161 * The first NDADDR blocks are direct blocks
162 */
163
164 if (lbn < NDADDR) {
165 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
166 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
167
168 /*
169 * The block is an already-allocated direct block
170 * and the file already extends past this block,
171 * thus this must be a whole block.
172 * Just read the block (if requested).
173 */
174
175 if (bpp != NULL) {
176 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
177 bpp);
178 if (error) {
179 brelse(*bpp, 0);
180 return (error);
181 }
182 }
183 return (0);
184 }
185 if (nb != 0) {
186
187 /*
188 * Consider need to reallocate a fragment.
189 */
190
191 osize = fragroundup(fs, blkoff(fs, ip->i_size));
192 nsize = fragroundup(fs, size);
193 if (nsize <= osize) {
194
195 /*
196 * The existing block is already
197 * at least as big as we want.
198 * Just read the block (if requested).
199 */
200
201 if (bpp != NULL) {
202 error = bread(vp, lbn, osize, NOCRED,
203 bpp);
204 if (error) {
205 brelse(*bpp, 0);
206 return (error);
207 }
208 }
209 return 0;
210 } else {
211
212 /*
213 * The existing block is smaller than we want,
214 * grow it.
215 */
216 mutex_enter(&ump->um_lock);
217 error = ffs_realloccg(ip, lbn,
218 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
219 &ip->i_ffs1_db[0]), osize, nsize, cred,
220 bpp, &newb);
221 if (error)
222 return (error);
223 if (DOINGSOFTDEP(vp))
224 softdep_setup_allocdirect(ip, lbn,
225 newb, nb, nsize, osize,
226 bpp ? *bpp : NULL);
227 }
228 } else {
229
230 /*
231 * the block was not previously allocated,
232 * allocate a new block or fragment.
233 */
234
235 if (ip->i_size < lblktosize(fs, lbn + 1))
236 nsize = fragroundup(fs, size);
237 else
238 nsize = fs->fs_bsize;
239 mutex_enter(&ump->um_lock);
240 error = ffs_alloc(ip, lbn,
241 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
242 &ip->i_ffs1_db[0]),
243 nsize, cred, &newb);
244 if (error)
245 return (error);
246 if (bpp != NULL) {
247 bp = getblk(vp, lbn, nsize, 0, 0);
248 bp->b_blkno = fsbtodb(fs, newb);
249 if (flags & B_CLRBUF)
250 clrbuf(bp);
251 *bpp = bp;
252 }
253 if (DOINGSOFTDEP(vp)) {
254 softdep_setup_allocdirect(ip, lbn, newb, 0,
255 nsize, 0, bpp ? *bpp : NULL);
256 }
257 }
258 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
259 ip->i_flag |= IN_CHANGE | IN_UPDATE;
260 return (0);
261 }
262
263 /*
264 * Determine the number of levels of indirection.
265 */
266
267 pref = 0;
268 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
269 return (error);
270
271 /*
272 * Fetch the first indirect block allocating if necessary.
273 */
274
275 --num;
276 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
277 allocib = NULL;
278 allocblk = allociblk;
279 if (nb == 0) {
280 mutex_enter(&ump->um_lock);
281 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
282 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
283 &newb);
284 if (error)
285 goto fail;
286 nb = newb;
287 *allocblk++ = nb;
288 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
289 bp->b_blkno = fsbtodb(fs, nb);
290 clrbuf(bp);
291 if (DOINGSOFTDEP(vp)) {
292 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
293 newb, 0, fs->fs_bsize, 0, bp);
294 bdwrite(bp);
295 } else {
296
297 /*
298 * Write synchronously so that indirect blocks
299 * never point at garbage.
300 */
301
302 if ((error = bwrite(bp)) != 0)
303 goto fail;
304 }
305 unwindidx = 0;
306 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
307 *allocib = ufs_rw32(nb, needswap);
308 ip->i_flag |= IN_CHANGE | IN_UPDATE;
309 }
310
311 /*
312 * Fetch through the indirect blocks, allocating as necessary.
313 */
314
315 for (i = 1;;) {
316 error = bread(vp,
317 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
318 if (error) {
319 brelse(bp, 0);
320 goto fail;
321 }
322 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
323 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
324 if (i == num)
325 break;
326 i++;
327 if (nb != 0) {
328 brelse(bp, 0);
329 continue;
330 }
331 mutex_enter(&ump->um_lock);
332 if (pref == 0)
333 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
334 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
335 &newb);
336 if (error) {
337 brelse(bp, 0);
338 goto fail;
339 }
340 nb = newb;
341 *allocblk++ = nb;
342 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
343 nbp->b_blkno = fsbtodb(fs, nb);
344 clrbuf(nbp);
345 if (DOINGSOFTDEP(vp)) {
346 softdep_setup_allocindir_meta(nbp, ip, bp,
347 indirs[i - 1].in_off, nb);
348 bdwrite(nbp);
349 } else {
350
351 /*
352 * Write synchronously so that indirect blocks
353 * never point at garbage.
354 */
355
356 if ((error = bwrite(nbp)) != 0) {
357 brelse(bp, 0);
358 goto fail;
359 }
360 }
361 if (unwindidx < 0)
362 unwindidx = i - 1;
363 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
364
365 /*
366 * If required, write synchronously, otherwise use
367 * delayed write.
368 */
369
370 if (flags & B_SYNC) {
371 bwrite(bp);
372 } else {
373 bdwrite(bp);
374 }
375 }
376
377 if (flags & B_METAONLY) {
378 KASSERT(bpp != NULL);
379 *bpp = bp;
380 return (0);
381 }
382
383 /*
384 * Get the data block, allocating if necessary.
385 */
386
387 if (nb == 0) {
388 mutex_enter(&ump->um_lock);
389 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
390 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
391 &newb);
392 if (error) {
393 brelse(bp, 0);
394 goto fail;
395 }
396 nb = newb;
397 *allocblk++ = nb;
398 if (bpp != NULL) {
399 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
400 nbp->b_blkno = fsbtodb(fs, nb);
401 if (flags & B_CLRBUF)
402 clrbuf(nbp);
403 *bpp = nbp;
404 }
405 if (DOINGSOFTDEP(vp))
406 softdep_setup_allocindir_page(ip, lbn, bp,
407 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
408 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
409 if (allocib == NULL && unwindidx < 0) {
410 unwindidx = i - 1;
411 }
412
413 /*
414 * If required, write synchronously, otherwise use
415 * delayed write.
416 */
417
418 if (flags & B_SYNC) {
419 bwrite(bp);
420 } else {
421 bdwrite(bp);
422 }
423 return (0);
424 }
425 brelse(bp, 0);
426 if (bpp != NULL) {
427 if (flags & B_CLRBUF) {
428 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
429 if (error) {
430 brelse(nbp, 0);
431 goto fail;
432 }
433 } else {
434 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
435 nbp->b_blkno = fsbtodb(fs, nb);
436 clrbuf(nbp);
437 }
438 *bpp = nbp;
439 }
440 return (0);
441
442 fail:
443 /*
444 * If we have failed part way through block allocation, we
445 * have to deallocate any indirect blocks that we have allocated.
446 */
447
448 if (unwindidx >= 0) {
449
450 /*
451 * First write out any buffers we've created to resolve their
452 * softdeps. This must be done in reverse order of creation
453 * so that we resolve the dependencies in one pass.
454 * Write the cylinder group buffers for these buffers too.
455 */
456
457 for (i = num; i >= unwindidx; i--) {
458 if (i == 0) {
459 break;
460 }
461 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
462 0);
463 if (bp->b_flags & B_DELWRI) {
464 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
465 dbtofsb(fs, bp->b_blkno))));
466 bwrite(bp);
467 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
468 0, 0);
469 if (bp->b_flags & B_DELWRI) {
470 bwrite(bp);
471 } else {
472 brelse(bp, BC_INVAL);
473 }
474 } else {
475 brelse(bp, BC_INVAL);
476 }
477 }
478 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
479 ip->i_flag |= IN_CHANGE | IN_UPDATE;
480 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
481 }
482
483 /*
484 * Now that any dependencies that we created have been
485 * resolved, we can undo the partial allocation.
486 */
487
488 if (unwindidx == 0) {
489 *allocib = 0;
490 ip->i_flag |= IN_CHANGE | IN_UPDATE;
491 if (DOINGSOFTDEP(vp))
492 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
493 } else {
494 int r;
495
496 r = bread(vp, indirs[unwindidx].in_lbn,
497 (int)fs->fs_bsize, NOCRED, &bp);
498 if (r) {
499 panic("Could not unwind indirect block, error %d", r);
500 brelse(bp, 0);
501 } else {
502 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
503 bap[indirs[unwindidx].in_off] = 0;
504 bwrite(bp);
505 }
506 }
507 for (i = unwindidx + 1; i <= num; i++) {
508 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
509 0);
510 brelse(bp, BC_INVAL);
511 }
512 }
513 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
514 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
515 deallocated += fs->fs_bsize;
516 }
517 if (deallocated) {
518 #ifdef QUOTA
519 /*
520 * Restore user's disk quota because allocation failed.
521 */
522 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
523 #endif
524 ip->i_ffs1_blocks -= btodb(deallocated);
525 ip->i_flag |= IN_CHANGE | IN_UPDATE;
526 }
527 return (error);
528 }
529
530 static int
531 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
532 int flags, struct buf **bpp)
533 {
534 daddr_t lbn, lastlbn;
535 struct buf *bp, *nbp;
536 struct inode *ip = VTOI(vp);
537 struct fs *fs = ip->i_fs;
538 struct ufsmount *ump = ip->i_ump;
539 struct indir indirs[NIADDR + 2];
540 daddr_t newb, pref, nb;
541 int64_t *bap;
542 int deallocated, osize, nsize, num, i, error;
543 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
544 int64_t *allocib;
545 int unwindidx = -1;
546 #ifdef FFS_EI
547 const int needswap = UFS_FSNEEDSWAP(fs);
548 #endif
549 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
550
551 lbn = lblkno(fs, off);
552 size = blkoff(fs, off) + size;
553 if (size > fs->fs_bsize)
554 panic("ffs_balloc: blk too big");
555 if (bpp != NULL) {
556 *bpp = NULL;
557 }
558 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
559
560 if (lbn < 0)
561 return (EFBIG);
562
563 #ifdef notyet
564 /*
565 * Check for allocating external data.
566 */
567 if (flags & IO_EXT) {
568 if (lbn >= NXADDR)
569 return (EFBIG);
570 /*
571 * If the next write will extend the data into a new block,
572 * and the data is currently composed of a fragment
573 * this fragment has to be extended to be a full block.
574 */
575 lastlbn = lblkno(fs, dp->di_extsize);
576 if (lastlbn < lbn) {
577 nb = lastlbn;
578 osize = sblksize(fs, dp->di_extsize, nb);
579 if (osize < fs->fs_bsize && osize > 0) {
580 mutex_enter(&ump->um_lock);
581 error = ffs_realloccg(ip, -1 - nb,
582 dp->di_extb[nb],
583 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
584 &dp->di_extb[0]), osize,
585 (int)fs->fs_bsize, cred, &bp);
586 if (error)
587 return (error);
588 if (DOINGSOFTDEP(vp))
589 softdep_setup_allocext(ip, nb,
590 dbtofsb(fs, bp->b_blkno),
591 dp->di_extb[nb],
592 fs->fs_bsize, osize, bp);
593 dp->di_extsize = smalllblktosize(fs, nb + 1);
594 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
595 bp->b_xflags |= BX_ALTDATA;
596 ip->i_flag |= IN_CHANGE | IN_UPDATE;
597 if (flags & IO_SYNC)
598 bwrite(bp);
599 else
600 bawrite(bp);
601 }
602 }
603 /*
604 * All blocks are direct blocks
605 */
606 if (flags & BA_METAONLY)
607 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
608 nb = dp->di_extb[lbn];
609 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
610 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
611 if (error) {
612 brelse(bp, 0);
613 return (error);
614 }
615 bp->b_blkno = fsbtodb(fs, nb);
616 bp->b_xflags |= BX_ALTDATA;
617 *bpp = bp;
618 return (0);
619 }
620 if (nb != 0) {
621 /*
622 * Consider need to reallocate a fragment.
623 */
624 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
625 nsize = fragroundup(fs, size);
626 if (nsize <= osize) {
627 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
628 if (error) {
629 brelse(bp, 0);
630 return (error);
631 }
632 mutex_enter(&bp->b_interlock);
633 bp->b_blkno = fsbtodb(fs, nb);
634 bp->b_xflags |= BX_ALTDATA;
635 mutex_exit(&bp->b_interlock);
636 } else {
637 mutex_enter(&ump->um_lock);
638 error = ffs_realloccg(ip, -1 - lbn,
639 dp->di_extb[lbn],
640 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
641 &dp->di_extb[0]), osize, nsize, cred, &bp);
642 if (error)
643 return (error);
644 bp->b_xflags |= BX_ALTDATA;
645 if (DOINGSOFTDEP(vp))
646 softdep_setup_allocext(ip, lbn,
647 dbtofsb(fs, bp->b_blkno), nb,
648 nsize, osize, bp);
649 }
650 } else {
651 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
652 nsize = fragroundup(fs, size);
653 else
654 nsize = fs->fs_bsize;
655 mutex_enter(&ump->um_lock);
656 error = ffs_alloc(ip, lbn,
657 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
658 nsize, cred, &newb);
659 if (error)
660 return (error);
661 bp = getblk(vp, -1 - lbn, nsize, 0, 0);
662 bp->b_blkno = fsbtodb(fs, newb);
663 bp->b_xflags |= BX_ALTDATA;
664 if (flags & BA_CLRBUF)
665 vfs_bio_clrbuf(bp);
666 if (DOINGSOFTDEP(vp))
667 softdep_setup_allocext(ip, lbn, newb, 0,
668 nsize, 0, bp);
669 }
670 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
671 ip->i_flag |= IN_CHANGE | IN_UPDATE;
672 *bpp = bp;
673 return (0);
674 }
675 #endif
676 /*
677 * If the next write will extend the file into a new block,
678 * and the file is currently composed of a fragment
679 * this fragment has to be extended to be a full block.
680 */
681
682 lastlbn = lblkno(fs, ip->i_size);
683 if (lastlbn < NDADDR && lastlbn < lbn) {
684 nb = lastlbn;
685 osize = blksize(fs, ip, nb);
686 if (osize < fs->fs_bsize && osize > 0) {
687 mutex_enter(&ump->um_lock);
688 error = ffs_realloccg(ip, nb,
689 ffs_blkpref_ufs2(ip, lastlbn, nb,
690 &ip->i_ffs2_db[0]),
691 osize, (int)fs->fs_bsize, cred, bpp, &newb);
692 if (error)
693 return (error);
694 if (DOINGSOFTDEP(vp))
695 softdep_setup_allocdirect(ip, nb, newb,
696 ufs_rw64(ip->i_ffs2_db[nb], needswap),
697 fs->fs_bsize, osize, bpp ? *bpp : NULL);
698 ip->i_size = lblktosize(fs, nb + 1);
699 ip->i_ffs2_size = ip->i_size;
700 uvm_vnp_setsize(vp, ip->i_size);
701 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
702 ip->i_flag |= IN_CHANGE | IN_UPDATE;
703 if (bpp) {
704 if (flags & B_SYNC)
705 bwrite(*bpp);
706 else
707 bawrite(*bpp);
708 }
709 }
710 }
711
712 /*
713 * The first NDADDR blocks are direct blocks
714 */
715
716 if (lbn < NDADDR) {
717 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
718 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
719
720 /*
721 * The block is an already-allocated direct block
722 * and the file already extends past this block,
723 * thus this must be a whole block.
724 * Just read the block (if requested).
725 */
726
727 if (bpp != NULL) {
728 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
729 bpp);
730 if (error) {
731 brelse(*bpp, 0);
732 return (error);
733 }
734 }
735 return (0);
736 }
737 if (nb != 0) {
738
739 /*
740 * Consider need to reallocate a fragment.
741 */
742
743 osize = fragroundup(fs, blkoff(fs, ip->i_size));
744 nsize = fragroundup(fs, size);
745 if (nsize <= osize) {
746
747 /*
748 * The existing block is already
749 * at least as big as we want.
750 * Just read the block (if requested).
751 */
752
753 if (bpp != NULL) {
754 error = bread(vp, lbn, osize, NOCRED,
755 bpp);
756 if (error) {
757 brelse(*bpp, 0);
758 return (error);
759 }
760 }
761 return 0;
762 } else {
763
764 /*
765 * The existing block is smaller than we want,
766 * grow it.
767 */
768 mutex_enter(&ump->um_lock);
769 error = ffs_realloccg(ip, lbn,
770 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
771 &ip->i_ffs2_db[0]), osize, nsize, cred,
772 bpp, &newb);
773 if (error)
774 return (error);
775 if (DOINGSOFTDEP(vp))
776 softdep_setup_allocdirect(ip, lbn,
777 newb, nb, nsize, osize,
778 bpp ? *bpp : NULL);
779 }
780 } else {
781
782 /*
783 * the block was not previously allocated,
784 * allocate a new block or fragment.
785 */
786
787 if (ip->i_size < lblktosize(fs, lbn + 1))
788 nsize = fragroundup(fs, size);
789 else
790 nsize = fs->fs_bsize;
791 mutex_enter(&ump->um_lock);
792 error = ffs_alloc(ip, lbn,
793 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
794 &ip->i_ffs2_db[0]), nsize, cred, &newb);
795 if (error)
796 return (error);
797 if (bpp != NULL) {
798 bp = getblk(vp, lbn, nsize, 0, 0);
799 bp->b_blkno = fsbtodb(fs, newb);
800 if (flags & B_CLRBUF)
801 clrbuf(bp);
802 *bpp = bp;
803 }
804 if (DOINGSOFTDEP(vp)) {
805 softdep_setup_allocdirect(ip, lbn, newb, 0,
806 nsize, 0, bpp ? *bpp : NULL);
807 }
808 }
809 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
810 ip->i_flag |= IN_CHANGE | IN_UPDATE;
811 return (0);
812 }
813
814 /*
815 * Determine the number of levels of indirection.
816 */
817
818 pref = 0;
819 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
820 return (error);
821
822 /*
823 * Fetch the first indirect block allocating if necessary.
824 */
825
826 --num;
827 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
828 allocib = NULL;
829 allocblk = allociblk;
830 if (nb == 0) {
831 mutex_enter(&ump->um_lock);
832 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
833 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
834 &newb);
835 if (error)
836 goto fail;
837 nb = newb;
838 *allocblk++ = nb;
839 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
840 bp->b_blkno = fsbtodb(fs, nb);
841 clrbuf(bp);
842 if (DOINGSOFTDEP(vp)) {
843 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
844 newb, 0, fs->fs_bsize, 0, bp);
845 bdwrite(bp);
846 } else {
847
848 /*
849 * Write synchronously so that indirect blocks
850 * never point at garbage.
851 */
852
853 if ((error = bwrite(bp)) != 0)
854 goto fail;
855 }
856 unwindidx = 0;
857 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
858 *allocib = ufs_rw64(nb, needswap);
859 ip->i_flag |= IN_CHANGE | IN_UPDATE;
860 }
861
862 /*
863 * Fetch through the indirect blocks, allocating as necessary.
864 */
865
866 for (i = 1;;) {
867 error = bread(vp,
868 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
869 if (error) {
870 brelse(bp, 0);
871 goto fail;
872 }
873 bap = (int64_t *)bp->b_data;
874 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
875 if (i == num)
876 break;
877 i++;
878 if (nb != 0) {
879 brelse(bp, 0);
880 continue;
881 }
882 mutex_enter(&ump->um_lock);
883 if (pref == 0)
884 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
885 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
886 &newb);
887 if (error) {
888 brelse(bp, 0);
889 goto fail;
890 }
891 nb = newb;
892 *allocblk++ = nb;
893 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
894 nbp->b_blkno = fsbtodb(fs, nb);
895 clrbuf(nbp);
896 if (DOINGSOFTDEP(vp)) {
897 softdep_setup_allocindir_meta(nbp, ip, bp,
898 indirs[i - 1].in_off, nb);
899 bdwrite(nbp);
900 } else {
901
902 /*
903 * Write synchronously so that indirect blocks
904 * never point at garbage.
905 */
906
907 if ((error = bwrite(nbp)) != 0) {
908 brelse(bp, 0);
909 goto fail;
910 }
911 }
912 if (unwindidx < 0)
913 unwindidx = i - 1;
914 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
915
916 /*
917 * If required, write synchronously, otherwise use
918 * delayed write.
919 */
920
921 if (flags & B_SYNC) {
922 bwrite(bp);
923 } else {
924 bdwrite(bp);
925 }
926 }
927
928 if (flags & B_METAONLY) {
929 KASSERT(bpp != NULL);
930 *bpp = bp;
931 return (0);
932 }
933
934 /*
935 * Get the data block, allocating if necessary.
936 */
937
938 if (nb == 0) {
939 mutex_enter(&ump->um_lock);
940 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
941 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
942 &newb);
943 if (error) {
944 brelse(bp, 0);
945 goto fail;
946 }
947 nb = newb;
948 *allocblk++ = nb;
949 if (bpp != NULL) {
950 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
951 nbp->b_blkno = fsbtodb(fs, nb);
952 if (flags & B_CLRBUF)
953 clrbuf(nbp);
954 *bpp = nbp;
955 }
956 if (DOINGSOFTDEP(vp))
957 softdep_setup_allocindir_page(ip, lbn, bp,
958 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
959 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
960 if (allocib == NULL && unwindidx < 0) {
961 unwindidx = i - 1;
962 }
963
964 /*
965 * If required, write synchronously, otherwise use
966 * delayed write.
967 */
968
969 if (flags & B_SYNC) {
970 bwrite(bp);
971 } else {
972 bdwrite(bp);
973 }
974 return (0);
975 }
976 brelse(bp, 0);
977 if (bpp != NULL) {
978 if (flags & B_CLRBUF) {
979 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
980 if (error) {
981 brelse(nbp, 0);
982 goto fail;
983 }
984 } else {
985 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
986 nbp->b_blkno = fsbtodb(fs, nb);
987 clrbuf(nbp);
988 }
989 *bpp = nbp;
990 }
991 return (0);
992
993 fail:
994 /*
995 * If we have failed part way through block allocation, we
996 * have to deallocate any indirect blocks that we have allocated.
997 */
998
999 if (unwindidx >= 0) {
1000
1001 /*
1002 * First write out any buffers we've created to resolve their
1003 * softdeps. This must be done in reverse order of creation
1004 * so that we resolve the dependencies in one pass.
1005 * Write the cylinder group buffers for these buffers too.
1006 */
1007
1008 for (i = num; i >= unwindidx; i--) {
1009 if (i == 0) {
1010 break;
1011 }
1012 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1013 0);
1014 if (bp->b_flags & B_DELWRI) {
1015 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1016 dbtofsb(fs, bp->b_blkno))));
1017 bwrite(bp);
1018 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
1019 0, 0);
1020 if (bp->b_flags & B_DELWRI) {
1021 bwrite(bp);
1022 } else {
1023 brelse(bp, BC_INVAL);
1024 }
1025 } else {
1026 brelse(bp, BC_INVAL);
1027 }
1028 }
1029 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
1030 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1031 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1032 }
1033
1034 /*
1035 * Now that any dependencies that we created have been
1036 * resolved, we can undo the partial allocation.
1037 */
1038
1039 if (unwindidx == 0) {
1040 *allocib = 0;
1041 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1042 if (DOINGSOFTDEP(vp))
1043 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1044 } else {
1045 int r;
1046
1047 r = bread(vp, indirs[unwindidx].in_lbn,
1048 (int)fs->fs_bsize, NOCRED, &bp);
1049 if (r) {
1050 panic("Could not unwind indirect block, error %d", r);
1051 brelse(bp, 0);
1052 } else {
1053 bap = (int64_t *)bp->b_data;
1054 bap[indirs[unwindidx].in_off] = 0;
1055 bwrite(bp);
1056 }
1057 }
1058 for (i = unwindidx + 1; i <= num; i++) {
1059 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1060 0);
1061 brelse(bp, BC_INVAL);
1062 }
1063 }
1064 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1065 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1066 deallocated += fs->fs_bsize;
1067 }
1068 if (deallocated) {
1069 #ifdef QUOTA
1070 /*
1071 * Restore user's disk quota because allocation failed.
1072 */
1073 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1074 #endif
1075 ip->i_ffs2_blocks -= btodb(deallocated);
1076 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1077 }
1078 return (error);
1079 }
1080