ffs_balloc.c revision 1.44.6.5 1 /* $NetBSD: ffs_balloc.c,v 1.44.6.5 2007/09/16 19:02:45 ad Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.44.6.5 2007/09/16 19:02:45 ad Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufs_extern.h>
62 #include <ufs/ufs/ufs_bswap.h>
63
64 #include <ufs/ffs/fs.h>
65 #include <ufs/ffs/ffs_extern.h>
66
67 #include <uvm/uvm.h>
68
69 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
70 struct buf **);
71 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
72 struct buf **);
73
74 /*
75 * Balloc defines the structure of file system storage
76 * by allocating the physical blocks on a device given
77 * the inode and the logical block number in a file.
78 */
79
80 int
81 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
82 struct buf **bpp)
83 {
84
85 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
86 return ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
87 else
88 return ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
89 }
90
91 static int
92 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
93 int flags, struct buf **bpp)
94 {
95 daddr_t lbn, lastlbn;
96 struct buf *bp, *nbp;
97 struct inode *ip = VTOI(vp);
98 struct fs *fs = ip->i_fs;
99 struct ufsmount *ump = ip->i_ump;
100 struct indir indirs[NIADDR + 2];
101 daddr_t newb, pref, nb;
102 int32_t *bap; /* XXX ondisk32 */
103 int deallocated, osize, nsize, num, i, error;
104 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
105 int32_t *allocib;
106 int unwindidx = -1;
107 #ifdef FFS_EI
108 const int needswap = UFS_FSNEEDSWAP(fs);
109 #endif
110 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
111
112 lbn = lblkno(fs, off);
113 size = blkoff(fs, off) + size;
114 if (size > fs->fs_bsize)
115 panic("ffs_balloc: blk too big");
116 if (bpp != NULL) {
117 *bpp = NULL;
118 }
119 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
120
121 if (lbn < 0)
122 return (EFBIG);
123
124 /*
125 * If the next write will extend the file into a new block,
126 * and the file is currently composed of a fragment
127 * this fragment has to be extended to be a full block.
128 */
129
130 lastlbn = lblkno(fs, ip->i_size);
131 if (lastlbn < NDADDR && lastlbn < lbn) {
132 nb = lastlbn;
133 osize = blksize(fs, ip, nb);
134 if (osize < fs->fs_bsize && osize > 0) {
135 mutex_enter(&ump->um_lock);
136 error = ffs_realloccg(ip, nb,
137 ffs_blkpref_ufs1(ip, lastlbn, nb,
138 &ip->i_ffs1_db[0]),
139 osize, (int)fs->fs_bsize, cred, bpp, &newb);
140 if (error)
141 return (error);
142 if (DOINGSOFTDEP(vp))
143 softdep_setup_allocdirect(ip, nb, newb,
144 ufs_rw32(ip->i_ffs1_db[nb], needswap),
145 fs->fs_bsize, osize, bpp ? *bpp : NULL);
146 ip->i_size = lblktosize(fs, nb + 1);
147 ip->i_ffs1_size = ip->i_size;
148 uvm_vnp_setsize(vp, ip->i_ffs1_size);
149 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
150 ip->i_flag |= IN_CHANGE | IN_UPDATE;
151 if (bpp && *bpp) {
152 if (flags & B_SYNC)
153 bwrite(*bpp);
154 else
155 bawrite(*bpp);
156 }
157 }
158 }
159
160 /*
161 * The first NDADDR blocks are direct blocks
162 */
163
164 if (lbn < NDADDR) {
165 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
166 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
167
168 /*
169 * The block is an already-allocated direct block
170 * and the file already extends past this block,
171 * thus this must be a whole block.
172 * Just read the block (if requested).
173 */
174
175 if (bpp != NULL) {
176 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
177 bpp);
178 if (error) {
179 brelse(*bpp, 0);
180 return (error);
181 }
182 }
183 return (0);
184 }
185 if (nb != 0) {
186
187 /*
188 * Consider need to reallocate a fragment.
189 */
190
191 osize = fragroundup(fs, blkoff(fs, ip->i_size));
192 nsize = fragroundup(fs, size);
193 if (nsize <= osize) {
194
195 /*
196 * The existing block is already
197 * at least as big as we want.
198 * Just read the block (if requested).
199 */
200
201 if (bpp != NULL) {
202 error = bread(vp, lbn, osize, NOCRED,
203 bpp);
204 if (error) {
205 brelse(*bpp, 0);
206 return (error);
207 }
208 }
209 return 0;
210 } else {
211
212 /*
213 * The existing block is smaller than we want,
214 * grow it.
215 */
216 mutex_enter(&ump->um_lock);
217 error = ffs_realloccg(ip, lbn,
218 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
219 &ip->i_ffs1_db[0]), osize, nsize, cred,
220 bpp, &newb);
221 if (error)
222 return (error);
223 if (DOINGSOFTDEP(vp))
224 softdep_setup_allocdirect(ip, lbn,
225 newb, nb, nsize, osize,
226 bpp ? *bpp : NULL);
227 }
228 } else {
229
230 /*
231 * the block was not previously allocated,
232 * allocate a new block or fragment.
233 */
234
235 if (ip->i_size < lblktosize(fs, lbn + 1))
236 nsize = fragroundup(fs, size);
237 else
238 nsize = fs->fs_bsize;
239 mutex_enter(&ump->um_lock);
240 error = ffs_alloc(ip, lbn,
241 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
242 &ip->i_ffs1_db[0]),
243 nsize, cred, &newb);
244 if (error)
245 return (error);
246 if (bpp != NULL) {
247 bp = getblk(vp, lbn, nsize, 0, 0);
248 bp->b_blkno = fsbtodb(fs, newb);
249 if (flags & B_CLRBUF)
250 clrbuf(bp);
251 *bpp = bp;
252 }
253 if (DOINGSOFTDEP(vp)) {
254 softdep_setup_allocdirect(ip, lbn, newb, 0,
255 nsize, 0, bpp ? *bpp : NULL);
256 }
257 }
258 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
259 ip->i_flag |= IN_CHANGE | IN_UPDATE;
260 return (0);
261 }
262
263 /*
264 * Determine the number of levels of indirection.
265 */
266
267 pref = 0;
268 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
269 return (error);
270
271 /*
272 * Fetch the first indirect block allocating if necessary.
273 */
274
275 --num;
276 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
277 allocib = NULL;
278 allocblk = allociblk;
279 if (nb == 0) {
280 mutex_enter(&ump->um_lock);
281 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
282 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
283 &newb);
284 if (error)
285 goto fail;
286 nb = newb;
287 *allocblk++ = nb;
288 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
289 bp->b_blkno = fsbtodb(fs, nb);
290 clrbuf(bp);
291 if (DOINGSOFTDEP(vp)) {
292 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
293 newb, 0, fs->fs_bsize, 0, bp);
294 bdwrite(bp);
295 } else {
296
297 /*
298 * Write synchronously so that indirect blocks
299 * never point at garbage.
300 */
301
302 if ((error = bwrite(bp)) != 0)
303 goto fail;
304 }
305 unwindidx = 0;
306 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
307 *allocib = ufs_rw32(nb, needswap);
308 ip->i_flag |= IN_CHANGE | IN_UPDATE;
309 }
310
311 /*
312 * Fetch through the indirect blocks, allocating as necessary.
313 */
314
315 for (i = 1;;) {
316 error = bread(vp,
317 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
318 if (error) {
319 brelse(bp, 0);
320 goto fail;
321 }
322 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
323 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
324 if (i == num)
325 break;
326 i++;
327 if (nb != 0) {
328 brelse(bp, 0);
329 continue;
330 }
331 mutex_enter(&ump->um_lock);
332 if (pref == 0)
333 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
334 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
335 &newb);
336 if (error) {
337 brelse(bp, 0);
338 goto fail;
339 }
340 nb = newb;
341 *allocblk++ = nb;
342 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
343 nbp->b_blkno = fsbtodb(fs, nb);
344 clrbuf(nbp);
345 if (DOINGSOFTDEP(vp)) {
346 softdep_setup_allocindir_meta(nbp, ip, bp,
347 indirs[i - 1].in_off, nb);
348 bdwrite(nbp);
349 } else {
350
351 /*
352 * Write synchronously so that indirect blocks
353 * never point at garbage.
354 */
355
356 if ((error = bwrite(nbp)) != 0) {
357 brelse(bp, 0);
358 goto fail;
359 }
360 }
361 if (unwindidx < 0)
362 unwindidx = i - 1;
363 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
364
365 /*
366 * If required, write synchronously, otherwise use
367 * delayed write.
368 */
369
370 if (flags & B_SYNC) {
371 bwrite(bp);
372 } else {
373 bdwrite(bp);
374 }
375 }
376
377 if (flags & B_METAONLY) {
378 KASSERT(bpp != NULL);
379 *bpp = bp;
380 return (0);
381 }
382
383 /*
384 * Get the data block, allocating if necessary.
385 */
386
387 if (nb == 0) {
388 mutex_enter(&ump->um_lock);
389 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
390 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
391 &newb);
392 if (error) {
393 brelse(bp, 0);
394 goto fail;
395 }
396 nb = newb;
397 *allocblk++ = nb;
398 if (bpp != NULL) {
399 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
400 nbp->b_blkno = fsbtodb(fs, nb);
401 if (flags & B_CLRBUF)
402 clrbuf(nbp);
403 *bpp = nbp;
404 }
405 if (DOINGSOFTDEP(vp))
406 softdep_setup_allocindir_page(ip, lbn, bp,
407 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
408 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
409 if (allocib == NULL && unwindidx < 0) {
410 unwindidx = i - 1;
411 }
412
413 /*
414 * If required, write synchronously, otherwise use
415 * delayed write.
416 */
417
418 if (flags & B_SYNC) {
419 bwrite(bp);
420 } else {
421 bdwrite(bp);
422 }
423 return (0);
424 }
425 brelse(bp, 0);
426 if (bpp != NULL) {
427 if (flags & B_CLRBUF) {
428 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
429 if (error) {
430 brelse(nbp, 0);
431 goto fail;
432 }
433 } else {
434 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
435 nbp->b_blkno = fsbtodb(fs, nb);
436 clrbuf(nbp);
437 }
438 *bpp = nbp;
439 }
440 return (0);
441
442 fail:
443 /*
444 * If we have failed part way through block allocation, we
445 * have to deallocate any indirect blocks that we have allocated.
446 */
447
448 if (unwindidx >= 0) {
449
450 /*
451 * First write out any buffers we've created to resolve their
452 * softdeps. This must be done in reverse order of creation
453 * so that we resolve the dependencies in one pass.
454 * Write the cylinder group buffers for these buffers too.
455 */
456
457 for (i = num; i >= unwindidx; i--) {
458 if (i == 0) {
459 break;
460 }
461 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
462 0);
463 if (bp->b_oflags & BO_DELWRI) {
464 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
465 dbtofsb(fs, bp->b_blkno))));
466 bwrite(bp);
467 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
468 0, 0);
469 if (bp->b_oflags & BO_DELWRI) {
470 bwrite(bp);
471 } else {
472 brelse(bp, BC_INVAL);
473 }
474 } else {
475 brelse(bp, BC_INVAL);
476 }
477 }
478
479 /* Now flush all dependencies to disk. */
480 /* XXXAD pages locked */
481 (void)softdep_sync_metadata(vp);
482
483 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
484 ip->i_flag |= IN_CHANGE | IN_UPDATE;
485 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
486 }
487
488 /*
489 * Now that any dependencies that we created have been
490 * resolved, we can undo the partial allocation.
491 */
492
493 if (unwindidx == 0) {
494 *allocib = 0;
495 ip->i_flag |= IN_CHANGE | IN_UPDATE;
496 if (DOINGSOFTDEP(vp))
497 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
498 } else {
499 int r;
500
501 r = bread(vp, indirs[unwindidx].in_lbn,
502 (int)fs->fs_bsize, NOCRED, &bp);
503 if (r) {
504 panic("Could not unwind indirect block, error %d", r);
505 brelse(bp, 0);
506 } else {
507 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
508 bap[indirs[unwindidx].in_off] = 0;
509 bwrite(bp);
510 }
511 }
512 for (i = unwindidx + 1; i <= num; i++) {
513 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
514 0);
515 brelse(bp, BC_INVAL);
516 }
517 }
518 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
519 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
520 deallocated += fs->fs_bsize;
521 }
522 if (deallocated) {
523 #ifdef QUOTA
524 /*
525 * Restore user's disk quota because allocation failed.
526 */
527 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
528 #endif
529 ip->i_ffs1_blocks -= btodb(deallocated);
530 ip->i_flag |= IN_CHANGE | IN_UPDATE;
531 }
532 /*
533 * Flush all dependencies again so that the soft updates code
534 * doesn't find any untracked changes.
535 */
536 /* XXXAD pages locked */
537 (void)softdep_sync_metadata(vp);
538 return (error);
539 }
540
541 static int
542 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
543 int flags, struct buf **bpp)
544 {
545 daddr_t lbn, lastlbn;
546 struct buf *bp, *nbp;
547 struct inode *ip = VTOI(vp);
548 struct fs *fs = ip->i_fs;
549 struct ufsmount *ump = ip->i_ump;
550 struct indir indirs[NIADDR + 2];
551 daddr_t newb, pref, nb;
552 int64_t *bap;
553 int deallocated, osize, nsize, num, i, error;
554 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
555 int64_t *allocib;
556 int unwindidx = -1;
557 #ifdef FFS_EI
558 const int needswap = UFS_FSNEEDSWAP(fs);
559 #endif
560 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
561
562 lbn = lblkno(fs, off);
563 size = blkoff(fs, off) + size;
564 if (size > fs->fs_bsize)
565 panic("ffs_balloc: blk too big");
566 if (bpp != NULL) {
567 *bpp = NULL;
568 }
569 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
570
571 if (lbn < 0)
572 return (EFBIG);
573
574 #ifdef notyet
575 /*
576 * Check for allocating external data.
577 */
578 if (flags & IO_EXT) {
579 if (lbn >= NXADDR)
580 return (EFBIG);
581 /*
582 * If the next write will extend the data into a new block,
583 * and the data is currently composed of a fragment
584 * this fragment has to be extended to be a full block.
585 */
586 lastlbn = lblkno(fs, dp->di_extsize);
587 if (lastlbn < lbn) {
588 nb = lastlbn;
589 osize = sblksize(fs, dp->di_extsize, nb);
590 if (osize < fs->fs_bsize && osize > 0) {
591 mutex_enter(&ump->um_lock);
592 error = ffs_realloccg(ip, -1 - nb,
593 dp->di_extb[nb],
594 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
595 &dp->di_extb[0]), osize,
596 (int)fs->fs_bsize, cred, &bp);
597 if (error)
598 return (error);
599 if (DOINGSOFTDEP(vp))
600 softdep_setup_allocext(ip, nb,
601 dbtofsb(fs, bp->b_blkno),
602 dp->di_extb[nb],
603 fs->fs_bsize, osize, bp);
604 dp->di_extsize = smalllblktosize(fs, nb + 1);
605 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
606 bp->b_xflags |= BX_ALTDATA;
607 ip->i_flag |= IN_CHANGE | IN_UPDATE;
608 if (flags & IO_SYNC)
609 bwrite(bp);
610 else
611 bawrite(bp);
612 }
613 }
614 /*
615 * All blocks are direct blocks
616 */
617 if (flags & BA_METAONLY)
618 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
619 nb = dp->di_extb[lbn];
620 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
621 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
622 if (error) {
623 brelse(bp, 0);
624 return (error);
625 }
626 mutex_enter(&bp->b_interlock);
627 bp->b_blkno = fsbtodb(fs, nb);
628 bp->b_xflags |= BX_ALTDATA;
629 mutex_exit(&bp->b_interlock);
630 *bpp = bp;
631 return (0);
632 }
633 if (nb != 0) {
634 /*
635 * Consider need to reallocate a fragment.
636 */
637 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
638 nsize = fragroundup(fs, size);
639 if (nsize <= osize) {
640 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
641 if (error) {
642 brelse(bp, 0);
643 return (error);
644 }
645 mutex_enter(&bp->b_interlock);
646 bp->b_blkno = fsbtodb(fs, nb);
647 bp->b_xflags |= BX_ALTDATA;
648 mutex_exit(&bp->b_interlock);
649 } else {
650 mutex_enter(&ump->um_lock);
651 error = ffs_realloccg(ip, -1 - lbn,
652 dp->di_extb[lbn],
653 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
654 &dp->di_extb[0]), osize, nsize, cred, &bp);
655 if (error)
656 return (error);
657 bp->b_xflags |= BX_ALTDATA;
658 if (DOINGSOFTDEP(vp))
659 softdep_setup_allocext(ip, lbn,
660 dbtofsb(fs, bp->b_blkno), nb,
661 nsize, osize, bp);
662 }
663 } else {
664 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
665 nsize = fragroundup(fs, size);
666 else
667 nsize = fs->fs_bsize;
668 mutex_enter(&ump->um_lock);
669 error = ffs_alloc(ip, lbn,
670 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
671 nsize, cred, &newb);
672 if (error)
673 return (error);
674 bp = getblk(vp, -1 - lbn, nsize, 0, 0);
675 bp->b_blkno = fsbtodb(fs, newb);
676 bp->b_xflags |= BX_ALTDATA;
677 if (flags & BA_CLRBUF)
678 vfs_bio_clrbuf(bp);
679 if (DOINGSOFTDEP(vp))
680 softdep_setup_allocext(ip, lbn, newb, 0,
681 nsize, 0, bp);
682 }
683 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
684 ip->i_flag |= IN_CHANGE | IN_UPDATE;
685 *bpp = bp;
686 return (0);
687 }
688 #endif
689 /*
690 * If the next write will extend the file into a new block,
691 * and the file is currently composed of a fragment
692 * this fragment has to be extended to be a full block.
693 */
694
695 lastlbn = lblkno(fs, ip->i_size);
696 if (lastlbn < NDADDR && lastlbn < lbn) {
697 nb = lastlbn;
698 osize = blksize(fs, ip, nb);
699 if (osize < fs->fs_bsize && osize > 0) {
700 mutex_enter(&ump->um_lock);
701 error = ffs_realloccg(ip, nb,
702 ffs_blkpref_ufs2(ip, lastlbn, nb,
703 &ip->i_ffs2_db[0]),
704 osize, (int)fs->fs_bsize, cred, bpp, &newb);
705 if (error)
706 return (error);
707 if (DOINGSOFTDEP(vp))
708 softdep_setup_allocdirect(ip, nb, newb,
709 ufs_rw64(ip->i_ffs2_db[nb], needswap),
710 fs->fs_bsize, osize, bpp ? *bpp : NULL);
711 ip->i_size = lblktosize(fs, nb + 1);
712 ip->i_ffs2_size = ip->i_size;
713 uvm_vnp_setsize(vp, ip->i_size);
714 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
715 ip->i_flag |= IN_CHANGE | IN_UPDATE;
716 if (bpp) {
717 if (flags & B_SYNC)
718 bwrite(*bpp);
719 else
720 bawrite(*bpp);
721 }
722 }
723 }
724
725 /*
726 * The first NDADDR blocks are direct blocks
727 */
728
729 if (lbn < NDADDR) {
730 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
731 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
732
733 /*
734 * The block is an already-allocated direct block
735 * and the file already extends past this block,
736 * thus this must be a whole block.
737 * Just read the block (if requested).
738 */
739
740 if (bpp != NULL) {
741 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
742 bpp);
743 if (error) {
744 brelse(*bpp, 0);
745 return (error);
746 }
747 }
748 return (0);
749 }
750 if (nb != 0) {
751
752 /*
753 * Consider need to reallocate a fragment.
754 */
755
756 osize = fragroundup(fs, blkoff(fs, ip->i_size));
757 nsize = fragroundup(fs, size);
758 if (nsize <= osize) {
759
760 /*
761 * The existing block is already
762 * at least as big as we want.
763 * Just read the block (if requested).
764 */
765
766 if (bpp != NULL) {
767 error = bread(vp, lbn, osize, NOCRED,
768 bpp);
769 if (error) {
770 brelse(*bpp, 0);
771 return (error);
772 }
773 }
774 return 0;
775 } else {
776
777 /*
778 * The existing block is smaller than we want,
779 * grow it.
780 */
781 mutex_enter(&ump->um_lock);
782 error = ffs_realloccg(ip, lbn,
783 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
784 &ip->i_ffs2_db[0]), osize, nsize, cred,
785 bpp, &newb);
786 if (error)
787 return (error);
788 if (DOINGSOFTDEP(vp))
789 softdep_setup_allocdirect(ip, lbn,
790 newb, nb, nsize, osize,
791 bpp ? *bpp : NULL);
792 }
793 } else {
794
795 /*
796 * the block was not previously allocated,
797 * allocate a new block or fragment.
798 */
799
800 if (ip->i_size < lblktosize(fs, lbn + 1))
801 nsize = fragroundup(fs, size);
802 else
803 nsize = fs->fs_bsize;
804 mutex_enter(&ump->um_lock);
805 error = ffs_alloc(ip, lbn,
806 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
807 &ip->i_ffs2_db[0]), nsize, cred, &newb);
808 if (error)
809 return (error);
810 if (bpp != NULL) {
811 bp = getblk(vp, lbn, nsize, 0, 0);
812 bp->b_blkno = fsbtodb(fs, newb);
813 if (flags & B_CLRBUF)
814 clrbuf(bp);
815 *bpp = bp;
816 }
817 if (DOINGSOFTDEP(vp)) {
818 softdep_setup_allocdirect(ip, lbn, newb, 0,
819 nsize, 0, bpp ? *bpp : NULL);
820 }
821 }
822 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
823 ip->i_flag |= IN_CHANGE | IN_UPDATE;
824 return (0);
825 }
826
827 /*
828 * Determine the number of levels of indirection.
829 */
830
831 pref = 0;
832 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
833 return (error);
834
835 /*
836 * Fetch the first indirect block allocating if necessary.
837 */
838
839 --num;
840 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
841 allocib = NULL;
842 allocblk = allociblk;
843 if (nb == 0) {
844 mutex_enter(&ump->um_lock);
845 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
846 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
847 &newb);
848 if (error)
849 goto fail;
850 nb = newb;
851 *allocblk++ = nb;
852 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
853 bp->b_blkno = fsbtodb(fs, nb);
854 clrbuf(bp);
855 if (DOINGSOFTDEP(vp)) {
856 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
857 newb, 0, fs->fs_bsize, 0, bp);
858 bdwrite(bp);
859 } else {
860
861 /*
862 * Write synchronously so that indirect blocks
863 * never point at garbage.
864 */
865
866 if ((error = bwrite(bp)) != 0)
867 goto fail;
868 }
869 unwindidx = 0;
870 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
871 *allocib = ufs_rw64(nb, needswap);
872 ip->i_flag |= IN_CHANGE | IN_UPDATE;
873 }
874
875 /*
876 * Fetch through the indirect blocks, allocating as necessary.
877 */
878
879 for (i = 1;;) {
880 error = bread(vp,
881 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
882 if (error) {
883 brelse(bp, 0);
884 goto fail;
885 }
886 bap = (int64_t *)bp->b_data;
887 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
888 if (i == num)
889 break;
890 i++;
891 if (nb != 0) {
892 brelse(bp, 0);
893 continue;
894 }
895 mutex_enter(&ump->um_lock);
896 if (pref == 0)
897 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
898 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
899 &newb);
900 if (error) {
901 brelse(bp, 0);
902 goto fail;
903 }
904 nb = newb;
905 *allocblk++ = nb;
906 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
907 nbp->b_blkno = fsbtodb(fs, nb);
908 clrbuf(nbp);
909 if (DOINGSOFTDEP(vp)) {
910 softdep_setup_allocindir_meta(nbp, ip, bp,
911 indirs[i - 1].in_off, nb);
912 bdwrite(nbp);
913 } else {
914
915 /*
916 * Write synchronously so that indirect blocks
917 * never point at garbage.
918 */
919
920 if ((error = bwrite(nbp)) != 0) {
921 brelse(bp, 0);
922 goto fail;
923 }
924 }
925 if (unwindidx < 0)
926 unwindidx = i - 1;
927 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
928
929 /*
930 * If required, write synchronously, otherwise use
931 * delayed write.
932 */
933
934 if (flags & B_SYNC) {
935 bwrite(bp);
936 } else {
937 bdwrite(bp);
938 }
939 }
940
941 if (flags & B_METAONLY) {
942 KASSERT(bpp != NULL);
943 *bpp = bp;
944 return (0);
945 }
946
947 /*
948 * Get the data block, allocating if necessary.
949 */
950
951 if (nb == 0) {
952 mutex_enter(&ump->um_lock);
953 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
954 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
955 &newb);
956 if (error) {
957 brelse(bp, 0);
958 goto fail;
959 }
960 nb = newb;
961 *allocblk++ = nb;
962 if (bpp != NULL) {
963 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
964 nbp->b_blkno = fsbtodb(fs, nb);
965 if (flags & B_CLRBUF)
966 clrbuf(nbp);
967 *bpp = nbp;
968 }
969 if (DOINGSOFTDEP(vp))
970 softdep_setup_allocindir_page(ip, lbn, bp,
971 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
972 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
973 if (allocib == NULL && unwindidx < 0) {
974 unwindidx = i - 1;
975 }
976
977 /*
978 * If required, write synchronously, otherwise use
979 * delayed write.
980 */
981
982 if (flags & B_SYNC) {
983 bwrite(bp);
984 } else {
985 bdwrite(bp);
986 }
987 return (0);
988 }
989 brelse(bp, 0);
990 if (bpp != NULL) {
991 if (flags & B_CLRBUF) {
992 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
993 if (error) {
994 brelse(nbp, 0);
995 goto fail;
996 }
997 } else {
998 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
999 nbp->b_blkno = fsbtodb(fs, nb);
1000 clrbuf(nbp);
1001 }
1002 *bpp = nbp;
1003 }
1004 return (0);
1005
1006 fail:
1007 /*
1008 * If we have failed part way through block allocation, we
1009 * have to deallocate any indirect blocks that we have allocated.
1010 */
1011
1012 if (unwindidx >= 0) {
1013
1014 /*
1015 * First write out any buffers we've created to resolve their
1016 * softdeps. This must be done in reverse order of creation
1017 * so that we resolve the dependencies in one pass.
1018 * Write the cylinder group buffers for these buffers too.
1019 */
1020
1021 for (i = num; i >= unwindidx; i--) {
1022 if (i == 0) {
1023 break;
1024 }
1025 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1026 0);
1027 if (bp->b_oflags & BO_DELWRI) {
1028 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1029 dbtofsb(fs, bp->b_blkno))));
1030 bwrite(bp);
1031 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
1032 0, 0);
1033 if (bp->b_oflags & BO_DELWRI) {
1034 bwrite(bp);
1035 } else {
1036 brelse(bp, BC_INVAL);
1037 }
1038 } else {
1039 brelse(bp, BC_INVAL);
1040 }
1041 }
1042
1043 /* Now flush the dependencies to disk. */
1044 /* XXXAD pages locked */
1045 (void)softdep_sync_metadata(vp);
1046
1047 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
1048 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1049 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1050 }
1051
1052 /*
1053 * Now that any dependencies that we created have been
1054 * resolved, we can undo the partial allocation.
1055 */
1056
1057 if (unwindidx == 0) {
1058 *allocib = 0;
1059 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1060 if (DOINGSOFTDEP(vp))
1061 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1062 } else {
1063 int r;
1064
1065 r = bread(vp, indirs[unwindidx].in_lbn,
1066 (int)fs->fs_bsize, NOCRED, &bp);
1067 if (r) {
1068 panic("Could not unwind indirect block, error %d", r);
1069 brelse(bp, 0);
1070 } else {
1071 bap = (int64_t *)bp->b_data;
1072 bap[indirs[unwindidx].in_off] = 0;
1073 bwrite(bp);
1074 }
1075 }
1076 for (i = unwindidx + 1; i <= num; i++) {
1077 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1078 0);
1079 brelse(bp, BC_INVAL);
1080 }
1081 }
1082 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1083 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1084 deallocated += fs->fs_bsize;
1085 }
1086 if (deallocated) {
1087 #ifdef QUOTA
1088 /*
1089 * Restore user's disk quota because allocation failed.
1090 */
1091 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1092 #endif
1093 ip->i_ffs2_blocks -= btodb(deallocated);
1094 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1095 }
1096
1097 /*
1098 * Flush all dependencies again so that the soft updates code
1099 * doesn't find any untracked changes.
1100 */
1101 /* XXXAD pages locked */
1102 (void)softdep_sync_metadata(vp);
1103 return (error);
1104 }
1105