ffs_balloc.c revision 1.44.6.6 1 /* $NetBSD: ffs_balloc.c,v 1.44.6.6 2007/10/24 15:06:23 ad Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.44.6.6 2007/10/24 15:06:23 ad Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufs_extern.h>
62 #include <ufs/ufs/ufs_bswap.h>
63
64 #include <ufs/ffs/fs.h>
65 #include <ufs/ffs/ffs_extern.h>
66
67 #include <uvm/uvm.h>
68
69 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
70 struct buf **);
71 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
72 struct buf **);
73
74 /*
75 * Balloc defines the structure of file system storage
76 * by allocating the physical blocks on a device given
77 * the inode and the logical block number in a file.
78 */
79
80 int
81 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
82 struct buf **bpp)
83 {
84
85 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
86 return ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
87 else
88 return ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
89 }
90
91 static int
92 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
93 int flags, struct buf **bpp)
94 {
95 daddr_t lbn, lastlbn;
96 struct buf *bp, *nbp;
97 struct inode *ip = VTOI(vp);
98 struct fs *fs = ip->i_fs;
99 struct ufsmount *ump = ip->i_ump;
100 struct indir indirs[NIADDR + 2];
101 daddr_t newb, pref, nb;
102 int32_t *bap; /* XXX ondisk32 */
103 int deallocated, osize, nsize, num, i, error;
104 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
105 int32_t *allocib;
106 int unwindidx = -1;
107 #ifdef FFS_EI
108 const int needswap = UFS_FSNEEDSWAP(fs);
109 #endif
110 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
111
112 lbn = lblkno(fs, off);
113 size = blkoff(fs, off) + size;
114 if (size > fs->fs_bsize)
115 panic("ffs_balloc: blk too big");
116 if (bpp != NULL) {
117 *bpp = NULL;
118 }
119 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
120
121 if (lbn < 0)
122 return (EFBIG);
123
124 /*
125 * If the next write will extend the file into a new block,
126 * and the file is currently composed of a fragment
127 * this fragment has to be extended to be a full block.
128 */
129
130 lastlbn = lblkno(fs, ip->i_size);
131 if (lastlbn < NDADDR && lastlbn < lbn) {
132 nb = lastlbn;
133 osize = blksize(fs, ip, nb);
134 if (osize < fs->fs_bsize && osize > 0) {
135 mutex_enter(&ump->um_lock);
136 error = ffs_realloccg(ip, nb,
137 ffs_blkpref_ufs1(ip, lastlbn, nb,
138 &ip->i_ffs1_db[0]),
139 osize, (int)fs->fs_bsize, cred, bpp, &newb);
140 if (error)
141 return (error);
142 if (DOINGSOFTDEP(vp))
143 softdep_setup_allocdirect(ip, nb, newb,
144 ufs_rw32(ip->i_ffs1_db[nb], needswap),
145 fs->fs_bsize, osize, bpp ? *bpp : NULL);
146 ip->i_size = lblktosize(fs, nb + 1);
147 ip->i_ffs1_size = ip->i_size;
148 uvm_vnp_setsize(vp, ip->i_ffs1_size);
149 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
150 ip->i_flag |= IN_CHANGE | IN_UPDATE;
151 if (bpp && *bpp) {
152 if (flags & B_SYNC)
153 bwrite(*bpp);
154 else
155 bawrite(*bpp);
156 }
157 }
158 }
159
160 /*
161 * The first NDADDR blocks are direct blocks
162 */
163
164 if (lbn < NDADDR) {
165 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
166 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
167
168 /*
169 * The block is an already-allocated direct block
170 * and the file already extends past this block,
171 * thus this must be a whole block.
172 * Just read the block (if requested).
173 */
174
175 if (bpp != NULL) {
176 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
177 bpp);
178 if (error) {
179 brelse(*bpp, 0);
180 return (error);
181 }
182 }
183 return (0);
184 }
185 if (nb != 0) {
186
187 /*
188 * Consider need to reallocate a fragment.
189 */
190
191 osize = fragroundup(fs, blkoff(fs, ip->i_size));
192 nsize = fragroundup(fs, size);
193 if (nsize <= osize) {
194
195 /*
196 * The existing block is already
197 * at least as big as we want.
198 * Just read the block (if requested).
199 */
200
201 if (bpp != NULL) {
202 error = bread(vp, lbn, osize, NOCRED,
203 bpp);
204 if (error) {
205 brelse(*bpp, 0);
206 return (error);
207 }
208 }
209 return 0;
210 } else {
211
212 /*
213 * The existing block is smaller than we want,
214 * grow it.
215 */
216 mutex_enter(&ump->um_lock);
217 error = ffs_realloccg(ip, lbn,
218 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
219 &ip->i_ffs1_db[0]), osize, nsize, cred,
220 bpp, &newb);
221 if (error)
222 return (error);
223 if (DOINGSOFTDEP(vp))
224 softdep_setup_allocdirect(ip, lbn,
225 newb, nb, nsize, osize,
226 bpp ? *bpp : NULL);
227 }
228 } else {
229
230 /*
231 * the block was not previously allocated,
232 * allocate a new block or fragment.
233 */
234
235 if (ip->i_size < lblktosize(fs, lbn + 1))
236 nsize = fragroundup(fs, size);
237 else
238 nsize = fs->fs_bsize;
239 mutex_enter(&ump->um_lock);
240 error = ffs_alloc(ip, lbn,
241 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
242 &ip->i_ffs1_db[0]),
243 nsize, cred, &newb);
244 if (error)
245 return (error);
246 if (bpp != NULL) {
247 bp = getblk(vp, lbn, nsize, 0, 0);
248 bp->b_blkno = fsbtodb(fs, newb);
249 if (flags & B_CLRBUF)
250 clrbuf(bp);
251 *bpp = bp;
252 }
253 if (DOINGSOFTDEP(vp)) {
254 softdep_setup_allocdirect(ip, lbn, newb, 0,
255 nsize, 0, bpp ? *bpp : NULL);
256 }
257 }
258 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
259 ip->i_flag |= IN_CHANGE | IN_UPDATE;
260 return (0);
261 }
262
263 /*
264 * Determine the number of levels of indirection.
265 */
266
267 pref = 0;
268 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
269 return (error);
270
271 /*
272 * Fetch the first indirect block allocating if necessary.
273 */
274
275 --num;
276 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
277 allocib = NULL;
278 allocblk = allociblk;
279 if (nb == 0) {
280 mutex_enter(&ump->um_lock);
281 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
282 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
283 &newb);
284 if (error)
285 goto fail;
286 nb = newb;
287 *allocblk++ = nb;
288 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
289 bp->b_blkno = fsbtodb(fs, nb);
290 clrbuf(bp);
291 if (DOINGSOFTDEP(vp)) {
292 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
293 newb, 0, fs->fs_bsize, 0, bp);
294 bdwrite(bp);
295 } else {
296
297 /*
298 * Write synchronously so that indirect blocks
299 * never point at garbage.
300 */
301
302 if ((error = bwrite(bp)) != 0)
303 goto fail;
304 }
305 unwindidx = 0;
306 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
307 *allocib = ufs_rw32(nb, needswap);
308 ip->i_flag |= IN_CHANGE | IN_UPDATE;
309 }
310
311 /*
312 * Fetch through the indirect blocks, allocating as necessary.
313 */
314
315 for (i = 1;;) {
316 error = bread(vp,
317 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
318 if (error) {
319 brelse(bp, 0);
320 goto fail;
321 }
322 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
323 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
324 if (i == num)
325 break;
326 i++;
327 if (nb != 0) {
328 brelse(bp, 0);
329 continue;
330 }
331 mutex_enter(&ump->um_lock);
332 if (pref == 0)
333 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
334 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
335 &newb);
336 if (error) {
337 brelse(bp, 0);
338 goto fail;
339 }
340 nb = newb;
341 *allocblk++ = nb;
342 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
343 nbp->b_blkno = fsbtodb(fs, nb);
344 clrbuf(nbp);
345 if (DOINGSOFTDEP(vp)) {
346 softdep_setup_allocindir_meta(nbp, ip, bp,
347 indirs[i - 1].in_off, nb);
348 bdwrite(nbp);
349 } else {
350
351 /*
352 * Write synchronously so that indirect blocks
353 * never point at garbage.
354 */
355
356 if ((error = bwrite(nbp)) != 0) {
357 brelse(bp, 0);
358 goto fail;
359 }
360 }
361 if (unwindidx < 0)
362 unwindidx = i - 1;
363 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
364
365 /*
366 * If required, write synchronously, otherwise use
367 * delayed write.
368 */
369
370 if (flags & B_SYNC) {
371 bwrite(bp);
372 } else {
373 bdwrite(bp);
374 }
375 }
376
377 if (flags & B_METAONLY) {
378 KASSERT(bpp != NULL);
379 *bpp = bp;
380 return (0);
381 }
382
383 /*
384 * Get the data block, allocating if necessary.
385 */
386
387 if (nb == 0) {
388 mutex_enter(&ump->um_lock);
389 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
390 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
391 &newb);
392 if (error) {
393 brelse(bp, 0);
394 goto fail;
395 }
396 nb = newb;
397 *allocblk++ = nb;
398 if (bpp != NULL) {
399 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
400 nbp->b_blkno = fsbtodb(fs, nb);
401 if (flags & B_CLRBUF)
402 clrbuf(nbp);
403 *bpp = nbp;
404 }
405 if (DOINGSOFTDEP(vp))
406 softdep_setup_allocindir_page(ip, lbn, bp,
407 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
408 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
409 if (allocib == NULL && unwindidx < 0) {
410 unwindidx = i - 1;
411 }
412
413 /*
414 * If required, write synchronously, otherwise use
415 * delayed write.
416 */
417
418 if (flags & B_SYNC) {
419 bwrite(bp);
420 } else {
421 bdwrite(bp);
422 }
423 return (0);
424 }
425 brelse(bp, 0);
426 if (bpp != NULL) {
427 if (flags & B_CLRBUF) {
428 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
429 if (error) {
430 brelse(nbp, 0);
431 goto fail;
432 }
433 } else {
434 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
435 nbp->b_blkno = fsbtodb(fs, nb);
436 clrbuf(nbp);
437 }
438 *bpp = nbp;
439 }
440 return (0);
441
442 fail:
443 /*
444 * If we have failed part way through block allocation, we
445 * have to deallocate any indirect blocks that we have allocated.
446 */
447
448 if (unwindidx >= 0) {
449
450 /*
451 * First write out any buffers we've created to resolve their
452 * softdeps. This must be done in reverse order of creation
453 * so that we resolve the dependencies in one pass.
454 * Write the cylinder group buffers for these buffers too.
455 */
456
457 for (i = num; i >= unwindidx; i--) {
458 if (i == 0) {
459 break;
460 }
461 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
462 0);
463 if (bp->b_oflags & BO_DELWRI) {
464 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
465 dbtofsb(fs, bp->b_blkno))));
466 bwrite(bp);
467 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
468 0, 0);
469 if (bp->b_oflags & BO_DELWRI) {
470 bwrite(bp);
471 } else {
472 brelse(bp, BC_INVAL);
473 }
474 } else {
475 brelse(bp, BC_INVAL);
476 }
477 }
478
479 /* Now flush all dependencies to disk. */
480 #ifdef notyet
481 /* XXX pages locked */
482 (void)softdep_sync_metadata(vp);
483 #endif
484
485 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
486 ip->i_flag |= IN_CHANGE | IN_UPDATE;
487 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
488 }
489
490 /*
491 * Now that any dependencies that we created have been
492 * resolved, we can undo the partial allocation.
493 */
494
495 if (unwindidx == 0) {
496 *allocib = 0;
497 ip->i_flag |= IN_CHANGE | IN_UPDATE;
498 if (DOINGSOFTDEP(vp))
499 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
500 } else {
501 int r;
502
503 r = bread(vp, indirs[unwindidx].in_lbn,
504 (int)fs->fs_bsize, NOCRED, &bp);
505 if (r) {
506 panic("Could not unwind indirect block, error %d", r);
507 brelse(bp, 0);
508 } else {
509 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
510 bap[indirs[unwindidx].in_off] = 0;
511 bwrite(bp);
512 }
513 }
514 for (i = unwindidx + 1; i <= num; i++) {
515 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
516 0);
517 brelse(bp, BC_INVAL);
518 }
519 }
520 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
521 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
522 deallocated += fs->fs_bsize;
523 }
524 if (deallocated) {
525 #ifdef QUOTA
526 /*
527 * Restore user's disk quota because allocation failed.
528 */
529 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
530 #endif
531 ip->i_ffs1_blocks -= btodb(deallocated);
532 ip->i_flag |= IN_CHANGE | IN_UPDATE;
533 }
534 /*
535 * Flush all dependencies again so that the soft updates code
536 * doesn't find any untracked changes.
537 */
538 #ifdef notyet
539 /* XXX pages locked */
540 (void)softdep_sync_metadata(vp);
541 #endif
542 return (error);
543 }
544
545 static int
546 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
547 int flags, struct buf **bpp)
548 {
549 daddr_t lbn, lastlbn;
550 struct buf *bp, *nbp;
551 struct inode *ip = VTOI(vp);
552 struct fs *fs = ip->i_fs;
553 struct ufsmount *ump = ip->i_ump;
554 struct indir indirs[NIADDR + 2];
555 daddr_t newb, pref, nb;
556 int64_t *bap;
557 int deallocated, osize, nsize, num, i, error;
558 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
559 int64_t *allocib;
560 int unwindidx = -1;
561 #ifdef FFS_EI
562 const int needswap = UFS_FSNEEDSWAP(fs);
563 #endif
564 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
565
566 lbn = lblkno(fs, off);
567 size = blkoff(fs, off) + size;
568 if (size > fs->fs_bsize)
569 panic("ffs_balloc: blk too big");
570 if (bpp != NULL) {
571 *bpp = NULL;
572 }
573 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
574
575 if (lbn < 0)
576 return (EFBIG);
577
578 #ifdef notyet
579 /*
580 * Check for allocating external data.
581 */
582 if (flags & IO_EXT) {
583 if (lbn >= NXADDR)
584 return (EFBIG);
585 /*
586 * If the next write will extend the data into a new block,
587 * and the data is currently composed of a fragment
588 * this fragment has to be extended to be a full block.
589 */
590 lastlbn = lblkno(fs, dp->di_extsize);
591 if (lastlbn < lbn) {
592 nb = lastlbn;
593 osize = sblksize(fs, dp->di_extsize, nb);
594 if (osize < fs->fs_bsize && osize > 0) {
595 mutex_enter(&ump->um_lock);
596 error = ffs_realloccg(ip, -1 - nb,
597 dp->di_extb[nb],
598 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
599 &dp->di_extb[0]), osize,
600 (int)fs->fs_bsize, cred, &bp);
601 if (error)
602 return (error);
603 if (DOINGSOFTDEP(vp))
604 softdep_setup_allocext(ip, nb,
605 dbtofsb(fs, bp->b_blkno),
606 dp->di_extb[nb],
607 fs->fs_bsize, osize, bp);
608 dp->di_extsize = smalllblktosize(fs, nb + 1);
609 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
610 bp->b_xflags |= BX_ALTDATA;
611 ip->i_flag |= IN_CHANGE | IN_UPDATE;
612 if (flags & IO_SYNC)
613 bwrite(bp);
614 else
615 bawrite(bp);
616 }
617 }
618 /*
619 * All blocks are direct blocks
620 */
621 if (flags & BA_METAONLY)
622 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
623 nb = dp->di_extb[lbn];
624 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
625 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
626 if (error) {
627 brelse(bp, 0);
628 return (error);
629 }
630 mutex_enter(&bp->b_interlock);
631 bp->b_blkno = fsbtodb(fs, nb);
632 bp->b_xflags |= BX_ALTDATA;
633 mutex_exit(&bp->b_interlock);
634 *bpp = bp;
635 return (0);
636 }
637 if (nb != 0) {
638 /*
639 * Consider need to reallocate a fragment.
640 */
641 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
642 nsize = fragroundup(fs, size);
643 if (nsize <= osize) {
644 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
645 if (error) {
646 brelse(bp, 0);
647 return (error);
648 }
649 mutex_enter(&bp->b_interlock);
650 bp->b_blkno = fsbtodb(fs, nb);
651 bp->b_xflags |= BX_ALTDATA;
652 mutex_exit(&bp->b_interlock);
653 } else {
654 mutex_enter(&ump->um_lock);
655 error = ffs_realloccg(ip, -1 - lbn,
656 dp->di_extb[lbn],
657 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
658 &dp->di_extb[0]), osize, nsize, cred, &bp);
659 if (error)
660 return (error);
661 bp->b_xflags |= BX_ALTDATA;
662 if (DOINGSOFTDEP(vp))
663 softdep_setup_allocext(ip, lbn,
664 dbtofsb(fs, bp->b_blkno), nb,
665 nsize, osize, bp);
666 }
667 } else {
668 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
669 nsize = fragroundup(fs, size);
670 else
671 nsize = fs->fs_bsize;
672 mutex_enter(&ump->um_lock);
673 error = ffs_alloc(ip, lbn,
674 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
675 nsize, cred, &newb);
676 if (error)
677 return (error);
678 bp = getblk(vp, -1 - lbn, nsize, 0, 0);
679 bp->b_blkno = fsbtodb(fs, newb);
680 bp->b_xflags |= BX_ALTDATA;
681 if (flags & BA_CLRBUF)
682 vfs_bio_clrbuf(bp);
683 if (DOINGSOFTDEP(vp))
684 softdep_setup_allocext(ip, lbn, newb, 0,
685 nsize, 0, bp);
686 }
687 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
688 ip->i_flag |= IN_CHANGE | IN_UPDATE;
689 *bpp = bp;
690 return (0);
691 }
692 #endif
693 /*
694 * If the next write will extend the file into a new block,
695 * and the file is currently composed of a fragment
696 * this fragment has to be extended to be a full block.
697 */
698
699 lastlbn = lblkno(fs, ip->i_size);
700 if (lastlbn < NDADDR && lastlbn < lbn) {
701 nb = lastlbn;
702 osize = blksize(fs, ip, nb);
703 if (osize < fs->fs_bsize && osize > 0) {
704 mutex_enter(&ump->um_lock);
705 error = ffs_realloccg(ip, nb,
706 ffs_blkpref_ufs2(ip, lastlbn, nb,
707 &ip->i_ffs2_db[0]),
708 osize, (int)fs->fs_bsize, cred, bpp, &newb);
709 if (error)
710 return (error);
711 if (DOINGSOFTDEP(vp))
712 softdep_setup_allocdirect(ip, nb, newb,
713 ufs_rw64(ip->i_ffs2_db[nb], needswap),
714 fs->fs_bsize, osize, bpp ? *bpp : NULL);
715 ip->i_size = lblktosize(fs, nb + 1);
716 ip->i_ffs2_size = ip->i_size;
717 uvm_vnp_setsize(vp, ip->i_size);
718 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
719 ip->i_flag |= IN_CHANGE | IN_UPDATE;
720 if (bpp) {
721 if (flags & B_SYNC)
722 bwrite(*bpp);
723 else
724 bawrite(*bpp);
725 }
726 }
727 }
728
729 /*
730 * The first NDADDR blocks are direct blocks
731 */
732
733 if (lbn < NDADDR) {
734 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
735 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
736
737 /*
738 * The block is an already-allocated direct block
739 * and the file already extends past this block,
740 * thus this must be a whole block.
741 * Just read the block (if requested).
742 */
743
744 if (bpp != NULL) {
745 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
746 bpp);
747 if (error) {
748 brelse(*bpp, 0);
749 return (error);
750 }
751 }
752 return (0);
753 }
754 if (nb != 0) {
755
756 /*
757 * Consider need to reallocate a fragment.
758 */
759
760 osize = fragroundup(fs, blkoff(fs, ip->i_size));
761 nsize = fragroundup(fs, size);
762 if (nsize <= osize) {
763
764 /*
765 * The existing block is already
766 * at least as big as we want.
767 * Just read the block (if requested).
768 */
769
770 if (bpp != NULL) {
771 error = bread(vp, lbn, osize, NOCRED,
772 bpp);
773 if (error) {
774 brelse(*bpp, 0);
775 return (error);
776 }
777 }
778 return 0;
779 } else {
780
781 /*
782 * The existing block is smaller than we want,
783 * grow it.
784 */
785 mutex_enter(&ump->um_lock);
786 error = ffs_realloccg(ip, lbn,
787 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
788 &ip->i_ffs2_db[0]), osize, nsize, cred,
789 bpp, &newb);
790 if (error)
791 return (error);
792 if (DOINGSOFTDEP(vp))
793 softdep_setup_allocdirect(ip, lbn,
794 newb, nb, nsize, osize,
795 bpp ? *bpp : NULL);
796 }
797 } else {
798
799 /*
800 * the block was not previously allocated,
801 * allocate a new block or fragment.
802 */
803
804 if (ip->i_size < lblktosize(fs, lbn + 1))
805 nsize = fragroundup(fs, size);
806 else
807 nsize = fs->fs_bsize;
808 mutex_enter(&ump->um_lock);
809 error = ffs_alloc(ip, lbn,
810 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
811 &ip->i_ffs2_db[0]), nsize, cred, &newb);
812 if (error)
813 return (error);
814 if (bpp != NULL) {
815 bp = getblk(vp, lbn, nsize, 0, 0);
816 bp->b_blkno = fsbtodb(fs, newb);
817 if (flags & B_CLRBUF)
818 clrbuf(bp);
819 *bpp = bp;
820 }
821 if (DOINGSOFTDEP(vp)) {
822 softdep_setup_allocdirect(ip, lbn, newb, 0,
823 nsize, 0, bpp ? *bpp : NULL);
824 }
825 }
826 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
827 ip->i_flag |= IN_CHANGE | IN_UPDATE;
828 return (0);
829 }
830
831 /*
832 * Determine the number of levels of indirection.
833 */
834
835 pref = 0;
836 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
837 return (error);
838
839 /*
840 * Fetch the first indirect block allocating if necessary.
841 */
842
843 --num;
844 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
845 allocib = NULL;
846 allocblk = allociblk;
847 if (nb == 0) {
848 mutex_enter(&ump->um_lock);
849 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
850 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
851 &newb);
852 if (error)
853 goto fail;
854 nb = newb;
855 *allocblk++ = nb;
856 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
857 bp->b_blkno = fsbtodb(fs, nb);
858 clrbuf(bp);
859 if (DOINGSOFTDEP(vp)) {
860 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
861 newb, 0, fs->fs_bsize, 0, bp);
862 bdwrite(bp);
863 } else {
864
865 /*
866 * Write synchronously so that indirect blocks
867 * never point at garbage.
868 */
869
870 if ((error = bwrite(bp)) != 0)
871 goto fail;
872 }
873 unwindidx = 0;
874 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
875 *allocib = ufs_rw64(nb, needswap);
876 ip->i_flag |= IN_CHANGE | IN_UPDATE;
877 }
878
879 /*
880 * Fetch through the indirect blocks, allocating as necessary.
881 */
882
883 for (i = 1;;) {
884 error = bread(vp,
885 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
886 if (error) {
887 brelse(bp, 0);
888 goto fail;
889 }
890 bap = (int64_t *)bp->b_data;
891 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
892 if (i == num)
893 break;
894 i++;
895 if (nb != 0) {
896 brelse(bp, 0);
897 continue;
898 }
899 mutex_enter(&ump->um_lock);
900 if (pref == 0)
901 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
902 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
903 &newb);
904 if (error) {
905 brelse(bp, 0);
906 goto fail;
907 }
908 nb = newb;
909 *allocblk++ = nb;
910 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
911 nbp->b_blkno = fsbtodb(fs, nb);
912 clrbuf(nbp);
913 if (DOINGSOFTDEP(vp)) {
914 softdep_setup_allocindir_meta(nbp, ip, bp,
915 indirs[i - 1].in_off, nb);
916 bdwrite(nbp);
917 } else {
918
919 /*
920 * Write synchronously so that indirect blocks
921 * never point at garbage.
922 */
923
924 if ((error = bwrite(nbp)) != 0) {
925 brelse(bp, 0);
926 goto fail;
927 }
928 }
929 if (unwindidx < 0)
930 unwindidx = i - 1;
931 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
932
933 /*
934 * If required, write synchronously, otherwise use
935 * delayed write.
936 */
937
938 if (flags & B_SYNC) {
939 bwrite(bp);
940 } else {
941 bdwrite(bp);
942 }
943 }
944
945 if (flags & B_METAONLY) {
946 KASSERT(bpp != NULL);
947 *bpp = bp;
948 return (0);
949 }
950
951 /*
952 * Get the data block, allocating if necessary.
953 */
954
955 if (nb == 0) {
956 mutex_enter(&ump->um_lock);
957 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
958 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
959 &newb);
960 if (error) {
961 brelse(bp, 0);
962 goto fail;
963 }
964 nb = newb;
965 *allocblk++ = nb;
966 if (bpp != NULL) {
967 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
968 nbp->b_blkno = fsbtodb(fs, nb);
969 if (flags & B_CLRBUF)
970 clrbuf(nbp);
971 *bpp = nbp;
972 }
973 if (DOINGSOFTDEP(vp))
974 softdep_setup_allocindir_page(ip, lbn, bp,
975 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
976 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
977 if (allocib == NULL && unwindidx < 0) {
978 unwindidx = i - 1;
979 }
980
981 /*
982 * If required, write synchronously, otherwise use
983 * delayed write.
984 */
985
986 if (flags & B_SYNC) {
987 bwrite(bp);
988 } else {
989 bdwrite(bp);
990 }
991 return (0);
992 }
993 brelse(bp, 0);
994 if (bpp != NULL) {
995 if (flags & B_CLRBUF) {
996 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
997 if (error) {
998 brelse(nbp, 0);
999 goto fail;
1000 }
1001 } else {
1002 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
1003 nbp->b_blkno = fsbtodb(fs, nb);
1004 clrbuf(nbp);
1005 }
1006 *bpp = nbp;
1007 }
1008 return (0);
1009
1010 fail:
1011 /*
1012 * If we have failed part way through block allocation, we
1013 * have to deallocate any indirect blocks that we have allocated.
1014 */
1015
1016 if (unwindidx >= 0) {
1017
1018 /*
1019 * First write out any buffers we've created to resolve their
1020 * softdeps. This must be done in reverse order of creation
1021 * so that we resolve the dependencies in one pass.
1022 * Write the cylinder group buffers for these buffers too.
1023 */
1024
1025 for (i = num; i >= unwindidx; i--) {
1026 if (i == 0) {
1027 break;
1028 }
1029 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1030 0);
1031 if (bp->b_oflags & BO_DELWRI) {
1032 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1033 dbtofsb(fs, bp->b_blkno))));
1034 bwrite(bp);
1035 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
1036 0, 0);
1037 if (bp->b_oflags & BO_DELWRI) {
1038 bwrite(bp);
1039 } else {
1040 brelse(bp, BC_INVAL);
1041 }
1042 } else {
1043 brelse(bp, BC_INVAL);
1044 }
1045 }
1046
1047 /* Now flush the dependencies to disk. */
1048 #ifdef notyet
1049 /* XXX pages locked */
1050 (void)softdep_sync_metadata(vp);
1051 #endif
1052
1053 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
1054 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1055 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1056 }
1057
1058 /*
1059 * Now that any dependencies that we created have been
1060 * resolved, we can undo the partial allocation.
1061 */
1062
1063 if (unwindidx == 0) {
1064 *allocib = 0;
1065 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1066 if (DOINGSOFTDEP(vp))
1067 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1068 } else {
1069 int r;
1070
1071 r = bread(vp, indirs[unwindidx].in_lbn,
1072 (int)fs->fs_bsize, NOCRED, &bp);
1073 if (r) {
1074 panic("Could not unwind indirect block, error %d", r);
1075 brelse(bp, 0);
1076 } else {
1077 bap = (int64_t *)bp->b_data;
1078 bap[indirs[unwindidx].in_off] = 0;
1079 bwrite(bp);
1080 }
1081 }
1082 for (i = unwindidx + 1; i <= num; i++) {
1083 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1084 0);
1085 brelse(bp, BC_INVAL);
1086 }
1087 }
1088 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1089 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1090 deallocated += fs->fs_bsize;
1091 }
1092 if (deallocated) {
1093 #ifdef QUOTA
1094 /*
1095 * Restore user's disk quota because allocation failed.
1096 */
1097 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1098 #endif
1099 ip->i_ffs2_blocks -= btodb(deallocated);
1100 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1101 }
1102
1103 /*
1104 * Flush all dependencies again so that the soft updates code
1105 * doesn't find any untracked changes.
1106 */
1107 #ifdef notyet
1108 /* XXX pages locked */
1109 (void)softdep_sync_metadata(vp);
1110 #endif
1111 return (error);
1112 }
1113