ffs_balloc.c revision 1.47 1 /* $NetBSD: ffs_balloc.c,v 1.47 2007/12/08 15:21:19 ad Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.47 2007/12/08 15:21:19 ad Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57
58 #include <ufs/ufs/quota.h>
59 #include <ufs/ufs/ufsmount.h>
60 #include <ufs/ufs/inode.h>
61 #include <ufs/ufs/ufs_extern.h>
62 #include <ufs/ufs/ufs_bswap.h>
63
64 #include <ufs/ffs/fs.h>
65 #include <ufs/ffs/ffs_extern.h>
66
67 #include <uvm/uvm.h>
68
69 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
70 struct buf **);
71 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
72 struct buf **);
73
74 /*
75 * Balloc defines the structure of file system storage
76 * by allocating the physical blocks on a device given
77 * the inode and the logical block number in a file.
78 */
79
80 int
81 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
82 struct buf **bpp)
83 {
84
85 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
86 return ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
87 else
88 return ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
89 }
90
91 static int
92 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
93 int flags, struct buf **bpp)
94 {
95 daddr_t lbn, lastlbn;
96 struct buf *bp, *nbp;
97 struct inode *ip = VTOI(vp);
98 struct fs *fs = ip->i_fs;
99 struct ufsmount *ump = ip->i_ump;
100 struct indir indirs[NIADDR + 2];
101 daddr_t newb, pref, nb;
102 int32_t *bap; /* XXX ondisk32 */
103 int deallocated, osize, nsize, num, i, error;
104 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
105 int32_t *allocib;
106 int unwindidx = -1;
107 #ifdef FFS_EI
108 const int needswap = UFS_FSNEEDSWAP(fs);
109 #endif
110 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
111
112 lbn = lblkno(fs, off);
113 size = blkoff(fs, off) + size;
114 if (size > fs->fs_bsize)
115 panic("ffs_balloc: blk too big");
116 if (bpp != NULL) {
117 *bpp = NULL;
118 }
119 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
120
121 if (lbn < 0)
122 return (EFBIG);
123
124 /*
125 * If the next write will extend the file into a new block,
126 * and the file is currently composed of a fragment
127 * this fragment has to be extended to be a full block.
128 */
129
130 lastlbn = lblkno(fs, ip->i_size);
131 if (lastlbn < NDADDR && lastlbn < lbn) {
132 nb = lastlbn;
133 osize = blksize(fs, ip, nb);
134 if (osize < fs->fs_bsize && osize > 0) {
135 mutex_enter(&ump->um_lock);
136 error = ffs_realloccg(ip, nb,
137 ffs_blkpref_ufs1(ip, lastlbn, nb,
138 &ip->i_ffs1_db[0]),
139 osize, (int)fs->fs_bsize, cred, bpp, &newb);
140 if (error)
141 return (error);
142 if (DOINGSOFTDEP(vp))
143 softdep_setup_allocdirect(ip, nb, newb,
144 ufs_rw32(ip->i_ffs1_db[nb], needswap),
145 fs->fs_bsize, osize, bpp ? *bpp : NULL);
146 ip->i_size = lblktosize(fs, nb + 1);
147 ip->i_ffs1_size = ip->i_size;
148 uvm_vnp_setsize(vp, ip->i_ffs1_size);
149 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
150 ip->i_flag |= IN_CHANGE | IN_UPDATE;
151 if (bpp && *bpp) {
152 if (flags & B_SYNC)
153 bwrite(*bpp);
154 else
155 bawrite(*bpp);
156 }
157 }
158 }
159
160 /*
161 * The first NDADDR blocks are direct blocks
162 */
163
164 if (lbn < NDADDR) {
165 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
166 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
167
168 /*
169 * The block is an already-allocated direct block
170 * and the file already extends past this block,
171 * thus this must be a whole block.
172 * Just read the block (if requested).
173 */
174
175 if (bpp != NULL) {
176 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
177 bpp);
178 if (error) {
179 brelse(*bpp, 0);
180 return (error);
181 }
182 }
183 return (0);
184 }
185 if (nb != 0) {
186
187 /*
188 * Consider need to reallocate a fragment.
189 */
190
191 osize = fragroundup(fs, blkoff(fs, ip->i_size));
192 nsize = fragroundup(fs, size);
193 if (nsize <= osize) {
194
195 /*
196 * The existing block is already
197 * at least as big as we want.
198 * Just read the block (if requested).
199 */
200
201 if (bpp != NULL) {
202 error = bread(vp, lbn, osize, NOCRED,
203 bpp);
204 if (error) {
205 brelse(*bpp, 0);
206 return (error);
207 }
208 }
209 return 0;
210 } else {
211
212 /*
213 * The existing block is smaller than we want,
214 * grow it.
215 */
216 mutex_enter(&ump->um_lock);
217 error = ffs_realloccg(ip, lbn,
218 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
219 &ip->i_ffs1_db[0]), osize, nsize, cred,
220 bpp, &newb);
221 if (error)
222 return (error);
223 if (DOINGSOFTDEP(vp))
224 softdep_setup_allocdirect(ip, lbn,
225 newb, nb, nsize, osize,
226 bpp ? *bpp : NULL);
227 }
228 } else {
229
230 /*
231 * the block was not previously allocated,
232 * allocate a new block or fragment.
233 */
234
235 if (ip->i_size < lblktosize(fs, lbn + 1))
236 nsize = fragroundup(fs, size);
237 else
238 nsize = fs->fs_bsize;
239 mutex_enter(&ump->um_lock);
240 error = ffs_alloc(ip, lbn,
241 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
242 &ip->i_ffs1_db[0]),
243 nsize, cred, &newb);
244 if (error)
245 return (error);
246 if (bpp != NULL) {
247 bp = getblk(vp, lbn, nsize, 0, 0);
248 bp->b_blkno = fsbtodb(fs, newb);
249 if (flags & B_CLRBUF)
250 clrbuf(bp);
251 *bpp = bp;
252 }
253 if (DOINGSOFTDEP(vp)) {
254 softdep_setup_allocdirect(ip, lbn, newb, 0,
255 nsize, 0, bpp ? *bpp : NULL);
256 }
257 }
258 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
259 ip->i_flag |= IN_CHANGE | IN_UPDATE;
260 return (0);
261 }
262
263 /*
264 * Determine the number of levels of indirection.
265 */
266
267 pref = 0;
268 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
269 return (error);
270
271 /*
272 * Fetch the first indirect block allocating if necessary.
273 */
274
275 --num;
276 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
277 allocib = NULL;
278 allocblk = allociblk;
279 if (nb == 0) {
280 mutex_enter(&ump->um_lock);
281 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
282 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
283 &newb);
284 if (error)
285 goto fail;
286 nb = newb;
287 *allocblk++ = nb;
288 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
289 bp->b_blkno = fsbtodb(fs, nb);
290 clrbuf(bp);
291 if (DOINGSOFTDEP(vp)) {
292 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
293 newb, 0, fs->fs_bsize, 0, bp);
294 bdwrite(bp);
295 } else {
296
297 /*
298 * Write synchronously so that indirect blocks
299 * never point at garbage.
300 */
301
302 if ((error = bwrite(bp)) != 0)
303 goto fail;
304 }
305 unwindidx = 0;
306 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
307 *allocib = ufs_rw32(nb, needswap);
308 ip->i_flag |= IN_CHANGE | IN_UPDATE;
309 }
310
311 /*
312 * Fetch through the indirect blocks, allocating as necessary.
313 */
314
315 for (i = 1;;) {
316 error = bread(vp,
317 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
318 if (error) {
319 brelse(bp, 0);
320 goto fail;
321 }
322 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
323 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
324 if (i == num)
325 break;
326 i++;
327 if (nb != 0) {
328 brelse(bp, 0);
329 continue;
330 }
331 mutex_enter(&ump->um_lock);
332 if (pref == 0)
333 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
334 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
335 &newb);
336 if (error) {
337 brelse(bp, 0);
338 goto fail;
339 }
340 nb = newb;
341 *allocblk++ = nb;
342 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
343 nbp->b_blkno = fsbtodb(fs, nb);
344 clrbuf(nbp);
345 if (DOINGSOFTDEP(vp)) {
346 softdep_setup_allocindir_meta(nbp, ip, bp,
347 indirs[i - 1].in_off, nb);
348 bdwrite(nbp);
349 } else {
350
351 /*
352 * Write synchronously so that indirect blocks
353 * never point at garbage.
354 */
355
356 if ((error = bwrite(nbp)) != 0) {
357 brelse(bp, 0);
358 goto fail;
359 }
360 }
361 if (unwindidx < 0)
362 unwindidx = i - 1;
363 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
364
365 /*
366 * If required, write synchronously, otherwise use
367 * delayed write.
368 */
369
370 if (flags & B_SYNC) {
371 bwrite(bp);
372 } else {
373 bdwrite(bp);
374 }
375 }
376
377 if (flags & B_METAONLY) {
378 KASSERT(bpp != NULL);
379 *bpp = bp;
380 return (0);
381 }
382
383 /*
384 * Get the data block, allocating if necessary.
385 */
386
387 if (nb == 0) {
388 mutex_enter(&ump->um_lock);
389 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
390 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
391 &newb);
392 if (error) {
393 brelse(bp, 0);
394 goto fail;
395 }
396 nb = newb;
397 *allocblk++ = nb;
398 if (bpp != NULL) {
399 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
400 nbp->b_blkno = fsbtodb(fs, nb);
401 if (flags & B_CLRBUF)
402 clrbuf(nbp);
403 *bpp = nbp;
404 }
405 if (DOINGSOFTDEP(vp))
406 softdep_setup_allocindir_page(ip, lbn, bp,
407 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
408 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
409 if (allocib == NULL && unwindidx < 0) {
410 unwindidx = i - 1;
411 }
412
413 /*
414 * If required, write synchronously, otherwise use
415 * delayed write.
416 */
417
418 if (flags & B_SYNC) {
419 bwrite(bp);
420 } else {
421 bdwrite(bp);
422 }
423 return (0);
424 }
425 brelse(bp, 0);
426 if (bpp != NULL) {
427 if (flags & B_CLRBUF) {
428 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
429 if (error) {
430 brelse(nbp, 0);
431 goto fail;
432 }
433 } else {
434 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
435 nbp->b_blkno = fsbtodb(fs, nb);
436 clrbuf(nbp);
437 }
438 *bpp = nbp;
439 }
440 return (0);
441
442 fail:
443 /*
444 * If we have failed part way through block allocation, we
445 * have to deallocate any indirect blocks that we have allocated.
446 */
447
448 if (unwindidx >= 0) {
449
450 /*
451 * First write out any buffers we've created to resolve their
452 * softdeps. This must be done in reverse order of creation
453 * so that we resolve the dependencies in one pass.
454 * Write the cylinder group buffers for these buffers too.
455 */
456
457 for (i = num; i >= unwindidx; i--) {
458 if (i == 0) {
459 break;
460 }
461 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
462 0);
463 if (bp->b_flags & B_DELWRI) {
464 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
465 dbtofsb(fs, bp->b_blkno))));
466 bwrite(bp);
467 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
468 0, 0);
469 if (bp->b_flags & B_DELWRI) {
470 bwrite(bp);
471 } else {
472 brelse(bp, BC_INVAL);
473 }
474 } else {
475 brelse(bp, BC_INVAL);
476 }
477 }
478
479 /* Now flush all dependencies to disk. */
480 #ifdef notyet
481 /* XXX pages locked */
482 (void)softdep_sync_metadata(vp);
483 #endif
484
485 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
486 ip->i_flag |= IN_CHANGE | IN_UPDATE;
487 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
488 }
489
490 /*
491 * Now that any dependencies that we created have been
492 * resolved, we can undo the partial allocation.
493 */
494
495 if (unwindidx == 0) {
496 *allocib = 0;
497 ip->i_flag |= IN_CHANGE | IN_UPDATE;
498 if (DOINGSOFTDEP(vp))
499 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
500 } else {
501 int r;
502
503 r = bread(vp, indirs[unwindidx].in_lbn,
504 (int)fs->fs_bsize, NOCRED, &bp);
505 if (r) {
506 panic("Could not unwind indirect block, error %d", r);
507 brelse(bp, 0);
508 } else {
509 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
510 bap[indirs[unwindidx].in_off] = 0;
511 bwrite(bp);
512 }
513 }
514 for (i = unwindidx + 1; i <= num; i++) {
515 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
516 0);
517 brelse(bp, BC_INVAL);
518 }
519 }
520 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
521 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
522 deallocated += fs->fs_bsize;
523 }
524 if (deallocated) {
525 #ifdef QUOTA
526 /*
527 * Restore user's disk quota because allocation failed.
528 */
529 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
530 #endif
531 ip->i_ffs1_blocks -= btodb(deallocated);
532 ip->i_flag |= IN_CHANGE | IN_UPDATE;
533 }
534 /*
535 * Flush all dependencies again so that the soft updates code
536 * doesn't find any untracked changes.
537 */
538 #ifdef notyet
539 /* XXX pages locked */
540 (void)softdep_sync_metadata(vp);
541 #endif
542 return (error);
543 }
544
545 static int
546 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
547 int flags, struct buf **bpp)
548 {
549 daddr_t lbn, lastlbn;
550 struct buf *bp, *nbp;
551 struct inode *ip = VTOI(vp);
552 struct fs *fs = ip->i_fs;
553 struct ufsmount *ump = ip->i_ump;
554 struct indir indirs[NIADDR + 2];
555 daddr_t newb, pref, nb;
556 int64_t *bap;
557 int deallocated, osize, nsize, num, i, error;
558 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
559 int64_t *allocib;
560 int unwindidx = -1;
561 #ifdef FFS_EI
562 const int needswap = UFS_FSNEEDSWAP(fs);
563 #endif
564 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
565
566 lbn = lblkno(fs, off);
567 size = blkoff(fs, off) + size;
568 if (size > fs->fs_bsize)
569 panic("ffs_balloc: blk too big");
570 if (bpp != NULL) {
571 *bpp = NULL;
572 }
573 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
574
575 if (lbn < 0)
576 return (EFBIG);
577
578 #ifdef notyet
579 /*
580 * Check for allocating external data.
581 */
582 if (flags & IO_EXT) {
583 if (lbn >= NXADDR)
584 return (EFBIG);
585 /*
586 * If the next write will extend the data into a new block,
587 * and the data is currently composed of a fragment
588 * this fragment has to be extended to be a full block.
589 */
590 lastlbn = lblkno(fs, dp->di_extsize);
591 if (lastlbn < lbn) {
592 nb = lastlbn;
593 osize = sblksize(fs, dp->di_extsize, nb);
594 if (osize < fs->fs_bsize && osize > 0) {
595 mutex_enter(&ump->um_lock);
596 error = ffs_realloccg(ip, -1 - nb,
597 dp->di_extb[nb],
598 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
599 &dp->di_extb[0]), osize,
600 (int)fs->fs_bsize, cred, &bp);
601 if (error)
602 return (error);
603 if (DOINGSOFTDEP(vp))
604 softdep_setup_allocext(ip, nb,
605 dbtofsb(fs, bp->b_blkno),
606 dp->di_extb[nb],
607 fs->fs_bsize, osize, bp);
608 dp->di_extsize = smalllblktosize(fs, nb + 1);
609 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
610 bp->b_xflags |= BX_ALTDATA;
611 ip->i_flag |= IN_CHANGE | IN_UPDATE;
612 if (flags & IO_SYNC)
613 bwrite(bp);
614 else
615 bawrite(bp);
616 }
617 }
618 /*
619 * All blocks are direct blocks
620 */
621 if (flags & BA_METAONLY)
622 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
623 nb = dp->di_extb[lbn];
624 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
625 error = bread(vp, -1 - lbn, fs->fs_bsize, NOCRED, &bp);
626 if (error) {
627 brelse(bp, 0);
628 return (error);
629 }
630 bp->b_blkno = fsbtodb(fs, nb);
631 bp->b_xflags |= BX_ALTDATA;
632 *bpp = bp;
633 return (0);
634 }
635 if (nb != 0) {
636 /*
637 * Consider need to reallocate a fragment.
638 */
639 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
640 nsize = fragroundup(fs, size);
641 if (nsize <= osize) {
642 error = bread(vp, -1 - lbn, osize, NOCRED, &bp);
643 if (error) {
644 brelse(bp, 0);
645 return (error);
646 }
647 mutex_enter(&bp->b_interlock);
648 bp->b_blkno = fsbtodb(fs, nb);
649 bp->b_xflags |= BX_ALTDATA;
650 mutex_exit(&bp->b_interlock);
651 } else {
652 mutex_enter(&ump->um_lock);
653 error = ffs_realloccg(ip, -1 - lbn,
654 dp->di_extb[lbn],
655 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
656 &dp->di_extb[0]), osize, nsize, cred, &bp);
657 if (error)
658 return (error);
659 bp->b_xflags |= BX_ALTDATA;
660 if (DOINGSOFTDEP(vp))
661 softdep_setup_allocext(ip, lbn,
662 dbtofsb(fs, bp->b_blkno), nb,
663 nsize, osize, bp);
664 }
665 } else {
666 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
667 nsize = fragroundup(fs, size);
668 else
669 nsize = fs->fs_bsize;
670 mutex_enter(&ump->um_lock);
671 error = ffs_alloc(ip, lbn,
672 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
673 nsize, cred, &newb);
674 if (error)
675 return (error);
676 bp = getblk(vp, -1 - lbn, nsize, 0, 0);
677 bp->b_blkno = fsbtodb(fs, newb);
678 bp->b_xflags |= BX_ALTDATA;
679 if (flags & BA_CLRBUF)
680 vfs_bio_clrbuf(bp);
681 if (DOINGSOFTDEP(vp))
682 softdep_setup_allocext(ip, lbn, newb, 0,
683 nsize, 0, bp);
684 }
685 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
686 ip->i_flag |= IN_CHANGE | IN_UPDATE;
687 *bpp = bp;
688 return (0);
689 }
690 #endif
691 /*
692 * If the next write will extend the file into a new block,
693 * and the file is currently composed of a fragment
694 * this fragment has to be extended to be a full block.
695 */
696
697 lastlbn = lblkno(fs, ip->i_size);
698 if (lastlbn < NDADDR && lastlbn < lbn) {
699 nb = lastlbn;
700 osize = blksize(fs, ip, nb);
701 if (osize < fs->fs_bsize && osize > 0) {
702 mutex_enter(&ump->um_lock);
703 error = ffs_realloccg(ip, nb,
704 ffs_blkpref_ufs2(ip, lastlbn, nb,
705 &ip->i_ffs2_db[0]),
706 osize, (int)fs->fs_bsize, cred, bpp, &newb);
707 if (error)
708 return (error);
709 if (DOINGSOFTDEP(vp))
710 softdep_setup_allocdirect(ip, nb, newb,
711 ufs_rw64(ip->i_ffs2_db[nb], needswap),
712 fs->fs_bsize, osize, bpp ? *bpp : NULL);
713 ip->i_size = lblktosize(fs, nb + 1);
714 ip->i_ffs2_size = ip->i_size;
715 uvm_vnp_setsize(vp, ip->i_size);
716 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
717 ip->i_flag |= IN_CHANGE | IN_UPDATE;
718 if (bpp) {
719 if (flags & B_SYNC)
720 bwrite(*bpp);
721 else
722 bawrite(*bpp);
723 }
724 }
725 }
726
727 /*
728 * The first NDADDR blocks are direct blocks
729 */
730
731 if (lbn < NDADDR) {
732 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
733 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
734
735 /*
736 * The block is an already-allocated direct block
737 * and the file already extends past this block,
738 * thus this must be a whole block.
739 * Just read the block (if requested).
740 */
741
742 if (bpp != NULL) {
743 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
744 bpp);
745 if (error) {
746 brelse(*bpp, 0);
747 return (error);
748 }
749 }
750 return (0);
751 }
752 if (nb != 0) {
753
754 /*
755 * Consider need to reallocate a fragment.
756 */
757
758 osize = fragroundup(fs, blkoff(fs, ip->i_size));
759 nsize = fragroundup(fs, size);
760 if (nsize <= osize) {
761
762 /*
763 * The existing block is already
764 * at least as big as we want.
765 * Just read the block (if requested).
766 */
767
768 if (bpp != NULL) {
769 error = bread(vp, lbn, osize, NOCRED,
770 bpp);
771 if (error) {
772 brelse(*bpp, 0);
773 return (error);
774 }
775 }
776 return 0;
777 } else {
778
779 /*
780 * The existing block is smaller than we want,
781 * grow it.
782 */
783 mutex_enter(&ump->um_lock);
784 error = ffs_realloccg(ip, lbn,
785 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
786 &ip->i_ffs2_db[0]), osize, nsize, cred,
787 bpp, &newb);
788 if (error)
789 return (error);
790 if (DOINGSOFTDEP(vp))
791 softdep_setup_allocdirect(ip, lbn,
792 newb, nb, nsize, osize,
793 bpp ? *bpp : NULL);
794 }
795 } else {
796
797 /*
798 * the block was not previously allocated,
799 * allocate a new block or fragment.
800 */
801
802 if (ip->i_size < lblktosize(fs, lbn + 1))
803 nsize = fragroundup(fs, size);
804 else
805 nsize = fs->fs_bsize;
806 mutex_enter(&ump->um_lock);
807 error = ffs_alloc(ip, lbn,
808 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
809 &ip->i_ffs2_db[0]), nsize, cred, &newb);
810 if (error)
811 return (error);
812 if (bpp != NULL) {
813 bp = getblk(vp, lbn, nsize, 0, 0);
814 bp->b_blkno = fsbtodb(fs, newb);
815 if (flags & B_CLRBUF)
816 clrbuf(bp);
817 *bpp = bp;
818 }
819 if (DOINGSOFTDEP(vp)) {
820 softdep_setup_allocdirect(ip, lbn, newb, 0,
821 nsize, 0, bpp ? *bpp : NULL);
822 }
823 }
824 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
825 ip->i_flag |= IN_CHANGE | IN_UPDATE;
826 return (0);
827 }
828
829 /*
830 * Determine the number of levels of indirection.
831 */
832
833 pref = 0;
834 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
835 return (error);
836
837 /*
838 * Fetch the first indirect block allocating if necessary.
839 */
840
841 --num;
842 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
843 allocib = NULL;
844 allocblk = allociblk;
845 if (nb == 0) {
846 mutex_enter(&ump->um_lock);
847 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
848 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
849 &newb);
850 if (error)
851 goto fail;
852 nb = newb;
853 *allocblk++ = nb;
854 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
855 bp->b_blkno = fsbtodb(fs, nb);
856 clrbuf(bp);
857 if (DOINGSOFTDEP(vp)) {
858 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
859 newb, 0, fs->fs_bsize, 0, bp);
860 bdwrite(bp);
861 } else {
862
863 /*
864 * Write synchronously so that indirect blocks
865 * never point at garbage.
866 */
867
868 if ((error = bwrite(bp)) != 0)
869 goto fail;
870 }
871 unwindidx = 0;
872 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
873 *allocib = ufs_rw64(nb, needswap);
874 ip->i_flag |= IN_CHANGE | IN_UPDATE;
875 }
876
877 /*
878 * Fetch through the indirect blocks, allocating as necessary.
879 */
880
881 for (i = 1;;) {
882 error = bread(vp,
883 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
884 if (error) {
885 brelse(bp, 0);
886 goto fail;
887 }
888 bap = (int64_t *)bp->b_data;
889 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
890 if (i == num)
891 break;
892 i++;
893 if (nb != 0) {
894 brelse(bp, 0);
895 continue;
896 }
897 mutex_enter(&ump->um_lock);
898 if (pref == 0)
899 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
900 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
901 &newb);
902 if (error) {
903 brelse(bp, 0);
904 goto fail;
905 }
906 nb = newb;
907 *allocblk++ = nb;
908 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
909 nbp->b_blkno = fsbtodb(fs, nb);
910 clrbuf(nbp);
911 if (DOINGSOFTDEP(vp)) {
912 softdep_setup_allocindir_meta(nbp, ip, bp,
913 indirs[i - 1].in_off, nb);
914 bdwrite(nbp);
915 } else {
916
917 /*
918 * Write synchronously so that indirect blocks
919 * never point at garbage.
920 */
921
922 if ((error = bwrite(nbp)) != 0) {
923 brelse(bp, 0);
924 goto fail;
925 }
926 }
927 if (unwindidx < 0)
928 unwindidx = i - 1;
929 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
930
931 /*
932 * If required, write synchronously, otherwise use
933 * delayed write.
934 */
935
936 if (flags & B_SYNC) {
937 bwrite(bp);
938 } else {
939 bdwrite(bp);
940 }
941 }
942
943 if (flags & B_METAONLY) {
944 KASSERT(bpp != NULL);
945 *bpp = bp;
946 return (0);
947 }
948
949 /*
950 * Get the data block, allocating if necessary.
951 */
952
953 if (nb == 0) {
954 mutex_enter(&ump->um_lock);
955 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
956 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
957 &newb);
958 if (error) {
959 brelse(bp, 0);
960 goto fail;
961 }
962 nb = newb;
963 *allocblk++ = nb;
964 if (bpp != NULL) {
965 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
966 nbp->b_blkno = fsbtodb(fs, nb);
967 if (flags & B_CLRBUF)
968 clrbuf(nbp);
969 *bpp = nbp;
970 }
971 if (DOINGSOFTDEP(vp))
972 softdep_setup_allocindir_page(ip, lbn, bp,
973 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
974 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
975 if (allocib == NULL && unwindidx < 0) {
976 unwindidx = i - 1;
977 }
978
979 /*
980 * If required, write synchronously, otherwise use
981 * delayed write.
982 */
983
984 if (flags & B_SYNC) {
985 bwrite(bp);
986 } else {
987 bdwrite(bp);
988 }
989 return (0);
990 }
991 brelse(bp, 0);
992 if (bpp != NULL) {
993 if (flags & B_CLRBUF) {
994 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
995 if (error) {
996 brelse(nbp, 0);
997 goto fail;
998 }
999 } else {
1000 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
1001 nbp->b_blkno = fsbtodb(fs, nb);
1002 clrbuf(nbp);
1003 }
1004 *bpp = nbp;
1005 }
1006 return (0);
1007
1008 fail:
1009 /*
1010 * If we have failed part way through block allocation, we
1011 * have to deallocate any indirect blocks that we have allocated.
1012 */
1013
1014 if (unwindidx >= 0) {
1015
1016 /*
1017 * First write out any buffers we've created to resolve their
1018 * softdeps. This must be done in reverse order of creation
1019 * so that we resolve the dependencies in one pass.
1020 * Write the cylinder group buffers for these buffers too.
1021 */
1022
1023 for (i = num; i >= unwindidx; i--) {
1024 if (i == 0) {
1025 break;
1026 }
1027 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1028 0);
1029 if (bp->b_flags & B_DELWRI) {
1030 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1031 dbtofsb(fs, bp->b_blkno))));
1032 bwrite(bp);
1033 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
1034 0, 0);
1035 if (bp->b_flags & B_DELWRI) {
1036 bwrite(bp);
1037 } else {
1038 brelse(bp, BC_INVAL);
1039 }
1040 } else {
1041 brelse(bp, BC_INVAL);
1042 }
1043 }
1044
1045 /* Now flush the dependencies to disk. */
1046 #ifdef notyet
1047 /* XXX pages locked */
1048 (void)softdep_sync_metadata(vp);
1049 #endif
1050
1051 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
1052 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1053 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1054 }
1055
1056 /*
1057 * Now that any dependencies that we created have been
1058 * resolved, we can undo the partial allocation.
1059 */
1060
1061 if (unwindidx == 0) {
1062 *allocib = 0;
1063 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1064 if (DOINGSOFTDEP(vp))
1065 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1066 } else {
1067 int r;
1068
1069 r = bread(vp, indirs[unwindidx].in_lbn,
1070 (int)fs->fs_bsize, NOCRED, &bp);
1071 if (r) {
1072 panic("Could not unwind indirect block, error %d", r);
1073 brelse(bp, 0);
1074 } else {
1075 bap = (int64_t *)bp->b_data;
1076 bap[indirs[unwindidx].in_off] = 0;
1077 bwrite(bp);
1078 }
1079 }
1080 for (i = unwindidx + 1; i <= num; i++) {
1081 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1082 0);
1083 brelse(bp, BC_INVAL);
1084 }
1085 }
1086 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1087 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1088 deallocated += fs->fs_bsize;
1089 }
1090 if (deallocated) {
1091 #ifdef QUOTA
1092 /*
1093 * Restore user's disk quota because allocation failed.
1094 */
1095 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1096 #endif
1097 ip->i_ffs2_blocks -= btodb(deallocated);
1098 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1099 }
1100
1101 /*
1102 * Flush all dependencies again so that the soft updates code
1103 * doesn't find any untracked changes.
1104 */
1105 #ifdef notyet
1106 /* XXX pages locked */
1107 (void)softdep_sync_metadata(vp);
1108 #endif
1109 return (error);
1110 }
1111