ffs_balloc.c revision 1.48.12.1 1 /* $NetBSD: ffs_balloc.c,v 1.48.12.1 2008/06/23 04:32:05 wrstuden Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.48.12.1 2008/06/23 04:32:05 wrstuden Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
58
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
64
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67
68 #include <uvm/uvm.h>
69
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71 struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73 struct buf **);
74
75 /*
76 * Balloc defines the structure of file system storage
77 * by allocating the physical blocks on a device given
78 * the inode and the logical block number in a file.
79 */
80
81 int
82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83 struct buf **bpp)
84 {
85 int error;
86
87 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 else
90 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
91
92 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 brelse(*bpp, 0);
94
95 return error;
96 }
97
98 static int
99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100 int flags, struct buf **bpp)
101 {
102 daddr_t lbn, lastlbn;
103 struct buf *bp, *nbp;
104 struct inode *ip = VTOI(vp);
105 struct fs *fs = ip->i_fs;
106 struct ufsmount *ump = ip->i_ump;
107 struct indir indirs[NIADDR + 2];
108 daddr_t newb, pref, nb;
109 int32_t *bap; /* XXX ondisk32 */
110 int deallocated, osize, nsize, num, i, error;
111 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
112 int32_t *allocib;
113 int unwindidx = -1;
114 #ifdef FFS_EI
115 const int needswap = UFS_FSNEEDSWAP(fs);
116 #endif
117 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
118
119 lbn = lblkno(fs, off);
120 size = blkoff(fs, off) + size;
121 if (size > fs->fs_bsize)
122 panic("ffs_balloc: blk too big");
123 if (bpp != NULL) {
124 *bpp = NULL;
125 }
126 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
127
128 if (lbn < 0)
129 return (EFBIG);
130
131 /*
132 * If the next write will extend the file into a new block,
133 * and the file is currently composed of a fragment
134 * this fragment has to be extended to be a full block.
135 */
136
137 lastlbn = lblkno(fs, ip->i_size);
138 if (lastlbn < NDADDR && lastlbn < lbn) {
139 nb = lastlbn;
140 osize = blksize(fs, ip, nb);
141 if (osize < fs->fs_bsize && osize > 0) {
142 mutex_enter(&ump->um_lock);
143 error = ffs_realloccg(ip, nb,
144 ffs_blkpref_ufs1(ip, lastlbn, nb,
145 &ip->i_ffs1_db[0]),
146 osize, (int)fs->fs_bsize, cred, bpp, &newb);
147 if (error)
148 return (error);
149 if (DOINGSOFTDEP(vp))
150 softdep_setup_allocdirect(ip, nb, newb,
151 ufs_rw32(ip->i_ffs1_db[nb], needswap),
152 fs->fs_bsize, osize, bpp ? *bpp : NULL);
153 ip->i_size = lblktosize(fs, nb + 1);
154 ip->i_ffs1_size = ip->i_size;
155 uvm_vnp_setsize(vp, ip->i_ffs1_size);
156 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
157 ip->i_flag |= IN_CHANGE | IN_UPDATE;
158 if (bpp && *bpp) {
159 if (flags & B_SYNC)
160 bwrite(*bpp);
161 else
162 bawrite(*bpp);
163 }
164 }
165 }
166
167 /*
168 * The first NDADDR blocks are direct blocks
169 */
170
171 if (lbn < NDADDR) {
172 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
173 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
174
175 /*
176 * The block is an already-allocated direct block
177 * and the file already extends past this block,
178 * thus this must be a whole block.
179 * Just read the block (if requested).
180 */
181
182 if (bpp != NULL) {
183 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
184 B_MODIFY, bpp);
185 if (error) {
186 brelse(*bpp, 0);
187 return (error);
188 }
189 }
190 return (0);
191 }
192 if (nb != 0) {
193
194 /*
195 * Consider need to reallocate a fragment.
196 */
197
198 osize = fragroundup(fs, blkoff(fs, ip->i_size));
199 nsize = fragroundup(fs, size);
200 if (nsize <= osize) {
201
202 /*
203 * The existing block is already
204 * at least as big as we want.
205 * Just read the block (if requested).
206 */
207
208 if (bpp != NULL) {
209 error = bread(vp, lbn, osize, NOCRED,
210 B_MODIFY, bpp);
211 if (error) {
212 brelse(*bpp, 0);
213 return (error);
214 }
215 }
216 return 0;
217 } else {
218
219 /*
220 * The existing block is smaller than we want,
221 * grow it.
222 */
223 mutex_enter(&ump->um_lock);
224 error = ffs_realloccg(ip, lbn,
225 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
226 &ip->i_ffs1_db[0]), osize, nsize, cred,
227 bpp, &newb);
228 if (error)
229 return (error);
230 if (DOINGSOFTDEP(vp))
231 softdep_setup_allocdirect(ip, lbn,
232 newb, nb, nsize, osize,
233 bpp ? *bpp : NULL);
234 }
235 } else {
236
237 /*
238 * the block was not previously allocated,
239 * allocate a new block or fragment.
240 */
241
242 if (ip->i_size < lblktosize(fs, lbn + 1))
243 nsize = fragroundup(fs, size);
244 else
245 nsize = fs->fs_bsize;
246 mutex_enter(&ump->um_lock);
247 error = ffs_alloc(ip, lbn,
248 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
249 &ip->i_ffs1_db[0]),
250 nsize, cred, &newb);
251 if (error)
252 return (error);
253 if (bpp != NULL) {
254 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
255 nsize, (flags & B_CLRBUF) != 0, bpp);
256 if (error)
257 return error;
258 }
259 if (DOINGSOFTDEP(vp)) {
260 softdep_setup_allocdirect(ip, lbn, newb, 0,
261 nsize, 0, bpp ? *bpp : NULL);
262 }
263 }
264 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
265 ip->i_flag |= IN_CHANGE | IN_UPDATE;
266 return (0);
267 }
268
269 /*
270 * Determine the number of levels of indirection.
271 */
272
273 pref = 0;
274 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
275 return (error);
276
277 /*
278 * Fetch the first indirect block allocating if necessary.
279 */
280
281 --num;
282 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
283 allocib = NULL;
284 allocblk = allociblk;
285 if (nb == 0) {
286 mutex_enter(&ump->um_lock);
287 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
288 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
289 &newb);
290 if (error)
291 goto fail;
292 nb = newb;
293 *allocblk++ = nb;
294 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
295 fs->fs_bsize, true, &bp);
296 if (error)
297 goto fail;
298 if (DOINGSOFTDEP(vp)) {
299 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
300 newb, 0, fs->fs_bsize, 0, bp);
301 bdwrite(bp);
302 } else {
303
304 /*
305 * Write synchronously so that indirect blocks
306 * never point at garbage.
307 */
308
309 if ((error = bwrite(bp)) != 0)
310 goto fail;
311 }
312 unwindidx = 0;
313 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
314 *allocib = ufs_rw32(nb, needswap);
315 ip->i_flag |= IN_CHANGE | IN_UPDATE;
316 }
317
318 /*
319 * Fetch through the indirect blocks, allocating as necessary.
320 */
321
322 for (i = 1;;) {
323 error = bread(vp,
324 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
325 if (error) {
326 brelse(bp, 0);
327 goto fail;
328 }
329 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
330 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
331 if (i == num)
332 break;
333 i++;
334 if (nb != 0) {
335 brelse(bp, 0);
336 continue;
337 }
338 if (fscow_run(bp, true) != 0) {
339 brelse(bp, 0);
340 goto fail;
341 }
342 mutex_enter(&ump->um_lock);
343 if (pref == 0)
344 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
345 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
346 &newb);
347 if (error) {
348 brelse(bp, 0);
349 goto fail;
350 }
351 nb = newb;
352 *allocblk++ = nb;
353 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
354 fs->fs_bsize, true, &nbp);
355 if (error) {
356 brelse(bp, 0);
357 goto fail;
358 }
359 if (DOINGSOFTDEP(vp)) {
360 softdep_setup_allocindir_meta(nbp, ip, bp,
361 indirs[i - 1].in_off, nb);
362 bdwrite(nbp);
363 } else {
364
365 /*
366 * Write synchronously so that indirect blocks
367 * never point at garbage.
368 */
369
370 if ((error = bwrite(nbp)) != 0) {
371 brelse(bp, 0);
372 goto fail;
373 }
374 }
375 if (unwindidx < 0)
376 unwindidx = i - 1;
377 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
378
379 /*
380 * If required, write synchronously, otherwise use
381 * delayed write.
382 */
383
384 if (flags & B_SYNC) {
385 bwrite(bp);
386 } else {
387 bdwrite(bp);
388 }
389 }
390
391 if (flags & B_METAONLY) {
392 KASSERT(bpp != NULL);
393 *bpp = bp;
394 return (0);
395 }
396
397 /*
398 * Get the data block, allocating if necessary.
399 */
400
401 if (nb == 0) {
402 if (fscow_run(bp, true) != 0) {
403 brelse(bp, 0);
404 goto fail;
405 }
406 mutex_enter(&ump->um_lock);
407 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
408 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
409 &newb);
410 if (error) {
411 brelse(bp, 0);
412 goto fail;
413 }
414 nb = newb;
415 *allocblk++ = nb;
416 if (bpp != NULL) {
417 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
418 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
419 if (error) {
420 brelse(bp, 0);
421 goto fail;
422 }
423 }
424 if (DOINGSOFTDEP(vp))
425 softdep_setup_allocindir_page(ip, lbn, bp,
426 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
427 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
428 if (allocib == NULL && unwindidx < 0) {
429 unwindidx = i - 1;
430 }
431
432 /*
433 * If required, write synchronously, otherwise use
434 * delayed write.
435 */
436
437 if (flags & B_SYNC) {
438 bwrite(bp);
439 } else {
440 bdwrite(bp);
441 }
442 return (0);
443 }
444 brelse(bp, 0);
445 if (bpp != NULL) {
446 if (flags & B_CLRBUF) {
447 error = bread(vp, lbn, (int)fs->fs_bsize,
448 NOCRED, B_MODIFY, &nbp);
449 if (error) {
450 brelse(nbp, 0);
451 goto fail;
452 }
453 } else {
454 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
455 fs->fs_bsize, true, &nbp);
456 if (error)
457 goto fail;
458 }
459 *bpp = nbp;
460 }
461 return (0);
462
463 fail:
464 /*
465 * If we have failed part way through block allocation, we
466 * have to deallocate any indirect blocks that we have allocated.
467 */
468
469 if (unwindidx >= 0) {
470
471 /*
472 * First write out any buffers we've created to resolve their
473 * softdeps. This must be done in reverse order of creation
474 * so that we resolve the dependencies in one pass.
475 * Write the cylinder group buffers for these buffers too.
476 */
477
478 for (i = num; i >= unwindidx; i--) {
479 if (i == 0) {
480 break;
481 }
482 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
483 fs->fs_bsize, false, &bp) != 0)
484 continue;
485 if (bp->b_oflags & BO_DELWRI) {
486 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
487 dbtofsb(fs, bp->b_blkno))));
488 bwrite(bp);
489 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
490 fs->fs_cgsize, false, &bp) != 0)
491 continue;
492 if (bp->b_oflags & BO_DELWRI) {
493 bwrite(bp);
494 } else {
495 brelse(bp, BC_INVAL);
496 }
497 } else {
498 brelse(bp, BC_INVAL);
499 }
500 }
501
502 /* Now flush all dependencies to disk. */
503 #ifdef notyet
504 /* XXX pages locked */
505 (void)softdep_sync_metadata(vp);
506 #endif
507
508 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
509 ip->i_flag |= IN_CHANGE | IN_UPDATE;
510 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
511 }
512
513 /*
514 * Now that any dependencies that we created have been
515 * resolved, we can undo the partial allocation.
516 */
517
518 if (unwindidx == 0) {
519 *allocib = 0;
520 ip->i_flag |= IN_CHANGE | IN_UPDATE;
521 if (DOINGSOFTDEP(vp))
522 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
523 } else {
524 int r;
525
526 r = bread(vp, indirs[unwindidx].in_lbn,
527 (int)fs->fs_bsize, NOCRED, 0, &bp);
528 if (r) {
529 panic("Could not unwind indirect block, error %d", r);
530 brelse(bp, 0);
531 } else {
532 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
533 bap[indirs[unwindidx].in_off] = 0;
534 bwrite(bp);
535 }
536 }
537 for (i = unwindidx + 1; i <= num; i++) {
538 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
539 fs->fs_bsize, false, &bp) == 0)
540 brelse(bp, BC_INVAL);
541 }
542 }
543 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
544 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
545 deallocated += fs->fs_bsize;
546 }
547 if (deallocated) {
548 #ifdef QUOTA
549 /*
550 * Restore user's disk quota because allocation failed.
551 */
552 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
553 #endif
554 ip->i_ffs1_blocks -= btodb(deallocated);
555 ip->i_flag |= IN_CHANGE | IN_UPDATE;
556 }
557 /*
558 * Flush all dependencies again so that the soft updates code
559 * doesn't find any untracked changes.
560 */
561 #ifdef notyet
562 /* XXX pages locked */
563 (void)softdep_sync_metadata(vp);
564 #endif
565 return (error);
566 }
567
568 static int
569 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
570 int flags, struct buf **bpp)
571 {
572 daddr_t lbn, lastlbn;
573 struct buf *bp, *nbp;
574 struct inode *ip = VTOI(vp);
575 struct fs *fs = ip->i_fs;
576 struct ufsmount *ump = ip->i_ump;
577 struct indir indirs[NIADDR + 2];
578 daddr_t newb, pref, nb;
579 int64_t *bap;
580 int deallocated, osize, nsize, num, i, error;
581 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
582 int64_t *allocib;
583 int unwindidx = -1;
584 #ifdef FFS_EI
585 const int needswap = UFS_FSNEEDSWAP(fs);
586 #endif
587 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
588
589 lbn = lblkno(fs, off);
590 size = blkoff(fs, off) + size;
591 if (size > fs->fs_bsize)
592 panic("ffs_balloc: blk too big");
593 if (bpp != NULL) {
594 *bpp = NULL;
595 }
596 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
597
598 if (lbn < 0)
599 return (EFBIG);
600
601 #ifdef notyet
602 /*
603 * Check for allocating external data.
604 */
605 if (flags & IO_EXT) {
606 if (lbn >= NXADDR)
607 return (EFBIG);
608 /*
609 * If the next write will extend the data into a new block,
610 * and the data is currently composed of a fragment
611 * this fragment has to be extended to be a full block.
612 */
613 lastlbn = lblkno(fs, dp->di_extsize);
614 if (lastlbn < lbn) {
615 nb = lastlbn;
616 osize = sblksize(fs, dp->di_extsize, nb);
617 if (osize < fs->fs_bsize && osize > 0) {
618 mutex_enter(&ump->um_lock);
619 error = ffs_realloccg(ip, -1 - nb,
620 dp->di_extb[nb],
621 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
622 &dp->di_extb[0]), osize,
623 (int)fs->fs_bsize, cred, &bp);
624 if (error)
625 return (error);
626 if (DOINGSOFTDEP(vp))
627 softdep_setup_allocext(ip, nb,
628 dbtofsb(fs, bp->b_blkno),
629 dp->di_extb[nb],
630 fs->fs_bsize, osize, bp);
631 dp->di_extsize = smalllblktosize(fs, nb + 1);
632 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
633 bp->b_xflags |= BX_ALTDATA;
634 ip->i_flag |= IN_CHANGE | IN_UPDATE;
635 if (flags & IO_SYNC)
636 bwrite(bp);
637 else
638 bawrite(bp);
639 }
640 }
641 /*
642 * All blocks are direct blocks
643 */
644 if (flags & BA_METAONLY)
645 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
646 nb = dp->di_extb[lbn];
647 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
648 error = bread(vp, -1 - lbn, fs->fs_bsize,
649 NOCRED, 0, &bp);
650 if (error) {
651 brelse(bp, 0);
652 return (error);
653 }
654 mutex_enter(&bp->b_interlock);
655 bp->b_blkno = fsbtodb(fs, nb);
656 bp->b_xflags |= BX_ALTDATA;
657 mutex_exit(&bp->b_interlock);
658 *bpp = bp;
659 return (0);
660 }
661 if (nb != 0) {
662 /*
663 * Consider need to reallocate a fragment.
664 */
665 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
666 nsize = fragroundup(fs, size);
667 if (nsize <= osize) {
668 error = bread(vp, -1 - lbn, osize,
669 NOCRED, 0, &bp);
670 if (error) {
671 brelse(bp, 0);
672 return (error);
673 }
674 mutex_enter(&bp->b_interlock);
675 bp->b_blkno = fsbtodb(fs, nb);
676 bp->b_xflags |= BX_ALTDATA;
677 mutex_exit(&bp->b_interlock);
678 } else {
679 mutex_enter(&ump->um_lock);
680 error = ffs_realloccg(ip, -1 - lbn,
681 dp->di_extb[lbn],
682 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
683 &dp->di_extb[0]), osize, nsize, cred, &bp);
684 if (error)
685 return (error);
686 bp->b_xflags |= BX_ALTDATA;
687 if (DOINGSOFTDEP(vp))
688 softdep_setup_allocext(ip, lbn,
689 dbtofsb(fs, bp->b_blkno), nb,
690 nsize, osize, bp);
691 }
692 } else {
693 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
694 nsize = fragroundup(fs, size);
695 else
696 nsize = fs->fs_bsize;
697 mutex_enter(&ump->um_lock);
698 error = ffs_alloc(ip, lbn,
699 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
700 nsize, cred, &newb);
701 if (error)
702 return (error);
703 error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
704 nsize, (flags & BA_CLRBUF) != 0, &bp);
705 if (error)
706 return error;
707 bp->b_xflags |= BX_ALTDATA;
708 if (DOINGSOFTDEP(vp))
709 softdep_setup_allocext(ip, lbn, newb, 0,
710 nsize, 0, bp);
711 }
712 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
713 ip->i_flag |= IN_CHANGE | IN_UPDATE;
714 *bpp = bp;
715 return (0);
716 }
717 #endif
718 /*
719 * If the next write will extend the file into a new block,
720 * and the file is currently composed of a fragment
721 * this fragment has to be extended to be a full block.
722 */
723
724 lastlbn = lblkno(fs, ip->i_size);
725 if (lastlbn < NDADDR && lastlbn < lbn) {
726 nb = lastlbn;
727 osize = blksize(fs, ip, nb);
728 if (osize < fs->fs_bsize && osize > 0) {
729 mutex_enter(&ump->um_lock);
730 error = ffs_realloccg(ip, nb,
731 ffs_blkpref_ufs2(ip, lastlbn, nb,
732 &ip->i_ffs2_db[0]),
733 osize, (int)fs->fs_bsize, cred, bpp, &newb);
734 if (error)
735 return (error);
736 if (DOINGSOFTDEP(vp))
737 softdep_setup_allocdirect(ip, nb, newb,
738 ufs_rw64(ip->i_ffs2_db[nb], needswap),
739 fs->fs_bsize, osize, bpp ? *bpp : NULL);
740 ip->i_size = lblktosize(fs, nb + 1);
741 ip->i_ffs2_size = ip->i_size;
742 uvm_vnp_setsize(vp, ip->i_size);
743 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
744 ip->i_flag |= IN_CHANGE | IN_UPDATE;
745 if (bpp) {
746 if (flags & B_SYNC)
747 bwrite(*bpp);
748 else
749 bawrite(*bpp);
750 }
751 }
752 }
753
754 /*
755 * The first NDADDR blocks are direct blocks
756 */
757
758 if (lbn < NDADDR) {
759 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
760 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
761
762 /*
763 * The block is an already-allocated direct block
764 * and the file already extends past this block,
765 * thus this must be a whole block.
766 * Just read the block (if requested).
767 */
768
769 if (bpp != NULL) {
770 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
771 B_MODIFY, bpp);
772 if (error) {
773 brelse(*bpp, 0);
774 return (error);
775 }
776 }
777 return (0);
778 }
779 if (nb != 0) {
780
781 /*
782 * Consider need to reallocate a fragment.
783 */
784
785 osize = fragroundup(fs, blkoff(fs, ip->i_size));
786 nsize = fragroundup(fs, size);
787 if (nsize <= osize) {
788
789 /*
790 * The existing block is already
791 * at least as big as we want.
792 * Just read the block (if requested).
793 */
794
795 if (bpp != NULL) {
796 error = bread(vp, lbn, osize, NOCRED,
797 B_MODIFY, bpp);
798 if (error) {
799 brelse(*bpp, 0);
800 return (error);
801 }
802 }
803 return 0;
804 } else {
805
806 /*
807 * The existing block is smaller than we want,
808 * grow it.
809 */
810 mutex_enter(&ump->um_lock);
811 error = ffs_realloccg(ip, lbn,
812 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
813 &ip->i_ffs2_db[0]), osize, nsize, cred,
814 bpp, &newb);
815 if (error)
816 return (error);
817 if (DOINGSOFTDEP(vp))
818 softdep_setup_allocdirect(ip, lbn,
819 newb, nb, nsize, osize,
820 bpp ? *bpp : NULL);
821 }
822 } else {
823
824 /*
825 * the block was not previously allocated,
826 * allocate a new block or fragment.
827 */
828
829 if (ip->i_size < lblktosize(fs, lbn + 1))
830 nsize = fragroundup(fs, size);
831 else
832 nsize = fs->fs_bsize;
833 mutex_enter(&ump->um_lock);
834 error = ffs_alloc(ip, lbn,
835 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
836 &ip->i_ffs2_db[0]), nsize, cred, &newb);
837 if (error)
838 return (error);
839 if (bpp != NULL) {
840 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
841 nsize, (flags & B_CLRBUF) != 0, bpp);
842 if (error)
843 return error;
844 }
845 if (DOINGSOFTDEP(vp)) {
846 softdep_setup_allocdirect(ip, lbn, newb, 0,
847 nsize, 0, bpp ? *bpp : NULL);
848 }
849 }
850 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
851 ip->i_flag |= IN_CHANGE | IN_UPDATE;
852 return (0);
853 }
854
855 /*
856 * Determine the number of levels of indirection.
857 */
858
859 pref = 0;
860 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
861 return (error);
862
863 /*
864 * Fetch the first indirect block allocating if necessary.
865 */
866
867 --num;
868 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
869 allocib = NULL;
870 allocblk = allociblk;
871 if (nb == 0) {
872 mutex_enter(&ump->um_lock);
873 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
874 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
875 &newb);
876 if (error)
877 goto fail;
878 nb = newb;
879 *allocblk++ = nb;
880 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
881 fs->fs_bsize, true, &bp);
882 if (error)
883 goto fail;
884 if (DOINGSOFTDEP(vp)) {
885 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
886 newb, 0, fs->fs_bsize, 0, bp);
887 bdwrite(bp);
888 } else {
889
890 /*
891 * Write synchronously so that indirect blocks
892 * never point at garbage.
893 */
894
895 if ((error = bwrite(bp)) != 0)
896 goto fail;
897 }
898 unwindidx = 0;
899 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
900 *allocib = ufs_rw64(nb, needswap);
901 ip->i_flag |= IN_CHANGE | IN_UPDATE;
902 }
903
904 /*
905 * Fetch through the indirect blocks, allocating as necessary.
906 */
907
908 for (i = 1;;) {
909 error = bread(vp,
910 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
911 if (error) {
912 brelse(bp, 0);
913 goto fail;
914 }
915 bap = (int64_t *)bp->b_data;
916 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
917 if (i == num)
918 break;
919 i++;
920 if (nb != 0) {
921 brelse(bp, 0);
922 continue;
923 }
924 if (fscow_run(bp, true) != 0) {
925 brelse(bp, 0);
926 goto fail;
927 }
928 mutex_enter(&ump->um_lock);
929 if (pref == 0)
930 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
931 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
932 &newb);
933 if (error) {
934 brelse(bp, 0);
935 goto fail;
936 }
937 nb = newb;
938 *allocblk++ = nb;
939 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
940 fs->fs_bsize, true, &nbp);
941 if (error) {
942 brelse(bp, 0);
943 goto fail;
944 }
945 if (DOINGSOFTDEP(vp)) {
946 softdep_setup_allocindir_meta(nbp, ip, bp,
947 indirs[i - 1].in_off, nb);
948 bdwrite(nbp);
949 } else {
950
951 /*
952 * Write synchronously so that indirect blocks
953 * never point at garbage.
954 */
955
956 if ((error = bwrite(nbp)) != 0) {
957 brelse(bp, 0);
958 goto fail;
959 }
960 }
961 if (unwindidx < 0)
962 unwindidx = i - 1;
963 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
964
965 /*
966 * If required, write synchronously, otherwise use
967 * delayed write.
968 */
969
970 if (flags & B_SYNC) {
971 bwrite(bp);
972 } else {
973 bdwrite(bp);
974 }
975 }
976
977 if (flags & B_METAONLY) {
978 KASSERT(bpp != NULL);
979 *bpp = bp;
980 return (0);
981 }
982
983 /*
984 * Get the data block, allocating if necessary.
985 */
986
987 if (nb == 0) {
988 if (fscow_run(bp, true) != 0) {
989 brelse(bp, 0);
990 goto fail;
991 }
992 mutex_enter(&ump->um_lock);
993 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
994 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
995 &newb);
996 if (error) {
997 brelse(bp, 0);
998 goto fail;
999 }
1000 nb = newb;
1001 *allocblk++ = nb;
1002 if (bpp != NULL) {
1003 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
1004 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
1005 if (error) {
1006 brelse(bp, 0);
1007 goto fail;
1008 }
1009 }
1010 if (DOINGSOFTDEP(vp))
1011 softdep_setup_allocindir_page(ip, lbn, bp,
1012 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
1013 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
1014 if (allocib == NULL && unwindidx < 0) {
1015 unwindidx = i - 1;
1016 }
1017
1018 /*
1019 * If required, write synchronously, otherwise use
1020 * delayed write.
1021 */
1022
1023 if (flags & B_SYNC) {
1024 bwrite(bp);
1025 } else {
1026 bdwrite(bp);
1027 }
1028 return (0);
1029 }
1030 brelse(bp, 0);
1031 if (bpp != NULL) {
1032 if (flags & B_CLRBUF) {
1033 error = bread(vp, lbn, (int)fs->fs_bsize,
1034 NOCRED, B_MODIFY, &nbp);
1035 if (error) {
1036 brelse(nbp, 0);
1037 goto fail;
1038 }
1039 } else {
1040 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
1041 fs->fs_bsize, true, &nbp);
1042 if (error)
1043 goto fail;
1044 }
1045 *bpp = nbp;
1046 }
1047 return (0);
1048
1049 fail:
1050 /*
1051 * If we have failed part way through block allocation, we
1052 * have to deallocate any indirect blocks that we have allocated.
1053 */
1054
1055 if (unwindidx >= 0) {
1056
1057 /*
1058 * First write out any buffers we've created to resolve their
1059 * softdeps. This must be done in reverse order of creation
1060 * so that we resolve the dependencies in one pass.
1061 * Write the cylinder group buffers for these buffers too.
1062 */
1063
1064 for (i = num; i >= unwindidx; i--) {
1065 if (i == 0) {
1066 break;
1067 }
1068 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1069 fs->fs_bsize, false, &bp) != 0)
1070 continue;
1071 if (bp->b_oflags & BO_DELWRI) {
1072 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1073 dbtofsb(fs, bp->b_blkno))));
1074 bwrite(bp);
1075 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
1076 fs->fs_cgsize, false, &bp) != 0)
1077 continue;
1078 if (bp->b_oflags & BO_DELWRI) {
1079 bwrite(bp);
1080 } else {
1081 brelse(bp, BC_INVAL);
1082 }
1083 } else {
1084 brelse(bp, BC_INVAL);
1085 }
1086 }
1087
1088 /* Now flush the dependencies to disk. */
1089 #ifdef notyet
1090 /* XXX pages locked */
1091 (void)softdep_sync_metadata(vp);
1092 #endif
1093
1094 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
1095 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1096 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1097 }
1098
1099 /*
1100 * Now that any dependencies that we created have been
1101 * resolved, we can undo the partial allocation.
1102 */
1103
1104 if (unwindidx == 0) {
1105 *allocib = 0;
1106 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1107 if (DOINGSOFTDEP(vp))
1108 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1109 } else {
1110 int r;
1111
1112 r = bread(vp, indirs[unwindidx].in_lbn,
1113 (int)fs->fs_bsize, NOCRED, 0, &bp);
1114 if (r) {
1115 panic("Could not unwind indirect block, error %d", r);
1116 brelse(bp, 0);
1117 } else {
1118 bap = (int64_t *)bp->b_data;
1119 bap[indirs[unwindidx].in_off] = 0;
1120 bwrite(bp);
1121 }
1122 }
1123 for (i = unwindidx + 1; i <= num; i++) {
1124 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1125 fs->fs_bsize, false, &bp) == 0)
1126 brelse(bp, BC_INVAL);
1127 }
1128 }
1129 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1130 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1131 deallocated += fs->fs_bsize;
1132 }
1133 if (deallocated) {
1134 #ifdef QUOTA
1135 /*
1136 * Restore user's disk quota because allocation failed.
1137 */
1138 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1139 #endif
1140 ip->i_ffs2_blocks -= btodb(deallocated);
1141 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1142 }
1143
1144 /*
1145 * Flush all dependencies again so that the soft updates code
1146 * doesn't find any untracked changes.
1147 */
1148 #ifdef notyet
1149 /* XXX pages locked */
1150 (void)softdep_sync_metadata(vp);
1151 #endif
1152 return (error);
1153 }
1154