ffs_balloc.c revision 1.51.4.1 1 /* $NetBSD: ffs_balloc.c,v 1.51.4.1 2011/06/18 17:00:25 bouyer Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.51.4.1 2011/06/18 17:00:25 bouyer Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
58
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
64
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67
68 #include <uvm/uvm.h>
69
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71 struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73 struct buf **);
74
75 /*
76 * Balloc defines the structure of file system storage
77 * by allocating the physical blocks on a device given
78 * the inode and the logical block number in a file.
79 */
80
81 int
82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83 struct buf **bpp)
84 {
85 int error;
86
87 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 else
90 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
91
92 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 brelse(*bpp, 0);
94
95 return error;
96 }
97
98 static int
99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100 int flags, struct buf **bpp)
101 {
102 daddr_t lbn, lastlbn;
103 struct buf *bp, *nbp;
104 struct inode *ip = VTOI(vp);
105 struct fs *fs = ip->i_fs;
106 struct ufsmount *ump = ip->i_ump;
107 struct indir indirs[NIADDR + 2];
108 daddr_t newb, pref, nb;
109 int32_t *bap; /* XXX ondisk32 */
110 int deallocated, osize, nsize, num, i, error;
111 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
112 int32_t *allocib;
113 int unwindidx = -1;
114 #ifdef FFS_EI
115 const int needswap = UFS_FSNEEDSWAP(fs);
116 #endif
117 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
118
119 lbn = lblkno(fs, off);
120 size = blkoff(fs, off) + size;
121 if (size > fs->fs_bsize)
122 panic("ffs_balloc: blk too big");
123 if (bpp != NULL) {
124 *bpp = NULL;
125 }
126 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
127
128 if (lbn < 0)
129 return (EFBIG);
130
131 /*
132 * If the next write will extend the file into a new block,
133 * and the file is currently composed of a fragment
134 * this fragment has to be extended to be a full block.
135 */
136
137 lastlbn = lblkno(fs, ip->i_size);
138 if (lastlbn < NDADDR && lastlbn < lbn) {
139 nb = lastlbn;
140 osize = blksize(fs, ip, nb);
141 if (osize < fs->fs_bsize && osize > 0) {
142 mutex_enter(&ump->um_lock);
143 error = ffs_realloccg(ip, nb,
144 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
145 &ip->i_ffs1_db[0]),
146 osize, (int)fs->fs_bsize, cred, bpp, &newb);
147 if (error)
148 return (error);
149 if (DOINGSOFTDEP(vp))
150 softdep_setup_allocdirect(ip, nb, newb,
151 ufs_rw32(ip->i_ffs1_db[nb], needswap),
152 fs->fs_bsize, osize, bpp ? *bpp : NULL);
153 ip->i_size = lblktosize(fs, nb + 1);
154 ip->i_ffs1_size = ip->i_size;
155 uvm_vnp_setsize(vp, ip->i_ffs1_size);
156 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
157 ip->i_flag |= IN_CHANGE | IN_UPDATE;
158 if (bpp && *bpp) {
159 if (flags & B_SYNC)
160 bwrite(*bpp);
161 else
162 bawrite(*bpp);
163 }
164 }
165 }
166
167 /*
168 * The first NDADDR blocks are direct blocks
169 */
170
171 if (lbn < NDADDR) {
172 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
173 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
174
175 /*
176 * The block is an already-allocated direct block
177 * and the file already extends past this block,
178 * thus this must be a whole block.
179 * Just read the block (if requested).
180 */
181
182 if (bpp != NULL) {
183 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
184 B_MODIFY, bpp);
185 if (error) {
186 brelse(*bpp, 0);
187 return (error);
188 }
189 }
190 return (0);
191 }
192 if (nb != 0) {
193
194 /*
195 * Consider need to reallocate a fragment.
196 */
197
198 osize = fragroundup(fs, blkoff(fs, ip->i_size));
199 nsize = fragroundup(fs, size);
200 if (nsize <= osize) {
201
202 /*
203 * The existing block is already
204 * at least as big as we want.
205 * Just read the block (if requested).
206 */
207
208 if (bpp != NULL) {
209 error = bread(vp, lbn, osize, NOCRED,
210 B_MODIFY, bpp);
211 if (error) {
212 brelse(*bpp, 0);
213 return (error);
214 }
215 }
216 return 0;
217 } else {
218
219 /*
220 * The existing block is smaller than we want,
221 * grow it.
222 */
223 mutex_enter(&ump->um_lock);
224 error = ffs_realloccg(ip, lbn,
225 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
226 &ip->i_ffs1_db[0]),
227 osize, nsize, cred, bpp, &newb);
228 if (error)
229 return (error);
230 if (DOINGSOFTDEP(vp))
231 softdep_setup_allocdirect(ip, lbn,
232 newb, nb, nsize, osize,
233 bpp ? *bpp : NULL);
234 }
235 } else {
236
237 /*
238 * the block was not previously allocated,
239 * allocate a new block or fragment.
240 */
241
242 if (ip->i_size < lblktosize(fs, lbn + 1))
243 nsize = fragroundup(fs, size);
244 else
245 nsize = fs->fs_bsize;
246 mutex_enter(&ump->um_lock);
247 error = ffs_alloc(ip, lbn,
248 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
249 &ip->i_ffs1_db[0]),
250 nsize, flags, cred, &newb);
251 if (error)
252 return (error);
253 if (bpp != NULL) {
254 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
255 nsize, (flags & B_CLRBUF) != 0, bpp);
256 if (error)
257 return error;
258 }
259 if (DOINGSOFTDEP(vp)) {
260 softdep_setup_allocdirect(ip, lbn, newb, 0,
261 nsize, 0, bpp ? *bpp : NULL);
262 }
263 }
264 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
265 ip->i_flag |= IN_CHANGE | IN_UPDATE;
266 return (0);
267 }
268
269 /*
270 * Determine the number of levels of indirection.
271 */
272
273 pref = 0;
274 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
275 return (error);
276
277 /*
278 * Fetch the first indirect block allocating if necessary.
279 */
280
281 --num;
282 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
283 allocib = NULL;
284 allocblk = allociblk;
285 if (nb == 0) {
286 mutex_enter(&ump->um_lock);
287 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
288 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
289 flags | B_METAONLY, cred, &newb);
290 if (error)
291 goto fail;
292 nb = newb;
293 *allocblk++ = nb;
294 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
295 fs->fs_bsize, true, &bp);
296 if (error)
297 goto fail;
298 if (DOINGSOFTDEP(vp)) {
299 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
300 newb, 0, fs->fs_bsize, 0, bp);
301 bdwrite(bp);
302 } else {
303
304 /*
305 * Write synchronously so that indirect blocks
306 * never point at garbage.
307 */
308
309 if ((error = bwrite(bp)) != 0)
310 goto fail;
311 }
312 unwindidx = 0;
313 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
314 *allocib = ufs_rw32(nb, needswap);
315 ip->i_flag |= IN_CHANGE | IN_UPDATE;
316 }
317
318 /*
319 * Fetch through the indirect blocks, allocating as necessary.
320 */
321
322 for (i = 1;;) {
323 error = bread(vp,
324 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
325 if (error) {
326 brelse(bp, 0);
327 goto fail;
328 }
329 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
330 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
331 if (i == num)
332 break;
333 i++;
334 if (nb != 0) {
335 brelse(bp, 0);
336 continue;
337 }
338 if (fscow_run(bp, true) != 0) {
339 brelse(bp, 0);
340 goto fail;
341 }
342 mutex_enter(&ump->um_lock);
343 /* Try to keep snapshot indirect blocks contiguous. */
344 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
345 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
346 flags | B_METAONLY, &bap[0]);
347 if (pref == 0)
348 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
349 NULL);
350 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
351 flags | B_METAONLY, cred, &newb);
352 if (error) {
353 brelse(bp, 0);
354 goto fail;
355 }
356 nb = newb;
357 *allocblk++ = nb;
358 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
359 fs->fs_bsize, true, &nbp);
360 if (error) {
361 brelse(bp, 0);
362 goto fail;
363 }
364 if (DOINGSOFTDEP(vp)) {
365 softdep_setup_allocindir_meta(nbp, ip, bp,
366 indirs[i - 1].in_off, nb);
367 bdwrite(nbp);
368 } else {
369
370 /*
371 * Write synchronously so that indirect blocks
372 * never point at garbage.
373 */
374
375 if ((error = bwrite(nbp)) != 0) {
376 brelse(bp, 0);
377 goto fail;
378 }
379 }
380 if (unwindidx < 0)
381 unwindidx = i - 1;
382 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
383
384 /*
385 * If required, write synchronously, otherwise use
386 * delayed write.
387 */
388
389 if (flags & B_SYNC) {
390 bwrite(bp);
391 } else {
392 bdwrite(bp);
393 }
394 }
395
396 if (flags & B_METAONLY) {
397 KASSERT(bpp != NULL);
398 *bpp = bp;
399 return (0);
400 }
401
402 /*
403 * Get the data block, allocating if necessary.
404 */
405
406 if (nb == 0) {
407 if (fscow_run(bp, true) != 0) {
408 brelse(bp, 0);
409 goto fail;
410 }
411 mutex_enter(&ump->um_lock);
412 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
413 &bap[0]);
414 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
415 &newb);
416 if (error) {
417 brelse(bp, 0);
418 goto fail;
419 }
420 nb = newb;
421 *allocblk++ = nb;
422 if (bpp != NULL) {
423 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
424 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
425 if (error) {
426 brelse(bp, 0);
427 goto fail;
428 }
429 }
430 if (DOINGSOFTDEP(vp))
431 softdep_setup_allocindir_page(ip, lbn, bp,
432 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
433 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
434 if (allocib == NULL && unwindidx < 0) {
435 unwindidx = i - 1;
436 }
437
438 /*
439 * If required, write synchronously, otherwise use
440 * delayed write.
441 */
442
443 if (flags & B_SYNC) {
444 bwrite(bp);
445 } else {
446 bdwrite(bp);
447 }
448 return (0);
449 }
450 brelse(bp, 0);
451 if (bpp != NULL) {
452 if (flags & B_CLRBUF) {
453 error = bread(vp, lbn, (int)fs->fs_bsize,
454 NOCRED, B_MODIFY, &nbp);
455 if (error) {
456 brelse(nbp, 0);
457 goto fail;
458 }
459 } else {
460 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
461 fs->fs_bsize, true, &nbp);
462 if (error)
463 goto fail;
464 }
465 *bpp = nbp;
466 }
467 return (0);
468
469 fail:
470 /*
471 * If we have failed part way through block allocation, we
472 * have to deallocate any indirect blocks that we have allocated.
473 */
474
475 if (unwindidx >= 0) {
476
477 /*
478 * First write out any buffers we've created to resolve their
479 * softdeps. This must be done in reverse order of creation
480 * so that we resolve the dependencies in one pass.
481 * Write the cylinder group buffers for these buffers too.
482 */
483
484 for (i = num; i >= unwindidx; i--) {
485 if (i == 0) {
486 break;
487 }
488 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
489 fs->fs_bsize, false, &bp) != 0)
490 continue;
491 if (bp->b_oflags & BO_DELWRI) {
492 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
493 dbtofsb(fs, bp->b_blkno))));
494 bwrite(bp);
495 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
496 fs->fs_cgsize, false, &bp) != 0)
497 continue;
498 if (bp->b_oflags & BO_DELWRI) {
499 bwrite(bp);
500 } else {
501 brelse(bp, BC_INVAL);
502 }
503 } else {
504 brelse(bp, BC_INVAL);
505 }
506 }
507
508 /* Now flush all dependencies to disk. */
509 #ifdef notyet
510 /* XXX pages locked */
511 (void)softdep_sync_metadata(vp);
512 #endif
513
514 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
515 ip->i_flag |= IN_CHANGE | IN_UPDATE;
516 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
517 }
518
519 /*
520 * Now that any dependencies that we created have been
521 * resolved, we can undo the partial allocation.
522 */
523
524 if (unwindidx == 0) {
525 *allocib = 0;
526 ip->i_flag |= IN_CHANGE | IN_UPDATE;
527 if (DOINGSOFTDEP(vp))
528 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
529 } else {
530 int r;
531
532 r = bread(vp, indirs[unwindidx].in_lbn,
533 (int)fs->fs_bsize, NOCRED, 0, &bp);
534 if (r) {
535 panic("Could not unwind indirect block, error %d", r);
536 brelse(bp, 0);
537 } else {
538 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
539 bap[indirs[unwindidx].in_off] = 0;
540 bwrite(bp);
541 }
542 }
543 for (i = unwindidx + 1; i <= num; i++) {
544 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
545 fs->fs_bsize, false, &bp) == 0)
546 brelse(bp, BC_INVAL);
547 }
548 }
549 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
550 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
551 deallocated += fs->fs_bsize;
552 }
553 if (deallocated) {
554 #ifdef QUOTA
555 /*
556 * Restore user's disk quota because allocation failed.
557 */
558 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
559 #endif
560 ip->i_ffs1_blocks -= btodb(deallocated);
561 ip->i_flag |= IN_CHANGE | IN_UPDATE;
562 }
563 /*
564 * Flush all dependencies again so that the soft updates code
565 * doesn't find any untracked changes.
566 */
567 #ifdef notyet
568 /* XXX pages locked */
569 (void)softdep_sync_metadata(vp);
570 #endif
571 return (error);
572 }
573
574 static int
575 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
576 int flags, struct buf **bpp)
577 {
578 daddr_t lbn, lastlbn;
579 struct buf *bp, *nbp;
580 struct inode *ip = VTOI(vp);
581 struct fs *fs = ip->i_fs;
582 struct ufsmount *ump = ip->i_ump;
583 struct indir indirs[NIADDR + 2];
584 daddr_t newb, pref, nb;
585 int64_t *bap;
586 int deallocated, osize, nsize, num, i, error;
587 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
588 int64_t *allocib;
589 int unwindidx = -1;
590 #ifdef FFS_EI
591 const int needswap = UFS_FSNEEDSWAP(fs);
592 #endif
593 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
594
595 lbn = lblkno(fs, off);
596 size = blkoff(fs, off) + size;
597 if (size > fs->fs_bsize)
598 panic("ffs_balloc: blk too big");
599 if (bpp != NULL) {
600 *bpp = NULL;
601 }
602 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
603
604 if (lbn < 0)
605 return (EFBIG);
606
607 #ifdef notyet
608 /*
609 * Check for allocating external data.
610 */
611 if (flags & IO_EXT) {
612 if (lbn >= NXADDR)
613 return (EFBIG);
614 /*
615 * If the next write will extend the data into a new block,
616 * and the data is currently composed of a fragment
617 * this fragment has to be extended to be a full block.
618 */
619 lastlbn = lblkno(fs, dp->di_extsize);
620 if (lastlbn < lbn) {
621 nb = lastlbn;
622 osize = sblksize(fs, dp->di_extsize, nb);
623 if (osize < fs->fs_bsize && osize > 0) {
624 mutex_enter(&ump->um_lock);
625 error = ffs_realloccg(ip, -1 - nb,
626 dp->di_extb[nb],
627 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
628 flags, &dp->di_extb[0]),
629 osize,
630 (int)fs->fs_bsize, cred, &bp);
631 if (error)
632 return (error);
633 if (DOINGSOFTDEP(vp))
634 softdep_setup_allocext(ip, nb,
635 dbtofsb(fs, bp->b_blkno),
636 dp->di_extb[nb],
637 fs->fs_bsize, osize, bp);
638 dp->di_extsize = smalllblktosize(fs, nb + 1);
639 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
640 bp->b_xflags |= BX_ALTDATA;
641 ip->i_flag |= IN_CHANGE | IN_UPDATE;
642 if (flags & IO_SYNC)
643 bwrite(bp);
644 else
645 bawrite(bp);
646 }
647 }
648 /*
649 * All blocks are direct blocks
650 */
651 if (flags & BA_METAONLY)
652 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
653 nb = dp->di_extb[lbn];
654 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
655 error = bread(vp, -1 - lbn, fs->fs_bsize,
656 NOCRED, 0, &bp);
657 if (error) {
658 brelse(bp, 0);
659 return (error);
660 }
661 mutex_enter(&bp->b_interlock);
662 bp->b_blkno = fsbtodb(fs, nb);
663 bp->b_xflags |= BX_ALTDATA;
664 mutex_exit(&bp->b_interlock);
665 *bpp = bp;
666 return (0);
667 }
668 if (nb != 0) {
669 /*
670 * Consider need to reallocate a fragment.
671 */
672 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
673 nsize = fragroundup(fs, size);
674 if (nsize <= osize) {
675 error = bread(vp, -1 - lbn, osize,
676 NOCRED, 0, &bp);
677 if (error) {
678 brelse(bp, 0);
679 return (error);
680 }
681 mutex_enter(&bp->b_interlock);
682 bp->b_blkno = fsbtodb(fs, nb);
683 bp->b_xflags |= BX_ALTDATA;
684 mutex_exit(&bp->b_interlock);
685 } else {
686 mutex_enter(&ump->um_lock);
687 error = ffs_realloccg(ip, -1 - lbn,
688 dp->di_extb[lbn],
689 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
690 &dp->di_extb[0]),
691 osize, nsize, cred, &bp);
692 if (error)
693 return (error);
694 bp->b_xflags |= BX_ALTDATA;
695 if (DOINGSOFTDEP(vp))
696 softdep_setup_allocext(ip, lbn,
697 dbtofsb(fs, bp->b_blkno), nb,
698 nsize, osize, bp);
699 }
700 } else {
701 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
702 nsize = fragroundup(fs, size);
703 else
704 nsize = fs->fs_bsize;
705 mutex_enter(&ump->um_lock);
706 error = ffs_alloc(ip, lbn,
707 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
708 &dp->di_extb[0]),
709 nsize, flags, cred, &newb);
710 if (error)
711 return (error);
712 error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
713 nsize, (flags & BA_CLRBUF) != 0, &bp);
714 if (error)
715 return error;
716 bp->b_xflags |= BX_ALTDATA;
717 if (DOINGSOFTDEP(vp))
718 softdep_setup_allocext(ip, lbn, newb, 0,
719 nsize, 0, bp);
720 }
721 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
722 ip->i_flag |= IN_CHANGE | IN_UPDATE;
723 *bpp = bp;
724 return (0);
725 }
726 #endif
727 /*
728 * If the next write will extend the file into a new block,
729 * and the file is currently composed of a fragment
730 * this fragment has to be extended to be a full block.
731 */
732
733 lastlbn = lblkno(fs, ip->i_size);
734 if (lastlbn < NDADDR && lastlbn < lbn) {
735 nb = lastlbn;
736 osize = blksize(fs, ip, nb);
737 if (osize < fs->fs_bsize && osize > 0) {
738 mutex_enter(&ump->um_lock);
739 error = ffs_realloccg(ip, nb,
740 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
741 &ip->i_ffs2_db[0]),
742 osize, (int)fs->fs_bsize, cred, bpp, &newb);
743 if (error)
744 return (error);
745 if (DOINGSOFTDEP(vp))
746 softdep_setup_allocdirect(ip, nb, newb,
747 ufs_rw64(ip->i_ffs2_db[nb], needswap),
748 fs->fs_bsize, osize, bpp ? *bpp : NULL);
749 ip->i_size = lblktosize(fs, nb + 1);
750 ip->i_ffs2_size = ip->i_size;
751 uvm_vnp_setsize(vp, ip->i_size);
752 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
753 ip->i_flag |= IN_CHANGE | IN_UPDATE;
754 if (bpp) {
755 if (flags & B_SYNC)
756 bwrite(*bpp);
757 else
758 bawrite(*bpp);
759 }
760 }
761 }
762
763 /*
764 * The first NDADDR blocks are direct blocks
765 */
766
767 if (lbn < NDADDR) {
768 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
769 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
770
771 /*
772 * The block is an already-allocated direct block
773 * and the file already extends past this block,
774 * thus this must be a whole block.
775 * Just read the block (if requested).
776 */
777
778 if (bpp != NULL) {
779 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
780 B_MODIFY, bpp);
781 if (error) {
782 brelse(*bpp, 0);
783 return (error);
784 }
785 }
786 return (0);
787 }
788 if (nb != 0) {
789
790 /*
791 * Consider need to reallocate a fragment.
792 */
793
794 osize = fragroundup(fs, blkoff(fs, ip->i_size));
795 nsize = fragroundup(fs, size);
796 if (nsize <= osize) {
797
798 /*
799 * The existing block is already
800 * at least as big as we want.
801 * Just read the block (if requested).
802 */
803
804 if (bpp != NULL) {
805 error = bread(vp, lbn, osize, NOCRED,
806 B_MODIFY, bpp);
807 if (error) {
808 brelse(*bpp, 0);
809 return (error);
810 }
811 }
812 return 0;
813 } else {
814
815 /*
816 * The existing block is smaller than we want,
817 * grow it.
818 */
819 mutex_enter(&ump->um_lock);
820 error = ffs_realloccg(ip, lbn,
821 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
822 &ip->i_ffs2_db[0]),
823 osize, nsize, cred, bpp, &newb);
824 if (error)
825 return (error);
826 if (DOINGSOFTDEP(vp))
827 softdep_setup_allocdirect(ip, lbn,
828 newb, nb, nsize, osize,
829 bpp ? *bpp : NULL);
830 }
831 } else {
832
833 /*
834 * the block was not previously allocated,
835 * allocate a new block or fragment.
836 */
837
838 if (ip->i_size < lblktosize(fs, lbn + 1))
839 nsize = fragroundup(fs, size);
840 else
841 nsize = fs->fs_bsize;
842 mutex_enter(&ump->um_lock);
843 error = ffs_alloc(ip, lbn,
844 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
845 &ip->i_ffs2_db[0]),
846 nsize, flags, cred, &newb);
847 if (error)
848 return (error);
849 if (bpp != NULL) {
850 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
851 nsize, (flags & B_CLRBUF) != 0, bpp);
852 if (error)
853 return error;
854 }
855 if (DOINGSOFTDEP(vp)) {
856 softdep_setup_allocdirect(ip, lbn, newb, 0,
857 nsize, 0, bpp ? *bpp : NULL);
858 }
859 }
860 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
861 ip->i_flag |= IN_CHANGE | IN_UPDATE;
862 return (0);
863 }
864
865 /*
866 * Determine the number of levels of indirection.
867 */
868
869 pref = 0;
870 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
871 return (error);
872
873 /*
874 * Fetch the first indirect block allocating if necessary.
875 */
876
877 --num;
878 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
879 allocib = NULL;
880 allocblk = allociblk;
881 if (nb == 0) {
882 mutex_enter(&ump->um_lock);
883 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
884 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
885 flags | B_METAONLY, cred, &newb);
886 if (error)
887 goto fail;
888 nb = newb;
889 *allocblk++ = nb;
890 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
891 fs->fs_bsize, true, &bp);
892 if (error)
893 goto fail;
894 if (DOINGSOFTDEP(vp)) {
895 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
896 newb, 0, fs->fs_bsize, 0, bp);
897 bdwrite(bp);
898 } else {
899
900 /*
901 * Write synchronously so that indirect blocks
902 * never point at garbage.
903 */
904
905 if ((error = bwrite(bp)) != 0)
906 goto fail;
907 }
908 unwindidx = 0;
909 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
910 *allocib = ufs_rw64(nb, needswap);
911 ip->i_flag |= IN_CHANGE | IN_UPDATE;
912 }
913
914 /*
915 * Fetch through the indirect blocks, allocating as necessary.
916 */
917
918 for (i = 1;;) {
919 error = bread(vp,
920 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
921 if (error) {
922 brelse(bp, 0);
923 goto fail;
924 }
925 bap = (int64_t *)bp->b_data;
926 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
927 if (i == num)
928 break;
929 i++;
930 if (nb != 0) {
931 brelse(bp, 0);
932 continue;
933 }
934 if (fscow_run(bp, true) != 0) {
935 brelse(bp, 0);
936 goto fail;
937 }
938 mutex_enter(&ump->um_lock);
939 /* Try to keep snapshot indirect blocks contiguous. */
940 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
941 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
942 flags | B_METAONLY, &bap[0]);
943 if (pref == 0)
944 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
945 NULL);
946 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
947 flags | B_METAONLY, cred, &newb);
948 if (error) {
949 brelse(bp, 0);
950 goto fail;
951 }
952 nb = newb;
953 *allocblk++ = nb;
954 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
955 fs->fs_bsize, true, &nbp);
956 if (error) {
957 brelse(bp, 0);
958 goto fail;
959 }
960 if (DOINGSOFTDEP(vp)) {
961 softdep_setup_allocindir_meta(nbp, ip, bp,
962 indirs[i - 1].in_off, nb);
963 bdwrite(nbp);
964 } else {
965
966 /*
967 * Write synchronously so that indirect blocks
968 * never point at garbage.
969 */
970
971 if ((error = bwrite(nbp)) != 0) {
972 brelse(bp, 0);
973 goto fail;
974 }
975 }
976 if (unwindidx < 0)
977 unwindidx = i - 1;
978 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
979
980 /*
981 * If required, write synchronously, otherwise use
982 * delayed write.
983 */
984
985 if (flags & B_SYNC) {
986 bwrite(bp);
987 } else {
988 bdwrite(bp);
989 }
990 }
991
992 if (flags & B_METAONLY) {
993 KASSERT(bpp != NULL);
994 *bpp = bp;
995 return (0);
996 }
997
998 /*
999 * Get the data block, allocating if necessary.
1000 */
1001
1002 if (nb == 0) {
1003 if (fscow_run(bp, true) != 0) {
1004 brelse(bp, 0);
1005 goto fail;
1006 }
1007 mutex_enter(&ump->um_lock);
1008 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
1009 &bap[0]);
1010 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
1011 &newb);
1012 if (error) {
1013 brelse(bp, 0);
1014 goto fail;
1015 }
1016 nb = newb;
1017 *allocblk++ = nb;
1018 if (bpp != NULL) {
1019 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
1020 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
1021 if (error) {
1022 brelse(bp, 0);
1023 goto fail;
1024 }
1025 }
1026 if (DOINGSOFTDEP(vp))
1027 softdep_setup_allocindir_page(ip, lbn, bp,
1028 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
1029 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
1030 if (allocib == NULL && unwindidx < 0) {
1031 unwindidx = i - 1;
1032 }
1033
1034 /*
1035 * If required, write synchronously, otherwise use
1036 * delayed write.
1037 */
1038
1039 if (flags & B_SYNC) {
1040 bwrite(bp);
1041 } else {
1042 bdwrite(bp);
1043 }
1044 return (0);
1045 }
1046 brelse(bp, 0);
1047 if (bpp != NULL) {
1048 if (flags & B_CLRBUF) {
1049 error = bread(vp, lbn, (int)fs->fs_bsize,
1050 NOCRED, B_MODIFY, &nbp);
1051 if (error) {
1052 brelse(nbp, 0);
1053 goto fail;
1054 }
1055 } else {
1056 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
1057 fs->fs_bsize, true, &nbp);
1058 if (error)
1059 goto fail;
1060 }
1061 *bpp = nbp;
1062 }
1063 return (0);
1064
1065 fail:
1066 /*
1067 * If we have failed part way through block allocation, we
1068 * have to deallocate any indirect blocks that we have allocated.
1069 */
1070
1071 if (unwindidx >= 0) {
1072
1073 /*
1074 * First write out any buffers we've created to resolve their
1075 * softdeps. This must be done in reverse order of creation
1076 * so that we resolve the dependencies in one pass.
1077 * Write the cylinder group buffers for these buffers too.
1078 */
1079
1080 for (i = num; i >= unwindidx; i--) {
1081 if (i == 0) {
1082 break;
1083 }
1084 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1085 fs->fs_bsize, false, &bp) != 0)
1086 continue;
1087 if (bp->b_oflags & BO_DELWRI) {
1088 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1089 dbtofsb(fs, bp->b_blkno))));
1090 bwrite(bp);
1091 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
1092 fs->fs_cgsize, false, &bp) != 0)
1093 continue;
1094 if (bp->b_oflags & BO_DELWRI) {
1095 bwrite(bp);
1096 } else {
1097 brelse(bp, BC_INVAL);
1098 }
1099 } else {
1100 brelse(bp, BC_INVAL);
1101 }
1102 }
1103
1104 /* Now flush the dependencies to disk. */
1105 #ifdef notyet
1106 /* XXX pages locked */
1107 (void)softdep_sync_metadata(vp);
1108 #endif
1109
1110 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
1111 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1112 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1113 }
1114
1115 /*
1116 * Now that any dependencies that we created have been
1117 * resolved, we can undo the partial allocation.
1118 */
1119
1120 if (unwindidx == 0) {
1121 *allocib = 0;
1122 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1123 if (DOINGSOFTDEP(vp))
1124 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1125 } else {
1126 int r;
1127
1128 r = bread(vp, indirs[unwindidx].in_lbn,
1129 (int)fs->fs_bsize, NOCRED, 0, &bp);
1130 if (r) {
1131 panic("Could not unwind indirect block, error %d", r);
1132 brelse(bp, 0);
1133 } else {
1134 bap = (int64_t *)bp->b_data;
1135 bap[indirs[unwindidx].in_off] = 0;
1136 bwrite(bp);
1137 }
1138 }
1139 for (i = unwindidx + 1; i <= num; i++) {
1140 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1141 fs->fs_bsize, false, &bp) == 0)
1142 brelse(bp, BC_INVAL);
1143 }
1144 }
1145 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1146 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1147 deallocated += fs->fs_bsize;
1148 }
1149 if (deallocated) {
1150 #ifdef QUOTA
1151 /*
1152 * Restore user's disk quota because allocation failed.
1153 */
1154 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1155 #endif
1156 ip->i_ffs2_blocks -= btodb(deallocated);
1157 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1158 }
1159
1160 /*
1161 * Flush all dependencies again so that the soft updates code
1162 * doesn't find any untracked changes.
1163 */
1164 #ifdef notyet
1165 /* XXX pages locked */
1166 (void)softdep_sync_metadata(vp);
1167 #endif
1168 return (error);
1169 }
1170