ffs_balloc.c revision 1.52.6.1 1 /* $NetBSD: ffs_balloc.c,v 1.52.6.1 2011/06/06 09:10:15 jruoho Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.52.6.1 2011/06/06 09:10:15 jruoho Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
58
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
64
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67
68 #include <uvm/uvm.h>
69
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71 struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73 struct buf **);
74
75 /*
76 * Balloc defines the structure of file system storage
77 * by allocating the physical blocks on a device given
78 * the inode and the logical block number in a file.
79 */
80
81 int
82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83 struct buf **bpp)
84 {
85 int error;
86
87 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 else
90 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
91
92 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 brelse(*bpp, 0);
94
95 return error;
96 }
97
98 static int
99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100 int flags, struct buf **bpp)
101 {
102 daddr_t lbn, lastlbn;
103 struct buf *bp, *nbp;
104 struct inode *ip = VTOI(vp);
105 struct fs *fs = ip->i_fs;
106 struct ufsmount *ump = ip->i_ump;
107 struct indir indirs[NIADDR + 2];
108 daddr_t newb, pref, nb;
109 int32_t *bap; /* XXX ondisk32 */
110 int deallocated, osize, nsize, num, i, error;
111 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
112 int32_t *allocib;
113 int unwindidx = -1;
114 #ifdef FFS_EI
115 const int needswap = UFS_FSNEEDSWAP(fs);
116 #endif
117 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
118
119 lbn = lblkno(fs, off);
120 size = blkoff(fs, off) + size;
121 if (size > fs->fs_bsize)
122 panic("ffs_balloc: blk too big");
123 if (bpp != NULL) {
124 *bpp = NULL;
125 }
126 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
127
128 if (lbn < 0)
129 return (EFBIG);
130
131 /*
132 * If the next write will extend the file into a new block,
133 * and the file is currently composed of a fragment
134 * this fragment has to be extended to be a full block.
135 */
136
137 lastlbn = lblkno(fs, ip->i_size);
138 if (lastlbn < NDADDR && lastlbn < lbn) {
139 nb = lastlbn;
140 osize = blksize(fs, ip, nb);
141 if (osize < fs->fs_bsize && osize > 0) {
142 mutex_enter(&ump->um_lock);
143 error = ffs_realloccg(ip, nb,
144 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
145 &ip->i_ffs1_db[0]),
146 osize, (int)fs->fs_bsize, cred, bpp, &newb);
147 if (error)
148 return (error);
149 ip->i_size = lblktosize(fs, nb + 1);
150 ip->i_ffs1_size = ip->i_size;
151 uvm_vnp_setsize(vp, ip->i_ffs1_size);
152 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
153 ip->i_flag |= IN_CHANGE | IN_UPDATE;
154 if (bpp && *bpp) {
155 if (flags & B_SYNC)
156 bwrite(*bpp);
157 else
158 bawrite(*bpp);
159 }
160 }
161 }
162
163 /*
164 * The first NDADDR blocks are direct blocks
165 */
166
167 if (lbn < NDADDR) {
168 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
169 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
170
171 /*
172 * The block is an already-allocated direct block
173 * and the file already extends past this block,
174 * thus this must be a whole block.
175 * Just read the block (if requested).
176 */
177
178 if (bpp != NULL) {
179 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
180 B_MODIFY, bpp);
181 if (error) {
182 brelse(*bpp, 0);
183 return (error);
184 }
185 }
186 return (0);
187 }
188 if (nb != 0) {
189
190 /*
191 * Consider need to reallocate a fragment.
192 */
193
194 osize = fragroundup(fs, blkoff(fs, ip->i_size));
195 nsize = fragroundup(fs, size);
196 if (nsize <= osize) {
197
198 /*
199 * The existing block is already
200 * at least as big as we want.
201 * Just read the block (if requested).
202 */
203
204 if (bpp != NULL) {
205 error = bread(vp, lbn, osize, NOCRED,
206 B_MODIFY, bpp);
207 if (error) {
208 brelse(*bpp, 0);
209 return (error);
210 }
211 }
212 return 0;
213 } else {
214
215 /*
216 * The existing block is smaller than we want,
217 * grow it.
218 */
219 mutex_enter(&ump->um_lock);
220 error = ffs_realloccg(ip, lbn,
221 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
222 &ip->i_ffs1_db[0]),
223 osize, nsize, cred, bpp, &newb);
224 if (error)
225 return (error);
226 }
227 } else {
228
229 /*
230 * the block was not previously allocated,
231 * allocate a new block or fragment.
232 */
233
234 if (ip->i_size < lblktosize(fs, lbn + 1))
235 nsize = fragroundup(fs, size);
236 else
237 nsize = fs->fs_bsize;
238 mutex_enter(&ump->um_lock);
239 error = ffs_alloc(ip, lbn,
240 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
241 &ip->i_ffs1_db[0]),
242 nsize, flags, cred, &newb);
243 if (error)
244 return (error);
245 if (bpp != NULL) {
246 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
247 nsize, (flags & B_CLRBUF) != 0, bpp);
248 if (error)
249 return error;
250 }
251 }
252 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
253 ip->i_flag |= IN_CHANGE | IN_UPDATE;
254 return (0);
255 }
256
257 /*
258 * Determine the number of levels of indirection.
259 */
260
261 pref = 0;
262 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
263 return (error);
264
265 /*
266 * Fetch the first indirect block allocating if necessary.
267 */
268
269 --num;
270 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
271 allocib = NULL;
272 allocblk = allociblk;
273 if (nb == 0) {
274 mutex_enter(&ump->um_lock);
275 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
276 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
277 flags | B_METAONLY, cred, &newb);
278 if (error)
279 goto fail;
280 nb = newb;
281 *allocblk++ = nb;
282 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
283 fs->fs_bsize, true, &bp);
284 if (error)
285 goto fail;
286 /*
287 * Write synchronously so that indirect blocks
288 * never point at garbage.
289 */
290 if ((error = bwrite(bp)) != 0)
291 goto fail;
292 unwindidx = 0;
293 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
294 *allocib = ufs_rw32(nb, needswap);
295 ip->i_flag |= IN_CHANGE | IN_UPDATE;
296 }
297
298 /*
299 * Fetch through the indirect blocks, allocating as necessary.
300 */
301
302 for (i = 1;;) {
303 error = bread(vp,
304 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
305 if (error) {
306 brelse(bp, 0);
307 goto fail;
308 }
309 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
310 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
311 if (i == num)
312 break;
313 i++;
314 if (nb != 0) {
315 brelse(bp, 0);
316 continue;
317 }
318 if (fscow_run(bp, true) != 0) {
319 brelse(bp, 0);
320 goto fail;
321 }
322 mutex_enter(&ump->um_lock);
323 /* Try to keep snapshot indirect blocks contiguous. */
324 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
325 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
326 flags | B_METAONLY, &bap[0]);
327 if (pref == 0)
328 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
329 NULL);
330 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
331 flags | B_METAONLY, cred, &newb);
332 if (error) {
333 brelse(bp, 0);
334 goto fail;
335 }
336 nb = newb;
337 *allocblk++ = nb;
338 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
339 fs->fs_bsize, true, &nbp);
340 if (error) {
341 brelse(bp, 0);
342 goto fail;
343 }
344 /*
345 * Write synchronously so that indirect blocks
346 * never point at garbage.
347 */
348 if ((error = bwrite(nbp)) != 0) {
349 brelse(bp, 0);
350 goto fail;
351 }
352 if (unwindidx < 0)
353 unwindidx = i - 1;
354 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
355
356 /*
357 * If required, write synchronously, otherwise use
358 * delayed write.
359 */
360
361 if (flags & B_SYNC) {
362 bwrite(bp);
363 } else {
364 bdwrite(bp);
365 }
366 }
367
368 if (flags & B_METAONLY) {
369 KASSERT(bpp != NULL);
370 *bpp = bp;
371 return (0);
372 }
373
374 /*
375 * Get the data block, allocating if necessary.
376 */
377
378 if (nb == 0) {
379 if (fscow_run(bp, true) != 0) {
380 brelse(bp, 0);
381 goto fail;
382 }
383 mutex_enter(&ump->um_lock);
384 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
385 &bap[0]);
386 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
387 &newb);
388 if (error) {
389 brelse(bp, 0);
390 goto fail;
391 }
392 nb = newb;
393 *allocblk++ = nb;
394 if (bpp != NULL) {
395 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
396 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
397 if (error) {
398 brelse(bp, 0);
399 goto fail;
400 }
401 }
402 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
403 if (allocib == NULL && unwindidx < 0) {
404 unwindidx = i - 1;
405 }
406
407 /*
408 * If required, write synchronously, otherwise use
409 * delayed write.
410 */
411
412 if (flags & B_SYNC) {
413 bwrite(bp);
414 } else {
415 bdwrite(bp);
416 }
417 return (0);
418 }
419 brelse(bp, 0);
420 if (bpp != NULL) {
421 if (flags & B_CLRBUF) {
422 error = bread(vp, lbn, (int)fs->fs_bsize,
423 NOCRED, B_MODIFY, &nbp);
424 if (error) {
425 brelse(nbp, 0);
426 goto fail;
427 }
428 } else {
429 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
430 fs->fs_bsize, true, &nbp);
431 if (error)
432 goto fail;
433 }
434 *bpp = nbp;
435 }
436 return (0);
437
438 fail:
439 /*
440 * If we have failed part way through block allocation, we
441 * have to deallocate any indirect blocks that we have allocated.
442 */
443
444 if (unwindidx >= 0) {
445
446 /*
447 * First write out any buffers we've created to resolve their
448 * softdeps. This must be done in reverse order of creation
449 * so that we resolve the dependencies in one pass.
450 * Write the cylinder group buffers for these buffers too.
451 */
452
453 for (i = num; i >= unwindidx; i--) {
454 if (i == 0) {
455 break;
456 }
457 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
458 fs->fs_bsize, false, &bp) != 0)
459 continue;
460 if (bp->b_oflags & BO_DELWRI) {
461 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
462 dbtofsb(fs, bp->b_blkno))));
463 bwrite(bp);
464 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
465 fs->fs_cgsize, false, &bp) != 0)
466 continue;
467 if (bp->b_oflags & BO_DELWRI) {
468 bwrite(bp);
469 } else {
470 brelse(bp, BC_INVAL);
471 }
472 } else {
473 brelse(bp, BC_INVAL);
474 }
475 }
476
477 /*
478 * Undo the partial allocation.
479 */
480 if (unwindidx == 0) {
481 *allocib = 0;
482 ip->i_flag |= IN_CHANGE | IN_UPDATE;
483 } else {
484 int r;
485
486 r = bread(vp, indirs[unwindidx].in_lbn,
487 (int)fs->fs_bsize, NOCRED, 0, &bp);
488 if (r) {
489 panic("Could not unwind indirect block, error %d", r);
490 brelse(bp, 0);
491 } else {
492 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
493 bap[indirs[unwindidx].in_off] = 0;
494 bwrite(bp);
495 }
496 }
497 for (i = unwindidx + 1; i <= num; i++) {
498 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
499 fs->fs_bsize, false, &bp) == 0)
500 brelse(bp, BC_INVAL);
501 }
502 }
503 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
504 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
505 deallocated += fs->fs_bsize;
506 }
507 if (deallocated) {
508 #if defined(QUOTA) || defined(QUOTA2)
509 /*
510 * Restore user's disk quota because allocation failed.
511 */
512 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
513 #endif
514 ip->i_ffs1_blocks -= btodb(deallocated);
515 ip->i_flag |= IN_CHANGE | IN_UPDATE;
516 }
517 return (error);
518 }
519
520 static int
521 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
522 int flags, struct buf **bpp)
523 {
524 daddr_t lbn, lastlbn;
525 struct buf *bp, *nbp;
526 struct inode *ip = VTOI(vp);
527 struct fs *fs = ip->i_fs;
528 struct ufsmount *ump = ip->i_ump;
529 struct indir indirs[NIADDR + 2];
530 daddr_t newb, pref, nb;
531 int64_t *bap;
532 int deallocated, osize, nsize, num, i, error;
533 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
534 int64_t *allocib;
535 int unwindidx = -1;
536 #ifdef FFS_EI
537 const int needswap = UFS_FSNEEDSWAP(fs);
538 #endif
539 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
540
541 lbn = lblkno(fs, off);
542 size = blkoff(fs, off) + size;
543 if (size > fs->fs_bsize)
544 panic("ffs_balloc: blk too big");
545 if (bpp != NULL) {
546 *bpp = NULL;
547 }
548 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
549
550 if (lbn < 0)
551 return (EFBIG);
552
553 #ifdef notyet
554 /*
555 * Check for allocating external data.
556 */
557 if (flags & IO_EXT) {
558 if (lbn >= NXADDR)
559 return (EFBIG);
560 /*
561 * If the next write will extend the data into a new block,
562 * and the data is currently composed of a fragment
563 * this fragment has to be extended to be a full block.
564 */
565 lastlbn = lblkno(fs, dp->di_extsize);
566 if (lastlbn < lbn) {
567 nb = lastlbn;
568 osize = sblksize(fs, dp->di_extsize, nb);
569 if (osize < fs->fs_bsize && osize > 0) {
570 mutex_enter(&ump->um_lock);
571 error = ffs_realloccg(ip, -1 - nb,
572 dp->di_extb[nb],
573 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
574 flags, &dp->di_extb[0]),
575 osize,
576 (int)fs->fs_bsize, cred, &bp);
577 if (error)
578 return (error);
579 dp->di_extsize = smalllblktosize(fs, nb + 1);
580 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
581 bp->b_xflags |= BX_ALTDATA;
582 ip->i_flag |= IN_CHANGE | IN_UPDATE;
583 if (flags & IO_SYNC)
584 bwrite(bp);
585 else
586 bawrite(bp);
587 }
588 }
589 /*
590 * All blocks are direct blocks
591 */
592 if (flags & BA_METAONLY)
593 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
594 nb = dp->di_extb[lbn];
595 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
596 error = bread(vp, -1 - lbn, fs->fs_bsize,
597 NOCRED, 0, &bp);
598 if (error) {
599 brelse(bp, 0);
600 return (error);
601 }
602 mutex_enter(&bp->b_interlock);
603 bp->b_blkno = fsbtodb(fs, nb);
604 bp->b_xflags |= BX_ALTDATA;
605 mutex_exit(&bp->b_interlock);
606 *bpp = bp;
607 return (0);
608 }
609 if (nb != 0) {
610 /*
611 * Consider need to reallocate a fragment.
612 */
613 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
614 nsize = fragroundup(fs, size);
615 if (nsize <= osize) {
616 error = bread(vp, -1 - lbn, osize,
617 NOCRED, 0, &bp);
618 if (error) {
619 brelse(bp, 0);
620 return (error);
621 }
622 mutex_enter(&bp->b_interlock);
623 bp->b_blkno = fsbtodb(fs, nb);
624 bp->b_xflags |= BX_ALTDATA;
625 mutex_exit(&bp->b_interlock);
626 } else {
627 mutex_enter(&ump->um_lock);
628 error = ffs_realloccg(ip, -1 - lbn,
629 dp->di_extb[lbn],
630 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
631 &dp->di_extb[0]),
632 osize, nsize, cred, &bp);
633 if (error)
634 return (error);
635 bp->b_xflags |= BX_ALTDATA;
636 }
637 } else {
638 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
639 nsize = fragroundup(fs, size);
640 else
641 nsize = fs->fs_bsize;
642 mutex_enter(&ump->um_lock);
643 error = ffs_alloc(ip, lbn,
644 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
645 &dp->di_extb[0]),
646 nsize, flags, cred, &newb);
647 if (error)
648 return (error);
649 error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
650 nsize, (flags & BA_CLRBUF) != 0, &bp);
651 if (error)
652 return error;
653 bp->b_xflags |= BX_ALTDATA;
654 }
655 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
656 ip->i_flag |= IN_CHANGE | IN_UPDATE;
657 *bpp = bp;
658 return (0);
659 }
660 #endif
661 /*
662 * If the next write will extend the file into a new block,
663 * and the file is currently composed of a fragment
664 * this fragment has to be extended to be a full block.
665 */
666
667 lastlbn = lblkno(fs, ip->i_size);
668 if (lastlbn < NDADDR && lastlbn < lbn) {
669 nb = lastlbn;
670 osize = blksize(fs, ip, nb);
671 if (osize < fs->fs_bsize && osize > 0) {
672 mutex_enter(&ump->um_lock);
673 error = ffs_realloccg(ip, nb,
674 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
675 &ip->i_ffs2_db[0]),
676 osize, (int)fs->fs_bsize, cred, bpp, &newb);
677 if (error)
678 return (error);
679 ip->i_size = lblktosize(fs, nb + 1);
680 ip->i_ffs2_size = ip->i_size;
681 uvm_vnp_setsize(vp, ip->i_size);
682 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
683 ip->i_flag |= IN_CHANGE | IN_UPDATE;
684 if (bpp) {
685 if (flags & B_SYNC)
686 bwrite(*bpp);
687 else
688 bawrite(*bpp);
689 }
690 }
691 }
692
693 /*
694 * The first NDADDR blocks are direct blocks
695 */
696
697 if (lbn < NDADDR) {
698 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
699 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
700
701 /*
702 * The block is an already-allocated direct block
703 * and the file already extends past this block,
704 * thus this must be a whole block.
705 * Just read the block (if requested).
706 */
707
708 if (bpp != NULL) {
709 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
710 B_MODIFY, bpp);
711 if (error) {
712 brelse(*bpp, 0);
713 return (error);
714 }
715 }
716 return (0);
717 }
718 if (nb != 0) {
719
720 /*
721 * Consider need to reallocate a fragment.
722 */
723
724 osize = fragroundup(fs, blkoff(fs, ip->i_size));
725 nsize = fragroundup(fs, size);
726 if (nsize <= osize) {
727
728 /*
729 * The existing block is already
730 * at least as big as we want.
731 * Just read the block (if requested).
732 */
733
734 if (bpp != NULL) {
735 error = bread(vp, lbn, osize, NOCRED,
736 B_MODIFY, bpp);
737 if (error) {
738 brelse(*bpp, 0);
739 return (error);
740 }
741 }
742 return 0;
743 } else {
744
745 /*
746 * The existing block is smaller than we want,
747 * grow it.
748 */
749 mutex_enter(&ump->um_lock);
750 error = ffs_realloccg(ip, lbn,
751 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
752 &ip->i_ffs2_db[0]),
753 osize, nsize, cred, bpp, &newb);
754 if (error)
755 return (error);
756 }
757 } else {
758
759 /*
760 * the block was not previously allocated,
761 * allocate a new block or fragment.
762 */
763
764 if (ip->i_size < lblktosize(fs, lbn + 1))
765 nsize = fragroundup(fs, size);
766 else
767 nsize = fs->fs_bsize;
768 mutex_enter(&ump->um_lock);
769 error = ffs_alloc(ip, lbn,
770 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
771 &ip->i_ffs2_db[0]),
772 nsize, flags, cred, &newb);
773 if (error)
774 return (error);
775 if (bpp != NULL) {
776 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
777 nsize, (flags & B_CLRBUF) != 0, bpp);
778 if (error)
779 return error;
780 }
781 }
782 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
783 ip->i_flag |= IN_CHANGE | IN_UPDATE;
784 return (0);
785 }
786
787 /*
788 * Determine the number of levels of indirection.
789 */
790
791 pref = 0;
792 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
793 return (error);
794
795 /*
796 * Fetch the first indirect block allocating if necessary.
797 */
798
799 --num;
800 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
801 allocib = NULL;
802 allocblk = allociblk;
803 if (nb == 0) {
804 mutex_enter(&ump->um_lock);
805 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
806 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
807 flags | B_METAONLY, cred, &newb);
808 if (error)
809 goto fail;
810 nb = newb;
811 *allocblk++ = nb;
812 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
813 fs->fs_bsize, true, &bp);
814 if (error)
815 goto fail;
816 /*
817 * Write synchronously so that indirect blocks
818 * never point at garbage.
819 */
820 if ((error = bwrite(bp)) != 0)
821 goto fail;
822 unwindidx = 0;
823 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
824 *allocib = ufs_rw64(nb, needswap);
825 ip->i_flag |= IN_CHANGE | IN_UPDATE;
826 }
827
828 /*
829 * Fetch through the indirect blocks, allocating as necessary.
830 */
831
832 for (i = 1;;) {
833 error = bread(vp,
834 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
835 if (error) {
836 brelse(bp, 0);
837 goto fail;
838 }
839 bap = (int64_t *)bp->b_data;
840 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
841 if (i == num)
842 break;
843 i++;
844 if (nb != 0) {
845 brelse(bp, 0);
846 continue;
847 }
848 if (fscow_run(bp, true) != 0) {
849 brelse(bp, 0);
850 goto fail;
851 }
852 mutex_enter(&ump->um_lock);
853 /* Try to keep snapshot indirect blocks contiguous. */
854 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
855 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
856 flags | B_METAONLY, &bap[0]);
857 if (pref == 0)
858 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
859 NULL);
860 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
861 flags | B_METAONLY, cred, &newb);
862 if (error) {
863 brelse(bp, 0);
864 goto fail;
865 }
866 nb = newb;
867 *allocblk++ = nb;
868 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
869 fs->fs_bsize, true, &nbp);
870 if (error) {
871 brelse(bp, 0);
872 goto fail;
873 }
874 /*
875 * Write synchronously so that indirect blocks
876 * never point at garbage.
877 */
878 if ((error = bwrite(nbp)) != 0) {
879 brelse(bp, 0);
880 goto fail;
881 }
882 if (unwindidx < 0)
883 unwindidx = i - 1;
884 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
885
886 /*
887 * If required, write synchronously, otherwise use
888 * delayed write.
889 */
890
891 if (flags & B_SYNC) {
892 bwrite(bp);
893 } else {
894 bdwrite(bp);
895 }
896 }
897
898 if (flags & B_METAONLY) {
899 KASSERT(bpp != NULL);
900 *bpp = bp;
901 return (0);
902 }
903
904 /*
905 * Get the data block, allocating if necessary.
906 */
907
908 if (nb == 0) {
909 if (fscow_run(bp, true) != 0) {
910 brelse(bp, 0);
911 goto fail;
912 }
913 mutex_enter(&ump->um_lock);
914 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
915 &bap[0]);
916 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
917 &newb);
918 if (error) {
919 brelse(bp, 0);
920 goto fail;
921 }
922 nb = newb;
923 *allocblk++ = nb;
924 if (bpp != NULL) {
925 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
926 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
927 if (error) {
928 brelse(bp, 0);
929 goto fail;
930 }
931 }
932 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
933 if (allocib == NULL && unwindidx < 0) {
934 unwindidx = i - 1;
935 }
936
937 /*
938 * If required, write synchronously, otherwise use
939 * delayed write.
940 */
941
942 if (flags & B_SYNC) {
943 bwrite(bp);
944 } else {
945 bdwrite(bp);
946 }
947 return (0);
948 }
949 brelse(bp, 0);
950 if (bpp != NULL) {
951 if (flags & B_CLRBUF) {
952 error = bread(vp, lbn, (int)fs->fs_bsize,
953 NOCRED, B_MODIFY, &nbp);
954 if (error) {
955 brelse(nbp, 0);
956 goto fail;
957 }
958 } else {
959 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
960 fs->fs_bsize, true, &nbp);
961 if (error)
962 goto fail;
963 }
964 *bpp = nbp;
965 }
966 return (0);
967
968 fail:
969 /*
970 * If we have failed part way through block allocation, we
971 * have to deallocate any indirect blocks that we have allocated.
972 */
973
974 if (unwindidx >= 0) {
975
976 /*
977 * First write out any buffers we've created to resolve their
978 * softdeps. This must be done in reverse order of creation
979 * so that we resolve the dependencies in one pass.
980 * Write the cylinder group buffers for these buffers too.
981 */
982
983 for (i = num; i >= unwindidx; i--) {
984 if (i == 0) {
985 break;
986 }
987 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
988 fs->fs_bsize, false, &bp) != 0)
989 continue;
990 if (bp->b_oflags & BO_DELWRI) {
991 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
992 dbtofsb(fs, bp->b_blkno))));
993 bwrite(bp);
994 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
995 fs->fs_cgsize, false, &bp) != 0)
996 continue;
997 if (bp->b_oflags & BO_DELWRI) {
998 bwrite(bp);
999 } else {
1000 brelse(bp, BC_INVAL);
1001 }
1002 } else {
1003 brelse(bp, BC_INVAL);
1004 }
1005 }
1006
1007 /*
1008 * Now that any dependencies that we created have been
1009 * resolved, we can undo the partial allocation.
1010 */
1011
1012 if (unwindidx == 0) {
1013 *allocib = 0;
1014 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1015 } else {
1016 int r;
1017
1018 r = bread(vp, indirs[unwindidx].in_lbn,
1019 (int)fs->fs_bsize, NOCRED, 0, &bp);
1020 if (r) {
1021 panic("Could not unwind indirect block, error %d", r);
1022 brelse(bp, 0);
1023 } else {
1024 bap = (int64_t *)bp->b_data;
1025 bap[indirs[unwindidx].in_off] = 0;
1026 bwrite(bp);
1027 }
1028 }
1029 for (i = unwindidx + 1; i <= num; i++) {
1030 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1031 fs->fs_bsize, false, &bp) == 0)
1032 brelse(bp, BC_INVAL);
1033 }
1034 }
1035 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1036 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1037 deallocated += fs->fs_bsize;
1038 }
1039 if (deallocated) {
1040 #if defined(QUOTA) || defined(QUOTA2)
1041 /*
1042 * Restore user's disk quota because allocation failed.
1043 */
1044 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1045 #endif
1046 ip->i_ffs2_blocks -= btodb(deallocated);
1047 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1048 }
1049
1050 return (error);
1051 }
1052