ffs_balloc.c revision 1.54.4.1 1 /* $NetBSD: ffs_balloc.c,v 1.54.4.1 2013/01/23 00:06:32 yamt Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.54.4.1 2013/01/23 00:06:32 yamt Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
58
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
64
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67
68 #include <uvm/uvm.h>
69
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71 struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73 struct buf **);
74
75 /*
76 * Balloc defines the structure of file system storage
77 * by allocating the physical blocks on a device given
78 * the inode and the logical block number in a file.
79 */
80
81 int
82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83 struct buf **bpp)
84 {
85 int error;
86
87 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 else
90 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
91
92 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 brelse(*bpp, 0);
94
95 return error;
96 }
97
98 static int
99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100 int flags, struct buf **bpp)
101 {
102 daddr_t lbn, lastlbn;
103 struct buf *bp, *nbp;
104 struct inode *ip = VTOI(vp);
105 struct fs *fs = ip->i_fs;
106 struct ufsmount *ump = ip->i_ump;
107 struct indir indirs[UFS_NIADDR + 2];
108 daddr_t newb, pref, nb;
109 int32_t *bap; /* XXX ondisk32 */
110 int deallocated, osize, nsize, num, i, error;
111 int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
112 int32_t *allocib;
113 int unwindidx = -1;
114 #ifdef FFS_EI
115 const int needswap = UFS_FSNEEDSWAP(fs);
116 #endif
117 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
118
119 lbn = lblkno(fs, off);
120 size = blkoff(fs, off) + size;
121 if (size > fs->fs_bsize)
122 panic("ffs_balloc: blk too big");
123 if (bpp != NULL) {
124 *bpp = NULL;
125 }
126 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
127
128 if (lbn < 0)
129 return (EFBIG);
130
131 /*
132 * If the next write will extend the file into a new block,
133 * and the file is currently composed of a fragment
134 * this fragment has to be extended to be a full block.
135 */
136
137 lastlbn = lblkno(fs, ip->i_size);
138 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
139 nb = lastlbn;
140 osize = blksize(fs, ip, nb);
141 if (osize < fs->fs_bsize && osize > 0) {
142 mutex_enter(&ump->um_lock);
143 error = ffs_realloccg(ip, nb,
144 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
145 &ip->i_ffs1_db[0]),
146 osize, (int)fs->fs_bsize, cred, bpp, &newb);
147 if (error)
148 return (error);
149 ip->i_size = lblktosize(fs, nb + 1);
150 ip->i_ffs1_size = ip->i_size;
151 uvm_vnp_setsize(vp, ip->i_ffs1_size);
152 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
153 ip->i_flag |= IN_CHANGE | IN_UPDATE;
154 if (bpp && *bpp) {
155 if (flags & B_SYNC)
156 bwrite(*bpp);
157 else
158 bawrite(*bpp);
159 }
160 }
161 }
162
163 /*
164 * The first UFS_NDADDR blocks are direct blocks
165 */
166
167 if (lbn < UFS_NDADDR) {
168 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
169 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
170
171 /*
172 * The block is an already-allocated direct block
173 * and the file already extends past this block,
174 * thus this must be a whole block.
175 * Just read the block (if requested).
176 */
177
178 if (bpp != NULL) {
179 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
180 B_MODIFY, bpp);
181 if (error) {
182 return (error);
183 }
184 }
185 return (0);
186 }
187 if (nb != 0) {
188
189 /*
190 * Consider need to reallocate a fragment.
191 */
192
193 osize = fragroundup(fs, blkoff(fs, ip->i_size));
194 nsize = fragroundup(fs, size);
195 if (nsize <= osize) {
196
197 /*
198 * The existing block is already
199 * at least as big as we want.
200 * Just read the block (if requested).
201 */
202
203 if (bpp != NULL) {
204 error = bread(vp, lbn, osize, NOCRED,
205 B_MODIFY, bpp);
206 if (error) {
207 return (error);
208 }
209 }
210 return 0;
211 } else {
212
213 /*
214 * The existing block is smaller than we want,
215 * grow it.
216 */
217 mutex_enter(&ump->um_lock);
218 error = ffs_realloccg(ip, lbn,
219 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
220 &ip->i_ffs1_db[0]),
221 osize, nsize, cred, bpp, &newb);
222 if (error)
223 return (error);
224 }
225 } else {
226
227 /*
228 * the block was not previously allocated,
229 * allocate a new block or fragment.
230 */
231
232 if (ip->i_size < lblktosize(fs, lbn + 1))
233 nsize = fragroundup(fs, size);
234 else
235 nsize = fs->fs_bsize;
236 mutex_enter(&ump->um_lock);
237 error = ffs_alloc(ip, lbn,
238 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
239 &ip->i_ffs1_db[0]),
240 nsize, flags, cred, &newb);
241 if (error)
242 return (error);
243 if (bpp != NULL) {
244 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
245 nsize, (flags & B_CLRBUF) != 0, bpp);
246 if (error)
247 return error;
248 }
249 }
250 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
251 ip->i_flag |= IN_CHANGE | IN_UPDATE;
252 return (0);
253 }
254
255 /*
256 * Determine the number of levels of indirection.
257 */
258
259 pref = 0;
260 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
261 return (error);
262
263 /*
264 * Fetch the first indirect block allocating if necessary.
265 */
266
267 --num;
268 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
269 allocib = NULL;
270 allocblk = allociblk;
271 if (nb == 0) {
272 mutex_enter(&ump->um_lock);
273 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
274 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
275 flags | B_METAONLY, cred, &newb);
276 if (error)
277 goto fail;
278 nb = newb;
279 *allocblk++ = nb;
280 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
281 fs->fs_bsize, true, &bp);
282 if (error)
283 goto fail;
284 /*
285 * Write synchronously so that indirect blocks
286 * never point at garbage.
287 */
288 if ((error = bwrite(bp)) != 0)
289 goto fail;
290 unwindidx = 0;
291 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
292 *allocib = ufs_rw32(nb, needswap);
293 ip->i_flag |= IN_CHANGE | IN_UPDATE;
294 }
295
296 /*
297 * Fetch through the indirect blocks, allocating as necessary.
298 */
299
300 for (i = 1;;) {
301 error = bread(vp,
302 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
303 if (error) {
304 goto fail;
305 }
306 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
307 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
308 if (i == num)
309 break;
310 i++;
311 if (nb != 0) {
312 brelse(bp, 0);
313 continue;
314 }
315 if (fscow_run(bp, true) != 0) {
316 brelse(bp, 0);
317 goto fail;
318 }
319 mutex_enter(&ump->um_lock);
320 /* Try to keep snapshot indirect blocks contiguous. */
321 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
322 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
323 flags | B_METAONLY, &bap[0]);
324 if (pref == 0)
325 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
326 NULL);
327 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
328 flags | B_METAONLY, cred, &newb);
329 if (error) {
330 brelse(bp, 0);
331 goto fail;
332 }
333 nb = newb;
334 *allocblk++ = nb;
335 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
336 fs->fs_bsize, true, &nbp);
337 if (error) {
338 brelse(bp, 0);
339 goto fail;
340 }
341 /*
342 * Write synchronously so that indirect blocks
343 * never point at garbage.
344 */
345 if ((error = bwrite(nbp)) != 0) {
346 brelse(bp, 0);
347 goto fail;
348 }
349 if (unwindidx < 0)
350 unwindidx = i - 1;
351 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
352
353 /*
354 * If required, write synchronously, otherwise use
355 * delayed write.
356 */
357
358 if (flags & B_SYNC) {
359 bwrite(bp);
360 } else {
361 bdwrite(bp);
362 }
363 }
364
365 if (flags & B_METAONLY) {
366 KASSERT(bpp != NULL);
367 *bpp = bp;
368 return (0);
369 }
370
371 /*
372 * Get the data block, allocating if necessary.
373 */
374
375 if (nb == 0) {
376 if (fscow_run(bp, true) != 0) {
377 brelse(bp, 0);
378 goto fail;
379 }
380 mutex_enter(&ump->um_lock);
381 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
382 &bap[0]);
383 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
384 &newb);
385 if (error) {
386 brelse(bp, 0);
387 goto fail;
388 }
389 nb = newb;
390 *allocblk++ = nb;
391 if (bpp != NULL) {
392 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
393 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
394 if (error) {
395 brelse(bp, 0);
396 goto fail;
397 }
398 }
399 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
400 if (allocib == NULL && unwindidx < 0) {
401 unwindidx = i - 1;
402 }
403
404 /*
405 * If required, write synchronously, otherwise use
406 * delayed write.
407 */
408
409 if (flags & B_SYNC) {
410 bwrite(bp);
411 } else {
412 bdwrite(bp);
413 }
414 return (0);
415 }
416 brelse(bp, 0);
417 if (bpp != NULL) {
418 if (flags & B_CLRBUF) {
419 error = bread(vp, lbn, (int)fs->fs_bsize,
420 NOCRED, B_MODIFY, &nbp);
421 if (error) {
422 goto fail;
423 }
424 } else {
425 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
426 fs->fs_bsize, true, &nbp);
427 if (error)
428 goto fail;
429 }
430 *bpp = nbp;
431 }
432 return (0);
433
434 fail:
435 /*
436 * If we have failed part way through block allocation, we
437 * have to deallocate any indirect blocks that we have allocated.
438 */
439
440 if (unwindidx >= 0) {
441
442 /*
443 * First write out any buffers we've created to resolve their
444 * softdeps. This must be done in reverse order of creation
445 * so that we resolve the dependencies in one pass.
446 * Write the cylinder group buffers for these buffers too.
447 */
448
449 for (i = num; i >= unwindidx; i--) {
450 if (i == 0) {
451 break;
452 }
453 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
454 fs->fs_bsize, false, &bp) != 0)
455 continue;
456 if (bp->b_oflags & BO_DELWRI) {
457 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
458 dbtofsb(fs, bp->b_blkno))));
459 bwrite(bp);
460 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
461 fs->fs_cgsize, false, &bp) != 0)
462 continue;
463 if (bp->b_oflags & BO_DELWRI) {
464 bwrite(bp);
465 } else {
466 brelse(bp, BC_INVAL);
467 }
468 } else {
469 brelse(bp, BC_INVAL);
470 }
471 }
472
473 /*
474 * Undo the partial allocation.
475 */
476 if (unwindidx == 0) {
477 *allocib = 0;
478 ip->i_flag |= IN_CHANGE | IN_UPDATE;
479 } else {
480 int r;
481
482 r = bread(vp, indirs[unwindidx].in_lbn,
483 (int)fs->fs_bsize, NOCRED, 0, &bp);
484 if (r) {
485 panic("Could not unwind indirect block, error %d", r);
486 } else {
487 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
488 bap[indirs[unwindidx].in_off] = 0;
489 bwrite(bp);
490 }
491 }
492 for (i = unwindidx + 1; i <= num; i++) {
493 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
494 fs->fs_bsize, false, &bp) == 0)
495 brelse(bp, BC_INVAL);
496 }
497 }
498 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
499 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
500 deallocated += fs->fs_bsize;
501 }
502 if (deallocated) {
503 #if defined(QUOTA) || defined(QUOTA2)
504 /*
505 * Restore user's disk quota because allocation failed.
506 */
507 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
508 #endif
509 ip->i_ffs1_blocks -= btodb(deallocated);
510 ip->i_flag |= IN_CHANGE | IN_UPDATE;
511 }
512 return (error);
513 }
514
515 static int
516 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
517 int flags, struct buf **bpp)
518 {
519 daddr_t lbn, lastlbn;
520 struct buf *bp, *nbp;
521 struct inode *ip = VTOI(vp);
522 struct fs *fs = ip->i_fs;
523 struct ufsmount *ump = ip->i_ump;
524 struct indir indirs[UFS_NIADDR + 2];
525 daddr_t newb, pref, nb;
526 int64_t *bap;
527 int deallocated, osize, nsize, num, i, error;
528 daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
529 int64_t *allocib;
530 int unwindidx = -1;
531 #ifdef FFS_EI
532 const int needswap = UFS_FSNEEDSWAP(fs);
533 #endif
534 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
535
536 lbn = lblkno(fs, off);
537 size = blkoff(fs, off) + size;
538 if (size > fs->fs_bsize)
539 panic("ffs_balloc: blk too big");
540 if (bpp != NULL) {
541 *bpp = NULL;
542 }
543 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
544
545 if (lbn < 0)
546 return (EFBIG);
547
548 #ifdef notyet
549 /*
550 * Check for allocating external data.
551 */
552 if (flags & IO_EXT) {
553 if (lbn >= UFS_NXADDR)
554 return (EFBIG);
555 /*
556 * If the next write will extend the data into a new block,
557 * and the data is currently composed of a fragment
558 * this fragment has to be extended to be a full block.
559 */
560 lastlbn = lblkno(fs, dp->di_extsize);
561 if (lastlbn < lbn) {
562 nb = lastlbn;
563 osize = sblksize(fs, dp->di_extsize, nb);
564 if (osize < fs->fs_bsize && osize > 0) {
565 mutex_enter(&ump->um_lock);
566 error = ffs_realloccg(ip, -1 - nb,
567 dp->di_extb[nb],
568 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
569 flags, &dp->di_extb[0]),
570 osize,
571 (int)fs->fs_bsize, cred, &bp);
572 if (error)
573 return (error);
574 dp->di_extsize = smalllblktosize(fs, nb + 1);
575 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
576 bp->b_xflags |= BX_ALTDATA;
577 ip->i_flag |= IN_CHANGE | IN_UPDATE;
578 if (flags & IO_SYNC)
579 bwrite(bp);
580 else
581 bawrite(bp);
582 }
583 }
584 /*
585 * All blocks are direct blocks
586 */
587 if (flags & BA_METAONLY)
588 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
589 nb = dp->di_extb[lbn];
590 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
591 error = bread(vp, -1 - lbn, fs->fs_bsize,
592 NOCRED, 0, &bp);
593 if (error) {
594 return (error);
595 }
596 mutex_enter(&bp->b_interlock);
597 bp->b_blkno = fsbtodb(fs, nb);
598 bp->b_xflags |= BX_ALTDATA;
599 mutex_exit(&bp->b_interlock);
600 *bpp = bp;
601 return (0);
602 }
603 if (nb != 0) {
604 /*
605 * Consider need to reallocate a fragment.
606 */
607 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
608 nsize = fragroundup(fs, size);
609 if (nsize <= osize) {
610 error = bread(vp, -1 - lbn, osize,
611 NOCRED, 0, &bp);
612 if (error) {
613 return (error);
614 }
615 mutex_enter(&bp->b_interlock);
616 bp->b_blkno = fsbtodb(fs, nb);
617 bp->b_xflags |= BX_ALTDATA;
618 mutex_exit(&bp->b_interlock);
619 } else {
620 mutex_enter(&ump->um_lock);
621 error = ffs_realloccg(ip, -1 - lbn,
622 dp->di_extb[lbn],
623 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
624 &dp->di_extb[0]),
625 osize, nsize, cred, &bp);
626 if (error)
627 return (error);
628 bp->b_xflags |= BX_ALTDATA;
629 }
630 } else {
631 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
632 nsize = fragroundup(fs, size);
633 else
634 nsize = fs->fs_bsize;
635 mutex_enter(&ump->um_lock);
636 error = ffs_alloc(ip, lbn,
637 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
638 &dp->di_extb[0]),
639 nsize, flags, cred, &newb);
640 if (error)
641 return (error);
642 error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
643 nsize, (flags & BA_CLRBUF) != 0, &bp);
644 if (error)
645 return error;
646 bp->b_xflags |= BX_ALTDATA;
647 }
648 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
649 ip->i_flag |= IN_CHANGE | IN_UPDATE;
650 *bpp = bp;
651 return (0);
652 }
653 #endif
654 /*
655 * If the next write will extend the file into a new block,
656 * and the file is currently composed of a fragment
657 * this fragment has to be extended to be a full block.
658 */
659
660 lastlbn = lblkno(fs, ip->i_size);
661 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
662 nb = lastlbn;
663 osize = blksize(fs, ip, nb);
664 if (osize < fs->fs_bsize && osize > 0) {
665 mutex_enter(&ump->um_lock);
666 error = ffs_realloccg(ip, nb,
667 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
668 &ip->i_ffs2_db[0]),
669 osize, (int)fs->fs_bsize, cred, bpp, &newb);
670 if (error)
671 return (error);
672 ip->i_size = lblktosize(fs, nb + 1);
673 ip->i_ffs2_size = ip->i_size;
674 uvm_vnp_setsize(vp, ip->i_size);
675 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
676 ip->i_flag |= IN_CHANGE | IN_UPDATE;
677 if (bpp) {
678 if (flags & B_SYNC)
679 bwrite(*bpp);
680 else
681 bawrite(*bpp);
682 }
683 }
684 }
685
686 /*
687 * The first UFS_NDADDR blocks are direct blocks
688 */
689
690 if (lbn < UFS_NDADDR) {
691 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
692 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
693
694 /*
695 * The block is an already-allocated direct block
696 * and the file already extends past this block,
697 * thus this must be a whole block.
698 * Just read the block (if requested).
699 */
700
701 if (bpp != NULL) {
702 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
703 B_MODIFY, bpp);
704 if (error) {
705 return (error);
706 }
707 }
708 return (0);
709 }
710 if (nb != 0) {
711
712 /*
713 * Consider need to reallocate a fragment.
714 */
715
716 osize = fragroundup(fs, blkoff(fs, ip->i_size));
717 nsize = fragroundup(fs, size);
718 if (nsize <= osize) {
719
720 /*
721 * The existing block is already
722 * at least as big as we want.
723 * Just read the block (if requested).
724 */
725
726 if (bpp != NULL) {
727 error = bread(vp, lbn, osize, NOCRED,
728 B_MODIFY, bpp);
729 if (error) {
730 return (error);
731 }
732 }
733 return 0;
734 } else {
735
736 /*
737 * The existing block is smaller than we want,
738 * grow it.
739 */
740 mutex_enter(&ump->um_lock);
741 error = ffs_realloccg(ip, lbn,
742 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
743 &ip->i_ffs2_db[0]),
744 osize, nsize, cred, bpp, &newb);
745 if (error)
746 return (error);
747 }
748 } else {
749
750 /*
751 * the block was not previously allocated,
752 * allocate a new block or fragment.
753 */
754
755 if (ip->i_size < lblktosize(fs, lbn + 1))
756 nsize = fragroundup(fs, size);
757 else
758 nsize = fs->fs_bsize;
759 mutex_enter(&ump->um_lock);
760 error = ffs_alloc(ip, lbn,
761 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
762 &ip->i_ffs2_db[0]),
763 nsize, flags, cred, &newb);
764 if (error)
765 return (error);
766 if (bpp != NULL) {
767 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
768 nsize, (flags & B_CLRBUF) != 0, bpp);
769 if (error)
770 return error;
771 }
772 }
773 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
774 ip->i_flag |= IN_CHANGE | IN_UPDATE;
775 return (0);
776 }
777
778 /*
779 * Determine the number of levels of indirection.
780 */
781
782 pref = 0;
783 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
784 return (error);
785
786 /*
787 * Fetch the first indirect block allocating if necessary.
788 */
789
790 --num;
791 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
792 allocib = NULL;
793 allocblk = allociblk;
794 if (nb == 0) {
795 mutex_enter(&ump->um_lock);
796 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
797 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
798 flags | B_METAONLY, cred, &newb);
799 if (error)
800 goto fail;
801 nb = newb;
802 *allocblk++ = nb;
803 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
804 fs->fs_bsize, true, &bp);
805 if (error)
806 goto fail;
807 /*
808 * Write synchronously so that indirect blocks
809 * never point at garbage.
810 */
811 if ((error = bwrite(bp)) != 0)
812 goto fail;
813 unwindidx = 0;
814 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
815 *allocib = ufs_rw64(nb, needswap);
816 ip->i_flag |= IN_CHANGE | IN_UPDATE;
817 }
818
819 /*
820 * Fetch through the indirect blocks, allocating as necessary.
821 */
822
823 for (i = 1;;) {
824 error = bread(vp,
825 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
826 if (error) {
827 goto fail;
828 }
829 bap = (int64_t *)bp->b_data;
830 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
831 if (i == num)
832 break;
833 i++;
834 if (nb != 0) {
835 brelse(bp, 0);
836 continue;
837 }
838 if (fscow_run(bp, true) != 0) {
839 brelse(bp, 0);
840 goto fail;
841 }
842 mutex_enter(&ump->um_lock);
843 /* Try to keep snapshot indirect blocks contiguous. */
844 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
845 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
846 flags | B_METAONLY, &bap[0]);
847 if (pref == 0)
848 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
849 NULL);
850 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
851 flags | B_METAONLY, cred, &newb);
852 if (error) {
853 brelse(bp, 0);
854 goto fail;
855 }
856 nb = newb;
857 *allocblk++ = nb;
858 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
859 fs->fs_bsize, true, &nbp);
860 if (error) {
861 brelse(bp, 0);
862 goto fail;
863 }
864 /*
865 * Write synchronously so that indirect blocks
866 * never point at garbage.
867 */
868 if ((error = bwrite(nbp)) != 0) {
869 brelse(bp, 0);
870 goto fail;
871 }
872 if (unwindidx < 0)
873 unwindidx = i - 1;
874 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
875
876 /*
877 * If required, write synchronously, otherwise use
878 * delayed write.
879 */
880
881 if (flags & B_SYNC) {
882 bwrite(bp);
883 } else {
884 bdwrite(bp);
885 }
886 }
887
888 if (flags & B_METAONLY) {
889 KASSERT(bpp != NULL);
890 *bpp = bp;
891 return (0);
892 }
893
894 /*
895 * Get the data block, allocating if necessary.
896 */
897
898 if (nb == 0) {
899 if (fscow_run(bp, true) != 0) {
900 brelse(bp, 0);
901 goto fail;
902 }
903 mutex_enter(&ump->um_lock);
904 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
905 &bap[0]);
906 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
907 &newb);
908 if (error) {
909 brelse(bp, 0);
910 goto fail;
911 }
912 nb = newb;
913 *allocblk++ = nb;
914 if (bpp != NULL) {
915 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
916 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
917 if (error) {
918 brelse(bp, 0);
919 goto fail;
920 }
921 }
922 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
923 if (allocib == NULL && unwindidx < 0) {
924 unwindidx = i - 1;
925 }
926
927 /*
928 * If required, write synchronously, otherwise use
929 * delayed write.
930 */
931
932 if (flags & B_SYNC) {
933 bwrite(bp);
934 } else {
935 bdwrite(bp);
936 }
937 return (0);
938 }
939 brelse(bp, 0);
940 if (bpp != NULL) {
941 if (flags & B_CLRBUF) {
942 error = bread(vp, lbn, (int)fs->fs_bsize,
943 NOCRED, B_MODIFY, &nbp);
944 if (error) {
945 goto fail;
946 }
947 } else {
948 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
949 fs->fs_bsize, true, &nbp);
950 if (error)
951 goto fail;
952 }
953 *bpp = nbp;
954 }
955 return (0);
956
957 fail:
958 /*
959 * If we have failed part way through block allocation, we
960 * have to deallocate any indirect blocks that we have allocated.
961 */
962
963 if (unwindidx >= 0) {
964
965 /*
966 * First write out any buffers we've created to resolve their
967 * softdeps. This must be done in reverse order of creation
968 * so that we resolve the dependencies in one pass.
969 * Write the cylinder group buffers for these buffers too.
970 */
971
972 for (i = num; i >= unwindidx; i--) {
973 if (i == 0) {
974 break;
975 }
976 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
977 fs->fs_bsize, false, &bp) != 0)
978 continue;
979 if (bp->b_oflags & BO_DELWRI) {
980 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
981 dbtofsb(fs, bp->b_blkno))));
982 bwrite(bp);
983 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
984 fs->fs_cgsize, false, &bp) != 0)
985 continue;
986 if (bp->b_oflags & BO_DELWRI) {
987 bwrite(bp);
988 } else {
989 brelse(bp, BC_INVAL);
990 }
991 } else {
992 brelse(bp, BC_INVAL);
993 }
994 }
995
996 /*
997 * Now that any dependencies that we created have been
998 * resolved, we can undo the partial allocation.
999 */
1000
1001 if (unwindidx == 0) {
1002 *allocib = 0;
1003 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1004 } else {
1005 int r;
1006
1007 r = bread(vp, indirs[unwindidx].in_lbn,
1008 (int)fs->fs_bsize, NOCRED, 0, &bp);
1009 if (r) {
1010 panic("Could not unwind indirect block, error %d", r);
1011 } else {
1012 bap = (int64_t *)bp->b_data;
1013 bap[indirs[unwindidx].in_off] = 0;
1014 bwrite(bp);
1015 }
1016 }
1017 for (i = unwindidx + 1; i <= num; i++) {
1018 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1019 fs->fs_bsize, false, &bp) == 0)
1020 brelse(bp, BC_INVAL);
1021 }
1022 }
1023 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1024 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1025 deallocated += fs->fs_bsize;
1026 }
1027 if (deallocated) {
1028 #if defined(QUOTA) || defined(QUOTA2)
1029 /*
1030 * Restore user's disk quota because allocation failed.
1031 */
1032 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1033 #endif
1034 ip->i_ffs2_blocks -= btodb(deallocated);
1035 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1036 }
1037
1038 return (error);
1039 }
1040