ffs_balloc.c revision 1.65 1 /* $NetBSD: ffs_balloc.c,v 1.65 2020/09/05 16:30:13 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.65 2020/09/05 16:30:13 riastradh Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #include "opt_uvmhist.h"
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/buf.h>
54 #include <sys/file.h>
55 #include <sys/mount.h>
56 #include <sys/vnode.h>
57 #include <sys/kauth.h>
58 #include <sys/fstrans.h>
59
60 #include <ufs/ufs/quota.h>
61 #include <ufs/ufs/ufsmount.h>
62 #include <ufs/ufs/inode.h>
63 #include <ufs/ufs/ufs_extern.h>
64 #include <ufs/ufs/ufs_bswap.h>
65
66 #include <ufs/ffs/fs.h>
67 #include <ufs/ffs/ffs_extern.h>
68
69 #ifdef UVMHIST
70 #include <uvm/uvm.h>
71 #endif
72 #include <uvm/uvm_extern.h>
73 #include <uvm/uvm_stat.h>
74
75 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
76 struct buf **);
77 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
78 struct buf **);
79
80 static daddr_t
81 ffs_extb(struct fs *fs, struct ufs2_dinode *dp, daddr_t nb)
82 {
83 return ufs_rw64(dp->di_extb[nb], UFS_FSNEEDSWAP(fs));
84 }
85
86 /*
87 * Balloc defines the structure of file system storage
88 * by allocating the physical blocks on a device given
89 * the inode and the logical block number in a file.
90 */
91
92 int
93 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
94 struct buf **bpp)
95 {
96 int error;
97
98 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
99 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
100 else
101 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
102
103 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
104 brelse(*bpp, 0);
105
106 return error;
107 }
108
109 static int
110 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
111 int flags, struct buf **bpp)
112 {
113 daddr_t lbn, lastlbn;
114 struct buf *bp, *nbp;
115 struct inode *ip = VTOI(vp);
116 struct fs *fs = ip->i_fs;
117 struct ufsmount *ump = ip->i_ump;
118 struct indir indirs[UFS_NIADDR + 2];
119 daddr_t newb, pref, nb;
120 int32_t *bap; /* XXX ondisk32 */
121 int deallocated, osize, nsize, num, i, error;
122 int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
123 int32_t *allocib;
124 int unwindidx = -1;
125 const int needswap = UFS_FSNEEDSWAP(fs);
126 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
127
128 lbn = ffs_lblkno(fs, off);
129 size = ffs_blkoff(fs, off) + size;
130 if (size > fs->fs_bsize)
131 panic("ffs_balloc: blk too big");
132 if (bpp != NULL) {
133 *bpp = NULL;
134 }
135 UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
136 lbn, size, 0);
137
138 if (lbn < 0)
139 return (EFBIG);
140
141 /*
142 * If the next write will extend the file into a new block,
143 * and the file is currently composed of a fragment
144 * this fragment has to be extended to be a full block.
145 */
146
147 lastlbn = ffs_lblkno(fs, ip->i_size);
148 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
149 nb = lastlbn;
150 osize = ffs_blksize(fs, ip, nb);
151 if (osize < fs->fs_bsize && osize > 0) {
152 mutex_enter(&ump->um_lock);
153 error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, nb),
154 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
155 &ip->i_ffs1_db[0]),
156 osize, (int)fs->fs_bsize, flags, cred, bpp,
157 &newb);
158 if (error)
159 return (error);
160 ip->i_size = ffs_lblktosize(fs, nb + 1);
161 ip->i_ffs1_size = ip->i_size;
162 uvm_vnp_setsize(vp, ip->i_ffs1_size);
163 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
164 ip->i_flag |= IN_CHANGE | IN_UPDATE;
165 if (bpp && *bpp) {
166 if (flags & B_SYNC)
167 bwrite(*bpp);
168 else
169 bawrite(*bpp);
170 }
171 }
172 }
173
174 /*
175 * The first UFS_NDADDR blocks are direct blocks
176 */
177
178 if (lbn < UFS_NDADDR) {
179 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
180 if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
181
182 /*
183 * The block is an already-allocated direct block
184 * and the file already extends past this block,
185 * thus this must be a whole block.
186 * Just read the block (if requested).
187 */
188
189 if (bpp != NULL) {
190 error = bread(vp, lbn, fs->fs_bsize,
191 B_MODIFY, bpp);
192 if (error) {
193 return (error);
194 }
195 }
196 return (0);
197 }
198 if (nb != 0) {
199
200 /*
201 * Consider need to reallocate a fragment.
202 */
203
204 osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
205 nsize = ffs_fragroundup(fs, size);
206 if (nsize <= osize) {
207
208 /*
209 * The existing block is already
210 * at least as big as we want.
211 * Just read the block (if requested).
212 */
213
214 if (bpp != NULL) {
215 error = bread(vp, lbn, osize,
216 B_MODIFY, bpp);
217 if (error) {
218 return (error);
219 }
220 }
221 return 0;
222 } else {
223
224 /*
225 * The existing block is smaller than we want,
226 * grow it.
227 */
228 mutex_enter(&ump->um_lock);
229 error = ffs_realloccg(ip, lbn,
230 ffs_getdb(fs, ip, lbn),
231 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
232 &ip->i_ffs1_db[0]),
233 osize, nsize, flags, cred, bpp, &newb);
234 if (error)
235 return (error);
236 }
237 } else {
238
239 /*
240 * the block was not previously allocated,
241 * allocate a new block or fragment.
242 */
243
244 if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
245 nsize = ffs_fragroundup(fs, size);
246 else
247 nsize = fs->fs_bsize;
248 mutex_enter(&ump->um_lock);
249 error = ffs_alloc(ip, lbn,
250 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
251 &ip->i_ffs1_db[0]),
252 nsize, flags, cred, &newb);
253 if (error)
254 return (error);
255 if (bpp != NULL) {
256 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
257 nsize, (flags & B_CLRBUF) != 0, bpp);
258 if (error)
259 return error;
260 }
261 }
262 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
263 ip->i_flag |= IN_CHANGE | IN_UPDATE;
264 return (0);
265 }
266
267 /*
268 * Determine the number of levels of indirection.
269 */
270
271 pref = 0;
272 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
273 return (error);
274
275 /*
276 * Fetch the first indirect block allocating if necessary.
277 */
278
279 --num;
280 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
281 allocib = NULL;
282 allocblk = allociblk;
283 if (nb == 0) {
284 mutex_enter(&ump->um_lock);
285 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
286 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
287 flags | B_METAONLY, cred, &newb);
288 if (error)
289 goto fail;
290 nb = newb;
291 *allocblk++ = nb;
292 error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
293 fs->fs_bsize, true, &bp);
294 if (error)
295 goto fail;
296 /*
297 * Write synchronously so that indirect blocks
298 * never point at garbage.
299 */
300 if ((error = bwrite(bp)) != 0)
301 goto fail;
302 unwindidx = 0;
303 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
304 *allocib = ufs_rw32(nb, needswap);
305 ip->i_flag |= IN_CHANGE | IN_UPDATE;
306 }
307
308 /*
309 * Fetch through the indirect blocks, allocating as necessary.
310 */
311
312 for (i = 1;;) {
313 error = bread(vp,
314 indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
315 if (error) {
316 goto fail;
317 }
318 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
319 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
320 if (i == num)
321 break;
322 i++;
323 if (nb != 0) {
324 brelse(bp, 0);
325 continue;
326 }
327 if (fscow_run(bp, true) != 0) {
328 brelse(bp, 0);
329 goto fail;
330 }
331 mutex_enter(&ump->um_lock);
332 /* Try to keep snapshot indirect blocks contiguous. */
333 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
334 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
335 flags | B_METAONLY, &bap[0]);
336 if (pref == 0)
337 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
338 NULL);
339 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
340 flags | B_METAONLY, cred, &newb);
341 if (error) {
342 brelse(bp, 0);
343 goto fail;
344 }
345 nb = newb;
346 *allocblk++ = nb;
347 error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
348 fs->fs_bsize, true, &nbp);
349 if (error) {
350 brelse(bp, 0);
351 goto fail;
352 }
353 /*
354 * Write synchronously so that indirect blocks
355 * never point at garbage.
356 */
357 if ((error = bwrite(nbp)) != 0) {
358 brelse(bp, 0);
359 goto fail;
360 }
361 if (unwindidx < 0)
362 unwindidx = i - 1;
363 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
364
365 /*
366 * If required, write synchronously, otherwise use
367 * delayed write.
368 */
369
370 if (flags & B_SYNC) {
371 bwrite(bp);
372 } else {
373 bdwrite(bp);
374 }
375 }
376
377 if (flags & B_METAONLY) {
378 KASSERT(bpp != NULL);
379 *bpp = bp;
380 return (0);
381 }
382
383 /*
384 * Get the data block, allocating if necessary.
385 */
386
387 if (nb == 0) {
388 if (fscow_run(bp, true) != 0) {
389 brelse(bp, 0);
390 goto fail;
391 }
392 mutex_enter(&ump->um_lock);
393 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
394 &bap[0]);
395 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
396 &newb);
397 if (error) {
398 brelse(bp, 0);
399 goto fail;
400 }
401 nb = newb;
402 *allocblk++ = nb;
403 if (bpp != NULL) {
404 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
405 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
406 if (error) {
407 brelse(bp, 0);
408 goto fail;
409 }
410 }
411 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
412 if (allocib == NULL && unwindidx < 0) {
413 unwindidx = i - 1;
414 }
415
416 /*
417 * If required, write synchronously, otherwise use
418 * delayed write.
419 */
420
421 if (flags & B_SYNC) {
422 bwrite(bp);
423 } else {
424 bdwrite(bp);
425 }
426 return (0);
427 }
428 brelse(bp, 0);
429 if (bpp != NULL) {
430 if (flags & B_CLRBUF) {
431 error = bread(vp, lbn, (int)fs->fs_bsize,
432 B_MODIFY, &nbp);
433 if (error) {
434 goto fail;
435 }
436 } else {
437 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
438 fs->fs_bsize, true, &nbp);
439 if (error)
440 goto fail;
441 }
442 *bpp = nbp;
443 }
444 return (0);
445
446 fail:
447 /*
448 * If we have failed part way through block allocation, we
449 * have to deallocate any indirect blocks that we have allocated.
450 */
451
452 if (unwindidx >= 0) {
453
454 /*
455 * First write out any buffers we've created to resolve their
456 * softdeps. This must be done in reverse order of creation
457 * so that we resolve the dependencies in one pass.
458 * Write the cylinder group buffers for these buffers too.
459 */
460
461 for (i = num; i >= unwindidx; i--) {
462 if (i == 0) {
463 break;
464 }
465 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
466 fs->fs_bsize, false, &bp) != 0)
467 continue;
468 if (bp->b_oflags & BO_DELWRI) {
469 nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
470 FFS_DBTOFSB(fs, bp->b_blkno))));
471 bwrite(bp);
472 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
473 fs->fs_cgsize, false, &bp) != 0)
474 continue;
475 if (bp->b_oflags & BO_DELWRI) {
476 bwrite(bp);
477 } else {
478 brelse(bp, BC_INVAL);
479 }
480 } else {
481 brelse(bp, BC_INVAL);
482 }
483 }
484
485 /*
486 * Undo the partial allocation.
487 */
488 if (unwindidx == 0) {
489 *allocib = 0;
490 ip->i_flag |= IN_CHANGE | IN_UPDATE;
491 } else {
492 int r;
493
494 r = bread(vp, indirs[unwindidx].in_lbn,
495 (int)fs->fs_bsize, 0, &bp);
496 if (r) {
497 panic("Could not unwind indirect block, error %d", r);
498 } else {
499 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
500 bap[indirs[unwindidx].in_off] = 0;
501 bwrite(bp);
502 }
503 }
504 for (i = unwindidx + 1; i <= num; i++) {
505 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
506 fs->fs_bsize, false, &bp) == 0)
507 brelse(bp, BC_INVAL);
508 }
509 }
510 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
511 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
512 deallocated += fs->fs_bsize;
513 }
514 if (deallocated) {
515 #if defined(QUOTA) || defined(QUOTA2)
516 /*
517 * Restore user's disk quota because allocation failed.
518 */
519 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
520 #endif
521 ip->i_ffs1_blocks -= btodb(deallocated);
522 ip->i_flag |= IN_CHANGE | IN_UPDATE;
523 }
524 return (error);
525 }
526
527 static int
528 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
529 int flags, struct buf **bpp)
530 {
531 daddr_t lbn, lastlbn;
532 struct buf *bp, *nbp;
533 struct inode *ip = VTOI(vp);
534 struct fs *fs = ip->i_fs;
535 struct ufsmount *ump = ip->i_ump;
536 struct indir indirs[UFS_NIADDR + 2];
537 daddr_t newb, pref, nb;
538 int64_t *bap;
539 int deallocated, osize, nsize, num, i, error;
540 daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
541 int64_t *allocib;
542 int unwindidx = -1;
543 const int needswap = UFS_FSNEEDSWAP(fs);
544 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
545
546 lbn = ffs_lblkno(fs, off);
547 size = ffs_blkoff(fs, off) + size;
548 if (size > fs->fs_bsize)
549 panic("ffs_balloc: blk too big");
550 if (bpp != NULL) {
551 *bpp = NULL;
552 }
553 UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
554 lbn, size, 0);
555
556 if (lbn < 0)
557 return (EFBIG);
558
559 /*
560 * Check for allocating external data.
561 */
562 if (flags & IO_EXT) {
563 struct ufs2_dinode *dp = ip->i_din.ffs2_din;
564 if (lbn >= UFS_NXADDR)
565 return (EFBIG);
566 /*
567 * If the next write will extend the data into a new block,
568 * and the data is currently composed of a fragment
569 * this fragment has to be extended to be a full block.
570 */
571 lastlbn = ffs_lblkno(fs, dp->di_extsize);
572 if (lastlbn < lbn) {
573 nb = lastlbn;
574 osize = ffs_sblksize(fs, dp->di_extsize, nb);
575 if (osize < fs->fs_bsize && osize > 0) {
576 mutex_enter(&ump->um_lock);
577 error = ffs_realloccg(ip, -1 - nb,
578 ffs_extb(fs, dp, nb),
579 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
580 flags, &dp->di_extb[0]),
581 osize, (int)fs->fs_bsize, flags, cred,
582 &bp, &newb);
583 if (error)
584 return (error);
585 dp->di_extsize = ffs_lblktosize(fs, nb + 1);
586 dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
587 ip->i_flag |= IN_CHANGE | IN_UPDATE;
588 if (flags & IO_SYNC)
589 bwrite(bp);
590 else
591 bawrite(bp);
592 }
593 }
594 /*
595 * All blocks are direct blocks
596 */
597 nb = dp->di_extb[lbn];
598 if (nb != 0 && dp->di_extsize >= ffs_lblktosize(fs, lbn + 1)) {
599 error = bread(vp, -1 - lbn, fs->fs_bsize,
600 0, &bp);
601 if (error) {
602 return (error);
603 }
604 mutex_enter(bp->b_objlock);
605 bp->b_blkno = FFS_FSBTODB(fs, nb);
606 mutex_exit(bp->b_objlock);
607 *bpp = bp;
608 return (0);
609 }
610 if (nb != 0) {
611 /*
612 * Consider need to reallocate a fragment.
613 */
614 osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
615 nsize = ffs_fragroundup(fs, size);
616 if (nsize <= osize) {
617 error = bread(vp, -1 - lbn, osize,
618 0, &bp);
619 if (error) {
620 return (error);
621 }
622 mutex_enter(bp->b_objlock);
623 bp->b_blkno = FFS_FSBTODB(fs, nb);
624 mutex_exit(bp->b_objlock);
625 } else {
626 mutex_enter(&ump->um_lock);
627 error = ffs_realloccg(ip, -1 - lbn,
628 ffs_extb(fs, dp, lbn),
629 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
630 &dp->di_extb[0]),
631 osize, nsize, flags, cred, &bp, &newb);
632 if (error)
633 return (error);
634 }
635 } else {
636 if (dp->di_extsize < ffs_lblktosize(fs, lbn + 1))
637 nsize = ffs_fragroundup(fs, size);
638 else
639 nsize = fs->fs_bsize;
640 mutex_enter(&ump->um_lock);
641 error = ffs_alloc(ip, lbn,
642 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
643 &dp->di_extb[0]),
644 nsize, flags, cred, &newb);
645 if (error)
646 return (error);
647 error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
648 nsize, (flags & B_CLRBUF) != 0, &bp);
649 if (error)
650 return error;
651 }
652 dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
653 ip->i_flag |= IN_CHANGE | IN_UPDATE;
654 *bpp = bp;
655 return (0);
656 }
657 /*
658 * If the next write will extend the file into a new block,
659 * and the file is currently composed of a fragment
660 * this fragment has to be extended to be a full block.
661 */
662
663 lastlbn = ffs_lblkno(fs, ip->i_size);
664 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
665 nb = lastlbn;
666 osize = ffs_blksize(fs, ip, nb);
667 if (osize < fs->fs_bsize && osize > 0) {
668 mutex_enter(&ump->um_lock);
669 error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, lbn),
670 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
671 &ip->i_ffs2_db[0]),
672 osize, (int)fs->fs_bsize, flags, cred, bpp,
673 &newb);
674 if (error)
675 return (error);
676 ip->i_size = ffs_lblktosize(fs, nb + 1);
677 ip->i_ffs2_size = ip->i_size;
678 uvm_vnp_setsize(vp, ip->i_size);
679 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
680 ip->i_flag |= IN_CHANGE | IN_UPDATE;
681 if (bpp) {
682 if (flags & B_SYNC)
683 bwrite(*bpp);
684 else
685 bawrite(*bpp);
686 }
687 }
688 }
689
690 /*
691 * The first UFS_NDADDR blocks are direct blocks
692 */
693
694 if (lbn < UFS_NDADDR) {
695 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
696 if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
697
698 /*
699 * The block is an already-allocated direct block
700 * and the file already extends past this block,
701 * thus this must be a whole block.
702 * Just read the block (if requested).
703 */
704
705 if (bpp != NULL) {
706 error = bread(vp, lbn, fs->fs_bsize,
707 B_MODIFY, bpp);
708 if (error) {
709 return (error);
710 }
711 }
712 return (0);
713 }
714 if (nb != 0) {
715
716 /*
717 * Consider need to reallocate a fragment.
718 */
719
720 osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
721 nsize = ffs_fragroundup(fs, size);
722 if (nsize <= osize) {
723
724 /*
725 * The existing block is already
726 * at least as big as we want.
727 * Just read the block (if requested).
728 */
729
730 if (bpp != NULL) {
731 error = bread(vp, lbn, osize,
732 B_MODIFY, bpp);
733 if (error) {
734 return (error);
735 }
736 }
737 return 0;
738 } else {
739
740 /*
741 * The existing block is smaller than we want,
742 * grow it.
743 */
744 mutex_enter(&ump->um_lock);
745 error = ffs_realloccg(ip, lbn,
746 ffs_getdb(fs, ip, lbn),
747 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
748 &ip->i_ffs2_db[0]),
749 osize, nsize, flags, cred, bpp, &newb);
750 if (error)
751 return (error);
752 }
753 } else {
754
755 /*
756 * the block was not previously allocated,
757 * allocate a new block or fragment.
758 */
759
760 if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
761 nsize = ffs_fragroundup(fs, size);
762 else
763 nsize = fs->fs_bsize;
764 mutex_enter(&ump->um_lock);
765 error = ffs_alloc(ip, lbn,
766 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
767 &ip->i_ffs2_db[0]),
768 nsize, flags, cred, &newb);
769 if (error)
770 return (error);
771 if (bpp != NULL) {
772 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
773 nsize, (flags & B_CLRBUF) != 0, bpp);
774 if (error)
775 return error;
776 }
777 }
778 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
779 ip->i_flag |= IN_CHANGE | IN_UPDATE;
780 return (0);
781 }
782
783 /*
784 * Determine the number of levels of indirection.
785 */
786
787 pref = 0;
788 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
789 return (error);
790
791 /*
792 * Fetch the first indirect block allocating if necessary.
793 */
794
795 --num;
796 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
797 allocib = NULL;
798 allocblk = allociblk;
799 if (nb == 0) {
800 mutex_enter(&ump->um_lock);
801 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
802 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
803 flags | B_METAONLY, cred, &newb);
804 if (error)
805 goto fail;
806 nb = newb;
807 *allocblk++ = nb;
808 error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
809 fs->fs_bsize, true, &bp);
810 if (error)
811 goto fail;
812 /*
813 * Write synchronously so that indirect blocks
814 * never point at garbage.
815 */
816 if ((error = bwrite(bp)) != 0)
817 goto fail;
818 unwindidx = 0;
819 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
820 *allocib = ufs_rw64(nb, needswap);
821 ip->i_flag |= IN_CHANGE | IN_UPDATE;
822 }
823
824 /*
825 * Fetch through the indirect blocks, allocating as necessary.
826 */
827
828 for (i = 1;;) {
829 error = bread(vp,
830 indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
831 if (error) {
832 goto fail;
833 }
834 bap = (int64_t *)bp->b_data;
835 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
836 if (i == num)
837 break;
838 i++;
839 if (nb != 0) {
840 brelse(bp, 0);
841 continue;
842 }
843 if (fscow_run(bp, true) != 0) {
844 brelse(bp, 0);
845 goto fail;
846 }
847 mutex_enter(&ump->um_lock);
848 /* Try to keep snapshot indirect blocks contiguous. */
849 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
850 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
851 flags | B_METAONLY, &bap[0]);
852 if (pref == 0)
853 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
854 NULL);
855 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
856 flags | B_METAONLY, cred, &newb);
857 if (error) {
858 brelse(bp, 0);
859 goto fail;
860 }
861 nb = newb;
862 *allocblk++ = nb;
863 error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
864 fs->fs_bsize, true, &nbp);
865 if (error) {
866 brelse(bp, 0);
867 goto fail;
868 }
869 /*
870 * Write synchronously so that indirect blocks
871 * never point at garbage.
872 */
873 if ((error = bwrite(nbp)) != 0) {
874 brelse(bp, 0);
875 goto fail;
876 }
877 if (unwindidx < 0)
878 unwindidx = i - 1;
879 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
880
881 /*
882 * If required, write synchronously, otherwise use
883 * delayed write.
884 */
885
886 if (flags & B_SYNC) {
887 bwrite(bp);
888 } else {
889 bdwrite(bp);
890 }
891 }
892
893 if (flags & B_METAONLY) {
894 KASSERT(bpp != NULL);
895 *bpp = bp;
896 return (0);
897 }
898
899 /*
900 * Get the data block, allocating if necessary.
901 */
902
903 if (nb == 0) {
904 if (fscow_run(bp, true) != 0) {
905 brelse(bp, 0);
906 goto fail;
907 }
908 mutex_enter(&ump->um_lock);
909 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
910 &bap[0]);
911 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
912 &newb);
913 if (error) {
914 brelse(bp, 0);
915 goto fail;
916 }
917 nb = newb;
918 *allocblk++ = nb;
919 if (bpp != NULL) {
920 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
921 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
922 if (error) {
923 brelse(bp, 0);
924 goto fail;
925 }
926 }
927 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
928 if (allocib == NULL && unwindidx < 0) {
929 unwindidx = i - 1;
930 }
931
932 /*
933 * If required, write synchronously, otherwise use
934 * delayed write.
935 */
936
937 if (flags & B_SYNC) {
938 bwrite(bp);
939 } else {
940 bdwrite(bp);
941 }
942 return (0);
943 }
944 brelse(bp, 0);
945 if (bpp != NULL) {
946 if (flags & B_CLRBUF) {
947 error = bread(vp, lbn, (int)fs->fs_bsize,
948 B_MODIFY, &nbp);
949 if (error) {
950 goto fail;
951 }
952 } else {
953 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
954 fs->fs_bsize, true, &nbp);
955 if (error)
956 goto fail;
957 }
958 *bpp = nbp;
959 }
960 return (0);
961
962 fail:
963 /*
964 * If we have failed part way through block allocation, we
965 * have to deallocate any indirect blocks that we have allocated.
966 */
967
968 if (unwindidx >= 0) {
969
970 /*
971 * First write out any buffers we've created to resolve their
972 * softdeps. This must be done in reverse order of creation
973 * so that we resolve the dependencies in one pass.
974 * Write the cylinder group buffers for these buffers too.
975 */
976
977 for (i = num; i >= unwindidx; i--) {
978 if (i == 0) {
979 break;
980 }
981 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
982 fs->fs_bsize, false, &bp) != 0)
983 continue;
984 if (bp->b_oflags & BO_DELWRI) {
985 nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
986 FFS_DBTOFSB(fs, bp->b_blkno))));
987 bwrite(bp);
988 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
989 fs->fs_cgsize, false, &bp) != 0)
990 continue;
991 if (bp->b_oflags & BO_DELWRI) {
992 bwrite(bp);
993 } else {
994 brelse(bp, BC_INVAL);
995 }
996 } else {
997 brelse(bp, BC_INVAL);
998 }
999 }
1000
1001 /*
1002 * Now that any dependencies that we created have been
1003 * resolved, we can undo the partial allocation.
1004 */
1005
1006 if (unwindidx == 0) {
1007 *allocib = 0;
1008 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1009 } else {
1010 int r;
1011
1012 r = bread(vp, indirs[unwindidx].in_lbn,
1013 (int)fs->fs_bsize, 0, &bp);
1014 if (r) {
1015 panic("Could not unwind indirect block, error %d", r);
1016 } else {
1017 bap = (int64_t *)bp->b_data;
1018 bap[indirs[unwindidx].in_off] = 0;
1019 bwrite(bp);
1020 }
1021 }
1022 for (i = unwindidx + 1; i <= num; i++) {
1023 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1024 fs->fs_bsize, false, &bp) == 0)
1025 brelse(bp, BC_INVAL);
1026 }
1027 }
1028 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1029 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1030 deallocated += fs->fs_bsize;
1031 }
1032 if (deallocated) {
1033 #if defined(QUOTA) || defined(QUOTA2)
1034 /*
1035 * Restore user's disk quota because allocation failed.
1036 */
1037 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1038 #endif
1039 ip->i_ffs2_blocks -= btodb(deallocated);
1040 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1041 }
1042
1043 return (error);
1044 }
1045