ffs_balloc.c revision 1.63.4.1 1 /* $NetBSD: ffs_balloc.c,v 1.63.4.1 2020/04/21 18:42:45 martin Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.63.4.1 2020/04/21 18:42:45 martin Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
58
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
64
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67
68 #include <uvm/uvm.h>
69
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71 struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73 struct buf **);
74
75 static daddr_t
76 ffs_extb(struct fs *fs, struct ufs2_dinode *dp, daddr_t nb)
77 {
78 return ufs_rw64(dp->di_extb[nb], UFS_FSNEEDSWAP(fs));
79 }
80
81 /*
82 * Balloc defines the structure of file system storage
83 * by allocating the physical blocks on a device given
84 * the inode and the logical block number in a file.
85 */
86
87 int
88 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
89 struct buf **bpp)
90 {
91 int error;
92
93 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
94 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
95 else
96 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
97
98 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
99 brelse(*bpp, 0);
100
101 return error;
102 }
103
104 static int
105 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
106 int flags, struct buf **bpp)
107 {
108 daddr_t lbn, lastlbn;
109 struct buf *bp, *nbp;
110 struct inode *ip = VTOI(vp);
111 struct fs *fs = ip->i_fs;
112 struct ufsmount *ump = ip->i_ump;
113 struct indir indirs[UFS_NIADDR + 2];
114 daddr_t newb, pref, nb;
115 int32_t *bap; /* XXX ondisk32 */
116 int deallocated, osize, nsize, num, i, error;
117 int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
118 int32_t *allocib;
119 int unwindidx = -1;
120 const int needswap = UFS_FSNEEDSWAP(fs);
121 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
122
123 lbn = ffs_lblkno(fs, off);
124 size = ffs_blkoff(fs, off) + size;
125 if (size > fs->fs_bsize)
126 panic("ffs_balloc: blk too big");
127 if (bpp != NULL) {
128 *bpp = NULL;
129 }
130 UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
131 lbn, size, 0);
132
133 if (lbn < 0)
134 return (EFBIG);
135
136 /*
137 * If the next write will extend the file into a new block,
138 * and the file is currently composed of a fragment
139 * this fragment has to be extended to be a full block.
140 */
141
142 lastlbn = ffs_lblkno(fs, ip->i_size);
143 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
144 nb = lastlbn;
145 osize = ffs_blksize(fs, ip, nb);
146 if (osize < fs->fs_bsize && osize > 0) {
147 mutex_enter(&ump->um_lock);
148 error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, nb),
149 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
150 &ip->i_ffs1_db[0]),
151 osize, (int)fs->fs_bsize, flags, cred, bpp,
152 &newb);
153 if (error)
154 return (error);
155 ip->i_size = ffs_lblktosize(fs, nb + 1);
156 ip->i_ffs1_size = ip->i_size;
157 uvm_vnp_setsize(vp, ip->i_ffs1_size);
158 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
159 ip->i_flag |= IN_CHANGE | IN_UPDATE;
160 if (bpp && *bpp) {
161 if (flags & B_SYNC)
162 bwrite(*bpp);
163 else
164 bawrite(*bpp);
165 }
166 }
167 }
168
169 /*
170 * The first UFS_NDADDR blocks are direct blocks
171 */
172
173 if (lbn < UFS_NDADDR) {
174 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
175 if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
176
177 /*
178 * The block is an already-allocated direct block
179 * and the file already extends past this block,
180 * thus this must be a whole block.
181 * Just read the block (if requested).
182 */
183
184 if (bpp != NULL) {
185 error = bread(vp, lbn, fs->fs_bsize,
186 B_MODIFY, bpp);
187 if (error) {
188 return (error);
189 }
190 }
191 return (0);
192 }
193 if (nb != 0) {
194
195 /*
196 * Consider need to reallocate a fragment.
197 */
198
199 osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
200 nsize = ffs_fragroundup(fs, size);
201 if (nsize <= osize) {
202
203 /*
204 * The existing block is already
205 * at least as big as we want.
206 * Just read the block (if requested).
207 */
208
209 if (bpp != NULL) {
210 error = bread(vp, lbn, osize,
211 B_MODIFY, bpp);
212 if (error) {
213 return (error);
214 }
215 }
216 return 0;
217 } else {
218
219 /*
220 * The existing block is smaller than we want,
221 * grow it.
222 */
223 mutex_enter(&ump->um_lock);
224 error = ffs_realloccg(ip, lbn,
225 ffs_getdb(fs, ip, lbn),
226 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
227 &ip->i_ffs1_db[0]),
228 osize, nsize, flags, cred, bpp, &newb);
229 if (error)
230 return (error);
231 }
232 } else {
233
234 /*
235 * the block was not previously allocated,
236 * allocate a new block or fragment.
237 */
238
239 if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
240 nsize = ffs_fragroundup(fs, size);
241 else
242 nsize = fs->fs_bsize;
243 mutex_enter(&ump->um_lock);
244 error = ffs_alloc(ip, lbn,
245 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
246 &ip->i_ffs1_db[0]),
247 nsize, flags, cred, &newb);
248 if (error)
249 return (error);
250 if (bpp != NULL) {
251 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
252 nsize, (flags & B_CLRBUF) != 0, bpp);
253 if (error)
254 return error;
255 }
256 }
257 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
258 ip->i_flag |= IN_CHANGE | IN_UPDATE;
259 return (0);
260 }
261
262 /*
263 * Determine the number of levels of indirection.
264 */
265
266 pref = 0;
267 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
268 return (error);
269
270 /*
271 * Fetch the first indirect block allocating if necessary.
272 */
273
274 --num;
275 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
276 allocib = NULL;
277 allocblk = allociblk;
278 if (nb == 0) {
279 mutex_enter(&ump->um_lock);
280 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
281 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
282 flags | B_METAONLY, cred, &newb);
283 if (error)
284 goto fail;
285 nb = newb;
286 *allocblk++ = nb;
287 error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
288 fs->fs_bsize, true, &bp);
289 if (error)
290 goto fail;
291 /*
292 * Write synchronously so that indirect blocks
293 * never point at garbage.
294 */
295 if ((error = bwrite(bp)) != 0)
296 goto fail;
297 unwindidx = 0;
298 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
299 *allocib = ufs_rw32(nb, needswap);
300 ip->i_flag |= IN_CHANGE | IN_UPDATE;
301 }
302
303 /*
304 * Fetch through the indirect blocks, allocating as necessary.
305 */
306
307 for (i = 1;;) {
308 error = bread(vp,
309 indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
310 if (error) {
311 goto fail;
312 }
313 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
314 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
315 if (i == num)
316 break;
317 i++;
318 if (nb != 0) {
319 brelse(bp, 0);
320 continue;
321 }
322 if (fscow_run(bp, true) != 0) {
323 brelse(bp, 0);
324 goto fail;
325 }
326 mutex_enter(&ump->um_lock);
327 /* Try to keep snapshot indirect blocks contiguous. */
328 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
329 pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
330 flags | B_METAONLY, &bap[0]);
331 if (pref == 0)
332 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
333 NULL);
334 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
335 flags | B_METAONLY, cred, &newb);
336 if (error) {
337 brelse(bp, 0);
338 goto fail;
339 }
340 nb = newb;
341 *allocblk++ = nb;
342 error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
343 fs->fs_bsize, true, &nbp);
344 if (error) {
345 brelse(bp, 0);
346 goto fail;
347 }
348 /*
349 * Write synchronously so that indirect blocks
350 * never point at garbage.
351 */
352 if ((error = bwrite(nbp)) != 0) {
353 brelse(bp, 0);
354 goto fail;
355 }
356 if (unwindidx < 0)
357 unwindidx = i - 1;
358 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
359
360 /*
361 * If required, write synchronously, otherwise use
362 * delayed write.
363 */
364
365 if (flags & B_SYNC) {
366 bwrite(bp);
367 } else {
368 bdwrite(bp);
369 }
370 }
371
372 if (flags & B_METAONLY) {
373 KASSERT(bpp != NULL);
374 *bpp = bp;
375 return (0);
376 }
377
378 /*
379 * Get the data block, allocating if necessary.
380 */
381
382 if (nb == 0) {
383 if (fscow_run(bp, true) != 0) {
384 brelse(bp, 0);
385 goto fail;
386 }
387 mutex_enter(&ump->um_lock);
388 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
389 &bap[0]);
390 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
391 &newb);
392 if (error) {
393 brelse(bp, 0);
394 goto fail;
395 }
396 nb = newb;
397 *allocblk++ = nb;
398 if (bpp != NULL) {
399 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
400 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
401 if (error) {
402 brelse(bp, 0);
403 goto fail;
404 }
405 }
406 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
407 if (allocib == NULL && unwindidx < 0) {
408 unwindidx = i - 1;
409 }
410
411 /*
412 * If required, write synchronously, otherwise use
413 * delayed write.
414 */
415
416 if (flags & B_SYNC) {
417 bwrite(bp);
418 } else {
419 bdwrite(bp);
420 }
421 return (0);
422 }
423 brelse(bp, 0);
424 if (bpp != NULL) {
425 if (flags & B_CLRBUF) {
426 error = bread(vp, lbn, (int)fs->fs_bsize,
427 B_MODIFY, &nbp);
428 if (error) {
429 goto fail;
430 }
431 } else {
432 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
433 fs->fs_bsize, true, &nbp);
434 if (error)
435 goto fail;
436 }
437 *bpp = nbp;
438 }
439 return (0);
440
441 fail:
442 /*
443 * If we have failed part way through block allocation, we
444 * have to deallocate any indirect blocks that we have allocated.
445 */
446
447 if (unwindidx >= 0) {
448
449 /*
450 * First write out any buffers we've created to resolve their
451 * softdeps. This must be done in reverse order of creation
452 * so that we resolve the dependencies in one pass.
453 * Write the cylinder group buffers for these buffers too.
454 */
455
456 for (i = num; i >= unwindidx; i--) {
457 if (i == 0) {
458 break;
459 }
460 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
461 fs->fs_bsize, false, &bp) != 0)
462 continue;
463 if (bp->b_oflags & BO_DELWRI) {
464 nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
465 FFS_DBTOFSB(fs, bp->b_blkno))));
466 bwrite(bp);
467 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
468 fs->fs_cgsize, false, &bp) != 0)
469 continue;
470 if (bp->b_oflags & BO_DELWRI) {
471 bwrite(bp);
472 } else {
473 brelse(bp, BC_INVAL);
474 }
475 } else {
476 brelse(bp, BC_INVAL);
477 }
478 }
479
480 /*
481 * Undo the partial allocation.
482 */
483 if (unwindidx == 0) {
484 *allocib = 0;
485 ip->i_flag |= IN_CHANGE | IN_UPDATE;
486 } else {
487 int r;
488
489 r = bread(vp, indirs[unwindidx].in_lbn,
490 (int)fs->fs_bsize, 0, &bp);
491 if (r) {
492 panic("Could not unwind indirect block, error %d", r);
493 } else {
494 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
495 bap[indirs[unwindidx].in_off] = 0;
496 bwrite(bp);
497 }
498 }
499 for (i = unwindidx + 1; i <= num; i++) {
500 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
501 fs->fs_bsize, false, &bp) == 0)
502 brelse(bp, BC_INVAL);
503 }
504 }
505 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
506 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
507 deallocated += fs->fs_bsize;
508 }
509 if (deallocated) {
510 #if defined(QUOTA) || defined(QUOTA2)
511 /*
512 * Restore user's disk quota because allocation failed.
513 */
514 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
515 #endif
516 ip->i_ffs1_blocks -= btodb(deallocated);
517 ip->i_flag |= IN_CHANGE | IN_UPDATE;
518 }
519 return (error);
520 }
521
522 static int
523 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
524 int flags, struct buf **bpp)
525 {
526 daddr_t lbn, lastlbn;
527 struct buf *bp, *nbp;
528 struct inode *ip = VTOI(vp);
529 struct fs *fs = ip->i_fs;
530 struct ufsmount *ump = ip->i_ump;
531 struct indir indirs[UFS_NIADDR + 2];
532 daddr_t newb, pref, nb;
533 int64_t *bap;
534 int deallocated, osize, nsize, num, i, error;
535 daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
536 int64_t *allocib;
537 int unwindidx = -1;
538 const int needswap = UFS_FSNEEDSWAP(fs);
539 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
540
541 lbn = ffs_lblkno(fs, off);
542 size = ffs_blkoff(fs, off) + size;
543 if (size > fs->fs_bsize)
544 panic("ffs_balloc: blk too big");
545 if (bpp != NULL) {
546 *bpp = NULL;
547 }
548 UVMHIST_LOG(ubchist, "vp %#jx lbn 0x%jx size 0x%jx", (uintptr_t)vp,
549 lbn, size, 0);
550
551 if (lbn < 0)
552 return (EFBIG);
553
554 /*
555 * Check for allocating external data.
556 */
557 if (flags & IO_EXT) {
558 struct ufs2_dinode *dp = ip->i_din.ffs2_din;
559 if (lbn >= UFS_NXADDR)
560 return (EFBIG);
561 /*
562 * If the next write will extend the data into a new block,
563 * and the data is currently composed of a fragment
564 * this fragment has to be extended to be a full block.
565 */
566 lastlbn = ffs_lblkno(fs, dp->di_extsize);
567 if (lastlbn < lbn) {
568 nb = lastlbn;
569 osize = ffs_sblksize(fs, dp->di_extsize, nb);
570 if (osize < fs->fs_bsize && osize > 0) {
571 mutex_enter(&ump->um_lock);
572 error = ffs_realloccg(ip, -1 - nb,
573 ffs_extb(fs, dp, nb),
574 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
575 flags, &dp->di_extb[0]),
576 osize, (int)fs->fs_bsize, flags, cred,
577 &bp, &newb);
578 if (error)
579 return (error);
580 dp->di_extsize = ffs_lblktosize(fs, nb + 1);
581 dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
582 ip->i_flag |= IN_CHANGE | IN_UPDATE;
583 if (flags & IO_SYNC)
584 bwrite(bp);
585 else
586 bawrite(bp);
587 }
588 }
589 /*
590 * All blocks are direct blocks
591 */
592 nb = dp->di_extb[lbn];
593 if (nb != 0 && dp->di_extsize >= ffs_lblktosize(fs, lbn + 1)) {
594 error = bread(vp, -1 - lbn, fs->fs_bsize,
595 0, &bp);
596 if (error) {
597 return (error);
598 }
599 mutex_enter(bp->b_objlock);
600 bp->b_blkno = FFS_FSBTODB(fs, nb);
601 mutex_exit(bp->b_objlock);
602 *bpp = bp;
603 return (0);
604 }
605 if (nb != 0) {
606 /*
607 * Consider need to reallocate a fragment.
608 */
609 osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
610 nsize = ffs_fragroundup(fs, size);
611 if (nsize <= osize) {
612 error = bread(vp, -1 - lbn, osize,
613 0, &bp);
614 if (error) {
615 return (error);
616 }
617 mutex_enter(bp->b_objlock);
618 bp->b_blkno = FFS_FSBTODB(fs, nb);
619 mutex_exit(bp->b_objlock);
620 } else {
621 mutex_enter(&ump->um_lock);
622 error = ffs_realloccg(ip, -1 - lbn,
623 ffs_extb(fs, dp, lbn),
624 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
625 &dp->di_extb[0]),
626 osize, nsize, flags, cred, &bp, &newb);
627 if (error)
628 return (error);
629 }
630 } else {
631 if (dp->di_extsize < ffs_lblktosize(fs, lbn + 1))
632 nsize = ffs_fragroundup(fs, size);
633 else
634 nsize = fs->fs_bsize;
635 mutex_enter(&ump->um_lock);
636 error = ffs_alloc(ip, lbn,
637 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
638 &dp->di_extb[0]),
639 nsize, flags, cred, &newb);
640 if (error)
641 return (error);
642 error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
643 nsize, (flags & B_CLRBUF) != 0, &bp);
644 if (error)
645 return error;
646 }
647 dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
648 ip->i_flag |= IN_CHANGE | IN_UPDATE;
649 *bpp = bp;
650 return (0);
651 }
652 /*
653 * If the next write will extend the file into a new block,
654 * and the file is currently composed of a fragment
655 * this fragment has to be extended to be a full block.
656 */
657
658 lastlbn = ffs_lblkno(fs, ip->i_size);
659 if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
660 nb = lastlbn;
661 osize = ffs_blksize(fs, ip, nb);
662 if (osize < fs->fs_bsize && osize > 0) {
663 mutex_enter(&ump->um_lock);
664 error = ffs_realloccg(ip, nb, ffs_getdb(fs, ip, lbn),
665 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
666 &ip->i_ffs2_db[0]),
667 osize, (int)fs->fs_bsize, flags, cred, bpp,
668 &newb);
669 if (error)
670 return (error);
671 ip->i_size = ffs_lblktosize(fs, nb + 1);
672 ip->i_ffs2_size = ip->i_size;
673 uvm_vnp_setsize(vp, ip->i_size);
674 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
675 ip->i_flag |= IN_CHANGE | IN_UPDATE;
676 if (bpp) {
677 if (flags & B_SYNC)
678 bwrite(*bpp);
679 else
680 bawrite(*bpp);
681 }
682 }
683 }
684
685 /*
686 * The first UFS_NDADDR blocks are direct blocks
687 */
688
689 if (lbn < UFS_NDADDR) {
690 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
691 if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
692
693 /*
694 * The block is an already-allocated direct block
695 * and the file already extends past this block,
696 * thus this must be a whole block.
697 * Just read the block (if requested).
698 */
699
700 if (bpp != NULL) {
701 error = bread(vp, lbn, fs->fs_bsize,
702 B_MODIFY, bpp);
703 if (error) {
704 return (error);
705 }
706 }
707 return (0);
708 }
709 if (nb != 0) {
710
711 /*
712 * Consider need to reallocate a fragment.
713 */
714
715 osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
716 nsize = ffs_fragroundup(fs, size);
717 if (nsize <= osize) {
718
719 /*
720 * The existing block is already
721 * at least as big as we want.
722 * Just read the block (if requested).
723 */
724
725 if (bpp != NULL) {
726 error = bread(vp, lbn, osize,
727 B_MODIFY, bpp);
728 if (error) {
729 return (error);
730 }
731 }
732 return 0;
733 } else {
734
735 /*
736 * The existing block is smaller than we want,
737 * grow it.
738 */
739 mutex_enter(&ump->um_lock);
740 error = ffs_realloccg(ip, lbn,
741 ffs_getdb(fs, ip, lbn),
742 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
743 &ip->i_ffs2_db[0]),
744 osize, nsize, flags, cred, bpp, &newb);
745 if (error)
746 return (error);
747 }
748 } else {
749
750 /*
751 * the block was not previously allocated,
752 * allocate a new block or fragment.
753 */
754
755 if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
756 nsize = ffs_fragroundup(fs, size);
757 else
758 nsize = fs->fs_bsize;
759 mutex_enter(&ump->um_lock);
760 error = ffs_alloc(ip, lbn,
761 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
762 &ip->i_ffs2_db[0]),
763 nsize, flags, cred, &newb);
764 if (error)
765 return (error);
766 if (bpp != NULL) {
767 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
768 nsize, (flags & B_CLRBUF) != 0, bpp);
769 if (error)
770 return error;
771 }
772 }
773 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
774 ip->i_flag |= IN_CHANGE | IN_UPDATE;
775 return (0);
776 }
777
778 /*
779 * Determine the number of levels of indirection.
780 */
781
782 pref = 0;
783 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
784 return (error);
785
786 /*
787 * Fetch the first indirect block allocating if necessary.
788 */
789
790 --num;
791 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
792 allocib = NULL;
793 allocblk = allociblk;
794 if (nb == 0) {
795 mutex_enter(&ump->um_lock);
796 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
797 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
798 flags | B_METAONLY, cred, &newb);
799 if (error)
800 goto fail;
801 nb = newb;
802 *allocblk++ = nb;
803 error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
804 fs->fs_bsize, true, &bp);
805 if (error)
806 goto fail;
807 /*
808 * Write synchronously so that indirect blocks
809 * never point at garbage.
810 */
811 if ((error = bwrite(bp)) != 0)
812 goto fail;
813 unwindidx = 0;
814 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
815 *allocib = ufs_rw64(nb, needswap);
816 ip->i_flag |= IN_CHANGE | IN_UPDATE;
817 }
818
819 /*
820 * Fetch through the indirect blocks, allocating as necessary.
821 */
822
823 for (i = 1;;) {
824 error = bread(vp,
825 indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
826 if (error) {
827 goto fail;
828 }
829 bap = (int64_t *)bp->b_data;
830 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
831 if (i == num)
832 break;
833 i++;
834 if (nb != 0) {
835 brelse(bp, 0);
836 continue;
837 }
838 if (fscow_run(bp, true) != 0) {
839 brelse(bp, 0);
840 goto fail;
841 }
842 mutex_enter(&ump->um_lock);
843 /* Try to keep snapshot indirect blocks contiguous. */
844 if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
845 pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
846 flags | B_METAONLY, &bap[0]);
847 if (pref == 0)
848 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
849 NULL);
850 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
851 flags | B_METAONLY, cred, &newb);
852 if (error) {
853 brelse(bp, 0);
854 goto fail;
855 }
856 nb = newb;
857 *allocblk++ = nb;
858 error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
859 fs->fs_bsize, true, &nbp);
860 if (error) {
861 brelse(bp, 0);
862 goto fail;
863 }
864 /*
865 * Write synchronously so that indirect blocks
866 * never point at garbage.
867 */
868 if ((error = bwrite(nbp)) != 0) {
869 brelse(bp, 0);
870 goto fail;
871 }
872 if (unwindidx < 0)
873 unwindidx = i - 1;
874 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
875
876 /*
877 * If required, write synchronously, otherwise use
878 * delayed write.
879 */
880
881 if (flags & B_SYNC) {
882 bwrite(bp);
883 } else {
884 bdwrite(bp);
885 }
886 }
887
888 if (flags & B_METAONLY) {
889 KASSERT(bpp != NULL);
890 *bpp = bp;
891 return (0);
892 }
893
894 /*
895 * Get the data block, allocating if necessary.
896 */
897
898 if (nb == 0) {
899 if (fscow_run(bp, true) != 0) {
900 brelse(bp, 0);
901 goto fail;
902 }
903 mutex_enter(&ump->um_lock);
904 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
905 &bap[0]);
906 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
907 &newb);
908 if (error) {
909 brelse(bp, 0);
910 goto fail;
911 }
912 nb = newb;
913 *allocblk++ = nb;
914 if (bpp != NULL) {
915 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
916 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
917 if (error) {
918 brelse(bp, 0);
919 goto fail;
920 }
921 }
922 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
923 if (allocib == NULL && unwindidx < 0) {
924 unwindidx = i - 1;
925 }
926
927 /*
928 * If required, write synchronously, otherwise use
929 * delayed write.
930 */
931
932 if (flags & B_SYNC) {
933 bwrite(bp);
934 } else {
935 bdwrite(bp);
936 }
937 return (0);
938 }
939 brelse(bp, 0);
940 if (bpp != NULL) {
941 if (flags & B_CLRBUF) {
942 error = bread(vp, lbn, (int)fs->fs_bsize,
943 B_MODIFY, &nbp);
944 if (error) {
945 goto fail;
946 }
947 } else {
948 error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
949 fs->fs_bsize, true, &nbp);
950 if (error)
951 goto fail;
952 }
953 *bpp = nbp;
954 }
955 return (0);
956
957 fail:
958 /*
959 * If we have failed part way through block allocation, we
960 * have to deallocate any indirect blocks that we have allocated.
961 */
962
963 if (unwindidx >= 0) {
964
965 /*
966 * First write out any buffers we've created to resolve their
967 * softdeps. This must be done in reverse order of creation
968 * so that we resolve the dependencies in one pass.
969 * Write the cylinder group buffers for these buffers too.
970 */
971
972 for (i = num; i >= unwindidx; i--) {
973 if (i == 0) {
974 break;
975 }
976 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
977 fs->fs_bsize, false, &bp) != 0)
978 continue;
979 if (bp->b_oflags & BO_DELWRI) {
980 nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
981 FFS_DBTOFSB(fs, bp->b_blkno))));
982 bwrite(bp);
983 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
984 fs->fs_cgsize, false, &bp) != 0)
985 continue;
986 if (bp->b_oflags & BO_DELWRI) {
987 bwrite(bp);
988 } else {
989 brelse(bp, BC_INVAL);
990 }
991 } else {
992 brelse(bp, BC_INVAL);
993 }
994 }
995
996 /*
997 * Now that any dependencies that we created have been
998 * resolved, we can undo the partial allocation.
999 */
1000
1001 if (unwindidx == 0) {
1002 *allocib = 0;
1003 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1004 } else {
1005 int r;
1006
1007 r = bread(vp, indirs[unwindidx].in_lbn,
1008 (int)fs->fs_bsize, 0, &bp);
1009 if (r) {
1010 panic("Could not unwind indirect block, error %d", r);
1011 } else {
1012 bap = (int64_t *)bp->b_data;
1013 bap[indirs[unwindidx].in_off] = 0;
1014 bwrite(bp);
1015 }
1016 }
1017 for (i = unwindidx + 1; i <= num; i++) {
1018 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1019 fs->fs_bsize, false, &bp) == 0)
1020 brelse(bp, BC_INVAL);
1021 }
1022 }
1023 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1024 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1025 deallocated += fs->fs_bsize;
1026 }
1027 if (deallocated) {
1028 #if defined(QUOTA) || defined(QUOTA2)
1029 /*
1030 * Restore user's disk quota because allocation failed.
1031 */
1032 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1033 #endif
1034 ip->i_ffs2_blocks -= btodb(deallocated);
1035 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1036 }
1037
1038 return (error);
1039 }
1040