ffs_balloc.c revision 1.48.12.2 1 /* $NetBSD: ffs_balloc.c,v 1.48.12.2 2008/09/18 04:37:05 wrstuden Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.48.12.2 2008/09/18 04:37:05 wrstuden Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
58
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
64
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67
68 #include <uvm/uvm.h>
69
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71 struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73 struct buf **);
74
75 /*
76 * Balloc defines the structure of file system storage
77 * by allocating the physical blocks on a device given
78 * the inode and the logical block number in a file.
79 */
80
81 int
82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83 struct buf **bpp)
84 {
85 int error;
86
87 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 else
90 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
91
92 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 brelse(*bpp, 0);
94
95 return error;
96 }
97
98 static int
99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100 int flags, struct buf **bpp)
101 {
102 daddr_t lbn, lastlbn;
103 struct buf *bp, *nbp;
104 struct inode *ip = VTOI(vp);
105 struct fs *fs = ip->i_fs;
106 struct ufsmount *ump = ip->i_ump;
107 struct indir indirs[NIADDR + 2];
108 daddr_t newb, pref, nb;
109 int32_t *bap; /* XXX ondisk32 */
110 int deallocated, osize, nsize, num, i, error;
111 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
112 int32_t *allocib;
113 int unwindidx = -1;
114 #ifdef FFS_EI
115 const int needswap = UFS_FSNEEDSWAP(fs);
116 #endif
117 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
118
119 lbn = lblkno(fs, off);
120 size = blkoff(fs, off) + size;
121 if (size > fs->fs_bsize)
122 panic("ffs_balloc: blk too big");
123 if (bpp != NULL) {
124 *bpp = NULL;
125 }
126 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
127
128 if (lbn < 0)
129 return (EFBIG);
130
131 /*
132 * If the next write will extend the file into a new block,
133 * and the file is currently composed of a fragment
134 * this fragment has to be extended to be a full block.
135 */
136
137 lastlbn = lblkno(fs, ip->i_size);
138 if (lastlbn < NDADDR && lastlbn < lbn) {
139 nb = lastlbn;
140 osize = blksize(fs, ip, nb);
141 if (osize < fs->fs_bsize && osize > 0) {
142 mutex_enter(&ump->um_lock);
143 error = ffs_realloccg(ip, nb,
144 ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
145 &ip->i_ffs1_db[0]),
146 osize, (int)fs->fs_bsize, cred, bpp, &newb);
147 if (error)
148 return (error);
149 if (DOINGSOFTDEP(vp))
150 softdep_setup_allocdirect(ip, nb, newb,
151 ufs_rw32(ip->i_ffs1_db[nb], needswap),
152 fs->fs_bsize, osize, bpp ? *bpp : NULL);
153 ip->i_size = lblktosize(fs, nb + 1);
154 ip->i_ffs1_size = ip->i_size;
155 uvm_vnp_setsize(vp, ip->i_ffs1_size);
156 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
157 ip->i_flag |= IN_CHANGE | IN_UPDATE;
158 if (bpp && *bpp) {
159 if (flags & B_SYNC)
160 bwrite(*bpp);
161 else
162 bawrite(*bpp);
163 }
164 }
165 }
166
167 /*
168 * The first NDADDR blocks are direct blocks
169 */
170
171 if (lbn < NDADDR) {
172 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
173 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
174
175 /*
176 * The block is an already-allocated direct block
177 * and the file already extends past this block,
178 * thus this must be a whole block.
179 * Just read the block (if requested).
180 */
181
182 if (bpp != NULL) {
183 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
184 B_MODIFY, bpp);
185 if (error) {
186 brelse(*bpp, 0);
187 return (error);
188 }
189 }
190 return (0);
191 }
192 if (nb != 0) {
193
194 /*
195 * Consider need to reallocate a fragment.
196 */
197
198 osize = fragroundup(fs, blkoff(fs, ip->i_size));
199 nsize = fragroundup(fs, size);
200 if (nsize <= osize) {
201
202 /*
203 * The existing block is already
204 * at least as big as we want.
205 * Just read the block (if requested).
206 */
207
208 if (bpp != NULL) {
209 error = bread(vp, lbn, osize, NOCRED,
210 B_MODIFY, bpp);
211 if (error) {
212 brelse(*bpp, 0);
213 return (error);
214 }
215 }
216 return 0;
217 } else {
218
219 /*
220 * The existing block is smaller than we want,
221 * grow it.
222 */
223 mutex_enter(&ump->um_lock);
224 error = ffs_realloccg(ip, lbn,
225 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
226 &ip->i_ffs1_db[0]),
227 osize, nsize, cred, bpp, &newb);
228 if (error)
229 return (error);
230 if (DOINGSOFTDEP(vp))
231 softdep_setup_allocdirect(ip, lbn,
232 newb, nb, nsize, osize,
233 bpp ? *bpp : NULL);
234 }
235 } else {
236
237 /*
238 * the block was not previously allocated,
239 * allocate a new block or fragment.
240 */
241
242 if (ip->i_size < lblktosize(fs, lbn + 1))
243 nsize = fragroundup(fs, size);
244 else
245 nsize = fs->fs_bsize;
246 mutex_enter(&ump->um_lock);
247 error = ffs_alloc(ip, lbn,
248 ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
249 &ip->i_ffs1_db[0]),
250 nsize, flags, cred, &newb);
251 if (error)
252 return (error);
253 if (bpp != NULL) {
254 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
255 nsize, (flags & B_CLRBUF) != 0, bpp);
256 if (error)
257 return error;
258 }
259 if (DOINGSOFTDEP(vp)) {
260 softdep_setup_allocdirect(ip, lbn, newb, 0,
261 nsize, 0, bpp ? *bpp : NULL);
262 }
263 }
264 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
265 ip->i_flag |= IN_CHANGE | IN_UPDATE;
266 return (0);
267 }
268
269 /*
270 * Determine the number of levels of indirection.
271 */
272
273 pref = 0;
274 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
275 return (error);
276
277 /*
278 * Fetch the first indirect block allocating if necessary.
279 */
280
281 --num;
282 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
283 allocib = NULL;
284 allocblk = allociblk;
285 if (nb == 0) {
286 mutex_enter(&ump->um_lock);
287 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
288 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
289 flags | B_METAONLY, cred, &newb);
290 if (error)
291 goto fail;
292 nb = newb;
293 *allocblk++ = nb;
294 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
295 fs->fs_bsize, true, &bp);
296 if (error)
297 goto fail;
298 if (DOINGSOFTDEP(vp)) {
299 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
300 newb, 0, fs->fs_bsize, 0, bp);
301 bdwrite(bp);
302 } else {
303
304 /*
305 * Write synchronously so that indirect blocks
306 * never point at garbage.
307 */
308
309 if ((error = bwrite(bp)) != 0)
310 goto fail;
311 }
312 unwindidx = 0;
313 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
314 *allocib = ufs_rw32(nb, needswap);
315 ip->i_flag |= IN_CHANGE | IN_UPDATE;
316 }
317
318 /*
319 * Fetch through the indirect blocks, allocating as necessary.
320 */
321
322 for (i = 1;;) {
323 error = bread(vp,
324 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
325 if (error) {
326 brelse(bp, 0);
327 goto fail;
328 }
329 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
330 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
331 if (i == num)
332 break;
333 i++;
334 if (nb != 0) {
335 brelse(bp, 0);
336 continue;
337 }
338 if (fscow_run(bp, true) != 0) {
339 brelse(bp, 0);
340 goto fail;
341 }
342 mutex_enter(&ump->um_lock);
343 if (pref == 0)
344 pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
345 NULL);
346 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
347 flags | B_METAONLY, cred, &newb);
348 if (error) {
349 brelse(bp, 0);
350 goto fail;
351 }
352 nb = newb;
353 *allocblk++ = nb;
354 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
355 fs->fs_bsize, true, &nbp);
356 if (error) {
357 brelse(bp, 0);
358 goto fail;
359 }
360 if (DOINGSOFTDEP(vp)) {
361 softdep_setup_allocindir_meta(nbp, ip, bp,
362 indirs[i - 1].in_off, nb);
363 bdwrite(nbp);
364 } else {
365
366 /*
367 * Write synchronously so that indirect blocks
368 * never point at garbage.
369 */
370
371 if ((error = bwrite(nbp)) != 0) {
372 brelse(bp, 0);
373 goto fail;
374 }
375 }
376 if (unwindidx < 0)
377 unwindidx = i - 1;
378 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
379
380 /*
381 * If required, write synchronously, otherwise use
382 * delayed write.
383 */
384
385 if (flags & B_SYNC) {
386 bwrite(bp);
387 } else {
388 bdwrite(bp);
389 }
390 }
391
392 if (flags & B_METAONLY) {
393 KASSERT(bpp != NULL);
394 *bpp = bp;
395 return (0);
396 }
397
398 /*
399 * Get the data block, allocating if necessary.
400 */
401
402 if (nb == 0) {
403 if (fscow_run(bp, true) != 0) {
404 brelse(bp, 0);
405 goto fail;
406 }
407 mutex_enter(&ump->um_lock);
408 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
409 &bap[0]);
410 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
411 &newb);
412 if (error) {
413 brelse(bp, 0);
414 goto fail;
415 }
416 nb = newb;
417 *allocblk++ = nb;
418 if (bpp != NULL) {
419 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
420 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
421 if (error) {
422 brelse(bp, 0);
423 goto fail;
424 }
425 }
426 if (DOINGSOFTDEP(vp))
427 softdep_setup_allocindir_page(ip, lbn, bp,
428 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
429 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
430 if (allocib == NULL && unwindidx < 0) {
431 unwindidx = i - 1;
432 }
433
434 /*
435 * If required, write synchronously, otherwise use
436 * delayed write.
437 */
438
439 if (flags & B_SYNC) {
440 bwrite(bp);
441 } else {
442 bdwrite(bp);
443 }
444 return (0);
445 }
446 brelse(bp, 0);
447 if (bpp != NULL) {
448 if (flags & B_CLRBUF) {
449 error = bread(vp, lbn, (int)fs->fs_bsize,
450 NOCRED, B_MODIFY, &nbp);
451 if (error) {
452 brelse(nbp, 0);
453 goto fail;
454 }
455 } else {
456 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
457 fs->fs_bsize, true, &nbp);
458 if (error)
459 goto fail;
460 }
461 *bpp = nbp;
462 }
463 return (0);
464
465 fail:
466 /*
467 * If we have failed part way through block allocation, we
468 * have to deallocate any indirect blocks that we have allocated.
469 */
470
471 if (unwindidx >= 0) {
472
473 /*
474 * First write out any buffers we've created to resolve their
475 * softdeps. This must be done in reverse order of creation
476 * so that we resolve the dependencies in one pass.
477 * Write the cylinder group buffers for these buffers too.
478 */
479
480 for (i = num; i >= unwindidx; i--) {
481 if (i == 0) {
482 break;
483 }
484 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
485 fs->fs_bsize, false, &bp) != 0)
486 continue;
487 if (bp->b_oflags & BO_DELWRI) {
488 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
489 dbtofsb(fs, bp->b_blkno))));
490 bwrite(bp);
491 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
492 fs->fs_cgsize, false, &bp) != 0)
493 continue;
494 if (bp->b_oflags & BO_DELWRI) {
495 bwrite(bp);
496 } else {
497 brelse(bp, BC_INVAL);
498 }
499 } else {
500 brelse(bp, BC_INVAL);
501 }
502 }
503
504 /* Now flush all dependencies to disk. */
505 #ifdef notyet
506 /* XXX pages locked */
507 (void)softdep_sync_metadata(vp);
508 #endif
509
510 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
511 ip->i_flag |= IN_CHANGE | IN_UPDATE;
512 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
513 }
514
515 /*
516 * Now that any dependencies that we created have been
517 * resolved, we can undo the partial allocation.
518 */
519
520 if (unwindidx == 0) {
521 *allocib = 0;
522 ip->i_flag |= IN_CHANGE | IN_UPDATE;
523 if (DOINGSOFTDEP(vp))
524 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
525 } else {
526 int r;
527
528 r = bread(vp, indirs[unwindidx].in_lbn,
529 (int)fs->fs_bsize, NOCRED, 0, &bp);
530 if (r) {
531 panic("Could not unwind indirect block, error %d", r);
532 brelse(bp, 0);
533 } else {
534 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
535 bap[indirs[unwindidx].in_off] = 0;
536 bwrite(bp);
537 }
538 }
539 for (i = unwindidx + 1; i <= num; i++) {
540 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
541 fs->fs_bsize, false, &bp) == 0)
542 brelse(bp, BC_INVAL);
543 }
544 }
545 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
546 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
547 deallocated += fs->fs_bsize;
548 }
549 if (deallocated) {
550 #ifdef QUOTA
551 /*
552 * Restore user's disk quota because allocation failed.
553 */
554 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
555 #endif
556 ip->i_ffs1_blocks -= btodb(deallocated);
557 ip->i_flag |= IN_CHANGE | IN_UPDATE;
558 }
559 /*
560 * Flush all dependencies again so that the soft updates code
561 * doesn't find any untracked changes.
562 */
563 #ifdef notyet
564 /* XXX pages locked */
565 (void)softdep_sync_metadata(vp);
566 #endif
567 return (error);
568 }
569
570 static int
571 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
572 int flags, struct buf **bpp)
573 {
574 daddr_t lbn, lastlbn;
575 struct buf *bp, *nbp;
576 struct inode *ip = VTOI(vp);
577 struct fs *fs = ip->i_fs;
578 struct ufsmount *ump = ip->i_ump;
579 struct indir indirs[NIADDR + 2];
580 daddr_t newb, pref, nb;
581 int64_t *bap;
582 int deallocated, osize, nsize, num, i, error;
583 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
584 int64_t *allocib;
585 int unwindidx = -1;
586 #ifdef FFS_EI
587 const int needswap = UFS_FSNEEDSWAP(fs);
588 #endif
589 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
590
591 lbn = lblkno(fs, off);
592 size = blkoff(fs, off) + size;
593 if (size > fs->fs_bsize)
594 panic("ffs_balloc: blk too big");
595 if (bpp != NULL) {
596 *bpp = NULL;
597 }
598 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
599
600 if (lbn < 0)
601 return (EFBIG);
602
603 #ifdef notyet
604 /*
605 * Check for allocating external data.
606 */
607 if (flags & IO_EXT) {
608 if (lbn >= NXADDR)
609 return (EFBIG);
610 /*
611 * If the next write will extend the data into a new block,
612 * and the data is currently composed of a fragment
613 * this fragment has to be extended to be a full block.
614 */
615 lastlbn = lblkno(fs, dp->di_extsize);
616 if (lastlbn < lbn) {
617 nb = lastlbn;
618 osize = sblksize(fs, dp->di_extsize, nb);
619 if (osize < fs->fs_bsize && osize > 0) {
620 mutex_enter(&ump->um_lock);
621 error = ffs_realloccg(ip, -1 - nb,
622 dp->di_extb[nb],
623 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
624 flags, &dp->di_extb[0]),
625 osize,
626 (int)fs->fs_bsize, cred, &bp);
627 if (error)
628 return (error);
629 if (DOINGSOFTDEP(vp))
630 softdep_setup_allocext(ip, nb,
631 dbtofsb(fs, bp->b_blkno),
632 dp->di_extb[nb],
633 fs->fs_bsize, osize, bp);
634 dp->di_extsize = smalllblktosize(fs, nb + 1);
635 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
636 bp->b_xflags |= BX_ALTDATA;
637 ip->i_flag |= IN_CHANGE | IN_UPDATE;
638 if (flags & IO_SYNC)
639 bwrite(bp);
640 else
641 bawrite(bp);
642 }
643 }
644 /*
645 * All blocks are direct blocks
646 */
647 if (flags & BA_METAONLY)
648 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
649 nb = dp->di_extb[lbn];
650 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
651 error = bread(vp, -1 - lbn, fs->fs_bsize,
652 NOCRED, 0, &bp);
653 if (error) {
654 brelse(bp, 0);
655 return (error);
656 }
657 mutex_enter(&bp->b_interlock);
658 bp->b_blkno = fsbtodb(fs, nb);
659 bp->b_xflags |= BX_ALTDATA;
660 mutex_exit(&bp->b_interlock);
661 *bpp = bp;
662 return (0);
663 }
664 if (nb != 0) {
665 /*
666 * Consider need to reallocate a fragment.
667 */
668 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
669 nsize = fragroundup(fs, size);
670 if (nsize <= osize) {
671 error = bread(vp, -1 - lbn, osize,
672 NOCRED, 0, &bp);
673 if (error) {
674 brelse(bp, 0);
675 return (error);
676 }
677 mutex_enter(&bp->b_interlock);
678 bp->b_blkno = fsbtodb(fs, nb);
679 bp->b_xflags |= BX_ALTDATA;
680 mutex_exit(&bp->b_interlock);
681 } else {
682 mutex_enter(&ump->um_lock);
683 error = ffs_realloccg(ip, -1 - lbn,
684 dp->di_extb[lbn],
685 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
686 &dp->di_extb[0]),
687 osize, nsize, cred, &bp);
688 if (error)
689 return (error);
690 bp->b_xflags |= BX_ALTDATA;
691 if (DOINGSOFTDEP(vp))
692 softdep_setup_allocext(ip, lbn,
693 dbtofsb(fs, bp->b_blkno), nb,
694 nsize, osize, bp);
695 }
696 } else {
697 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
698 nsize = fragroundup(fs, size);
699 else
700 nsize = fs->fs_bsize;
701 mutex_enter(&ump->um_lock);
702 error = ffs_alloc(ip, lbn,
703 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
704 &dp->di_extb[0]),
705 nsize, flags, cred, &newb);
706 if (error)
707 return (error);
708 error = ffs_getblk(vp, -1 - lbn, fsbtodb(fs, newb),
709 nsize, (flags & BA_CLRBUF) != 0, &bp);
710 if (error)
711 return error;
712 bp->b_xflags |= BX_ALTDATA;
713 if (DOINGSOFTDEP(vp))
714 softdep_setup_allocext(ip, lbn, newb, 0,
715 nsize, 0, bp);
716 }
717 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
718 ip->i_flag |= IN_CHANGE | IN_UPDATE;
719 *bpp = bp;
720 return (0);
721 }
722 #endif
723 /*
724 * If the next write will extend the file into a new block,
725 * and the file is currently composed of a fragment
726 * this fragment has to be extended to be a full block.
727 */
728
729 lastlbn = lblkno(fs, ip->i_size);
730 if (lastlbn < NDADDR && lastlbn < lbn) {
731 nb = lastlbn;
732 osize = blksize(fs, ip, nb);
733 if (osize < fs->fs_bsize && osize > 0) {
734 mutex_enter(&ump->um_lock);
735 error = ffs_realloccg(ip, nb,
736 ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
737 &ip->i_ffs2_db[0]),
738 osize, (int)fs->fs_bsize, cred, bpp, &newb);
739 if (error)
740 return (error);
741 if (DOINGSOFTDEP(vp))
742 softdep_setup_allocdirect(ip, nb, newb,
743 ufs_rw64(ip->i_ffs2_db[nb], needswap),
744 fs->fs_bsize, osize, bpp ? *bpp : NULL);
745 ip->i_size = lblktosize(fs, nb + 1);
746 ip->i_ffs2_size = ip->i_size;
747 uvm_vnp_setsize(vp, ip->i_size);
748 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
749 ip->i_flag |= IN_CHANGE | IN_UPDATE;
750 if (bpp) {
751 if (flags & B_SYNC)
752 bwrite(*bpp);
753 else
754 bawrite(*bpp);
755 }
756 }
757 }
758
759 /*
760 * The first NDADDR blocks are direct blocks
761 */
762
763 if (lbn < NDADDR) {
764 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
765 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
766
767 /*
768 * The block is an already-allocated direct block
769 * and the file already extends past this block,
770 * thus this must be a whole block.
771 * Just read the block (if requested).
772 */
773
774 if (bpp != NULL) {
775 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
776 B_MODIFY, bpp);
777 if (error) {
778 brelse(*bpp, 0);
779 return (error);
780 }
781 }
782 return (0);
783 }
784 if (nb != 0) {
785
786 /*
787 * Consider need to reallocate a fragment.
788 */
789
790 osize = fragroundup(fs, blkoff(fs, ip->i_size));
791 nsize = fragroundup(fs, size);
792 if (nsize <= osize) {
793
794 /*
795 * The existing block is already
796 * at least as big as we want.
797 * Just read the block (if requested).
798 */
799
800 if (bpp != NULL) {
801 error = bread(vp, lbn, osize, NOCRED,
802 B_MODIFY, bpp);
803 if (error) {
804 brelse(*bpp, 0);
805 return (error);
806 }
807 }
808 return 0;
809 } else {
810
811 /*
812 * The existing block is smaller than we want,
813 * grow it.
814 */
815 mutex_enter(&ump->um_lock);
816 error = ffs_realloccg(ip, lbn,
817 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
818 &ip->i_ffs2_db[0]),
819 osize, nsize, cred, bpp, &newb);
820 if (error)
821 return (error);
822 if (DOINGSOFTDEP(vp))
823 softdep_setup_allocdirect(ip, lbn,
824 newb, nb, nsize, osize,
825 bpp ? *bpp : NULL);
826 }
827 } else {
828
829 /*
830 * the block was not previously allocated,
831 * allocate a new block or fragment.
832 */
833
834 if (ip->i_size < lblktosize(fs, lbn + 1))
835 nsize = fragroundup(fs, size);
836 else
837 nsize = fs->fs_bsize;
838 mutex_enter(&ump->um_lock);
839 error = ffs_alloc(ip, lbn,
840 ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
841 &ip->i_ffs2_db[0]),
842 nsize, flags, cred, &newb);
843 if (error)
844 return (error);
845 if (bpp != NULL) {
846 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
847 nsize, (flags & B_CLRBUF) != 0, bpp);
848 if (error)
849 return error;
850 }
851 if (DOINGSOFTDEP(vp)) {
852 softdep_setup_allocdirect(ip, lbn, newb, 0,
853 nsize, 0, bpp ? *bpp : NULL);
854 }
855 }
856 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
857 ip->i_flag |= IN_CHANGE | IN_UPDATE;
858 return (0);
859 }
860
861 /*
862 * Determine the number of levels of indirection.
863 */
864
865 pref = 0;
866 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
867 return (error);
868
869 /*
870 * Fetch the first indirect block allocating if necessary.
871 */
872
873 --num;
874 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
875 allocib = NULL;
876 allocblk = allociblk;
877 if (nb == 0) {
878 mutex_enter(&ump->um_lock);
879 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
880 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
881 flags | B_METAONLY, cred, &newb);
882 if (error)
883 goto fail;
884 nb = newb;
885 *allocblk++ = nb;
886 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
887 fs->fs_bsize, true, &bp);
888 if (error)
889 goto fail;
890 if (DOINGSOFTDEP(vp)) {
891 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
892 newb, 0, fs->fs_bsize, 0, bp);
893 bdwrite(bp);
894 } else {
895
896 /*
897 * Write synchronously so that indirect blocks
898 * never point at garbage.
899 */
900
901 if ((error = bwrite(bp)) != 0)
902 goto fail;
903 }
904 unwindidx = 0;
905 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
906 *allocib = ufs_rw64(nb, needswap);
907 ip->i_flag |= IN_CHANGE | IN_UPDATE;
908 }
909
910 /*
911 * Fetch through the indirect blocks, allocating as necessary.
912 */
913
914 for (i = 1;;) {
915 error = bread(vp,
916 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
917 if (error) {
918 brelse(bp, 0);
919 goto fail;
920 }
921 bap = (int64_t *)bp->b_data;
922 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
923 if (i == num)
924 break;
925 i++;
926 if (nb != 0) {
927 brelse(bp, 0);
928 continue;
929 }
930 if (fscow_run(bp, true) != 0) {
931 brelse(bp, 0);
932 goto fail;
933 }
934 mutex_enter(&ump->um_lock);
935 if (pref == 0)
936 pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
937 NULL);
938 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
939 flags | B_METAONLY, cred, &newb);
940 if (error) {
941 brelse(bp, 0);
942 goto fail;
943 }
944 nb = newb;
945 *allocblk++ = nb;
946 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
947 fs->fs_bsize, true, &nbp);
948 if (error) {
949 brelse(bp, 0);
950 goto fail;
951 }
952 if (DOINGSOFTDEP(vp)) {
953 softdep_setup_allocindir_meta(nbp, ip, bp,
954 indirs[i - 1].in_off, nb);
955 bdwrite(nbp);
956 } else {
957
958 /*
959 * Write synchronously so that indirect blocks
960 * never point at garbage.
961 */
962
963 if ((error = bwrite(nbp)) != 0) {
964 brelse(bp, 0);
965 goto fail;
966 }
967 }
968 if (unwindidx < 0)
969 unwindidx = i - 1;
970 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
971
972 /*
973 * If required, write synchronously, otherwise use
974 * delayed write.
975 */
976
977 if (flags & B_SYNC) {
978 bwrite(bp);
979 } else {
980 bdwrite(bp);
981 }
982 }
983
984 if (flags & B_METAONLY) {
985 KASSERT(bpp != NULL);
986 *bpp = bp;
987 return (0);
988 }
989
990 /*
991 * Get the data block, allocating if necessary.
992 */
993
994 if (nb == 0) {
995 if (fscow_run(bp, true) != 0) {
996 brelse(bp, 0);
997 goto fail;
998 }
999 mutex_enter(&ump->um_lock);
1000 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
1001 &bap[0]);
1002 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
1003 &newb);
1004 if (error) {
1005 brelse(bp, 0);
1006 goto fail;
1007 }
1008 nb = newb;
1009 *allocblk++ = nb;
1010 if (bpp != NULL) {
1011 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
1012 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
1013 if (error) {
1014 brelse(bp, 0);
1015 goto fail;
1016 }
1017 }
1018 if (DOINGSOFTDEP(vp))
1019 softdep_setup_allocindir_page(ip, lbn, bp,
1020 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
1021 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
1022 if (allocib == NULL && unwindidx < 0) {
1023 unwindidx = i - 1;
1024 }
1025
1026 /*
1027 * If required, write synchronously, otherwise use
1028 * delayed write.
1029 */
1030
1031 if (flags & B_SYNC) {
1032 bwrite(bp);
1033 } else {
1034 bdwrite(bp);
1035 }
1036 return (0);
1037 }
1038 brelse(bp, 0);
1039 if (bpp != NULL) {
1040 if (flags & B_CLRBUF) {
1041 error = bread(vp, lbn, (int)fs->fs_bsize,
1042 NOCRED, B_MODIFY, &nbp);
1043 if (error) {
1044 brelse(nbp, 0);
1045 goto fail;
1046 }
1047 } else {
1048 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
1049 fs->fs_bsize, true, &nbp);
1050 if (error)
1051 goto fail;
1052 }
1053 *bpp = nbp;
1054 }
1055 return (0);
1056
1057 fail:
1058 /*
1059 * If we have failed part way through block allocation, we
1060 * have to deallocate any indirect blocks that we have allocated.
1061 */
1062
1063 if (unwindidx >= 0) {
1064
1065 /*
1066 * First write out any buffers we've created to resolve their
1067 * softdeps. This must be done in reverse order of creation
1068 * so that we resolve the dependencies in one pass.
1069 * Write the cylinder group buffers for these buffers too.
1070 */
1071
1072 for (i = num; i >= unwindidx; i--) {
1073 if (i == 0) {
1074 break;
1075 }
1076 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1077 fs->fs_bsize, false, &bp) != 0)
1078 continue;
1079 if (bp->b_oflags & BO_DELWRI) {
1080 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1081 dbtofsb(fs, bp->b_blkno))));
1082 bwrite(bp);
1083 if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
1084 fs->fs_cgsize, false, &bp) != 0)
1085 continue;
1086 if (bp->b_oflags & BO_DELWRI) {
1087 bwrite(bp);
1088 } else {
1089 brelse(bp, BC_INVAL);
1090 }
1091 } else {
1092 brelse(bp, BC_INVAL);
1093 }
1094 }
1095
1096 /* Now flush the dependencies to disk. */
1097 #ifdef notyet
1098 /* XXX pages locked */
1099 (void)softdep_sync_metadata(vp);
1100 #endif
1101
1102 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
1103 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1104 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1105 }
1106
1107 /*
1108 * Now that any dependencies that we created have been
1109 * resolved, we can undo the partial allocation.
1110 */
1111
1112 if (unwindidx == 0) {
1113 *allocib = 0;
1114 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1115 if (DOINGSOFTDEP(vp))
1116 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1117 } else {
1118 int r;
1119
1120 r = bread(vp, indirs[unwindidx].in_lbn,
1121 (int)fs->fs_bsize, NOCRED, 0, &bp);
1122 if (r) {
1123 panic("Could not unwind indirect block, error %d", r);
1124 brelse(bp, 0);
1125 } else {
1126 bap = (int64_t *)bp->b_data;
1127 bap[indirs[unwindidx].in_off] = 0;
1128 bwrite(bp);
1129 }
1130 }
1131 for (i = unwindidx + 1; i <= num; i++) {
1132 if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1133 fs->fs_bsize, false, &bp) == 0)
1134 brelse(bp, BC_INVAL);
1135 }
1136 }
1137 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1138 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1139 deallocated += fs->fs_bsize;
1140 }
1141 if (deallocated) {
1142 #ifdef QUOTA
1143 /*
1144 * Restore user's disk quota because allocation failed.
1145 */
1146 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1147 #endif
1148 ip->i_ffs2_blocks -= btodb(deallocated);
1149 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1150 }
1151
1152 /*
1153 * Flush all dependencies again so that the soft updates code
1154 * doesn't find any untracked changes.
1155 */
1156 #ifdef notyet
1157 /* XXX pages locked */
1158 (void)softdep_sync_metadata(vp);
1159 #endif
1160 return (error);
1161 }
1162