ffs_balloc.c revision 1.48.6.1 1 /* $NetBSD: ffs_balloc.c,v 1.48.6.1 2008/06/02 13:24:35 mjf Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.48.6.1 2008/06/02 13:24:35 mjf Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
58
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
64
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67
68 #include <uvm/uvm.h>
69
70 static int ffs_getblk(struct vnode *, daddr_t, daddr_t, int, bool, buf_t **);
71 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
72 struct buf **);
73 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
74 struct buf **);
75
76 /*
77 * Balloc defines the structure of file system storage
78 * by allocating the physical blocks on a device given
79 * the inode and the logical block number in a file.
80 */
81
82 int
83 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
84 struct buf **bpp)
85 {
86 int error;
87
88 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
89 error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
90 else
91 error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
92
93 if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
94 brelse(*bpp, 0);
95
96 return error;
97 }
98
99 static int
100 ffs_getblk(struct vnode *vp, daddr_t lblkno, daddr_t blkno, int size,
101 bool clearbuf, buf_t **bpp)
102 {
103 int error;
104
105 if ((*bpp = getblk(vp, lblkno, size, 0, 0)) == NULL)
106 return ENOMEM;
107 (*bpp)->b_blkno = blkno;
108 if (clearbuf)
109 clrbuf(*bpp);
110 if ((error = fscow_run(*bpp, false)) != 0)
111 brelse(*bpp, BC_INVAL);
112 return error;
113 }
114
115 static int
116 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
117 int flags, struct buf **bpp)
118 {
119 daddr_t lbn, lastlbn;
120 struct buf *bp, *nbp;
121 struct inode *ip = VTOI(vp);
122 struct fs *fs = ip->i_fs;
123 struct ufsmount *ump = ip->i_ump;
124 struct indir indirs[NIADDR + 2];
125 daddr_t newb, pref, nb;
126 int32_t *bap; /* XXX ondisk32 */
127 int deallocated, osize, nsize, num, i, error;
128 int32_t *blkp, *allocblk, allociblk[NIADDR + 1];
129 int32_t *allocib;
130 int unwindidx = -1;
131 #ifdef FFS_EI
132 const int needswap = UFS_FSNEEDSWAP(fs);
133 #endif
134 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
135
136 lbn = lblkno(fs, off);
137 size = blkoff(fs, off) + size;
138 if (size > fs->fs_bsize)
139 panic("ffs_balloc: blk too big");
140 if (bpp != NULL) {
141 *bpp = NULL;
142 }
143 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
144
145 if (lbn < 0)
146 return (EFBIG);
147
148 /*
149 * If the next write will extend the file into a new block,
150 * and the file is currently composed of a fragment
151 * this fragment has to be extended to be a full block.
152 */
153
154 lastlbn = lblkno(fs, ip->i_size);
155 if (lastlbn < NDADDR && lastlbn < lbn) {
156 nb = lastlbn;
157 osize = blksize(fs, ip, nb);
158 if (osize < fs->fs_bsize && osize > 0) {
159 mutex_enter(&ump->um_lock);
160 error = ffs_realloccg(ip, nb,
161 ffs_blkpref_ufs1(ip, lastlbn, nb,
162 &ip->i_ffs1_db[0]),
163 osize, (int)fs->fs_bsize, cred, bpp, &newb);
164 if (error)
165 return (error);
166 if (DOINGSOFTDEP(vp))
167 softdep_setup_allocdirect(ip, nb, newb,
168 ufs_rw32(ip->i_ffs1_db[nb], needswap),
169 fs->fs_bsize, osize, bpp ? *bpp : NULL);
170 ip->i_size = lblktosize(fs, nb + 1);
171 ip->i_ffs1_size = ip->i_size;
172 uvm_vnp_setsize(vp, ip->i_ffs1_size);
173 ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
174 ip->i_flag |= IN_CHANGE | IN_UPDATE;
175 if (bpp && *bpp) {
176 if (flags & B_SYNC)
177 bwrite(*bpp);
178 else
179 bawrite(*bpp);
180 }
181 }
182 }
183
184 /*
185 * The first NDADDR blocks are direct blocks
186 */
187
188 if (lbn < NDADDR) {
189 nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
190 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
191
192 /*
193 * The block is an already-allocated direct block
194 * and the file already extends past this block,
195 * thus this must be a whole block.
196 * Just read the block (if requested).
197 */
198
199 if (bpp != NULL) {
200 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
201 B_MODIFY, bpp);
202 if (error) {
203 brelse(*bpp, 0);
204 return (error);
205 }
206 }
207 return (0);
208 }
209 if (nb != 0) {
210
211 /*
212 * Consider need to reallocate a fragment.
213 */
214
215 osize = fragroundup(fs, blkoff(fs, ip->i_size));
216 nsize = fragroundup(fs, size);
217 if (nsize <= osize) {
218
219 /*
220 * The existing block is already
221 * at least as big as we want.
222 * Just read the block (if requested).
223 */
224
225 if (bpp != NULL) {
226 error = bread(vp, lbn, osize, NOCRED,
227 B_MODIFY, bpp);
228 if (error) {
229 brelse(*bpp, 0);
230 return (error);
231 }
232 }
233 return 0;
234 } else {
235
236 /*
237 * The existing block is smaller than we want,
238 * grow it.
239 */
240 mutex_enter(&ump->um_lock);
241 error = ffs_realloccg(ip, lbn,
242 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
243 &ip->i_ffs1_db[0]), osize, nsize, cred,
244 bpp, &newb);
245 if (error)
246 return (error);
247 if (DOINGSOFTDEP(vp))
248 softdep_setup_allocdirect(ip, lbn,
249 newb, nb, nsize, osize,
250 bpp ? *bpp : NULL);
251 }
252 } else {
253
254 /*
255 * the block was not previously allocated,
256 * allocate a new block or fragment.
257 */
258
259 if (ip->i_size < lblktosize(fs, lbn + 1))
260 nsize = fragroundup(fs, size);
261 else
262 nsize = fs->fs_bsize;
263 mutex_enter(&ump->um_lock);
264 error = ffs_alloc(ip, lbn,
265 ffs_blkpref_ufs1(ip, lbn, (int)lbn,
266 &ip->i_ffs1_db[0]),
267 nsize, cred, &newb);
268 if (error)
269 return (error);
270 if (bpp != NULL) {
271 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
272 nsize, (flags & B_CLRBUF) != 0, bpp);
273 if (error)
274 return error;
275 }
276 if (DOINGSOFTDEP(vp)) {
277 softdep_setup_allocdirect(ip, lbn, newb, 0,
278 nsize, 0, bpp ? *bpp : NULL);
279 }
280 }
281 ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
282 ip->i_flag |= IN_CHANGE | IN_UPDATE;
283 return (0);
284 }
285
286 /*
287 * Determine the number of levels of indirection.
288 */
289
290 pref = 0;
291 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
292 return (error);
293
294 /*
295 * Fetch the first indirect block allocating if necessary.
296 */
297
298 --num;
299 nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
300 allocib = NULL;
301 allocblk = allociblk;
302 if (nb == 0) {
303 mutex_enter(&ump->um_lock);
304 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
305 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
306 &newb);
307 if (error)
308 goto fail;
309 nb = newb;
310 *allocblk++ = nb;
311 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
312 fs->fs_bsize, true, &bp);
313 if (error)
314 goto fail;
315 if (DOINGSOFTDEP(vp)) {
316 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
317 newb, 0, fs->fs_bsize, 0, bp);
318 bdwrite(bp);
319 } else {
320
321 /*
322 * Write synchronously so that indirect blocks
323 * never point at garbage.
324 */
325
326 if ((error = bwrite(bp)) != 0)
327 goto fail;
328 }
329 unwindidx = 0;
330 allocib = &ip->i_ffs1_ib[indirs[0].in_off];
331 *allocib = ufs_rw32(nb, needswap);
332 ip->i_flag |= IN_CHANGE | IN_UPDATE;
333 }
334
335 /*
336 * Fetch through the indirect blocks, allocating as necessary.
337 */
338
339 for (i = 1;;) {
340 error = bread(vp,
341 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
342 if (error) {
343 brelse(bp, 0);
344 goto fail;
345 }
346 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
347 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
348 if (i == num)
349 break;
350 i++;
351 if (nb != 0) {
352 brelse(bp, 0);
353 continue;
354 }
355 if (fscow_run(bp, true) != 0) {
356 brelse(bp, 0);
357 goto fail;
358 }
359 mutex_enter(&ump->um_lock);
360 if (pref == 0)
361 pref = ffs_blkpref_ufs1(ip, lbn, 0, (int32_t *)0);
362 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
363 &newb);
364 if (error) {
365 brelse(bp, 0);
366 goto fail;
367 }
368 nb = newb;
369 *allocblk++ = nb;
370 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
371 fs->fs_bsize, true, &nbp);
372 if (error) {
373 brelse(bp, 0);
374 goto fail;
375 }
376 if (DOINGSOFTDEP(vp)) {
377 softdep_setup_allocindir_meta(nbp, ip, bp,
378 indirs[i - 1].in_off, nb);
379 bdwrite(nbp);
380 } else {
381
382 /*
383 * Write synchronously so that indirect blocks
384 * never point at garbage.
385 */
386
387 if ((error = bwrite(nbp)) != 0) {
388 brelse(bp, 0);
389 goto fail;
390 }
391 }
392 if (unwindidx < 0)
393 unwindidx = i - 1;
394 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
395
396 /*
397 * If required, write synchronously, otherwise use
398 * delayed write.
399 */
400
401 if (flags & B_SYNC) {
402 bwrite(bp);
403 } else {
404 bdwrite(bp);
405 }
406 }
407
408 if (flags & B_METAONLY) {
409 KASSERT(bpp != NULL);
410 *bpp = bp;
411 return (0);
412 }
413
414 /*
415 * Get the data block, allocating if necessary.
416 */
417
418 if (nb == 0) {
419 if (fscow_run(bp, true) != 0) {
420 brelse(bp, 0);
421 goto fail;
422 }
423 mutex_enter(&ump->um_lock);
424 pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, &bap[0]);
425 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
426 &newb);
427 if (error) {
428 brelse(bp, 0);
429 goto fail;
430 }
431 nb = newb;
432 *allocblk++ = nb;
433 if (bpp != NULL) {
434 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
435 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
436 if (error) {
437 brelse(bp, 0);
438 goto fail;
439 }
440 }
441 if (DOINGSOFTDEP(vp))
442 softdep_setup_allocindir_page(ip, lbn, bp,
443 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
444 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
445 if (allocib == NULL && unwindidx < 0) {
446 unwindidx = i - 1;
447 }
448
449 /*
450 * If required, write synchronously, otherwise use
451 * delayed write.
452 */
453
454 if (flags & B_SYNC) {
455 bwrite(bp);
456 } else {
457 bdwrite(bp);
458 }
459 return (0);
460 }
461 brelse(bp, 0);
462 if (bpp != NULL) {
463 if (flags & B_CLRBUF) {
464 error = bread(vp, lbn, (int)fs->fs_bsize,
465 NOCRED, B_MODIFY, &nbp);
466 if (error) {
467 brelse(nbp, 0);
468 goto fail;
469 }
470 } else {
471 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
472 fs->fs_bsize, true, &nbp);
473 if (error)
474 goto fail;
475 }
476 *bpp = nbp;
477 }
478 return (0);
479
480 fail:
481 /*
482 * If we have failed part way through block allocation, we
483 * have to deallocate any indirect blocks that we have allocated.
484 */
485
486 if (unwindidx >= 0) {
487
488 /*
489 * First write out any buffers we've created to resolve their
490 * softdeps. This must be done in reverse order of creation
491 * so that we resolve the dependencies in one pass.
492 * Write the cylinder group buffers for these buffers too.
493 */
494
495 for (i = num; i >= unwindidx; i--) {
496 if (i == 0) {
497 break;
498 }
499 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
500 0);
501 if (bp->b_oflags & BO_DELWRI) {
502 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
503 dbtofsb(fs, bp->b_blkno))));
504 bwrite(bp);
505 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
506 0, 0);
507 if (bp->b_oflags & BO_DELWRI) {
508 bwrite(bp);
509 } else {
510 brelse(bp, BC_INVAL);
511 }
512 } else {
513 brelse(bp, BC_INVAL);
514 }
515 }
516
517 /* Now flush all dependencies to disk. */
518 #ifdef notyet
519 /* XXX pages locked */
520 (void)softdep_sync_metadata(vp);
521 #endif
522
523 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
524 ip->i_flag |= IN_CHANGE | IN_UPDATE;
525 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
526 }
527
528 /*
529 * Now that any dependencies that we created have been
530 * resolved, we can undo the partial allocation.
531 */
532
533 if (unwindidx == 0) {
534 *allocib = 0;
535 ip->i_flag |= IN_CHANGE | IN_UPDATE;
536 if (DOINGSOFTDEP(vp))
537 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
538 } else {
539 int r;
540
541 r = bread(vp, indirs[unwindidx].in_lbn,
542 (int)fs->fs_bsize, NOCRED, 0, &bp);
543 if (r) {
544 panic("Could not unwind indirect block, error %d", r);
545 brelse(bp, 0);
546 } else {
547 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
548 bap[indirs[unwindidx].in_off] = 0;
549 bwrite(bp);
550 }
551 }
552 for (i = unwindidx + 1; i <= num; i++) {
553 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
554 0);
555 brelse(bp, BC_INVAL);
556 }
557 }
558 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
559 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
560 deallocated += fs->fs_bsize;
561 }
562 if (deallocated) {
563 #ifdef QUOTA
564 /*
565 * Restore user's disk quota because allocation failed.
566 */
567 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
568 #endif
569 ip->i_ffs1_blocks -= btodb(deallocated);
570 ip->i_flag |= IN_CHANGE | IN_UPDATE;
571 }
572 /*
573 * Flush all dependencies again so that the soft updates code
574 * doesn't find any untracked changes.
575 */
576 #ifdef notyet
577 /* XXX pages locked */
578 (void)softdep_sync_metadata(vp);
579 #endif
580 return (error);
581 }
582
583 static int
584 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
585 int flags, struct buf **bpp)
586 {
587 daddr_t lbn, lastlbn;
588 struct buf *bp, *nbp;
589 struct inode *ip = VTOI(vp);
590 struct fs *fs = ip->i_fs;
591 struct ufsmount *ump = ip->i_ump;
592 struct indir indirs[NIADDR + 2];
593 daddr_t newb, pref, nb;
594 int64_t *bap;
595 int deallocated, osize, nsize, num, i, error;
596 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
597 int64_t *allocib;
598 int unwindidx = -1;
599 #ifdef FFS_EI
600 const int needswap = UFS_FSNEEDSWAP(fs);
601 #endif
602 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
603
604 lbn = lblkno(fs, off);
605 size = blkoff(fs, off) + size;
606 if (size > fs->fs_bsize)
607 panic("ffs_balloc: blk too big");
608 if (bpp != NULL) {
609 *bpp = NULL;
610 }
611 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
612
613 if (lbn < 0)
614 return (EFBIG);
615
616 #ifdef notyet
617 /*
618 * Check for allocating external data.
619 */
620 if (flags & IO_EXT) {
621 if (lbn >= NXADDR)
622 return (EFBIG);
623 /*
624 * If the next write will extend the data into a new block,
625 * and the data is currently composed of a fragment
626 * this fragment has to be extended to be a full block.
627 */
628 lastlbn = lblkno(fs, dp->di_extsize);
629 if (lastlbn < lbn) {
630 nb = lastlbn;
631 osize = sblksize(fs, dp->di_extsize, nb);
632 if (osize < fs->fs_bsize && osize > 0) {
633 mutex_enter(&ump->um_lock);
634 error = ffs_realloccg(ip, -1 - nb,
635 dp->di_extb[nb],
636 ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
637 &dp->di_extb[0]), osize,
638 (int)fs->fs_bsize, cred, &bp);
639 if (error)
640 return (error);
641 if (DOINGSOFTDEP(vp))
642 softdep_setup_allocext(ip, nb,
643 dbtofsb(fs, bp->b_blkno),
644 dp->di_extb[nb],
645 fs->fs_bsize, osize, bp);
646 dp->di_extsize = smalllblktosize(fs, nb + 1);
647 dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
648 bp->b_xflags |= BX_ALTDATA;
649 ip->i_flag |= IN_CHANGE | IN_UPDATE;
650 if (flags & IO_SYNC)
651 bwrite(bp);
652 else
653 bawrite(bp);
654 }
655 }
656 /*
657 * All blocks are direct blocks
658 */
659 if (flags & BA_METAONLY)
660 panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
661 nb = dp->di_extb[lbn];
662 if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
663 error = bread(vp, -1 - lbn, fs->fs_bsize,
664 NOCRED, 0, &bp);
665 if (error) {
666 brelse(bp, 0);
667 return (error);
668 }
669 mutex_enter(&bp->b_interlock);
670 bp->b_blkno = fsbtodb(fs, nb);
671 bp->b_xflags |= BX_ALTDATA;
672 mutex_exit(&bp->b_interlock);
673 *bpp = bp;
674 return (0);
675 }
676 if (nb != 0) {
677 /*
678 * Consider need to reallocate a fragment.
679 */
680 osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
681 nsize = fragroundup(fs, size);
682 if (nsize <= osize) {
683 error = bread(vp, -1 - lbn, osize,
684 NOCRED, 0, &bp);
685 if (error) {
686 brelse(bp, 0);
687 return (error);
688 }
689 mutex_enter(&bp->b_interlock);
690 bp->b_blkno = fsbtodb(fs, nb);
691 bp->b_xflags |= BX_ALTDATA;
692 mutex_exit(&bp->b_interlock);
693 } else {
694 mutex_enter(&ump->um_lock);
695 error = ffs_realloccg(ip, -1 - lbn,
696 dp->di_extb[lbn],
697 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
698 &dp->di_extb[0]), osize, nsize, cred, &bp);
699 if (error)
700 return (error);
701 bp->b_xflags |= BX_ALTDATA;
702 if (DOINGSOFTDEP(vp))
703 softdep_setup_allocext(ip, lbn,
704 dbtofsb(fs, bp->b_blkno), nb,
705 nsize, osize, bp);
706 }
707 } else {
708 if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
709 nsize = fragroundup(fs, size);
710 else
711 nsize = fs->fs_bsize;
712 mutex_enter(&ump->um_lock);
713 error = ffs_alloc(ip, lbn,
714 ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
715 nsize, cred, &newb);
716 if (error)
717 return (error);
718 bp = getblk(vp, -1 - lbn, nsize, 0, 0);
719 bp->b_blkno = fsbtodb(fs, newb);
720 bp->b_xflags |= BX_ALTDATA;
721 if (flags & BA_CLRBUF)
722 vfs_bio_clrbuf(bp);
723 if (DOINGSOFTDEP(vp))
724 softdep_setup_allocext(ip, lbn, newb, 0,
725 nsize, 0, bp);
726 }
727 dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
728 ip->i_flag |= IN_CHANGE | IN_UPDATE;
729 *bpp = bp;
730 return (0);
731 }
732 #endif
733 /*
734 * If the next write will extend the file into a new block,
735 * and the file is currently composed of a fragment
736 * this fragment has to be extended to be a full block.
737 */
738
739 lastlbn = lblkno(fs, ip->i_size);
740 if (lastlbn < NDADDR && lastlbn < lbn) {
741 nb = lastlbn;
742 osize = blksize(fs, ip, nb);
743 if (osize < fs->fs_bsize && osize > 0) {
744 mutex_enter(&ump->um_lock);
745 error = ffs_realloccg(ip, nb,
746 ffs_blkpref_ufs2(ip, lastlbn, nb,
747 &ip->i_ffs2_db[0]),
748 osize, (int)fs->fs_bsize, cred, bpp, &newb);
749 if (error)
750 return (error);
751 if (DOINGSOFTDEP(vp))
752 softdep_setup_allocdirect(ip, nb, newb,
753 ufs_rw64(ip->i_ffs2_db[nb], needswap),
754 fs->fs_bsize, osize, bpp ? *bpp : NULL);
755 ip->i_size = lblktosize(fs, nb + 1);
756 ip->i_ffs2_size = ip->i_size;
757 uvm_vnp_setsize(vp, ip->i_size);
758 ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
759 ip->i_flag |= IN_CHANGE | IN_UPDATE;
760 if (bpp) {
761 if (flags & B_SYNC)
762 bwrite(*bpp);
763 else
764 bawrite(*bpp);
765 }
766 }
767 }
768
769 /*
770 * The first NDADDR blocks are direct blocks
771 */
772
773 if (lbn < NDADDR) {
774 nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
775 if (nb != 0 && ip->i_size >= lblktosize(fs, lbn + 1)) {
776
777 /*
778 * The block is an already-allocated direct block
779 * and the file already extends past this block,
780 * thus this must be a whole block.
781 * Just read the block (if requested).
782 */
783
784 if (bpp != NULL) {
785 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
786 B_MODIFY, bpp);
787 if (error) {
788 brelse(*bpp, 0);
789 return (error);
790 }
791 }
792 return (0);
793 }
794 if (nb != 0) {
795
796 /*
797 * Consider need to reallocate a fragment.
798 */
799
800 osize = fragroundup(fs, blkoff(fs, ip->i_size));
801 nsize = fragroundup(fs, size);
802 if (nsize <= osize) {
803
804 /*
805 * The existing block is already
806 * at least as big as we want.
807 * Just read the block (if requested).
808 */
809
810 if (bpp != NULL) {
811 error = bread(vp, lbn, osize, NOCRED,
812 B_MODIFY, bpp);
813 if (error) {
814 brelse(*bpp, 0);
815 return (error);
816 }
817 }
818 return 0;
819 } else {
820
821 /*
822 * The existing block is smaller than we want,
823 * grow it.
824 */
825 mutex_enter(&ump->um_lock);
826 error = ffs_realloccg(ip, lbn,
827 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
828 &ip->i_ffs2_db[0]), osize, nsize, cred,
829 bpp, &newb);
830 if (error)
831 return (error);
832 if (DOINGSOFTDEP(vp))
833 softdep_setup_allocdirect(ip, lbn,
834 newb, nb, nsize, osize,
835 bpp ? *bpp : NULL);
836 }
837 } else {
838
839 /*
840 * the block was not previously allocated,
841 * allocate a new block or fragment.
842 */
843
844 if (ip->i_size < lblktosize(fs, lbn + 1))
845 nsize = fragroundup(fs, size);
846 else
847 nsize = fs->fs_bsize;
848 mutex_enter(&ump->um_lock);
849 error = ffs_alloc(ip, lbn,
850 ffs_blkpref_ufs2(ip, lbn, (int)lbn,
851 &ip->i_ffs2_db[0]), nsize, cred, &newb);
852 if (error)
853 return (error);
854 if (bpp != NULL) {
855 error = ffs_getblk(vp, lbn, fsbtodb(fs, newb),
856 nsize, (flags & B_CLRBUF) != 0, bpp);
857 if (error)
858 return error;
859 }
860 if (DOINGSOFTDEP(vp)) {
861 softdep_setup_allocdirect(ip, lbn, newb, 0,
862 nsize, 0, bpp ? *bpp : NULL);
863 }
864 }
865 ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
866 ip->i_flag |= IN_CHANGE | IN_UPDATE;
867 return (0);
868 }
869
870 /*
871 * Determine the number of levels of indirection.
872 */
873
874 pref = 0;
875 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
876 return (error);
877
878 /*
879 * Fetch the first indirect block allocating if necessary.
880 */
881
882 --num;
883 nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
884 allocib = NULL;
885 allocblk = allociblk;
886 if (nb == 0) {
887 mutex_enter(&ump->um_lock);
888 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
889 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
890 &newb);
891 if (error)
892 goto fail;
893 nb = newb;
894 *allocblk++ = nb;
895 error = ffs_getblk(vp, indirs[1].in_lbn, fsbtodb(fs, nb),
896 fs->fs_bsize, true, &bp);
897 if (error)
898 goto fail;
899 if (DOINGSOFTDEP(vp)) {
900 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
901 newb, 0, fs->fs_bsize, 0, bp);
902 bdwrite(bp);
903 } else {
904
905 /*
906 * Write synchronously so that indirect blocks
907 * never point at garbage.
908 */
909
910 if ((error = bwrite(bp)) != 0)
911 goto fail;
912 }
913 unwindidx = 0;
914 allocib = &ip->i_ffs2_ib[indirs[0].in_off];
915 *allocib = ufs_rw64(nb, needswap);
916 ip->i_flag |= IN_CHANGE | IN_UPDATE;
917 }
918
919 /*
920 * Fetch through the indirect blocks, allocating as necessary.
921 */
922
923 for (i = 1;;) {
924 error = bread(vp,
925 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, 0, &bp);
926 if (error) {
927 brelse(bp, 0);
928 goto fail;
929 }
930 bap = (int64_t *)bp->b_data;
931 nb = ufs_rw64(bap[indirs[i].in_off], needswap);
932 if (i == num)
933 break;
934 i++;
935 if (nb != 0) {
936 brelse(bp, 0);
937 continue;
938 }
939 if (fscow_run(bp, true) != 0) {
940 brelse(bp, 0);
941 goto fail;
942 }
943 mutex_enter(&ump->um_lock);
944 if (pref == 0)
945 pref = ffs_blkpref_ufs2(ip, lbn, 0, (int64_t *)0);
946 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
947 &newb);
948 if (error) {
949 brelse(bp, 0);
950 goto fail;
951 }
952 nb = newb;
953 *allocblk++ = nb;
954 error = ffs_getblk(vp, indirs[i].in_lbn, fsbtodb(fs, nb),
955 fs->fs_bsize, true, &nbp);
956 if (error) {
957 brelse(bp, 0);
958 goto fail;
959 }
960 if (DOINGSOFTDEP(vp)) {
961 softdep_setup_allocindir_meta(nbp, ip, bp,
962 indirs[i - 1].in_off, nb);
963 bdwrite(nbp);
964 } else {
965
966 /*
967 * Write synchronously so that indirect blocks
968 * never point at garbage.
969 */
970
971 if ((error = bwrite(nbp)) != 0) {
972 brelse(bp, 0);
973 goto fail;
974 }
975 }
976 if (unwindidx < 0)
977 unwindidx = i - 1;
978 bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
979
980 /*
981 * If required, write synchronously, otherwise use
982 * delayed write.
983 */
984
985 if (flags & B_SYNC) {
986 bwrite(bp);
987 } else {
988 bdwrite(bp);
989 }
990 }
991
992 if (flags & B_METAONLY) {
993 KASSERT(bpp != NULL);
994 *bpp = bp;
995 return (0);
996 }
997
998 /*
999 * Get the data block, allocating if necessary.
1000 */
1001
1002 if (nb == 0) {
1003 if (fscow_run(bp, true) != 0) {
1004 brelse(bp, 0);
1005 goto fail;
1006 }
1007 mutex_enter(&ump->um_lock);
1008 pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, &bap[0]);
1009 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
1010 &newb);
1011 if (error) {
1012 brelse(bp, 0);
1013 goto fail;
1014 }
1015 nb = newb;
1016 *allocblk++ = nb;
1017 if (bpp != NULL) {
1018 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
1019 fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
1020 if (error) {
1021 brelse(bp, 0);
1022 goto fail;
1023 }
1024 }
1025 if (DOINGSOFTDEP(vp))
1026 softdep_setup_allocindir_page(ip, lbn, bp,
1027 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
1028 bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
1029 if (allocib == NULL && unwindidx < 0) {
1030 unwindidx = i - 1;
1031 }
1032
1033 /*
1034 * If required, write synchronously, otherwise use
1035 * delayed write.
1036 */
1037
1038 if (flags & B_SYNC) {
1039 bwrite(bp);
1040 } else {
1041 bdwrite(bp);
1042 }
1043 return (0);
1044 }
1045 brelse(bp, 0);
1046 if (bpp != NULL) {
1047 if (flags & B_CLRBUF) {
1048 error = bread(vp, lbn, (int)fs->fs_bsize,
1049 NOCRED, B_MODIFY, &nbp);
1050 if (error) {
1051 brelse(nbp, 0);
1052 goto fail;
1053 }
1054 } else {
1055 error = ffs_getblk(vp, lbn, fsbtodb(fs, nb),
1056 fs->fs_bsize, true, &nbp);
1057 if (error)
1058 goto fail;
1059 }
1060 *bpp = nbp;
1061 }
1062 return (0);
1063
1064 fail:
1065 /*
1066 * If we have failed part way through block allocation, we
1067 * have to deallocate any indirect blocks that we have allocated.
1068 */
1069
1070 if (unwindidx >= 0) {
1071
1072 /*
1073 * First write out any buffers we've created to resolve their
1074 * softdeps. This must be done in reverse order of creation
1075 * so that we resolve the dependencies in one pass.
1076 * Write the cylinder group buffers for these buffers too.
1077 */
1078
1079 for (i = num; i >= unwindidx; i--) {
1080 if (i == 0) {
1081 break;
1082 }
1083 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1084 0);
1085 if (bp->b_oflags & BO_DELWRI) {
1086 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
1087 dbtofsb(fs, bp->b_blkno))));
1088 bwrite(bp);
1089 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
1090 0, 0);
1091 if (bp->b_oflags & BO_DELWRI) {
1092 bwrite(bp);
1093 } else {
1094 brelse(bp, BC_INVAL);
1095 }
1096 } else {
1097 brelse(bp, BC_INVAL);
1098 }
1099 }
1100
1101 /* Now flush the dependencies to disk. */
1102 #ifdef notyet
1103 /* XXX pages locked */
1104 (void)softdep_sync_metadata(vp);
1105 #endif
1106
1107 if (DOINGSOFTDEP(vp) && unwindidx == 0) {
1108 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1109 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1110 }
1111
1112 /*
1113 * Now that any dependencies that we created have been
1114 * resolved, we can undo the partial allocation.
1115 */
1116
1117 if (unwindidx == 0) {
1118 *allocib = 0;
1119 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1120 if (DOINGSOFTDEP(vp))
1121 ffs_update(vp, NULL, NULL, UPDATE_WAIT);
1122 } else {
1123 int r;
1124
1125 r = bread(vp, indirs[unwindidx].in_lbn,
1126 (int)fs->fs_bsize, NOCRED, 0, &bp);
1127 if (r) {
1128 panic("Could not unwind indirect block, error %d", r);
1129 brelse(bp, 0);
1130 } else {
1131 bap = (int64_t *)bp->b_data;
1132 bap[indirs[unwindidx].in_off] = 0;
1133 bwrite(bp);
1134 }
1135 }
1136 for (i = unwindidx + 1; i <= num; i++) {
1137 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
1138 0);
1139 brelse(bp, BC_INVAL);
1140 }
1141 }
1142 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1143 ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1144 deallocated += fs->fs_bsize;
1145 }
1146 if (deallocated) {
1147 #ifdef QUOTA
1148 /*
1149 * Restore user's disk quota because allocation failed.
1150 */
1151 (void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1152 #endif
1153 ip->i_ffs2_blocks -= btodb(deallocated);
1154 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1155 }
1156
1157 /*
1158 * Flush all dependencies again so that the soft updates code
1159 * doesn't find any untracked changes.
1160 */
1161 #ifdef notyet
1162 /* XXX pages locked */
1163 (void)softdep_sync_metadata(vp);
1164 #endif
1165 return (error);
1166 }
1167