ffs_balloc.c revision 1.31 1 /* $NetBSD: ffs_balloc.c,v 1.31 2003/01/24 21:55:21 fvdl Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.31 2003/01/24 21:55:21 fvdl Exp $");
40
41 #if defined(_KERNEL_OPT)
42 #include "opt_quota.h"
43 #endif
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/buf.h>
48 #include <sys/file.h>
49 #include <sys/mount.h>
50 #include <sys/vnode.h>
51 #include <sys/mount.h>
52
53 #include <ufs/ufs/quota.h>
54 #include <ufs/ufs/ufsmount.h>
55 #include <ufs/ufs/inode.h>
56 #include <ufs/ufs/ufs_extern.h>
57 #include <ufs/ufs/ufs_bswap.h>
58
59 #include <ufs/ffs/fs.h>
60 #include <ufs/ffs/ffs_extern.h>
61
62 #include <uvm/uvm.h>
63
64 /*
65 * Balloc defines the structure of file system storage
66 * by allocating the physical blocks on a device given
67 * the inode and the logical block number in a file.
68 */
69 int
70 ffs_balloc(v)
71 void *v;
72 {
73 struct vop_balloc_args /* {
74 struct vnode *a_vp;
75 off_t a_startoffset;
76 int a_size;
77 struct ucred *a_cred;
78 int a_flags;
79 struct buf **a_bpp;
80 } */ *ap = v;
81 daddr_t lbn;
82 int size;
83 struct ucred *cred;
84 int flags;
85 daddr_t nb;
86 struct buf *bp, *nbp;
87 struct vnode *vp = ap->a_vp;
88 struct inode *ip = VTOI(vp);
89 struct fs *fs = ip->i_fs;
90 struct indir indirs[NIADDR + 2];
91 daddr_t newb, pref;
92 int32_t *bap; /* XXX ondisk32 */
93 int deallocated, osize, nsize, num, i, error;
94 daddr_t *blkp, *allocblk, allociblk[NIADDR + 1];
95 int32_t *allocib; /* XXX ondisk32 */
96 int unwindidx = -1;
97 struct buf **bpp = ap->a_bpp;
98 #ifdef FFS_EI
99 const int needswap = UFS_FSNEEDSWAP(fs);
100 #endif
101 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
102
103 lbn = lblkno(fs, ap->a_startoffset);
104 size = blkoff(fs, ap->a_startoffset) + ap->a_size;
105 if (size > fs->fs_bsize)
106 panic("ffs_balloc: blk too big");
107 if (bpp != NULL) {
108 *bpp = NULL;
109 }
110 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
111
112 KASSERT(size <= fs->fs_bsize);
113 if (lbn < 0)
114 return (EFBIG);
115 cred = ap->a_cred;
116 flags = ap->a_flags;
117
118 /*
119 * If the next write will extend the file into a new block,
120 * and the file is currently composed of a fragment
121 * this fragment has to be extended to be a full block.
122 */
123
124 nb = lblkno(fs, ip->i_ffs_size);
125 if (nb < NDADDR && nb < lbn) {
126 osize = blksize(fs, ip, nb);
127 if (osize < fs->fs_bsize && osize > 0) {
128 error = ffs_realloccg(ip, nb,
129 ffs_blkpref(ip, nb, (int)nb, &ip->i_ffs_db[0]),
130 osize, (int)fs->fs_bsize, cred, bpp, &newb);
131 if (error)
132 return (error);
133 if (DOINGSOFTDEP(vp))
134 softdep_setup_allocdirect(ip, nb, newb,
135 ufs_rw32(ip->i_ffs_db[nb], needswap),
136 fs->fs_bsize, osize, bpp ? *bpp : NULL);
137 ip->i_ffs_size = lblktosize(fs, nb + 1);
138 uvm_vnp_setsize(vp, ip->i_ffs_size);
139 /* XXX ondisk32 */
140 ip->i_ffs_db[nb] = ufs_rw32((int32_t)newb, needswap);
141 ip->i_flag |= IN_CHANGE | IN_UPDATE;
142 if (bpp) {
143 if (flags & B_SYNC)
144 bwrite(*bpp);
145 else
146 bawrite(*bpp);
147 }
148 }
149 }
150
151 /*
152 * The first NDADDR blocks are direct blocks
153 */
154
155 if (lbn < NDADDR) {
156 /* XXX ondisk32 */
157 nb = ufs_rw32(ip->i_ffs_db[lbn], needswap);
158 if (nb != 0 && ip->i_ffs_size >= lblktosize(fs, lbn + 1)) {
159
160 /*
161 * The block is an already-allocated direct block
162 * and the file already extends past this block,
163 * thus this must be a whole block.
164 * Just read the block (if requested).
165 */
166
167 if (bpp != NULL) {
168 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
169 bpp);
170 if (error) {
171 brelse(*bpp);
172 return (error);
173 }
174 }
175 return (0);
176 }
177 if (nb != 0) {
178
179 /*
180 * Consider need to reallocate a fragment.
181 */
182
183 osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size));
184 nsize = fragroundup(fs, size);
185 if (nsize <= osize) {
186
187 /*
188 * The existing block is already
189 * at least as big as we want.
190 * Just read the block (if requested).
191 */
192
193 if (bpp != NULL) {
194 error = bread(vp, lbn, osize, NOCRED,
195 bpp);
196 if (error) {
197 brelse(*bpp);
198 return (error);
199 }
200 }
201 return 0;
202 } else {
203
204 /*
205 * The existing block is smaller than we want,
206 * grow it.
207 */
208
209 error = ffs_realloccg(ip, lbn,
210 ffs_blkpref(ip, lbn, (int)lbn,
211 &ip->i_ffs_db[0]), osize, nsize, cred,
212 bpp, &newb);
213 if (error)
214 return (error);
215 if (DOINGSOFTDEP(vp))
216 softdep_setup_allocdirect(ip, lbn,
217 newb, nb, nsize, osize,
218 bpp ? *bpp : NULL);
219 }
220 } else {
221
222 /*
223 * the block was not previously allocated,
224 * allocate a new block or fragment.
225 */
226
227 if (ip->i_ffs_size < lblktosize(fs, lbn + 1))
228 nsize = fragroundup(fs, size);
229 else
230 nsize = fs->fs_bsize;
231 error = ffs_alloc(ip, lbn,
232 ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]),
233 nsize, cred, &newb);
234 if (error)
235 return (error);
236 if (bpp != NULL) {
237 bp = getblk(vp, lbn, nsize, 0, 0);
238 bp->b_blkno = fsbtodb(fs, newb);
239 if (flags & B_CLRBUF)
240 clrbuf(bp);
241 *bpp = bp;
242 }
243 if (DOINGSOFTDEP(vp)) {
244 softdep_setup_allocdirect(ip, lbn, newb, 0,
245 nsize, 0, bpp ? *bpp : NULL);
246 }
247 }
248 /* XXX ondisk32 */
249 ip->i_ffs_db[lbn] = ufs_rw32((int32_t)newb, needswap);
250 ip->i_flag |= IN_CHANGE | IN_UPDATE;
251 return (0);
252 }
253
254 /*
255 * Determine the number of levels of indirection.
256 */
257
258 pref = 0;
259 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
260 return (error);
261
262 /*
263 * Fetch the first indirect block allocating if necessary.
264 */
265
266 --num;
267 nb = ufs_rw32(ip->i_ffs_ib[indirs[0].in_off], needswap);
268 allocib = NULL;
269 allocblk = allociblk;
270 if (nb == 0) {
271 pref = ffs_blkpref(ip, lbn, 0, (int32_t *)0);
272 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
273 &newb);
274 if (error)
275 goto fail;
276 nb = newb;
277 *allocblk++ = nb;
278 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
279 bp->b_blkno = fsbtodb(fs, nb);
280 clrbuf(bp);
281 if (DOINGSOFTDEP(vp)) {
282 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
283 newb, 0, fs->fs_bsize, 0, bp);
284 bdwrite(bp);
285 } else {
286
287 /*
288 * Write synchronously so that indirect blocks
289 * never point at garbage.
290 */
291
292 if ((error = bwrite(bp)) != 0)
293 goto fail;
294 }
295 unwindidx = 0;
296 allocib = &ip->i_ffs_ib[indirs[0].in_off];
297 /* XXX ondisk32 */
298 *allocib = ufs_rw32((int32_t)nb, needswap);
299 ip->i_flag |= IN_CHANGE | IN_UPDATE;
300 }
301
302 /*
303 * Fetch through the indirect blocks, allocating as necessary.
304 */
305
306 for (i = 1;;) {
307 error = bread(vp,
308 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
309 if (error) {
310 brelse(bp);
311 goto fail;
312 }
313 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
314 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
315 if (i == num)
316 break;
317 i++;
318 if (nb != 0) {
319 brelse(bp);
320 continue;
321 }
322 if (pref == 0)
323 pref = ffs_blkpref(ip, lbn, 0, (int32_t *)0);
324 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
325 &newb);
326 if (error) {
327 brelse(bp);
328 goto fail;
329 }
330 nb = newb;
331 *allocblk++ = nb;
332 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
333 nbp->b_blkno = fsbtodb(fs, nb);
334 clrbuf(nbp);
335 if (DOINGSOFTDEP(vp)) {
336 softdep_setup_allocindir_meta(nbp, ip, bp,
337 indirs[i - 1].in_off, nb);
338 bdwrite(nbp);
339 } else {
340
341 /*
342 * Write synchronously so that indirect blocks
343 * never point at garbage.
344 */
345
346 if ((error = bwrite(nbp)) != 0) {
347 brelse(bp);
348 goto fail;
349 }
350 }
351 if (unwindidx < 0)
352 unwindidx = i - 1;
353 /* XXX ondisk32 */
354 bap[indirs[i - 1].in_off] = ufs_rw32((int32_t)nb, needswap);
355
356 /*
357 * If required, write synchronously, otherwise use
358 * delayed write.
359 */
360
361 if (flags & B_SYNC) {
362 bwrite(bp);
363 } else {
364 bdwrite(bp);
365 }
366 }
367
368 /*
369 * Get the data block, allocating if necessary.
370 */
371
372 if (nb == 0) {
373 pref = ffs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
374 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
375 &newb);
376 if (error) {
377 brelse(bp);
378 goto fail;
379 }
380 nb = newb;
381 *allocblk++ = nb;
382 if (bpp != NULL) {
383 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
384 nbp->b_blkno = fsbtodb(fs, nb);
385 if (flags & B_CLRBUF)
386 clrbuf(nbp);
387 *bpp = nbp;
388 }
389 if (DOINGSOFTDEP(vp))
390 softdep_setup_allocindir_page(ip, lbn, bp,
391 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
392 /* XXX ondisk32 */
393 bap[indirs[num].in_off] = ufs_rw32((int32_t)nb, needswap);
394 if (allocib == NULL && unwindidx < 0) {
395 unwindidx = i - 1;
396 }
397
398 /*
399 * If required, write synchronously, otherwise use
400 * delayed write.
401 */
402
403 if (flags & B_SYNC) {
404 bwrite(bp);
405 } else {
406 bdwrite(bp);
407 }
408 return (0);
409 }
410 brelse(bp);
411 if (bpp != NULL) {
412 if (flags & B_CLRBUF) {
413 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
414 if (error) {
415 brelse(nbp);
416 goto fail;
417 }
418 } else {
419 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
420 nbp->b_blkno = fsbtodb(fs, nb);
421 clrbuf(nbp);
422 }
423 *bpp = nbp;
424 }
425 return (0);
426
427 fail:
428 /*
429 * If we have failed part way through block allocation, we
430 * have to deallocate any indirect blocks that we have allocated.
431 */
432
433 if (unwindidx >= 0) {
434
435 /*
436 * First write out any buffers we've created to resolve their
437 * softdeps. This must be done in reverse order of creation
438 * so that we resolve the dependencies in one pass.
439 * Write the cylinder group buffers for these buffers too.
440 */
441
442 for (i = num; i >= unwindidx; i--) {
443 if (i == 0) {
444 break;
445 }
446 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
447 0);
448 if (bp->b_flags & B_DELWRI) {
449 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
450 dbtofsb(fs, bp->b_blkno))));
451 bwrite(bp);
452 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
453 0, 0);
454 if (bp->b_flags & B_DELWRI) {
455 bwrite(bp);
456 } else {
457 bp->b_flags |= B_INVAL;
458 brelse(bp);
459 }
460 } else {
461 bp->b_flags |= B_INVAL;
462 brelse(bp);
463 }
464 }
465 if (unwindidx == 0) {
466 ip->i_flag |= IN_MODIFIED | IN_CHANGE | IN_UPDATE;
467 VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
468 }
469
470 /*
471 * Now that any dependencies that we created have been
472 * resolved, we can undo the partial allocation.
473 */
474
475 if (unwindidx == 0) {
476 *allocib = 0;
477 ip->i_flag |= IN_MODIFIED | IN_CHANGE | IN_UPDATE;
478 VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
479 } else {
480 int r;
481
482 r = bread(vp, indirs[unwindidx].in_lbn,
483 (int)fs->fs_bsize, NOCRED, &bp);
484 if (r) {
485 panic("Could not unwind indirect block, error %d", r);
486 brelse(bp);
487 } else {
488 bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
489 bap[indirs[unwindidx].in_off] = 0;
490 bwrite(bp);
491 }
492 }
493 for (i = unwindidx + 1; i <= num; i++) {
494 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
495 0);
496 bp->b_flags |= B_INVAL;
497 brelse(bp);
498 }
499 }
500 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
501 ffs_blkfree(ip, *blkp, fs->fs_bsize);
502 deallocated += fs->fs_bsize;
503 }
504 if (deallocated) {
505 #ifdef QUOTA
506 /*
507 * Restore user's disk quota because allocation failed.
508 */
509 (void)chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
510 #endif
511 ip->i_ffs_blocks -= btodb(deallocated);
512 ip->i_flag |= IN_CHANGE | IN_UPDATE;
513 }
514 return (error);
515 }
516
517
518 int
519 ffs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
520 struct ucred *cred)
521 {
522 struct inode *ip = VTOI(vp);
523 struct fs *fs = ip->i_fs;
524 int error, delta, bshift, bsize;
525 UVMHIST_FUNC("ffs_gop_alloc"); UVMHIST_CALLED(ubchist);
526
527 error = 0;
528 bshift = fs->fs_bshift;
529 bsize = 1 << bshift;
530
531 delta = off & (bsize - 1);
532 off -= delta;
533 len += delta;
534
535 while (len > 0) {
536 bsize = MIN(bsize, len);
537
538 error = VOP_BALLOC(vp, off, bsize, cred, flags, NULL);
539 if (error) {
540 goto out;
541 }
542
543 /*
544 * increase file size now, VOP_BALLOC() requires that
545 * EOF be up-to-date before each call.
546 */
547
548 if (ip->i_ffs_size < off + bsize) {
549 UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
550 vp, ip->i_ffs_size, off + bsize, 0);
551 ip->i_ffs_size = off + bsize;
552 }
553
554 off += bsize;
555 len -= bsize;
556 }
557
558 out:
559 return error;
560 }
561