ffs_balloc.c revision 1.30 1 /* $NetBSD: ffs_balloc.c,v 1.30 2002/06/05 05:23:51 chs Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.30 2002/06/05 05:23:51 chs Exp $");
40
41 #if defined(_KERNEL_OPT)
42 #include "opt_quota.h"
43 #endif
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/buf.h>
48 #include <sys/file.h>
49 #include <sys/mount.h>
50 #include <sys/vnode.h>
51 #include <sys/mount.h>
52
53 #include <ufs/ufs/quota.h>
54 #include <ufs/ufs/ufsmount.h>
55 #include <ufs/ufs/inode.h>
56 #include <ufs/ufs/ufs_extern.h>
57 #include <ufs/ufs/ufs_bswap.h>
58
59 #include <ufs/ffs/fs.h>
60 #include <ufs/ffs/ffs_extern.h>
61
62 #include <uvm/uvm.h>
63
64 /*
65 * Balloc defines the structure of file system storage
66 * by allocating the physical blocks on a device given
67 * the inode and the logical block number in a file.
68 */
69 int
70 ffs_balloc(v)
71 void *v;
72 {
73 struct vop_balloc_args /* {
74 struct vnode *a_vp;
75 off_t a_startoffset;
76 int a_size;
77 struct ucred *a_cred;
78 int a_flags;
79 struct buf **a_bpp;
80 } */ *ap = v;
81 ufs_daddr_t lbn;
82 int size;
83 struct ucred *cred;
84 int flags;
85 ufs_daddr_t nb;
86 struct buf *bp, *nbp;
87 struct vnode *vp = ap->a_vp;
88 struct inode *ip = VTOI(vp);
89 struct fs *fs = ip->i_fs;
90 struct indir indirs[NIADDR + 2];
91 ufs_daddr_t newb, *bap, pref;
92 int deallocated, osize, nsize, num, i, error;
93 ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
94 int unwindidx = -1;
95 struct buf **bpp = ap->a_bpp;
96 #ifdef FFS_EI
97 const int needswap = UFS_FSNEEDSWAP(fs);
98 #endif
99 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
100
101 lbn = lblkno(fs, ap->a_startoffset);
102 size = blkoff(fs, ap->a_startoffset) + ap->a_size;
103 if (size > fs->fs_bsize)
104 panic("ffs_balloc: blk too big");
105 if (bpp != NULL) {
106 *bpp = NULL;
107 }
108 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
109
110 KASSERT(size <= fs->fs_bsize);
111 if (lbn < 0)
112 return (EFBIG);
113 cred = ap->a_cred;
114 flags = ap->a_flags;
115
116 /*
117 * If the next write will extend the file into a new block,
118 * and the file is currently composed of a fragment
119 * this fragment has to be extended to be a full block.
120 */
121
122 nb = lblkno(fs, ip->i_ffs_size);
123 if (nb < NDADDR && nb < lbn) {
124 osize = blksize(fs, ip, nb);
125 if (osize < fs->fs_bsize && osize > 0) {
126 error = ffs_realloccg(ip, nb,
127 ffs_blkpref(ip, nb, (int)nb, &ip->i_ffs_db[0]),
128 osize, (int)fs->fs_bsize, cred, bpp, &newb);
129 if (error)
130 return (error);
131 if (DOINGSOFTDEP(vp))
132 softdep_setup_allocdirect(ip, nb, newb,
133 ufs_rw32(ip->i_ffs_db[nb], needswap),
134 fs->fs_bsize, osize, bpp ? *bpp : NULL);
135 ip->i_ffs_size = lblktosize(fs, nb + 1);
136 uvm_vnp_setsize(vp, ip->i_ffs_size);
137 ip->i_ffs_db[nb] = ufs_rw32(newb, needswap);
138 ip->i_flag |= IN_CHANGE | IN_UPDATE;
139 if (bpp) {
140 if (flags & B_SYNC)
141 bwrite(*bpp);
142 else
143 bawrite(*bpp);
144 }
145 }
146 }
147
148 /*
149 * The first NDADDR blocks are direct blocks
150 */
151
152 if (lbn < NDADDR) {
153 nb = ufs_rw32(ip->i_ffs_db[lbn], needswap);
154 if (nb != 0 && ip->i_ffs_size >= lblktosize(fs, lbn + 1)) {
155
156 /*
157 * The block is an already-allocated direct block
158 * and the file already extends past this block,
159 * thus this must be a whole block.
160 * Just read the block (if requested).
161 */
162
163 if (bpp != NULL) {
164 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
165 bpp);
166 if (error) {
167 brelse(*bpp);
168 return (error);
169 }
170 }
171 return (0);
172 }
173 if (nb != 0) {
174
175 /*
176 * Consider need to reallocate a fragment.
177 */
178
179 osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size));
180 nsize = fragroundup(fs, size);
181 if (nsize <= osize) {
182
183 /*
184 * The existing block is already
185 * at least as big as we want.
186 * Just read the block (if requested).
187 */
188
189 if (bpp != NULL) {
190 error = bread(vp, lbn, osize, NOCRED,
191 bpp);
192 if (error) {
193 brelse(*bpp);
194 return (error);
195 }
196 }
197 return 0;
198 } else {
199
200 /*
201 * The existing block is smaller than we want,
202 * grow it.
203 */
204
205 error = ffs_realloccg(ip, lbn,
206 ffs_blkpref(ip, lbn, (int)lbn,
207 &ip->i_ffs_db[0]), osize, nsize, cred,
208 bpp, &newb);
209 if (error)
210 return (error);
211 if (DOINGSOFTDEP(vp))
212 softdep_setup_allocdirect(ip, lbn,
213 newb, nb, nsize, osize,
214 bpp ? *bpp : NULL);
215 }
216 } else {
217
218 /*
219 * the block was not previously allocated,
220 * allocate a new block or fragment.
221 */
222
223 if (ip->i_ffs_size < lblktosize(fs, lbn + 1))
224 nsize = fragroundup(fs, size);
225 else
226 nsize = fs->fs_bsize;
227 error = ffs_alloc(ip, lbn,
228 ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]),
229 nsize, cred, &newb);
230 if (error)
231 return (error);
232 if (bpp != NULL) {
233 bp = getblk(vp, lbn, nsize, 0, 0);
234 bp->b_blkno = fsbtodb(fs, newb);
235 if (flags & B_CLRBUF)
236 clrbuf(bp);
237 *bpp = bp;
238 }
239 if (DOINGSOFTDEP(vp)) {
240 softdep_setup_allocdirect(ip, lbn, newb, 0,
241 nsize, 0, bpp ? *bpp : NULL);
242 }
243 }
244 ip->i_ffs_db[lbn] = ufs_rw32(newb, needswap);
245 ip->i_flag |= IN_CHANGE | IN_UPDATE;
246 return (0);
247 }
248
249 /*
250 * Determine the number of levels of indirection.
251 */
252
253 pref = 0;
254 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
255 return (error);
256
257 /*
258 * Fetch the first indirect block allocating if necessary.
259 */
260
261 --num;
262 nb = ufs_rw32(ip->i_ffs_ib[indirs[0].in_off], needswap);
263 allocib = NULL;
264 allocblk = allociblk;
265 if (nb == 0) {
266 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
267 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
268 &newb);
269 if (error)
270 goto fail;
271 nb = newb;
272 *allocblk++ = nb;
273 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
274 bp->b_blkno = fsbtodb(fs, nb);
275 clrbuf(bp);
276 if (DOINGSOFTDEP(vp)) {
277 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
278 newb, 0, fs->fs_bsize, 0, bp);
279 bdwrite(bp);
280 } else {
281
282 /*
283 * Write synchronously so that indirect blocks
284 * never point at garbage.
285 */
286
287 if ((error = bwrite(bp)) != 0)
288 goto fail;
289 }
290 unwindidx = 0;
291 allocib = &ip->i_ffs_ib[indirs[0].in_off];
292 *allocib = ufs_rw32(nb, needswap);
293 ip->i_flag |= IN_CHANGE | IN_UPDATE;
294 }
295
296 /*
297 * Fetch through the indirect blocks, allocating as necessary.
298 */
299
300 for (i = 1;;) {
301 error = bread(vp,
302 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
303 if (error) {
304 brelse(bp);
305 goto fail;
306 }
307 bap = (ufs_daddr_t *)bp->b_data;
308 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
309 if (i == num)
310 break;
311 i++;
312 if (nb != 0) {
313 brelse(bp);
314 continue;
315 }
316 if (pref == 0)
317 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
318 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
319 &newb);
320 if (error) {
321 brelse(bp);
322 goto fail;
323 }
324 nb = newb;
325 *allocblk++ = nb;
326 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
327 nbp->b_blkno = fsbtodb(fs, nb);
328 clrbuf(nbp);
329 if (DOINGSOFTDEP(vp)) {
330 softdep_setup_allocindir_meta(nbp, ip, bp,
331 indirs[i - 1].in_off, nb);
332 bdwrite(nbp);
333 } else {
334
335 /*
336 * Write synchronously so that indirect blocks
337 * never point at garbage.
338 */
339
340 if ((error = bwrite(nbp)) != 0) {
341 brelse(bp);
342 goto fail;
343 }
344 }
345 if (unwindidx < 0)
346 unwindidx = i - 1;
347 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
348
349 /*
350 * If required, write synchronously, otherwise use
351 * delayed write.
352 */
353
354 if (flags & B_SYNC) {
355 bwrite(bp);
356 } else {
357 bdwrite(bp);
358 }
359 }
360
361 /*
362 * Get the data block, allocating if necessary.
363 */
364
365 if (nb == 0) {
366 pref = ffs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
367 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
368 &newb);
369 if (error) {
370 brelse(bp);
371 goto fail;
372 }
373 nb = newb;
374 *allocblk++ = nb;
375 if (bpp != NULL) {
376 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
377 nbp->b_blkno = fsbtodb(fs, nb);
378 if (flags & B_CLRBUF)
379 clrbuf(nbp);
380 *bpp = nbp;
381 }
382 if (DOINGSOFTDEP(vp))
383 softdep_setup_allocindir_page(ip, lbn, bp,
384 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
385 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
386 if (allocib == NULL && unwindidx < 0) {
387 unwindidx = i - 1;
388 }
389
390 /*
391 * If required, write synchronously, otherwise use
392 * delayed write.
393 */
394
395 if (flags & B_SYNC) {
396 bwrite(bp);
397 } else {
398 bdwrite(bp);
399 }
400 return (0);
401 }
402 brelse(bp);
403 if (bpp != NULL) {
404 if (flags & B_CLRBUF) {
405 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
406 if (error) {
407 brelse(nbp);
408 goto fail;
409 }
410 } else {
411 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
412 nbp->b_blkno = fsbtodb(fs, nb);
413 clrbuf(nbp);
414 }
415 *bpp = nbp;
416 }
417 return (0);
418
419 fail:
420 /*
421 * If we have failed part way through block allocation, we
422 * have to deallocate any indirect blocks that we have allocated.
423 */
424
425 if (unwindidx >= 0) {
426
427 /*
428 * First write out any buffers we've created to resolve their
429 * softdeps. This must be done in reverse order of creation
430 * so that we resolve the dependencies in one pass.
431 * Write the cylinder group buffers for these buffers too.
432 */
433
434 for (i = num; i >= unwindidx; i--) {
435 if (i == 0) {
436 break;
437 }
438 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
439 0);
440 if (bp->b_flags & B_DELWRI) {
441 nb = fsbtodb(fs, cgtod(fs, dtog(fs,
442 dbtofsb(fs, bp->b_blkno))));
443 bwrite(bp);
444 bp = getblk(ip->i_devvp, nb, (int)fs->fs_cgsize,
445 0, 0);
446 if (bp->b_flags & B_DELWRI) {
447 bwrite(bp);
448 } else {
449 bp->b_flags |= B_INVAL;
450 brelse(bp);
451 }
452 } else {
453 bp->b_flags |= B_INVAL;
454 brelse(bp);
455 }
456 }
457 if (unwindidx == 0) {
458 ip->i_flag |= IN_MODIFIED | IN_CHANGE | IN_UPDATE;
459 VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
460 }
461
462 /*
463 * Now that any dependencies that we created have been
464 * resolved, we can undo the partial allocation.
465 */
466
467 if (unwindidx == 0) {
468 *allocib = 0;
469 ip->i_flag |= IN_MODIFIED | IN_CHANGE | IN_UPDATE;
470 VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
471 } else {
472 int r;
473
474 r = bread(vp, indirs[unwindidx].in_lbn,
475 (int)fs->fs_bsize, NOCRED, &bp);
476 if (r) {
477 panic("Could not unwind indirect block, error %d", r);
478 brelse(bp);
479 } else {
480 bap = (ufs_daddr_t *)bp->b_data;
481 bap[indirs[unwindidx].in_off] = 0;
482 bwrite(bp);
483 }
484 }
485 for (i = unwindidx + 1; i <= num; i++) {
486 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
487 0);
488 bp->b_flags |= B_INVAL;
489 brelse(bp);
490 }
491 }
492 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
493 ffs_blkfree(ip, *blkp, fs->fs_bsize);
494 deallocated += fs->fs_bsize;
495 }
496 if (deallocated) {
497 #ifdef QUOTA
498 /*
499 * Restore user's disk quota because allocation failed.
500 */
501 (void)chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
502 #endif
503 ip->i_ffs_blocks -= btodb(deallocated);
504 ip->i_flag |= IN_CHANGE | IN_UPDATE;
505 }
506 return (error);
507 }
508
509
510 int
511 ffs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
512 struct ucred *cred)
513 {
514 struct inode *ip = VTOI(vp);
515 struct fs *fs = ip->i_fs;
516 int error, delta, bshift, bsize;
517 UVMHIST_FUNC("ffs_gop_alloc"); UVMHIST_CALLED(ubchist);
518
519 error = 0;
520 bshift = fs->fs_bshift;
521 bsize = 1 << bshift;
522
523 delta = off & (bsize - 1);
524 off -= delta;
525 len += delta;
526
527 while (len > 0) {
528 bsize = MIN(bsize, len);
529
530 error = VOP_BALLOC(vp, off, bsize, cred, flags, NULL);
531 if (error) {
532 goto out;
533 }
534
535 /*
536 * increase file size now, VOP_BALLOC() requires that
537 * EOF be up-to-date before each call.
538 */
539
540 if (ip->i_ffs_size < off + bsize) {
541 UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
542 vp, ip->i_ffs_size, off + bsize, 0);
543 ip->i_ffs_size = off + bsize;
544 }
545
546 off += bsize;
547 len -= bsize;
548 }
549
550 out:
551 return error;
552 }
553