ffs_balloc.c revision 1.25.2.1 1 /* $NetBSD: ffs_balloc.c,v 1.25.2.1 2001/10/01 12:48:21 fvdl Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
36 */
37
38 #if defined(_KERNEL_OPT)
39 #include "opt_quota.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/buf.h>
45 #include <sys/proc.h>
46 #include <sys/file.h>
47 #include <sys/mount.h>
48 #include <sys/vnode.h>
49 #include <sys/mount.h>
50
51 #include <ufs/ufs/quota.h>
52 #include <ufs/ufs/ufsmount.h>
53 #include <ufs/ufs/inode.h>
54 #include <ufs/ufs/ufs_extern.h>
55 #include <ufs/ufs/ufs_bswap.h>
56
57 #include <ufs/ffs/fs.h>
58 #include <ufs/ffs/ffs_extern.h>
59
60 #include <uvm/uvm.h>
61
62 /*
63 * Balloc defines the structure of file system storage
64 * by allocating the physical blocks on a device given
65 * the inode and the logical block number in a file.
66 */
67 int
68 ffs_balloc(v)
69 void *v;
70 {
71 struct vop_balloc_args /* {
72 struct vnode *a_vp;
73 off_t a_startoffset;
74 int a_size;
75 struct ucred *a_cred;
76 int a_flags;
77 struct buf **a_bpp;
78 } */ *ap = v;
79 ufs_daddr_t lbn;
80 int size;
81 struct ucred *cred;
82 int flags;
83 ufs_daddr_t nb;
84 struct buf *bp, *nbp;
85 struct vnode *vp = ap->a_vp;
86 struct inode *ip = VTOI(vp);
87 struct fs *fs = ip->i_fs;
88 struct indir indirs[NIADDR + 2];
89 ufs_daddr_t newb, *bap, pref;
90 int deallocated, osize, nsize, num, i, error;
91 ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
92 int unwindidx = -1;
93 struct buf **bpp = ap->a_bpp;
94 off_t off;
95 #ifdef FFS_EI
96 const int needswap = UFS_FSNEEDSWAP(fs);
97 #endif
98 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
99
100 lbn = lblkno(fs, ap->a_startoffset);
101 size = blkoff(fs, ap->a_startoffset) + ap->a_size;
102 if (size > fs->fs_bsize)
103 panic("ffs_balloc: blk too big");
104 if (bpp != NULL) {
105 *bpp = NULL;
106 }
107 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
108
109 KASSERT(size <= fs->fs_bsize);
110 if (lbn < 0)
111 return (EFBIG);
112 cred = ap->a_cred;
113 flags = ap->a_flags;
114
115 /*
116 * If the next write will extend the file into a new block,
117 * and the file is currently composed of a fragment
118 * this fragment has to be extended to be a full block.
119 */
120
121 nb = lblkno(fs, ip->i_ffs_size);
122 if (nb < NDADDR && nb < lbn) {
123 osize = blksize(fs, ip, nb);
124 if (osize < fs->fs_bsize && osize > 0) {
125 error = ffs_realloccg(ip, nb,
126 ffs_blkpref(ip, nb, (int)nb, &ip->i_ffs_db[0]),
127 osize, (int)fs->fs_bsize, cred, bpp, &newb);
128 if (error)
129 return (error);
130 if (DOINGSOFTDEP(vp))
131 softdep_setup_allocdirect(ip, nb, newb,
132 ufs_rw32(ip->i_ffs_db[nb], needswap),
133 fs->fs_bsize, osize, bpp ? *bpp : NULL);
134 ip->i_ffs_size = lblktosize(fs, nb + 1);
135 uvm_vnp_setsize(vp, ip->i_ffs_size);
136 ip->i_ffs_db[nb] = ufs_rw32(newb, needswap);
137 ip->i_flag |= IN_CHANGE | IN_UPDATE;
138 if (bpp) {
139 if (flags & B_SYNC)
140 bwrite(*bpp);
141 else
142 bawrite(*bpp);
143 }
144 }
145 }
146
147 /*
148 * The first NDADDR blocks are direct blocks
149 */
150
151 if (lbn < NDADDR) {
152 nb = ufs_rw32(ip->i_ffs_db[lbn], needswap);
153 if (nb != 0 && ip->i_ffs_size >= lblktosize(fs, lbn + 1)) {
154
155 /*
156 * The block is an already-allocated direct block
157 * and the file already extends past this block,
158 * thus this must be a whole block.
159 * Just read the block (if requested).
160 */
161
162 if (bpp != NULL) {
163 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
164 bpp);
165 if (error) {
166 brelse(*bpp);
167 return (error);
168 }
169 }
170 return (0);
171 }
172 if (nb != 0) {
173
174 /*
175 * Consider need to reallocate a fragment.
176 */
177
178 osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size));
179 nsize = fragroundup(fs, size);
180 if (nsize <= osize) {
181
182 /*
183 * The existing block is already
184 * at least as big as we want.
185 * Just read the block (if requested).
186 */
187
188 if (bpp != NULL) {
189 error = bread(vp, lbn, osize, NOCRED,
190 bpp);
191 if (error) {
192 brelse(*bpp);
193 return (error);
194 }
195 }
196 return 0;
197 } else {
198
199 /*
200 * The existing block is smaller than we want,
201 * grow it.
202 */
203
204 error = ffs_realloccg(ip, lbn,
205 ffs_blkpref(ip, lbn, (int)lbn,
206 &ip->i_ffs_db[0]), osize, nsize, cred,
207 bpp, &newb);
208 if (error)
209 return (error);
210 if (DOINGSOFTDEP(vp))
211 softdep_setup_allocdirect(ip, lbn,
212 newb, nb, nsize, osize,
213 bpp ? *bpp : NULL);
214 }
215 } else {
216
217 /*
218 * the block was not previously allocated,
219 * allocate a new block or fragment.
220 */
221
222 if (ip->i_ffs_size < lblktosize(fs, lbn + 1))
223 nsize = fragroundup(fs, size);
224 else
225 nsize = fs->fs_bsize;
226 error = ffs_alloc(ip, lbn,
227 ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]),
228 nsize, cred, &newb);
229 if (error)
230 return (error);
231 if (bpp != NULL) {
232 bp = getblk(vp, lbn, nsize, 0, 0);
233 bp->b_blkno = fsbtodb(fs, newb);
234 if (flags & B_CLRBUF)
235 clrbuf(bp);
236 *bpp = bp;
237 }
238 if (DOINGSOFTDEP(vp)) {
239 softdep_setup_allocdirect(ip, lbn, newb, 0,
240 nsize, 0, bpp ? *bpp : NULL);
241 }
242 }
243 ip->i_ffs_db[lbn] = ufs_rw32(newb, needswap);
244 ip->i_flag |= IN_CHANGE | IN_UPDATE;
245 return (0);
246 }
247 /*
248 * Determine the number of levels of indirection.
249 */
250 pref = 0;
251 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
252 return(error);
253
254 #ifdef DIAGNOSTIC
255 if (num < 1)
256 panic ("ffs_balloc: ufs_bmaparray returned indirect block\n");
257 #endif
258 /*
259 * Fetch the first indirect block allocating if necessary.
260 */
261 --num;
262 nb = ufs_rw32(ip->i_ffs_ib[indirs[0].in_off], needswap);
263 allocib = NULL;
264 allocblk = allociblk;
265 if (nb == 0) {
266 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
267 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
268 &newb);
269 if (error)
270 goto fail;
271 nb = newb;
272 *allocblk++ = nb;
273 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
274 bp->b_blkno = fsbtodb(fs, nb);
275 clrbuf(bp);
276 if (DOINGSOFTDEP(vp)) {
277 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
278 newb, 0, fs->fs_bsize, 0, bp);
279 bdwrite(bp);
280 } else {
281 /*
282 * Write synchronously so that indirect blocks
283 * never point at garbage.
284 */
285 if ((error = bwrite(bp)) != 0)
286 goto fail;
287 }
288 unwindidx = 0;
289 allocib = &ip->i_ffs_ib[indirs[0].in_off];
290 *allocib = ufs_rw32(nb, needswap);
291 ip->i_flag |= IN_CHANGE | IN_UPDATE;
292 }
293 /*
294 * Fetch through the indirect blocks, allocating as necessary.
295 */
296 for (i = 1;;) {
297 error = bread(vp,
298 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
299 if (error) {
300 brelse(bp);
301 goto fail;
302 }
303 bap = (ufs_daddr_t *)bp->b_data;
304 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
305 if (i == num)
306 break;
307 i++;
308 if (nb != 0) {
309 brelse(bp);
310 continue;
311 }
312 if (pref == 0)
313 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
314 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
315 &newb);
316 if (error) {
317 brelse(bp);
318 goto fail;
319 }
320 nb = newb;
321 *allocblk++ = nb;
322 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
323 nbp->b_blkno = fsbtodb(fs, nb);
324 clrbuf(nbp);
325 if (DOINGSOFTDEP(vp)) {
326 softdep_setup_allocindir_meta(nbp, ip, bp,
327 indirs[i - 1].in_off, nb);
328 bdwrite(nbp);
329 } else {
330 /*
331 * Write synchronously so that indirect blocks
332 * never point at garbage.
333 */
334 if ((error = bwrite(nbp)) != 0) {
335 brelse(bp);
336 goto fail;
337 }
338 }
339 if (unwindidx < 0)
340 unwindidx = i - 1;
341 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
342 /*
343 * If required, write synchronously, otherwise use
344 * delayed write.
345 */
346 if (flags & B_SYNC) {
347 bwrite(bp);
348 } else {
349 bdwrite(bp);
350 }
351 }
352 /*
353 * Get the data block, allocating if necessary.
354 */
355 if (nb == 0) {
356 pref = ffs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
357 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
358 &newb);
359 if (error) {
360 brelse(bp);
361 goto fail;
362 }
363 nb = newb;
364 *allocblk++ = nb;
365 if (bpp != NULL) {
366 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
367 nbp->b_blkno = fsbtodb(fs, nb);
368 if (flags & B_CLRBUF)
369 clrbuf(nbp);
370 *bpp = nbp;
371 }
372 if (DOINGSOFTDEP(vp))
373 softdep_setup_allocindir_page(ip, lbn, bp,
374 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
375 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
376 if (allocib == NULL && unwindidx < 0) {
377 unwindidx = i - 1;
378 }
379 /*
380 * If required, write synchronously, otherwise use
381 * delayed write.
382 */
383 if (flags & B_SYNC) {
384 bwrite(bp);
385 } else {
386 bdwrite(bp);
387 }
388 return (0);
389 }
390 brelse(bp);
391 if (bpp != NULL) {
392 if (flags & B_CLRBUF) {
393 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
394 if (error) {
395 brelse(nbp);
396 goto fail;
397 }
398 } else {
399 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
400 nbp->b_blkno = fsbtodb(fs, nb);
401 clrbuf(nbp);
402 }
403 *bpp = nbp;
404 }
405 return (0);
406
407 fail:
408
409 /*
410 * Restore the UVM state to what the rest of the FFS code is
411 * expecting. Unbusy any pages that we allocated and left busy up in
412 * ufs_balloc_range(). the following VOP_FSYNC() will try to busy
413 * those pages again, which would deadlock if they are still busy
414 * from before. After this we're back to a state where we can undo
415 * any partial allocation.
416 */
417
418 simple_lock(&vp->v_uobj.vmobjlock);
419 for (off = ap->a_startoffset; off < ap->a_startoffset + fs->fs_bsize;
420 off += PAGE_SIZE) {
421 struct vm_page *pg;
422
423 pg = uvm_pagelookup(&vp->v_uobj, off);
424 if (pg == NULL) {
425 break;
426 }
427 uvm_pageactivate(pg);
428 KASSERT((pg->flags & PG_FAKE) == 0);
429 pg->flags &= ~(PG_BUSY);
430 UVM_PAGE_OWN(pg, NULL);
431 }
432 simple_unlock(&vp->v_uobj.vmobjlock);
433
434 /*
435 * If we have failed part way through block allocation, we
436 * have to deallocate any indirect blocks that we have allocated.
437 * We have to fsync the file before we start to get rid of all
438 * of its dependencies so that we do not leave them dangling.
439 * We have to sync it at the end so that the soft updates code
440 * does not find any untracked changes. Although this is really
441 * slow, running out of disk space is not expected to be a common
442 * occurence. The error return from fsync is ignored as we already
443 * have an error to return to the user.
444 */
445
446 (void) VOP_FSYNC(vp, cred, FSYNC_WAIT, 0, 0, curproc);
447 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
448 ffs_blkfree(ip, *blkp, fs->fs_bsize);
449 deallocated += fs->fs_bsize;
450 }
451 if (unwindidx >= 0) {
452 if (unwindidx == 0) {
453 *allocib = 0;
454 } else {
455 int r;
456
457 r = bread(vp, indirs[unwindidx].in_lbn,
458 (int)fs->fs_bsize, NOCRED, &bp);
459 if (r) {
460 panic("Could not unwind indirect block, error %d", r);
461 brelse(bp);
462 } else {
463 bap = (ufs_daddr_t *)bp->b_data;
464 bap[indirs[unwindidx].in_off] = 0;
465 if (flags & B_SYNC)
466 bwrite(bp);
467 else
468 bdwrite(bp);
469 }
470 }
471 for (i = unwindidx + 1; i <= num; i++) {
472 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
473 0);
474 bp->b_flags |= B_INVAL;
475 brelse(bp);
476 }
477 }
478 if (deallocated) {
479 #ifdef QUOTA
480 /*
481 * Restore user's disk quota because allocation failed.
482 */
483 (void)chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
484 #endif
485 ip->i_ffs_blocks -= btodb(deallocated);
486 ip->i_flag |= IN_CHANGE | IN_UPDATE;
487 }
488 (void) VOP_FSYNC(vp, cred, FSYNC_WAIT, 0, 0, curproc);
489 return (error);
490 }
491
492
493 int
494 ffs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
495 struct ucred *cred)
496 {
497 struct inode *ip = VTOI(vp);
498 struct fs *fs = ip->i_fs;
499 int error, delta, bshift, bsize;
500 UVMHIST_FUNC("ffs_gop_alloc"); UVMHIST_CALLED(ubchist);
501
502 error = 0;
503 bshift = fs->fs_bshift;
504 bsize = 1 << bshift;
505
506 delta = off & (bsize - 1);
507 off -= delta;
508 len += delta;
509
510 while (len > 0) {
511 bsize = MIN(bsize, len);
512
513 error = VOP_BALLOC(vp, off, bsize, cred, flags, NULL);
514 if (error) {
515 goto out;
516 }
517
518 /*
519 * increase file size now, VOP_BALLOC() requires that
520 * EOF be up-to-date before each call.
521 */
522
523 if (ip->i_ffs_size < off + bsize) {
524 UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
525 vp, ip->i_ffs_size, off + bsize, 0);
526 ip->i_ffs_size = off + bsize;
527 }
528
529 off += bsize;
530 len -= bsize;
531 }
532
533 out:
534 return error;
535 }
536