ffs_balloc.c revision 1.23.2.5 1 /* $NetBSD: ffs_balloc.c,v 1.23.2.5 2001/10/08 20:11:51 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
36 */
37
38 #if defined(_KERNEL_OPT)
39 #include "opt_quota.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/buf.h>
45 #include <sys/lwp.h>
46 #include <sys/proc.h>
47 #include <sys/file.h>
48 #include <sys/mount.h>
49 #include <sys/vnode.h>
50 #include <sys/mount.h>
51
52 #include <ufs/ufs/quota.h>
53 #include <ufs/ufs/ufsmount.h>
54 #include <ufs/ufs/inode.h>
55 #include <ufs/ufs/ufs_extern.h>
56 #include <ufs/ufs/ufs_bswap.h>
57
58 #include <ufs/ffs/fs.h>
59 #include <ufs/ffs/ffs_extern.h>
60
61 #include <uvm/uvm.h>
62
63 /*
64 * Balloc defines the structure of file system storage
65 * by allocating the physical blocks on a device given
66 * the inode and the logical block number in a file.
67 */
68 int
69 ffs_balloc(v)
70 void *v;
71 {
72 struct vop_balloc_args /* {
73 struct vnode *a_vp;
74 off_t a_startoffset;
75 int a_size;
76 struct ucred *a_cred;
77 int a_flags;
78 struct buf **a_bpp;
79 } */ *ap = v;
80 ufs_daddr_t lbn;
81 int size;
82 struct ucred *cred;
83 int flags;
84 ufs_daddr_t nb;
85 struct buf *bp, *nbp;
86 struct vnode *vp = ap->a_vp;
87 struct inode *ip = VTOI(vp);
88 struct fs *fs = ip->i_fs;
89 struct indir indirs[NIADDR + 2];
90 ufs_daddr_t newb, *bap, pref;
91 int deallocated, osize, nsize, num, i, error;
92 ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
93 int unwindidx = -1;
94 struct buf **bpp = ap->a_bpp;
95 off_t off;
96 #ifdef FFS_EI
97 const int needswap = UFS_FSNEEDSWAP(fs);
98 #endif
99 UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
100
101 lbn = lblkno(fs, ap->a_startoffset);
102 size = blkoff(fs, ap->a_startoffset) + ap->a_size;
103 if (size > fs->fs_bsize)
104 panic("ffs_balloc: blk too big");
105 if (bpp != NULL) {
106 *bpp = NULL;
107 }
108 UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
109
110 KASSERT(size <= fs->fs_bsize);
111 if (lbn < 0)
112 return (EFBIG);
113 cred = ap->a_cred;
114 flags = ap->a_flags;
115
116 /*
117 * If the next write will extend the file into a new block,
118 * and the file is currently composed of a fragment
119 * this fragment has to be extended to be a full block.
120 */
121
122 nb = lblkno(fs, ip->i_ffs_size);
123 if (nb < NDADDR && nb < lbn) {
124 osize = blksize(fs, ip, nb);
125 if (osize < fs->fs_bsize && osize > 0) {
126 error = ffs_realloccg(ip, nb,
127 ffs_blkpref(ip, nb, (int)nb, &ip->i_ffs_db[0]),
128 osize, (int)fs->fs_bsize, cred, bpp, &newb);
129 if (error)
130 return (error);
131 if (DOINGSOFTDEP(vp))
132 softdep_setup_allocdirect(ip, nb, newb,
133 ufs_rw32(ip->i_ffs_db[nb], needswap),
134 fs->fs_bsize, osize, bpp ? *bpp : NULL);
135 ip->i_ffs_size = lblktosize(fs, nb + 1);
136 uvm_vnp_setsize(vp, ip->i_ffs_size);
137 ip->i_ffs_db[nb] = ufs_rw32(newb, needswap);
138 ip->i_flag |= IN_CHANGE | IN_UPDATE;
139 if (bpp) {
140 if (flags & B_SYNC)
141 bwrite(*bpp);
142 else
143 bawrite(*bpp);
144 }
145 }
146 }
147
148 /*
149 * The first NDADDR blocks are direct blocks
150 */
151
152 if (lbn < NDADDR) {
153 nb = ufs_rw32(ip->i_ffs_db[lbn], needswap);
154 if (nb != 0 && ip->i_ffs_size >= lblktosize(fs, lbn + 1)) {
155
156 /*
157 * The block is an already-allocated direct block
158 * and the file already extends past this block,
159 * thus this must be a whole block.
160 * Just read the block (if requested).
161 */
162
163 if (bpp != NULL) {
164 error = bread(vp, lbn, fs->fs_bsize, NOCRED,
165 bpp);
166 if (error) {
167 brelse(*bpp);
168 return (error);
169 }
170 }
171 return (0);
172 }
173 if (nb != 0) {
174
175 /*
176 * Consider need to reallocate a fragment.
177 */
178
179 osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size));
180 nsize = fragroundup(fs, size);
181 if (nsize <= osize) {
182
183 /*
184 * The existing block is already
185 * at least as big as we want.
186 * Just read the block (if requested).
187 */
188
189 if (bpp != NULL) {
190 error = bread(vp, lbn, osize, NOCRED,
191 bpp);
192 if (error) {
193 brelse(*bpp);
194 return (error);
195 }
196 }
197 return 0;
198 } else {
199
200 /*
201 * The existing block is smaller than we want,
202 * grow it.
203 */
204
205 error = ffs_realloccg(ip, lbn,
206 ffs_blkpref(ip, lbn, (int)lbn,
207 &ip->i_ffs_db[0]), osize, nsize, cred,
208 bpp, &newb);
209 if (error)
210 return (error);
211 if (DOINGSOFTDEP(vp))
212 softdep_setup_allocdirect(ip, lbn,
213 newb, nb, nsize, osize,
214 bpp ? *bpp : NULL);
215 }
216 } else {
217
218 /*
219 * the block was not previously allocated,
220 * allocate a new block or fragment.
221 */
222
223 if (ip->i_ffs_size < lblktosize(fs, lbn + 1))
224 nsize = fragroundup(fs, size);
225 else
226 nsize = fs->fs_bsize;
227 error = ffs_alloc(ip, lbn,
228 ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]),
229 nsize, cred, &newb);
230 if (error)
231 return (error);
232 if (bpp != NULL) {
233 bp = getblk(vp, lbn, nsize, 0, 0);
234 bp->b_blkno = fsbtodb(fs, newb);
235 if (flags & B_CLRBUF)
236 clrbuf(bp);
237 *bpp = bp;
238 }
239 if (DOINGSOFTDEP(vp)) {
240 softdep_setup_allocdirect(ip, lbn, newb, 0,
241 nsize, 0, bpp ? *bpp : NULL);
242 }
243 }
244 ip->i_ffs_db[lbn] = ufs_rw32(newb, needswap);
245 ip->i_flag |= IN_CHANGE | IN_UPDATE;
246 return (0);
247 }
248 /*
249 * Determine the number of levels of indirection.
250 */
251 pref = 0;
252 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
253 return(error);
254
255 #ifdef DIAGNOSTIC
256 if (num < 1)
257 panic ("ffs_balloc: ufs_bmaparray returned indirect block\n");
258 #endif
259 /*
260 * Fetch the first indirect block allocating if necessary.
261 */
262 --num;
263 nb = ufs_rw32(ip->i_ffs_ib[indirs[0].in_off], needswap);
264 allocib = NULL;
265 allocblk = allociblk;
266 if (nb == 0) {
267 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
268 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
269 &newb);
270 if (error)
271 goto fail;
272 nb = newb;
273 *allocblk++ = nb;
274 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
275 bp->b_blkno = fsbtodb(fs, nb);
276 clrbuf(bp);
277 if (DOINGSOFTDEP(vp)) {
278 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
279 newb, 0, fs->fs_bsize, 0, bp);
280 bdwrite(bp);
281 } else {
282 /*
283 * Write synchronously so that indirect blocks
284 * never point at garbage.
285 */
286 if ((error = bwrite(bp)) != 0)
287 goto fail;
288 }
289 unwindidx = 0;
290 allocib = &ip->i_ffs_ib[indirs[0].in_off];
291 *allocib = ufs_rw32(nb, needswap);
292 ip->i_flag |= IN_CHANGE | IN_UPDATE;
293 }
294 /*
295 * Fetch through the indirect blocks, allocating as necessary.
296 */
297 for (i = 1;;) {
298 error = bread(vp,
299 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
300 if (error) {
301 brelse(bp);
302 goto fail;
303 }
304 bap = (ufs_daddr_t *)bp->b_data;
305 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
306 if (i == num)
307 break;
308 i++;
309 if (nb != 0) {
310 brelse(bp);
311 continue;
312 }
313 if (pref == 0)
314 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
315 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
316 &newb);
317 if (error) {
318 brelse(bp);
319 goto fail;
320 }
321 nb = newb;
322 *allocblk++ = nb;
323 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
324 nbp->b_blkno = fsbtodb(fs, nb);
325 clrbuf(nbp);
326 if (DOINGSOFTDEP(vp)) {
327 softdep_setup_allocindir_meta(nbp, ip, bp,
328 indirs[i - 1].in_off, nb);
329 bdwrite(nbp);
330 } else {
331 /*
332 * Write synchronously so that indirect blocks
333 * never point at garbage.
334 */
335 if ((error = bwrite(nbp)) != 0) {
336 brelse(bp);
337 goto fail;
338 }
339 }
340 if (unwindidx < 0)
341 unwindidx = i - 1;
342 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
343 /*
344 * If required, write synchronously, otherwise use
345 * delayed write.
346 */
347 if (flags & B_SYNC) {
348 bwrite(bp);
349 } else {
350 bdwrite(bp);
351 }
352 }
353 /*
354 * Get the data block, allocating if necessary.
355 */
356 if (nb == 0) {
357 pref = ffs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
358 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
359 &newb);
360 if (error) {
361 brelse(bp);
362 goto fail;
363 }
364 nb = newb;
365 *allocblk++ = nb;
366 if (bpp != NULL) {
367 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
368 nbp->b_blkno = fsbtodb(fs, nb);
369 if (flags & B_CLRBUF)
370 clrbuf(nbp);
371 *bpp = nbp;
372 }
373 if (DOINGSOFTDEP(vp))
374 softdep_setup_allocindir_page(ip, lbn, bp,
375 indirs[num].in_off, nb, 0, bpp ? *bpp : NULL);
376 bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
377 if (allocib == NULL && unwindidx < 0) {
378 unwindidx = i - 1;
379 }
380 /*
381 * If required, write synchronously, otherwise use
382 * delayed write.
383 */
384 if (flags & B_SYNC) {
385 bwrite(bp);
386 } else {
387 bdwrite(bp);
388 }
389 return (0);
390 }
391 brelse(bp);
392 if (bpp != NULL) {
393 if (flags & B_CLRBUF) {
394 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
395 if (error) {
396 brelse(nbp);
397 goto fail;
398 }
399 } else {
400 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
401 nbp->b_blkno = fsbtodb(fs, nb);
402 clrbuf(nbp);
403 }
404 *bpp = nbp;
405 }
406 return (0);
407
408 fail:
409
410 /*
411 * Restore the UVM state to what the rest of the FFS code is
412 * expecting. Unbusy any pages that we allocated and left busy up in
413 * ufs_balloc_range(). the following VOP_FSYNC() will try to busy
414 * those pages again, which would deadlock if they are still busy
415 * from before. After this we're back to a state where we can undo
416 * any partial allocation.
417 */
418
419 simple_lock(&vp->v_uobj.vmobjlock);
420 for (off = ap->a_startoffset; off < ap->a_startoffset + fs->fs_bsize;
421 off += PAGE_SIZE) {
422 struct vm_page *pg;
423
424 pg = uvm_pagelookup(&vp->v_uobj, off);
425 if (pg == NULL) {
426 break;
427 }
428 uvm_pageactivate(pg);
429 KASSERT((pg->flags & PG_FAKE) == 0);
430 pg->flags &= ~(PG_BUSY);
431 UVM_PAGE_OWN(pg, NULL);
432 }
433 simple_unlock(&vp->v_uobj.vmobjlock);
434
435 /*
436 * If we have failed part way through block allocation, we
437 * have to deallocate any indirect blocks that we have allocated.
438 * We have to fsync the file before we start to get rid of all
439 * of its dependencies so that we do not leave them dangling.
440 * We have to sync it at the end so that the soft updates code
441 * does not find any untracked changes. Although this is really
442 * slow, running out of disk space is not expected to be a common
443 * occurence. The error return from fsync is ignored as we already
444 * have an error to return to the user.
445 */
446
447 (void) VOP_FSYNC(vp, cred, FSYNC_WAIT, 0, 0, curproc->l_proc);
448 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
449 ffs_blkfree(ip, *blkp, fs->fs_bsize);
450 deallocated += fs->fs_bsize;
451 }
452 if (unwindidx >= 0) {
453 if (unwindidx == 0) {
454 *allocib = 0;
455 } else {
456 int r;
457
458 r = bread(vp, indirs[unwindidx].in_lbn,
459 (int)fs->fs_bsize, NOCRED, &bp);
460 if (r) {
461 panic("Could not unwind indirect block, error %d", r);
462 brelse(bp);
463 } else {
464 bap = (ufs_daddr_t *)bp->b_data;
465 bap[indirs[unwindidx].in_off] = 0;
466 if (flags & B_SYNC)
467 bwrite(bp);
468 else
469 bdwrite(bp);
470 }
471 }
472 for (i = unwindidx + 1; i <= num; i++) {
473 bp = getblk(vp, indirs[i].in_lbn, (int)fs->fs_bsize, 0,
474 0);
475 bp->b_flags |= B_INVAL;
476 brelse(bp);
477 }
478 }
479 if (deallocated) {
480 #ifdef QUOTA
481 /*
482 * Restore user's disk quota because allocation failed.
483 */
484 (void)chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
485 #endif
486 ip->i_ffs_blocks -= btodb(deallocated);
487 ip->i_flag |= IN_CHANGE | IN_UPDATE;
488 }
489 (void) VOP_FSYNC(vp, cred, FSYNC_WAIT, 0, 0, curproc->l_proc);
490 return (error);
491 }
492
493
494 int
495 ffs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
496 struct ucred *cred)
497 {
498 struct inode *ip = VTOI(vp);
499 struct fs *fs = ip->i_fs;
500 int error, delta, bshift, bsize;
501 UVMHIST_FUNC("ffs_gop_alloc"); UVMHIST_CALLED(ubchist);
502
503 error = 0;
504 bshift = fs->fs_bshift;
505 bsize = 1 << bshift;
506
507 delta = off & (bsize - 1);
508 off -= delta;
509 len += delta;
510
511 while (len > 0) {
512 bsize = MIN(bsize, len);
513
514 error = VOP_BALLOC(vp, off, bsize, cred, flags, NULL);
515 if (error) {
516 goto out;
517 }
518
519 /*
520 * increase file size now, VOP_BALLOC() requires that
521 * EOF be up-to-date before each call.
522 */
523
524 if (ip->i_ffs_size < off + bsize) {
525 UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x",
526 vp, ip->i_ffs_size, off + bsize, 0);
527 ip->i_ffs_size = off + bsize;
528 }
529
530 off += bsize;
531 len -= bsize;
532 }
533
534 out:
535 return error;
536 }
537