ext2fs_balloc.c revision 1.3.14.2 1 /* $NetBSD: ext2fs_balloc.c,v 1.3.14.2 2000/12/08 09:20:09 bouyer Exp $ */
2
3 /*
4 * Copyright (c) 1997 Manuel Bouyer.
5 * Copyright (c) 1982, 1986, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93
37 * Modified for ext2fs by Manuel Bouyer.
38 */
39
40 #if defined(_KERNEL) && !defined(_LKM)
41 #include "opt_uvmhist.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/buf.h>
47 #include <sys/proc.h>
48 #include <sys/file.h>
49 #include <sys/vnode.h>
50 #include <sys/mount.h>
51
52 #include <uvm/uvm.h>
53
54 #include <ufs/ufs/quota.h>
55 #include <ufs/ufs/inode.h>
56 #include <ufs/ufs/ufs_extern.h>
57
58 #include <ufs/ext2fs/ext2fs.h>
59 #include <ufs/ext2fs/ext2fs_extern.h>
60
61 /*
62 * Balloc defines the structure of file system storage
63 * by allocating the physical blocks on a device given
64 * the inode and the logical block number in a file.
65 */
66 int
67 ext2fs_balloc(ip, bn, size, cred, bpp, flags)
68 struct inode *ip;
69 ufs_daddr_t bn;
70 int size;
71 struct ucred *cred;
72 struct buf **bpp;
73 int flags;
74 {
75 struct m_ext2fs *fs;
76 ufs_daddr_t nb;
77 struct buf *bp, *nbp;
78 struct vnode *vp = ITOV(ip);
79 struct indir indirs[NIADDR + 2];
80 ufs_daddr_t newb, lbn, *bap, pref;
81 int num, i, error;
82 u_int deallocated;
83 ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
84 int unwindidx = -1;
85 UVMHIST_FUNC("ext2fs_balloc"); UVMHIST_CALLED(ubchist);
86
87 UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0);
88
89 if (bpp != NULL) {
90 *bpp = NULL;
91 }
92 if (bn < 0)
93 return (EFBIG);
94 fs = ip->i_e2fs;
95 lbn = bn;
96
97 /*
98 * The first NDADDR blocks are direct blocks
99 */
100 if (bn < NDADDR) {
101 nb = fs2h32(ip->i_e2fs_blocks[bn]);
102 if (nb != 0) {
103
104 /*
105 * the block is already allocated, just read it.
106 */
107
108 if (bpp != NULL) {
109 error = bread(vp, bn, fs->e2fs_bsize, NOCRED,
110 &bp);
111 if (error) {
112 brelse(bp);
113 return (error);
114 }
115 *bpp = bp;
116 }
117 return (0);
118 }
119
120 /*
121 * allocate a new direct block.
122 */
123
124 error = ext2fs_alloc(ip, bn,
125 ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]),
126 cred, &newb);
127 if (error)
128 return (error);
129 ip->i_e2fs_last_lblk = lbn;
130 ip->i_e2fs_last_blk = newb;
131 ip->i_e2fs_blocks[bn] = h2fs32(newb);
132 ip->i_flag |= IN_CHANGE | IN_UPDATE;
133 if (bpp != NULL) {
134 bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
135 bp->b_blkno = fsbtodb(fs, newb);
136 if (flags & B_CLRBUF)
137 clrbuf(bp);
138 *bpp = bp;
139 }
140 return (0);
141 }
142 /*
143 * Determine the number of levels of indirection.
144 */
145 pref = 0;
146 if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0)
147 return(error);
148 #ifdef DIAGNOSTIC
149 if (num < 1)
150 panic ("ext2fs_balloc: ufs_getlbns returned indirect block\n");
151 #endif
152 /*
153 * Fetch the first indirect block allocating if necessary.
154 */
155 --num;
156 nb = fs2h32(ip->i_e2fs_blocks[NDADDR + indirs[0].in_off]);
157 allocib = NULL;
158 allocblk = allociblk;
159 if (nb == 0) {
160 pref = ext2fs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
161 error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
162 if (error)
163 return (error);
164 nb = newb;
165 *allocblk++ = nb;
166 ip->i_e2fs_last_blk = newb;
167 bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0);
168 bp->b_blkno = fsbtodb(fs, newb);
169 clrbuf(bp);
170 /*
171 * Write synchronously so that indirect blocks
172 * never point at garbage.
173 */
174 if ((error = bwrite(bp)) != 0)
175 goto fail;
176 unwindidx = 0;
177 allocib = &ip->i_e2fs_blocks[NDADDR + indirs[0].in_off];
178 *allocib = h2fs32(newb);
179 ip->i_flag |= IN_CHANGE | IN_UPDATE;
180 }
181 /*
182 * Fetch through the indirect blocks, allocating as necessary.
183 */
184 for (i = 1;;) {
185 error = bread(vp,
186 indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, &bp);
187 if (error) {
188 brelse(bp);
189 goto fail;
190 }
191 bap = (ufs_daddr_t *)bp->b_data;
192 nb = fs2h32(bap[indirs[i].in_off]);
193 if (i == num)
194 break;
195 i++;
196 if (nb != 0) {
197 brelse(bp);
198 continue;
199 }
200 pref = ext2fs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
201 error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
202 if (error) {
203 brelse(bp);
204 goto fail;
205 }
206 nb = newb;
207 *allocblk++ = nb;
208 ip->i_e2fs_last_blk = newb;
209 nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0);
210 nbp->b_blkno = fsbtodb(fs, nb);
211 clrbuf(nbp);
212 /*
213 * Write synchronously so that indirect blocks
214 * never point at garbage.
215 */
216 if ((error = bwrite(nbp)) != 0) {
217 brelse(bp);
218 goto fail;
219 }
220 if (unwindidx < 0)
221 unwindidx = i - 1;
222 bap[indirs[i - 1].in_off] = h2fs32(nb);
223 /*
224 * If required, write synchronously, otherwise use
225 * delayed write.
226 */
227 if (flags & B_SYNC) {
228 bwrite(bp);
229 } else {
230 bdwrite(bp);
231 }
232 }
233 /*
234 * Get the data block, allocating if necessary.
235 */
236 if (nb == 0) {
237 pref = ext2fs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
238 error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
239 if (error) {
240 brelse(bp);
241 goto fail;
242 }
243 nb = newb;
244 *allocblk++ = nb;
245 ip->i_e2fs_last_lblk = lbn;
246 ip->i_e2fs_last_blk = newb;
247 bap[indirs[num].in_off] = h2fs32(nb);
248 /*
249 * If required, write synchronously, otherwise use
250 * delayed write.
251 */
252 if (flags & B_SYNC) {
253 bwrite(bp);
254 } else {
255 bdwrite(bp);
256 }
257 if (bpp != NULL) {
258 nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
259 nbp->b_blkno = fsbtodb(fs, nb);
260 if (flags & B_CLRBUF)
261 clrbuf(nbp);
262 *bpp = nbp;
263 }
264 return (0);
265 }
266 brelse(bp);
267 if (bpp != NULL) {
268 if (flags & B_CLRBUF) {
269 error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED,
270 &nbp);
271 if (error) {
272 brelse(nbp);
273 goto fail;
274 }
275 } else {
276 nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
277 nbp->b_blkno = fsbtodb(fs, nb);
278 }
279 *bpp = nbp;
280 }
281 return (0);
282 fail:
283 /*
284 * If we have failed part way through block allocation, we
285 * have to deallocate any indirect blocks that we have allocated.
286 */
287 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
288 ext2fs_blkfree(ip, *blkp);
289 deallocated += fs->e2fs_bsize;
290 }
291 if (unwindidx >= 0) {
292 if (unwindidx == 0) {
293 *allocib = 0;
294 } else {
295 int r;
296
297 r = bread(vp, indirs[unwindidx].in_lbn,
298 (int)fs->e2fs_bsize, NOCRED, &bp);
299 if (r) {
300 panic("Could not unwind indirect block, error %d", r);
301 brelse(bp);
302 } else {
303 bap = (ufs_daddr_t *)bp->b_data;
304 bap[indirs[unwindidx].in_off] = 0;
305 if (flags & B_SYNC)
306 bwrite(bp);
307 else
308 bdwrite(bp);
309 }
310 }
311 for (i = unwindidx + 1; i <= num; i++) {
312 bp = getblk(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize,
313 0, 0);
314 bp->b_flags |= B_INVAL;
315 brelse(bp);
316 }
317 }
318 if (deallocated) {
319 ip->i_e2fs_nblock -= btodb(deallocated);
320 ip->i_e2fs_flags |= IN_CHANGE | IN_UPDATE;
321 }
322 return error;
323 }
324
325 int
326 ext2fs_ballocn(v)
327 void *v;
328 {
329 struct vop_ballocn_args /* {
330 struct vnode *a_vp;
331 off_t a_offset;
332 off_t a_length;
333 struct ucred *a_cred;
334 int a_flags;
335 } */ *ap = v;
336 off_t off, len;
337 struct vnode *vp = ap->a_vp;
338 struct inode *ip = VTOI(vp);
339 struct m_ext2fs *fs = ip->i_e2fs;
340 int error, delta, bshift, bsize;
341 UVMHIST_FUNC("ext2fs_ballocn"); UVMHIST_CALLED(ubchist);
342
343 bshift = fs->e2fs_bshift;
344 bsize = 1 << bshift;
345
346 off = ap->a_offset;
347 len = ap->a_length;
348
349 delta = off & (bsize - 1);
350 off -= delta;
351 len += delta;
352
353 while (len > 0) {
354 bsize = min(bsize, len);
355 UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x",
356 off, len, bsize, 0);
357
358 error = ext2fs_balloc(ip, lblkno(fs, off), bsize, ap->a_cred,
359 NULL, ap->a_flags);
360 if (error) {
361 UVMHIST_LOG(ubchist, "error %d", error, 0,0,0);
362 return error;
363 }
364
365 /*
366 * increase file size now, VOP_BALLOC() requires that
367 * EOF be up-to-date before each call.
368 */
369
370 if (ip->i_e2fs_size < off + bsize) {
371 UVMHIST_LOG(ubchist, "old 0x%x new 0x%x",
372 ip->i_e2fs_size, off + bsize,0,0);
373 ip->i_e2fs_size = off + bsize;
374 if (vp->v_uvm.u_size < ip->i_e2fs_size) {
375 uvm_vnp_setsize(vp, ip->i_e2fs_size);
376 }
377 }
378
379 off += bsize;
380 len -= bsize;
381 }
382 return 0;
383 }
384
385 /*
386 * allocate a range of blocks in a file.
387 * after this function returns, any page entirely contained within the range
388 * will map to invalid data and thus must be overwritten before it is made
389 * accessible to others.
390 */
391
392 int
393 ext2fs_balloc_range(vp, off, len, cred, flags)
394 struct vnode *vp;
395 off_t off, len;
396 struct ucred *cred;
397 int flags;
398 {
399 off_t eof, pagestart, pageend;
400 struct uvm_object *uobj;
401 struct inode *ip = VTOI(vp);
402 int i, delta, error, npages1, npages2;
403 int bshift = vp->v_mount->mnt_fs_bshift;
404 int bsize = 1 << bshift;
405 int ppb = max(bsize >> PAGE_SHIFT, 1);
406 struct vm_page *pgs1[ppb], *pgs2[ppb];
407 UVMHIST_FUNC("ext2fs_balloc_range"); UVMHIST_CALLED(ubchist);
408 UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
409 vp, off, len, vp->v_uvm.u_size);
410
411 error = 0;
412 uobj = &vp->v_uvm.u_obj;
413 eof = max(vp->v_uvm.u_size, off + len);
414 vp->v_uvm.u_size = eof;
415 UVMHIST_LOG(ubchist, "new eof 0x%x", eof,0,0,0);
416 pgs1[0] = pgs2[0] = NULL;
417
418 /*
419 * if the range does not start on a page and block boundary,
420 * cache the first block if the file so the page(s) will contain
421 * the correct data. hold the page(s) busy while we allocate
422 * the backing store for the range.
423 */
424
425 pagestart = trunc_page(off) & ~(bsize - 1);
426 if (off != pagestart) {
427 npages1 = min(ppb, (round_page(eof) - pagestart) >>
428 PAGE_SHIFT);
429 memset(pgs1, 0, npages1);
430 simple_lock(&uobj->vmobjlock);
431 error = VOP_GETPAGES(vp, pagestart, pgs1, &npages1, 0,
432 VM_PROT_READ, 0, PGO_SYNCIO);
433 if (error) {
434 UVMHIST_LOG(ubchist, "gp1 %d", error,0,0,0);
435 goto errout;
436 }
437 for (i = 0; i < npages1; i++) {
438 UVMHIST_LOG(ubchist, "got pgs1[%d] %p", i, pgs1[i],0,0);
439 }
440 }
441
442 /*
443 * similarly if the range does not end on a page and block boundary.
444 */
445
446 pageend = trunc_page(off + len) & ~(bsize - 1);
447 if (off + len < ip->i_e2fs_size &&
448 off + len != pageend &&
449 pagestart != pageend) {
450 npages2 = min(ppb, (round_page(eof) - pageend) >>
451 PAGE_SHIFT);
452 memset(pgs2, 0, npages2);
453 simple_lock(&uobj->vmobjlock);
454 error = VOP_GETPAGES(vp, pageend, pgs2, &npages2, 0,
455 VM_PROT_READ, 0, PGO_SYNCIO);
456 if (error) {
457 UVMHIST_LOG(ubchist, "gp2 %d", error,0,0,0);
458 goto errout;
459 }
460 for (i = 0; i < npages2; i++) {
461 UVMHIST_LOG(ubchist, "got pgs2[%d] %p", i, pgs2[i],0,0);
462 }
463 }
464
465 /*
466 * adjust off to be block-aligned.
467 */
468
469 delta = off & (bsize - 1);
470 off -= delta;
471 len += delta;
472
473 /*
474 * now allocate the range.
475 */
476
477 lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL);
478 error = VOP_BALLOCN(vp, off, len, cred, flags);
479 UVMHIST_LOG(ubchist, "ballocn %d", error,0,0,0);
480 lockmgr(&vp->v_glock, LK_RELEASE, NULL);
481
482 /*
483 * unbusy any pages we are holding.
484 */
485
486 errout:
487 simple_lock(&uobj->vmobjlock);
488 if (pgs1[0] != NULL) {
489 uvm_page_unbusy(pgs1, npages1);
490 }
491 if (pgs2[0] != NULL) {
492 uvm_page_unbusy(pgs2, npages2);
493 }
494 simple_unlock(&uobj->vmobjlock);
495 return (error);
496 }
497