ext2fs_balloc.c revision 1.10.4.1 1 /* $NetBSD: ext2fs_balloc.c,v 1.10.4.1 2001/10/01 12:48:18 fvdl Exp $ */
2
3 /*
4 * Copyright (c) 1997 Manuel Bouyer.
5 * Copyright (c) 1982, 1986, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93
37 * Modified for ext2fs by Manuel Bouyer.
38 */
39
40 #if defined(_KERNEL_OPT)
41 #include "opt_uvmhist.h"
42 #endif
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/buf.h>
47 #include <sys/proc.h>
48 #include <sys/file.h>
49 #include <sys/vnode.h>
50 #include <sys/mount.h>
51
52 #include <uvm/uvm.h>
53
54 #include <ufs/ufs/quota.h>
55 #include <ufs/ufs/inode.h>
56 #include <ufs/ufs/ufs_extern.h>
57
58 #include <ufs/ext2fs/ext2fs.h>
59 #include <ufs/ext2fs/ext2fs_extern.h>
60
61 /*
62 * Balloc defines the structure of file system storage
63 * by allocating the physical blocks on a device given
64 * the inode and the logical block number in a file.
65 */
66 int
67 ext2fs_balloc(ip, bn, size, cred, bpp, flags)
68 struct inode *ip;
69 ufs_daddr_t bn;
70 int size;
71 struct ucred *cred;
72 struct buf **bpp;
73 int flags;
74 {
75 struct m_ext2fs *fs;
76 ufs_daddr_t nb;
77 struct buf *bp, *nbp;
78 struct vnode *vp = ITOV(ip);
79 struct indir indirs[NIADDR + 2];
80 ufs_daddr_t newb, lbn, *bap, pref;
81 int num, i, error;
82 u_int deallocated;
83 ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
84 int unwindidx = -1;
85 UVMHIST_FUNC("ext2fs_balloc"); UVMHIST_CALLED(ubchist);
86
87 UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0);
88
89 if (bpp != NULL) {
90 *bpp = NULL;
91 }
92 if (bn < 0)
93 return (EFBIG);
94 fs = ip->i_e2fs;
95 lbn = bn;
96
97 /*
98 * The first NDADDR blocks are direct blocks
99 */
100 if (bn < NDADDR) {
101 nb = fs2h32(ip->i_e2fs_blocks[bn]);
102 if (nb != 0) {
103
104 /*
105 * the block is already allocated, just read it.
106 */
107
108 if (bpp != NULL) {
109 error = bread(vp, bn, fs->e2fs_bsize, NOCRED,
110 &bp);
111 if (error) {
112 brelse(bp);
113 return (error);
114 }
115 *bpp = bp;
116 }
117 return (0);
118 }
119
120 /*
121 * allocate a new direct block.
122 */
123
124 error = ext2fs_alloc(ip, bn,
125 ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]),
126 cred, &newb);
127 if (error)
128 return (error);
129 ip->i_e2fs_last_lblk = lbn;
130 ip->i_e2fs_last_blk = newb;
131 ip->i_e2fs_blocks[bn] = h2fs32(newb);
132 ip->i_flag |= IN_CHANGE | IN_UPDATE;
133 if (bpp != NULL) {
134 bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
135 bp->b_blkno = fsbtodb(fs, newb);
136 if (flags & B_CLRBUF)
137 clrbuf(bp);
138 *bpp = bp;
139 }
140 return (0);
141 }
142 /*
143 * Determine the number of levels of indirection.
144 */
145 pref = 0;
146 if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0)
147 return(error);
148 #ifdef DIAGNOSTIC
149 if (num < 1)
150 panic ("ext2fs_balloc: ufs_getlbns returned indirect block\n");
151 #endif
152 /*
153 * Fetch the first indirect block allocating if necessary.
154 */
155 --num;
156 nb = fs2h32(ip->i_e2fs_blocks[NDADDR + indirs[0].in_off]);
157 allocib = NULL;
158 allocblk = allociblk;
159 if (nb == 0) {
160 pref = ext2fs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
161 error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
162 if (error)
163 return (error);
164 nb = newb;
165 *allocblk++ = nb;
166 ip->i_e2fs_last_blk = newb;
167 bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0);
168 bp->b_blkno = fsbtodb(fs, newb);
169 clrbuf(bp);
170 /*
171 * Write synchronously so that indirect blocks
172 * never point at garbage.
173 */
174 if ((error = bwrite(bp)) != 0)
175 goto fail;
176 unwindidx = 0;
177 allocib = &ip->i_e2fs_blocks[NDADDR + indirs[0].in_off];
178 *allocib = h2fs32(newb);
179 ip->i_flag |= IN_CHANGE | IN_UPDATE;
180 }
181 /*
182 * Fetch through the indirect blocks, allocating as necessary.
183 */
184 for (i = 1;;) {
185 error = bread(vp,
186 indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, &bp);
187 if (error) {
188 brelse(bp);
189 goto fail;
190 }
191 bap = (ufs_daddr_t *)bp->b_data;
192 nb = fs2h32(bap[indirs[i].in_off]);
193 if (i == num)
194 break;
195 i++;
196 if (nb != 0) {
197 brelse(bp);
198 continue;
199 }
200 pref = ext2fs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
201 error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
202 if (error) {
203 brelse(bp);
204 goto fail;
205 }
206 nb = newb;
207 *allocblk++ = nb;
208 ip->i_e2fs_last_blk = newb;
209 nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0);
210 nbp->b_blkno = fsbtodb(fs, nb);
211 clrbuf(nbp);
212 /*
213 * Write synchronously so that indirect blocks
214 * never point at garbage.
215 */
216 if ((error = bwrite(nbp)) != 0) {
217 brelse(bp);
218 goto fail;
219 }
220 if (unwindidx < 0)
221 unwindidx = i - 1;
222 bap[indirs[i - 1].in_off] = h2fs32(nb);
223 /*
224 * If required, write synchronously, otherwise use
225 * delayed write.
226 */
227 if (flags & B_SYNC) {
228 bwrite(bp);
229 } else {
230 bdwrite(bp);
231 }
232 }
233 /*
234 * Get the data block, allocating if necessary.
235 */
236 if (nb == 0) {
237 pref = ext2fs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
238 error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
239 if (error) {
240 brelse(bp);
241 goto fail;
242 }
243 nb = newb;
244 *allocblk++ = nb;
245 ip->i_e2fs_last_lblk = lbn;
246 ip->i_e2fs_last_blk = newb;
247 bap[indirs[num].in_off] = h2fs32(nb);
248 /*
249 * If required, write synchronously, otherwise use
250 * delayed write.
251 */
252 if (flags & B_SYNC) {
253 bwrite(bp);
254 } else {
255 bdwrite(bp);
256 }
257 if (bpp != NULL) {
258 nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
259 nbp->b_blkno = fsbtodb(fs, nb);
260 if (flags & B_CLRBUF)
261 clrbuf(nbp);
262 *bpp = nbp;
263 }
264 return (0);
265 }
266 brelse(bp);
267 if (bpp != NULL) {
268 if (flags & B_CLRBUF) {
269 error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED,
270 &nbp);
271 if (error) {
272 brelse(nbp);
273 goto fail;
274 }
275 } else {
276 nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
277 nbp->b_blkno = fsbtodb(fs, nb);
278 }
279 *bpp = nbp;
280 }
281 return (0);
282 fail:
283 /*
284 * If we have failed part way through block allocation, we
285 * have to deallocate any indirect blocks that we have allocated.
286 */
287 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
288 ext2fs_blkfree(ip, *blkp);
289 deallocated += fs->e2fs_bsize;
290 }
291 if (unwindidx >= 0) {
292 if (unwindidx == 0) {
293 *allocib = 0;
294 } else {
295 int r;
296
297 r = bread(vp, indirs[unwindidx].in_lbn,
298 (int)fs->e2fs_bsize, NOCRED, &bp);
299 if (r) {
300 panic("Could not unwind indirect block, error %d", r);
301 brelse(bp);
302 } else {
303 bap = (ufs_daddr_t *)bp->b_data;
304 bap[indirs[unwindidx].in_off] = 0;
305 if (flags & B_SYNC)
306 bwrite(bp);
307 else
308 bdwrite(bp);
309 }
310 }
311 for (i = unwindidx + 1; i <= num; i++) {
312 bp = getblk(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize,
313 0, 0);
314 bp->b_flags |= B_INVAL;
315 brelse(bp);
316 }
317 }
318 if (deallocated) {
319 ip->i_e2fs_nblock -= btodb(deallocated);
320 ip->i_e2fs_flags |= IN_CHANGE | IN_UPDATE;
321 }
322 return error;
323 }
324
325 int
326 ext2fs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags,
327 struct ucred *cred)
328 {
329 struct inode *ip = VTOI(vp);
330 struct m_ext2fs *fs = ip->i_e2fs;
331 int error, delta, bshift, bsize;
332 UVMHIST_FUNC("ext2fs_gop_alloc"); UVMHIST_CALLED(ubchist);
333
334 bshift = fs->e2fs_bshift;
335 bsize = 1 << bshift;
336
337 delta = off & (bsize - 1);
338 off -= delta;
339 len += delta;
340
341 while (len > 0) {
342 bsize = min(bsize, len);
343 UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x",
344 off, len, bsize, 0);
345
346 error = ext2fs_balloc(ip, lblkno(fs, off), bsize, cred,
347 NULL, flags);
348 if (error) {
349 UVMHIST_LOG(ubchist, "error %d", error, 0,0,0);
350 return error;
351 }
352
353 /*
354 * increase file size now, VOP_BALLOC() requires that
355 * EOF be up-to-date before each call.
356 */
357
358 if (ip->i_e2fs_size < off + bsize) {
359 UVMHIST_LOG(ubchist, "old 0x%x new 0x%x",
360 ip->i_e2fs_size, off + bsize,0,0);
361 ip->i_e2fs_size = off + bsize;
362 if (vp->v_size < ip->i_e2fs_size) {
363 uvm_vnp_setsize(vp, ip->i_e2fs_size);
364 }
365 }
366
367 off += bsize;
368 len -= bsize;
369 }
370 return 0;
371 }
372
373 /*
374 * allocate a range of blocks in a file.
375 * after this function returns, any page entirely contained within the range
376 * will map to invalid data and thus must be overwritten before it is made
377 * accessible to others.
378 */
379
380 int
381 ext2fs_balloc_range(vp, off, len, cred, flags)
382 struct vnode *vp;
383 off_t off, len;
384 struct ucred *cred;
385 int flags;
386 {
387 off_t oldeof, eof, pagestart;
388 struct uvm_object *uobj;
389 struct genfs_node *gp = VTOG(vp);
390 int i, delta, error, npages;
391 int bshift = vp->v_mount->mnt_fs_bshift;
392 int bsize = 1 << bshift;
393 int ppb = max(bsize >> PAGE_SHIFT, 1);
394 struct vm_page *pgs[ppb];
395 UVMHIST_FUNC("ext2fs_balloc_range"); UVMHIST_CALLED(ubchist);
396 UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
397 vp, off, len, vp->v_size);
398
399 error = 0;
400 uobj = &vp->v_uobj;
401 oldeof = vp->v_size;
402 eof = max(oldeof, off + len);
403 UVMHIST_LOG(ubchist, "new eof 0x%x", eof,0,0,0);
404 pgs[0] = NULL;
405
406 /*
407 * cache the new range of the file. this will create zeroed pages
408 * where the new block will be and keep them locked until the
409 * new block is allocated, so there will be no window where
410 * the old contents of the new block is visible to racing threads.
411 */
412
413 pagestart = trunc_page(off) & ~(bsize - 1);
414 npages = min(ppb, (round_page(eof) - pagestart) >> PAGE_SHIFT);
415 memset(pgs, 0, npages);
416 simple_lock(&uobj->vmobjlock);
417 error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
418 VM_PROT_READ, 0, PGO_SYNCIO | PGO_PASTEOF);
419 if (error) {
420 UVMHIST_LOG(ubchist, "getpages %d", error,0,0,0);
421 goto errout;
422 }
423 for (i = 0; i < npages; i++) {
424 UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0);
425 KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
426 pgs[i]->flags &= ~PG_CLEAN;
427 uvm_pageactivate(pgs[i]);
428 }
429
430 /*
431 * adjust off to be block-aligned.
432 */
433
434 delta = off & (bsize - 1);
435 off -= delta;
436 len += delta;
437
438 /*
439 * now allocate the range.
440 */
441
442 lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
443 error = GOP_ALLOC(vp, off, len, flags, cred);
444 UVMHIST_LOG(ubchist, "alloc %d", error,0,0,0);
445 lockmgr(&gp->g_glock, LK_RELEASE, NULL);
446
447 /*
448 * clear PG_RDONLY on any pages we are holding
449 * (since they now have backing store) and unbusy them.
450 * if we got an error, free any pages we created past the old eob.
451 */
452
453 errout:
454 simple_lock(&uobj->vmobjlock);
455 if (error) {
456 (void) (uobj->pgops->pgo_put)(uobj, oldeof, pagestart + ppb,
457 PGO_FREE);
458 simple_lock(&uobj->vmobjlock);
459 }
460 if (pgs[0] != NULL) {
461 for (i = 0; i < npages; i++) {
462 pgs[i]->flags &= ~PG_RDONLY;
463 }
464 uvm_page_unbusy(pgs, npages);
465 }
466 simple_unlock(&uobj->vmobjlock);
467 return (error);
468 }
469