ffs_balloc.c revision 1.17 1 /* $NetBSD: ffs_balloc.c,v 1.17 2000/02/25 19:58:25 fvdl Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95
36 */
37
38 #if defined(_KERNEL) && !defined(_LKM)
39 #include "opt_quota.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/buf.h>
45 #include <sys/proc.h>
46 #include <sys/file.h>
47 #include <sys/mount.h>
48 #include <sys/vnode.h>
49 #include <sys/mount.h>
50
51 #include <vm/vm.h>
52
53 #include <uvm/uvm_extern.h>
54
55 #include <ufs/ufs/quota.h>
56 #include <ufs/ufs/ufsmount.h>
57 #include <ufs/ufs/inode.h>
58 #include <ufs/ufs/ufs_extern.h>
59 #include <ufs/ufs/ufs_bswap.h>
60
61 #include <ufs/ffs/fs.h>
62 #include <ufs/ffs/ffs_extern.h>
63
64 /*
65 * Balloc defines the structure of file system storage
66 * by allocating the physical blocks on a device given
67 * the inode and the logical block number in a file.
68 */
69 int
70 ffs_balloc(v)
71 void *v;
72 {
73 struct vop_balloc_args /* {
74 struct vnode *a_vp;
75 off_t a_startpoint;
76 int a_size;
77 struct ucred *a_cred;
78 int a_flags;
79 struct buf *a_bpp;
80 } */ *ap = v;
81 ufs_daddr_t lbn;
82 int size;
83 struct ucred *cred;
84 int flags;
85 ufs_daddr_t nb;
86 struct buf *bp, *nbp;
87 struct vnode *vp = ap->a_vp;
88 struct inode *ip = VTOI(vp);
89 struct fs *fs = ip->i_fs;
90 struct indir indirs[NIADDR + 2];
91 ufs_daddr_t newb, *bap, pref;
92 int deallocated, osize, nsize, num, i, error;
93 ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
94 int unwindidx = -1;
95 #ifdef FFS_EI
96 const int needswap = UFS_FSNEEDSWAP(fs);
97 #endif
98
99 lbn = lblkno(fs, ap->a_startoffset);
100 size = blkoff(fs, ap->a_startoffset) + ap->a_size;
101 if (size > fs->fs_bsize)
102 panic("ffs_balloc: blk too big");
103 *ap->a_bpp = NULL;
104 if (lbn < 0)
105 return (EFBIG);
106 cred = ap->a_cred;
107 flags = ap->a_flags;
108
109 /*
110 * If the next write will extend the file into a new block,
111 * and the file is currently composed of a fragment
112 * this fragment has to be extended to be a full block.
113 */
114 nb = lblkno(fs, ip->i_ffs_size);
115 if (nb < NDADDR && nb < lbn) {
116 osize = blksize(fs, ip, nb);
117 if (osize < fs->fs_bsize && osize > 0) {
118 error = ffs_realloccg(ip, nb,
119 ffs_blkpref(ip, nb, (int)nb, &ip->i_ffs_db[0]),
120 osize, (int)fs->fs_bsize, cred, &bp);
121 if (error)
122 return (error);
123 if (DOINGSOFTDEP(vp))
124 softdep_setup_allocdirect(ip, nb,
125 dbtofsb(fs, bp->b_blkno),
126 ufs_rw32(ip->i_ffs_db[nb], needswap),
127 fs->fs_bsize, osize, bp);
128 ip->i_ffs_size = (nb + 1) * fs->fs_bsize;
129 uvm_vnp_setsize(vp, ip->i_ffs_size);
130 ip->i_ffs_db[nb] = ufs_rw32(dbtofsb(fs, bp->b_blkno),
131 needswap);
132 ip->i_flag |= IN_CHANGE | IN_UPDATE;
133 if (flags & B_SYNC)
134 bwrite(bp);
135 else
136 bawrite(bp);
137 }
138 }
139 /*
140 * The first NDADDR blocks are direct blocks
141 */
142 if (lbn < NDADDR) {
143 nb = ufs_rw32(ip->i_ffs_db[lbn], needswap);
144 if (nb != 0 && ip->i_ffs_size >= (lbn + 1) * fs->fs_bsize) {
145 error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
146 if (error) {
147 brelse(bp);
148 return (error);
149 }
150 *ap->a_bpp = bp;
151 return (0);
152 }
153 if (nb != 0) {
154 /*
155 * Consider need to reallocate a fragment.
156 */
157 osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size));
158 nsize = fragroundup(fs, size);
159 if (nsize <= osize) {
160 error = bread(vp, lbn, osize, NOCRED, &bp);
161 if (error) {
162 brelse(bp);
163 return (error);
164 }
165 } else {
166 error = ffs_realloccg(ip, lbn,
167 ffs_blkpref(ip, lbn, (int)lbn,
168 &ip->i_ffs_db[0]), osize, nsize, cred,
169 &bp);
170 if (error)
171 return (error);
172 if (DOINGSOFTDEP(vp))
173 softdep_setup_allocdirect(ip, lbn,
174 dbtofsb(fs, bp->b_blkno), nb,
175 nsize, osize, bp);
176 }
177 } else {
178 if (ip->i_ffs_size < (lbn + 1) * fs->fs_bsize)
179 nsize = fragroundup(fs, size);
180 else
181 nsize = fs->fs_bsize;
182 error = ffs_alloc(ip, lbn,
183 ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]),
184 nsize, cred, &newb);
185 if (error)
186 return (error);
187 bp = getblk(vp, lbn, nsize, 0, 0);
188 bp->b_blkno = fsbtodb(fs, newb);
189 if (flags & B_CLRBUF)
190 clrbuf(bp);
191 if (DOINGSOFTDEP(vp))
192 softdep_setup_allocdirect(ip, lbn, newb, 0,
193 nsize, 0, bp);
194 }
195 ip->i_ffs_db[lbn] = ufs_rw32(dbtofsb(fs, bp->b_blkno),
196 needswap);
197 ip->i_flag |= IN_CHANGE | IN_UPDATE;
198 *ap->a_bpp = bp;
199 return (0);
200 }
201 /*
202 * Determine the number of levels of indirection.
203 */
204 pref = 0;
205 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
206 return(error);
207 #ifdef DIAGNOSTIC
208 if (num < 1)
209 panic ("ffs_balloc: ufs_bmaparray returned indirect block\n");
210 #endif
211 /*
212 * Fetch the first indirect block allocating if necessary.
213 */
214 --num;
215 nb = ufs_rw32(ip->i_ffs_ib[indirs[0].in_off], needswap);
216 allocib = NULL;
217 allocblk = allociblk;
218 if (nb == 0) {
219 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
220 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
221 cred, &newb);
222 if (error)
223 return (error);
224 nb = newb;
225 *allocblk++ = nb;
226 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
227 bp->b_blkno = fsbtodb(fs, nb);
228 clrbuf(bp);
229 if (DOINGSOFTDEP(vp)) {
230 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
231 newb, 0, fs->fs_bsize, 0, bp);
232 bdwrite(bp);
233 } else {
234 /*
235 * Write synchronously so that indirect blocks
236 * never point at garbage.
237 */
238 if ((error = bwrite(bp)) != 0)
239 goto fail;
240 }
241 allocib = &ip->i_ffs_ib[indirs[0].in_off];
242 *allocib = ufs_rw32(nb, needswap);
243 ip->i_flag |= IN_CHANGE | IN_UPDATE;
244 }
245 /*
246 * Fetch through the indirect blocks, allocating as necessary.
247 */
248 for (i = 1;;) {
249 error = bread(vp,
250 indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
251 if (error) {
252 brelse(bp);
253 goto fail;
254 }
255 bap = (ufs_daddr_t *)bp->b_data;
256 nb = ufs_rw32(bap[indirs[i].in_off], needswap);
257 if (i == num)
258 break;
259 i += 1;
260 if (nb != 0) {
261 brelse(bp);
262 continue;
263 }
264 if (pref == 0)
265 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
266 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
267 &newb);
268 if (error) {
269 brelse(bp);
270 goto fail;
271 }
272 nb = newb;
273 *allocblk++ = nb;
274 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
275 nbp->b_blkno = fsbtodb(fs, nb);
276 clrbuf(nbp);
277 if (DOINGSOFTDEP(vp)) {
278 softdep_setup_allocindir_meta(nbp, ip, bp,
279 indirs[i - 1].in_off, nb);
280 bdwrite(nbp);
281 } else {
282 /*
283 * Write synchronously so that indirect blocks
284 * never point at garbage.
285 */
286 if ((error = bwrite(nbp)) != 0) {
287 brelse(bp);
288 goto fail;
289 }
290 }
291 bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
292 /*
293 * If required, write synchronously, otherwise use
294 * delayed write.
295 */
296 if (flags & B_SYNC) {
297 bwrite(bp);
298 } else {
299 bdwrite(bp);
300 }
301 }
302 /*
303 * Get the data block, allocating if necessary.
304 */
305 if (nb == 0) {
306 pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
307 error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred,
308 &newb);
309 if (error) {
310 brelse(bp);
311 goto fail;
312 }
313 nb = newb;
314 *allocblk++ = nb;
315 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
316 nbp->b_blkno = fsbtodb(fs, nb);
317 if (flags & B_CLRBUF)
318 clrbuf(nbp);
319 if (DOINGSOFTDEP(vp))
320 softdep_setup_allocindir_page(ip, lbn, bp,
321 indirs[i].in_off, nb, 0, nbp);
322 bap[indirs[i].in_off] = ufs_rw32(nb, needswap);
323 if (allocib == NULL && unwindidx < 0)
324 unwindidx = i - 1;
325 /*
326 * If required, write synchronously, otherwise use
327 * delayed write.
328 */
329 if (flags & B_SYNC) {
330 bwrite(bp);
331 } else {
332 bdwrite(bp);
333 }
334 *ap->a_bpp = nbp;
335 return (0);
336 }
337 brelse(bp);
338 if (flags & B_CLRBUF) {
339 error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
340 if (error) {
341 brelse(nbp);
342 goto fail;
343 }
344 } else {
345 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
346 nbp->b_blkno = fsbtodb(fs, nb);
347 }
348 *ap->a_bpp = nbp;
349 return (0);
350 fail:
351 /*
352 * If we have failed part way through block allocation, we
353 * have to deallocate any indirect blocks that we have allocated.
354 * We have to fsync the file before we start to get rid of all
355 * of its dependencies so that we do not leave them dangling.
356 * We have to sync it at the end so that the soft updates code
357 * does not find any untracked changes. Although this is really
358 * slow, running out of disk space is not expected to be a common
359 * occurence. The error return from fsync is ignored as we already
360 * have an error to return to the user.
361 */
362 (void) VOP_FSYNC(vp, cred, MNT_WAIT, curproc);
363 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
364 ffs_blkfree(ip, *blkp, fs->fs_bsize);
365 deallocated += fs->fs_bsize;
366 }
367 if (allocib != NULL) {
368 *allocib = 0;
369 } else if (unwindidx >= 0) {
370 int r;
371
372 r = bread(vp, indirs[unwindidx].in_lbn,
373 (int)fs->fs_bsize, NOCRED, &bp);
374 if (r) {
375 panic("Could not unwind indirect block, error %d", r);
376 brelse(bp);
377 } else {
378 bap = (ufs_daddr_t *)bp->b_data;
379 bap[indirs[unwindidx].in_off] = 0;
380 if (flags & B_SYNC)
381 bwrite(bp);
382 else
383 bdwrite(bp);
384 }
385 }
386 if (deallocated) {
387 #ifdef QUOTA
388 /*
389 * Restore user's disk quota because allocation failed.
390 */
391 (void)chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
392 #endif
393 ip->i_ffs_blocks -= btodb(deallocated);
394 ip->i_flag |= IN_CHANGE | IN_UPDATE;
395 }
396 (void) VOP_FSYNC(vp, cred, MNT_WAIT, curproc);
397 return (error);
398 }
399