ext2fs_alloc.c revision 1.13 1 /* $NetBSD: ext2fs_alloc.c,v 1.13 2001/11/08 02:39:06 lukem Exp $ */
2
3 /*
4 * Copyright (c) 1997 Manuel Bouyer.
5 * Copyright (c) 1982, 1986, 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94
37 * Modified for ext2fs by Manuel Bouyer.
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: ext2fs_alloc.c,v 1.13 2001/11/08 02:39:06 lukem Exp $");
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/buf.h>
46 #include <sys/proc.h>
47 #include <sys/vnode.h>
48 #include <sys/mount.h>
49 #include <sys/kernel.h>
50 #include <sys/syslog.h>
51
52 #include <ufs/ufs/inode.h>
53 #include <ufs/ufs/ufs_extern.h>
54
55 #include <ufs/ext2fs/ext2fs.h>
56 #include <ufs/ext2fs/ext2fs_extern.h>
57
58 u_long ext2gennumber;
59
60 static ufs_daddr_t ext2fs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
61 static u_long ext2fs_dirpref __P((struct m_ext2fs *));
62 static void ext2fs_fserr __P((struct m_ext2fs *, u_int, char *));
63 static u_long ext2fs_hashalloc __P((struct inode *, int, long, int,
64 ufs_daddr_t (*)(struct inode *, int, ufs_daddr_t,
65 int)));
66 static ufs_daddr_t ext2fs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
67 static ufs_daddr_t ext2fs_mapsearch __P((struct m_ext2fs *, char *, ufs_daddr_t));
68
69 /*
70 * Allocate a block in the file system.
71 *
72 * A preference may be optionally specified. If a preference is given
73 * the following hierarchy is used to allocate a block:
74 * 1) allocate the requested block.
75 * 2) allocate a rotationally optimal block in the same cylinder.
76 * 3) allocate a block in the same cylinder group.
77 * 4) quadradically rehash into other cylinder groups, until an
78 * available block is located.
79 * If no block preference is given the following hierarchy is used
80 * to allocate a block:
81 * 1) allocate a block in the cylinder group that contains the
82 * inode for the file.
83 * 2) quadradically rehash into other cylinder groups, until an
84 * available block is located.
85 */
86 int
87 ext2fs_alloc(ip, lbn, bpref, cred, bnp)
88 struct inode *ip;
89 ufs_daddr_t lbn, bpref;
90 struct ucred *cred;
91 ufs_daddr_t *bnp;
92 {
93 struct m_ext2fs *fs;
94 ufs_daddr_t bno;
95 int cg;
96
97 *bnp = 0;
98 fs = ip->i_e2fs;
99 #ifdef DIAGNOSTIC
100 if (cred == NOCRED)
101 panic("ext2fs_alloc: missing credential\n");
102 #endif /* DIAGNOSTIC */
103 if (fs->e2fs.e2fs_fbcount == 0)
104 goto nospace;
105 if (cred->cr_uid != 0 && freespace(fs) <= 0)
106 goto nospace;
107 if (bpref >= fs->e2fs.e2fs_bcount)
108 bpref = 0;
109 if (bpref == 0)
110 cg = ino_to_cg(fs, ip->i_number);
111 else
112 cg = dtog(fs, bpref);
113 bno = (ufs_daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
114 ext2fs_alloccg);
115 if (bno > 0) {
116 ip->i_e2fs_nblock += btodb(fs->e2fs_bsize);
117 ip->i_flag |= IN_CHANGE | IN_UPDATE;
118 *bnp = bno;
119 return (0);
120 }
121 nospace:
122 ext2fs_fserr(fs, cred->cr_uid, "file system full");
123 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
124 return (ENOSPC);
125 }
126
127 /*
128 * Allocate an inode in the file system.
129 *
130 * If allocating a directory, use ext2fs_dirpref to select the inode.
131 * If allocating in a directory, the following hierarchy is followed:
132 * 1) allocate the preferred inode.
133 * 2) allocate an inode in the same cylinder group.
134 * 3) quadradically rehash into other cylinder groups, until an
135 * available inode is located.
136 * If no inode preference is given the following hierarchy is used
137 * to allocate an inode:
138 * 1) allocate an inode in cylinder group 0.
139 * 2) quadradically rehash into other cylinder groups, until an
140 * available inode is located.
141 */
142 int
143 ext2fs_valloc(v)
144 void *v;
145 {
146 struct vop_valloc_args /* {
147 struct vnode *a_pvp;
148 int a_mode;
149 struct ucred *a_cred;
150 struct vnode **a_vpp;
151 } */ *ap = v;
152 struct vnode *pvp = ap->a_pvp;
153 struct inode *pip;
154 struct m_ext2fs *fs;
155 struct inode *ip;
156 mode_t mode = ap->a_mode;
157 ino_t ino, ipref;
158 int cg, error;
159
160 *ap->a_vpp = NULL;
161 pip = VTOI(pvp);
162 fs = pip->i_e2fs;
163 if (fs->e2fs.e2fs_ficount == 0)
164 goto noinodes;
165
166 if ((mode & IFMT) == IFDIR)
167 cg = ext2fs_dirpref(fs);
168 else
169 cg = ino_to_cg(fs, pip->i_number);
170 ipref = cg * fs->e2fs.e2fs_ipg + 1;
171 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg);
172 if (ino == 0)
173 goto noinodes;
174 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
175 if (error) {
176 VOP_VFREE(pvp, ino, mode);
177 return (error);
178 }
179 ip = VTOI(*ap->a_vpp);
180 if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) {
181 printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n",
182 ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt);
183 panic("ext2fs_valloc: dup alloc");
184 }
185
186 memset(&ip->i_din, 0, sizeof(ip->i_din));
187
188 /*
189 * Set up a new generation number for this inode.
190 */
191 if (++ext2gennumber < (u_long)time.tv_sec)
192 ext2gennumber = time.tv_sec;
193 ip->i_e2fs_gen = ext2gennumber;
194 return (0);
195 noinodes:
196 ext2fs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
197 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt);
198 return (ENOSPC);
199 }
200
201 /*
202 * Find a cylinder to place a directory.
203 *
204 * The policy implemented by this algorithm is to select from
205 * among those cylinder groups with above the average number of
206 * free inodes, the one with the smallest number of directories.
207 */
208 static u_long
209 ext2fs_dirpref(fs)
210 struct m_ext2fs *fs;
211 {
212 int cg, maxspace, mincg, avgifree;
213
214 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg;
215 maxspace = 0;
216 mincg = -1;
217 for (cg = 0; cg < fs->e2fs_ncg; cg++)
218 if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) {
219 if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) {
220 mincg = cg;
221 maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree;
222 }
223 }
224 return mincg;
225 }
226
227 /*
228 * Select the desired position for the next block in a file. The file is
229 * logically divided into sections. The first section is composed of the
230 * direct blocks. Each additional section contains fs_maxbpg blocks.
231 *
232 * If no blocks have been allocated in the first section, the policy is to
233 * request a block in the same cylinder group as the inode that describes
234 * the file. Otherwise, the policy is to try to allocate the blocks
235 * contigously. The two fields of the ext2 inode extension (see
236 * ufs/ufs/inode.h) help this.
237 */
238 ufs_daddr_t
239 ext2fs_blkpref(ip, lbn, indx, bap)
240 struct inode *ip;
241 ufs_daddr_t lbn;
242 int indx;
243 ufs_daddr_t *bap;
244 {
245 struct m_ext2fs *fs;
246 int cg, i;
247
248 fs = ip->i_e2fs;
249 /*
250 * if we are doing contigous lbn allocation, try to alloc blocks
251 * contigously on disk
252 */
253
254 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) {
255 return ip->i_e2fs_last_blk + 1;
256 }
257
258 /*
259 * bap, if provided, gives us a list of blocks to which we want to
260 * stay close
261 */
262
263 if (bap) {
264 for (i = indx; i >= 0 ; i--) {
265 if (bap[i]) {
266 return fs2h32(bap[i]) + 1;
267 }
268 }
269 }
270
271 /* fall back to the first block of the cylinder containing the inode */
272
273 cg = ino_to_cg(fs, ip->i_number);
274 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1;
275 }
276
277 /*
278 * Implement the cylinder overflow algorithm.
279 *
280 * The policy implemented by this algorithm is:
281 * 1) allocate the block in its requested cylinder group.
282 * 2) quadradically rehash on the cylinder group number.
283 * 3) brute force search for a free block.
284 */
285 static u_long
286 ext2fs_hashalloc(ip, cg, pref, size, allocator)
287 struct inode *ip;
288 int cg;
289 long pref;
290 int size; /* size for data blocks, mode for inodes */
291 ufs_daddr_t (*allocator) __P((struct inode *, int, ufs_daddr_t, int));
292 {
293 struct m_ext2fs *fs;
294 long result;
295 int i, icg = cg;
296
297 fs = ip->i_e2fs;
298 /*
299 * 1: preferred cylinder group
300 */
301 result = (*allocator)(ip, cg, pref, size);
302 if (result)
303 return (result);
304 /*
305 * 2: quadratic rehash
306 */
307 for (i = 1; i < fs->e2fs_ncg; i *= 2) {
308 cg += i;
309 if (cg >= fs->e2fs_ncg)
310 cg -= fs->e2fs_ncg;
311 result = (*allocator)(ip, cg, 0, size);
312 if (result)
313 return (result);
314 }
315 /*
316 * 3: brute force search
317 * Note that we start at i == 2, since 0 was checked initially,
318 * and 1 is always checked in the quadratic rehash.
319 */
320 cg = (icg + 2) % fs->e2fs_ncg;
321 for (i = 2; i < fs->e2fs_ncg; i++) {
322 result = (*allocator)(ip, cg, 0, size);
323 if (result)
324 return (result);
325 cg++;
326 if (cg == fs->e2fs_ncg)
327 cg = 0;
328 }
329 return (0);
330 }
331
332 /*
333 * Determine whether a block can be allocated.
334 *
335 * Check to see if a block of the appropriate size is available,
336 * and if it is, allocate it.
337 */
338
339 static ufs_daddr_t
340 ext2fs_alloccg(ip, cg, bpref, size)
341 struct inode *ip;
342 int cg;
343 ufs_daddr_t bpref;
344 int size;
345 {
346 struct m_ext2fs *fs;
347 char *bbp;
348 struct buf *bp;
349 int error, bno, start, end, loc;
350
351 fs = ip->i_e2fs;
352 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
353 return (0);
354 error = bread(ip->i_devvp, fsbtodb(fs,
355 fs->e2fs_gd[cg].ext2bgd_b_bitmap),
356 (int)fs->e2fs_bsize, NOCRED, &bp);
357 if (error) {
358 brelse(bp);
359 return (0);
360 }
361 bbp = (char *)bp->b_data;
362
363 if (dtog(fs, bpref) != cg)
364 bpref = 0;
365 if (bpref != 0) {
366 bpref = dtogd(fs, bpref);
367 /*
368 * if the requested block is available, use it
369 */
370 if (isclr(bbp, bpref)) {
371 bno = bpref;
372 goto gotit;
373 }
374 }
375 /*
376 * no blocks in the requested cylinder, so take next
377 * available one in this cylinder group.
378 * first try to get 8 contigous blocks, then fall back to a single
379 * block.
380 */
381 if (bpref)
382 start = dtogd(fs, bpref) / NBBY;
383 else
384 start = 0;
385 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
386 for (loc = start; loc < end; loc++) {
387 if (bbp[loc] == 0) {
388 bno = loc * NBBY;
389 goto gotit;
390 }
391 }
392 for (loc = 0; loc < start; loc++) {
393 if (bbp[loc] == 0) {
394 bno = loc * NBBY;
395 goto gotit;
396 }
397 }
398
399 bno = ext2fs_mapsearch(fs, bbp, bpref);
400 if (bno < 0)
401 return (0);
402 gotit:
403 #ifdef DIAGNOSTIC
404 if (isset(bbp, (long)bno)) {
405 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n",
406 cg, bno, fs->e2fs_fsmnt);
407 panic("ext2fs_alloccg: dup alloc");
408 }
409 #endif
410 setbit(bbp, (long)bno);
411 fs->e2fs.e2fs_fbcount--;
412 fs->e2fs_gd[cg].ext2bgd_nbfree--;
413 fs->e2fs_fmod = 1;
414 bdwrite(bp);
415 return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno);
416 }
417
418 /*
419 * Determine whether an inode can be allocated.
420 *
421 * Check to see if an inode is available, and if it is,
422 * allocate it using the following policy:
423 * 1) allocate the requested inode.
424 * 2) allocate the next available inode after the requested
425 * inode in the specified cylinder group.
426 */
427 static ufs_daddr_t
428 ext2fs_nodealloccg(ip, cg, ipref, mode)
429 struct inode *ip;
430 int cg;
431 ufs_daddr_t ipref;
432 int mode;
433 {
434 struct m_ext2fs *fs;
435 char *ibp;
436 struct buf *bp;
437 int error, start, len, loc, map, i;
438
439 ipref--; /* to avoid a lot of (ipref -1) */
440 fs = ip->i_e2fs;
441 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0)
442 return (0);
443 error = bread(ip->i_devvp, fsbtodb(fs,
444 fs->e2fs_gd[cg].ext2bgd_i_bitmap),
445 (int)fs->e2fs_bsize, NOCRED, &bp);
446 if (error) {
447 brelse(bp);
448 return (0);
449 }
450 ibp = (char *)bp->b_data;
451 if (ipref) {
452 ipref %= fs->e2fs.e2fs_ipg;
453 if (isclr(ibp, ipref))
454 goto gotit;
455 }
456 start = ipref / NBBY;
457 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY);
458 loc = skpc(0xff, len, &ibp[start]);
459 if (loc == 0) {
460 len = start + 1;
461 start = 0;
462 loc = skpc(0xff, len, &ibp[0]);
463 if (loc == 0) {
464 printf("cg = %d, ipref = %d, fs = %s\n",
465 cg, ipref, fs->e2fs_fsmnt);
466 panic("ext2fs_nodealloccg: map corrupted");
467 /* NOTREACHED */
468 }
469 }
470 i = start + len - loc;
471 map = ibp[i];
472 ipref = i * NBBY;
473 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
474 if ((map & i) == 0) {
475 goto gotit;
476 }
477 }
478 printf("fs = %s\n", fs->e2fs_fsmnt);
479 panic("ext2fs_nodealloccg: block not in map");
480 /* NOTREACHED */
481 gotit:
482 setbit(ibp, ipref);
483 fs->e2fs.e2fs_ficount--;
484 fs->e2fs_gd[cg].ext2bgd_nifree--;
485 fs->e2fs_fmod = 1;
486 if ((mode & IFMT) == IFDIR) {
487 fs->e2fs_gd[cg].ext2bgd_ndirs++;
488 }
489 bdwrite(bp);
490 return (cg * fs->e2fs.e2fs_ipg + ipref +1);
491 }
492
493 /*
494 * Free a block.
495 *
496 * The specified block is placed back in the
497 * free map.
498 */
499 void
500 ext2fs_blkfree(ip, bno)
501 struct inode *ip;
502 ufs_daddr_t bno;
503 {
504 struct m_ext2fs *fs;
505 char *bbp;
506 struct buf *bp;
507 int error, cg;
508
509 fs = ip->i_e2fs;
510 cg = dtog(fs, bno);
511 if ((u_int)bno >= fs->e2fs.e2fs_bcount) {
512 printf("bad block %d, ino %d\n", bno, ip->i_number);
513 ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block");
514 return;
515 }
516 error = bread(ip->i_devvp,
517 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
518 (int)fs->e2fs_bsize, NOCRED, &bp);
519 if (error) {
520 brelse(bp);
521 return;
522 }
523 bbp = (char *)bp->b_data;
524 bno = dtogd(fs, bno);
525 if (isclr(bbp, bno)) {
526 printf("dev = 0x%x, block = %d, fs = %s\n",
527 ip->i_dev, bno, fs->e2fs_fsmnt);
528 panic("blkfree: freeing free block");
529 }
530 clrbit(bbp, bno);
531 fs->e2fs.e2fs_fbcount++;
532 fs->e2fs_gd[cg].ext2bgd_nbfree++;
533
534 fs->e2fs_fmod = 1;
535 bdwrite(bp);
536 }
537
538 /*
539 * Free an inode.
540 *
541 * The specified inode is placed back in the free map.
542 */
543 int
544 ext2fs_vfree(v)
545 void *v;
546 {
547 struct vop_vfree_args /* {
548 struct vnode *a_pvp;
549 ino_t a_ino;
550 int a_mode;
551 } */ *ap = v;
552 struct m_ext2fs *fs;
553 char *ibp;
554 struct inode *pip;
555 ino_t ino = ap->a_ino;
556 struct buf *bp;
557 int error, cg;
558
559 pip = VTOI(ap->a_pvp);
560 fs = pip->i_e2fs;
561 if ((u_int)ino >= fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO)
562 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
563 pip->i_dev, ino, fs->e2fs_fsmnt);
564 cg = ino_to_cg(fs, ino);
565 error = bread(pip->i_devvp,
566 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap),
567 (int)fs->e2fs_bsize, NOCRED, &bp);
568 if (error) {
569 brelse(bp);
570 return (0);
571 }
572 ibp = (char *)bp->b_data;
573 ino = (ino - 1) % fs->e2fs.e2fs_ipg;
574 if (isclr(ibp, ino)) {
575 printf("dev = 0x%x, ino = %d, fs = %s\n",
576 pip->i_dev, ino, fs->e2fs_fsmnt);
577 if (fs->e2fs_ronly == 0)
578 panic("ifree: freeing free inode");
579 }
580 clrbit(ibp, ino);
581 fs->e2fs.e2fs_ficount++;
582 fs->e2fs_gd[cg].ext2bgd_nifree++;
583 if ((ap->a_mode & IFMT) == IFDIR) {
584 fs->e2fs_gd[cg].ext2bgd_ndirs--;
585 }
586 fs->e2fs_fmod = 1;
587 bdwrite(bp);
588 return (0);
589 }
590
591 /*
592 * Find a block in the specified cylinder group.
593 *
594 * It is a panic if a request is made to find a block if none are
595 * available.
596 */
597
598 static ufs_daddr_t
599 ext2fs_mapsearch(fs, bbp, bpref)
600 struct m_ext2fs *fs;
601 char *bbp;
602 ufs_daddr_t bpref;
603 {
604 ufs_daddr_t bno;
605 int start, len, loc, i, map;
606
607 /*
608 * find the fragment by searching through the free block
609 * map for an appropriate bit pattern
610 */
611 if (bpref)
612 start = dtogd(fs, bpref) / NBBY;
613 else
614 start = 0;
615 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start;
616 loc = skpc(0xff, len, &bbp[start]);
617 if (loc == 0) {
618 len = start + 1;
619 start = 0;
620 loc = skpc(0xff, len, &bbp[start]);
621 if (loc == 0) {
622 printf("start = %d, len = %d, fs = %s\n",
623 start, len, fs->e2fs_fsmnt);
624 panic("ext2fs_alloccg: map corrupted");
625 /* NOTREACHED */
626 }
627 }
628 i = start + len - loc;
629 map = bbp[i];
630 bno = i * NBBY;
631 for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
632 if ((map & i) == 0)
633 return (bno);
634 }
635 printf("fs = %s\n", fs->e2fs_fsmnt);
636 panic("ext2fs_mapsearch: block not in map");
637 /* NOTREACHED */
638 }
639
640 /*
641 * Fserr prints the name of a file system with an error diagnostic.
642 *
643 * The form of the error message is:
644 * fs: error message
645 */
646 static void
647 ext2fs_fserr(fs, uid, cp)
648 struct m_ext2fs *fs;
649 u_int uid;
650 char *cp;
651 {
652
653 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp);
654 }
655