ffs_alloc.c revision 1.1 1 /*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * from: @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94
34 * $Id: ffs_alloc.c,v 1.1 1994/06/08 11:41:58 mycroft Exp $
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/vnode.h>
42 #include <sys/mount.h>
43 #include <sys/kernel.h>
44 #include <sys/syslog.h>
45
46 #include <vm/vm.h>
47
48 #include <ufs/ufs/quota.h>
49 #include <ufs/ufs/inode.h>
50
51 #include <ufs/ffs/fs.h>
52 #include <ufs/ffs/ffs_extern.h>
53
54 extern u_long nextgennumber;
55
56 static daddr_t ffs_alloccg __P((struct inode *, int, daddr_t, int));
57 static daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t));
58 static daddr_t ffs_clusteralloc __P((struct inode *, int, daddr_t, int));
59 static ino_t ffs_dirpref __P((struct fs *));
60 static daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
61 static void ffs_fserr __P((struct fs *, u_int, char *));
62 static u_long ffs_hashalloc
63 __P((struct inode *, int, long, int, u_long (*)()));
64 static ino_t ffs_nodealloccg __P((struct inode *, int, daddr_t, int));
65 static daddr_t ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int));
66
67 /*
68 * Allocate a block in the file system.
69 *
70 * The size of the requested block is given, which must be some
71 * multiple of fs_fsize and <= fs_bsize.
72 * A preference may be optionally specified. If a preference is given
73 * the following hierarchy is used to allocate a block:
74 * 1) allocate the requested block.
75 * 2) allocate a rotationally optimal block in the same cylinder.
76 * 3) allocate a block in the same cylinder group.
77 * 4) quadradically rehash into other cylinder groups, until an
78 * available block is located.
79 * If no block preference is given the following heirarchy is used
80 * to allocate a block:
81 * 1) allocate a block in the cylinder group that contains the
82 * inode for the file.
83 * 2) quadradically rehash into other cylinder groups, until an
84 * available block is located.
85 */
86 ffs_alloc(ip, lbn, bpref, size, cred, bnp)
87 register struct inode *ip;
88 daddr_t lbn, bpref;
89 int size;
90 struct ucred *cred;
91 daddr_t *bnp;
92 {
93 register struct fs *fs;
94 daddr_t bno;
95 int cg, error;
96
97 *bnp = 0;
98 fs = ip->i_fs;
99 #ifdef DIAGNOSTIC
100 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
101 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
102 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
103 panic("ffs_alloc: bad size");
104 }
105 if (cred == NOCRED)
106 panic("ffs_alloc: missing credential\n");
107 #endif /* DIAGNOSTIC */
108 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
109 goto nospace;
110 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
111 goto nospace;
112 #ifdef QUOTA
113 if (error = chkdq(ip, (long)btodb(size), cred, 0))
114 return (error);
115 #endif
116 if (bpref >= fs->fs_size)
117 bpref = 0;
118 if (bpref == 0)
119 cg = ino_to_cg(fs, ip->i_number);
120 else
121 cg = dtog(fs, bpref);
122 bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
123 (u_long (*)())ffs_alloccg);
124 if (bno > 0) {
125 ip->i_blocks += btodb(size);
126 ip->i_flag |= IN_CHANGE | IN_UPDATE;
127 *bnp = bno;
128 return (0);
129 }
130 #ifdef QUOTA
131 /*
132 * Restore user's disk quota because allocation failed.
133 */
134 (void) chkdq(ip, (long)-btodb(size), cred, FORCE);
135 #endif
136 nospace:
137 ffs_fserr(fs, cred->cr_uid, "file system full");
138 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
139 return (ENOSPC);
140 }
141
142 /*
143 * Reallocate a fragment to a bigger size
144 *
145 * The number and size of the old block is given, and a preference
146 * and new size is also specified. The allocator attempts to extend
147 * the original block. Failing that, the regular block allocator is
148 * invoked to get an appropriate block.
149 */
150 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
151 register struct inode *ip;
152 daddr_t lbprev;
153 daddr_t bpref;
154 int osize, nsize;
155 struct ucred *cred;
156 struct buf **bpp;
157 {
158 register struct fs *fs;
159 struct buf *bp;
160 int cg, request, error;
161 daddr_t bprev, bno;
162
163 *bpp = 0;
164 fs = ip->i_fs;
165 #ifdef DIAGNOSTIC
166 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
167 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
168 printf(
169 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
170 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
171 panic("ffs_realloccg: bad size");
172 }
173 if (cred == NOCRED)
174 panic("ffs_realloccg: missing credential\n");
175 #endif /* DIAGNOSTIC */
176 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
177 goto nospace;
178 if ((bprev = ip->i_db[lbprev]) == 0) {
179 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
180 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
181 panic("ffs_realloccg: bad bprev");
182 }
183 /*
184 * Allocate the extra space in the buffer.
185 */
186 if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
187 brelse(bp);
188 return (error);
189 }
190 #ifdef QUOTA
191 if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) {
192 brelse(bp);
193 return (error);
194 }
195 #endif
196 /*
197 * Check for extension in the existing location.
198 */
199 cg = dtog(fs, bprev);
200 if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) {
201 if (bp->b_blkno != fsbtodb(fs, bno))
202 panic("bad blockno");
203 ip->i_blocks += btodb(nsize - osize);
204 ip->i_flag |= IN_CHANGE | IN_UPDATE;
205 allocbuf(bp, nsize);
206 bp->b_flags |= B_DONE;
207 bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
208 *bpp = bp;
209 return (0);
210 }
211 /*
212 * Allocate a new disk location.
213 */
214 if (bpref >= fs->fs_size)
215 bpref = 0;
216 switch ((int)fs->fs_optim) {
217 case FS_OPTSPACE:
218 /*
219 * Allocate an exact sized fragment. Although this makes
220 * best use of space, we will waste time relocating it if
221 * the file continues to grow. If the fragmentation is
222 * less than half of the minimum free reserve, we choose
223 * to begin optimizing for time.
224 */
225 request = nsize;
226 if (fs->fs_minfree < 5 ||
227 fs->fs_cstotal.cs_nffree >
228 fs->fs_dsize * fs->fs_minfree / (2 * 100))
229 break;
230 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
231 fs->fs_fsmnt);
232 fs->fs_optim = FS_OPTTIME;
233 break;
234 case FS_OPTTIME:
235 /*
236 * At this point we have discovered a file that is trying to
237 * grow a small fragment to a larger fragment. To save time,
238 * we allocate a full sized block, then free the unused portion.
239 * If the file continues to grow, the `ffs_fragextend' call
240 * above will be able to grow it in place without further
241 * copying. If aberrant programs cause disk fragmentation to
242 * grow within 2% of the free reserve, we choose to begin
243 * optimizing for space.
244 */
245 request = fs->fs_bsize;
246 if (fs->fs_cstotal.cs_nffree <
247 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
248 break;
249 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
250 fs->fs_fsmnt);
251 fs->fs_optim = FS_OPTSPACE;
252 break;
253 default:
254 printf("dev = 0x%x, optim = %d, fs = %s\n",
255 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
256 panic("ffs_realloccg: bad optim");
257 /* NOTREACHED */
258 }
259 bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
260 (u_long (*)())ffs_alloccg);
261 if (bno > 0) {
262 bp->b_blkno = fsbtodb(fs, bno);
263 (void) vnode_pager_uncache(ITOV(ip));
264 ffs_blkfree(ip, bprev, (long)osize);
265 if (nsize < request)
266 ffs_blkfree(ip, bno + numfrags(fs, nsize),
267 (long)(request - nsize));
268 ip->i_blocks += btodb(nsize - osize);
269 ip->i_flag |= IN_CHANGE | IN_UPDATE;
270 allocbuf(bp, nsize);
271 bp->b_flags |= B_DONE;
272 bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
273 *bpp = bp;
274 return (0);
275 }
276 #ifdef QUOTA
277 /*
278 * Restore user's disk quota because allocation failed.
279 */
280 (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
281 #endif
282 brelse(bp);
283 nospace:
284 /*
285 * no space available
286 */
287 ffs_fserr(fs, cred->cr_uid, "file system full");
288 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
289 return (ENOSPC);
290 }
291
292 /*
293 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
294 *
295 * The vnode and an array of buffer pointers for a range of sequential
296 * logical blocks to be made contiguous is given. The allocator attempts
297 * to find a range of sequential blocks starting as close as possible to
298 * an fs_rotdelay offset from the end of the allocation for the logical
299 * block immediately preceeding the current range. If successful, the
300 * physical block numbers in the buffer pointers and in the inode are
301 * changed to reflect the new allocation. If unsuccessful, the allocation
302 * is left unchanged. The success in doing the reallocation is returned.
303 * Note that the error return is not reflected back to the user. Rather
304 * the previous block allocation will be used.
305 */
306 #include <sys/sysctl.h>
307 int doasyncfree = 1;
308
309 #ifdef DEBUG
310 struct ctldebug debug14 = { "doasyncfree", &doasyncfree };
311 #endif
312
313 int
314 ffs_reallocblks(ap)
315 struct vop_reallocblks_args /* {
316 struct vnode *a_vp;
317 struct cluster_save *a_buflist;
318 } */ *ap;
319 {
320 struct fs *fs;
321 struct inode *ip;
322 struct vnode *vp;
323 struct buf *sbp, *ebp;
324 daddr_t *bap, *sbap, *ebap;
325 struct cluster_save *buflist;
326 daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno;
327 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
328 int i, len, start_lvl, end_lvl, pref, ssize;
329
330 vp = ap->a_vp;
331 ip = VTOI(vp);
332 fs = ip->i_fs;
333 if (fs->fs_contigsumsize <= 0)
334 return (ENOSPC);
335 buflist = ap->a_buflist;
336 len = buflist->bs_nchildren;
337 start_lbn = buflist->bs_children[0]->b_lblkno;
338 end_lbn = start_lbn + len - 1;
339 #ifdef DIAGNOSTIC
340 for (i = 1; i < len; i++)
341 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
342 panic("ffs_reallocblks: non-cluster");
343 #endif
344 /*
345 * If the latest allocation is in a new cylinder group, assume that
346 * the filesystem has decided to move and do not force it back to
347 * the previous cylinder group.
348 */
349 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
350 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
351 return (ENOSPC);
352 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
353 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
354 return (ENOSPC);
355 /*
356 * Get the starting offset and block map for the first block.
357 */
358 if (start_lvl == 0) {
359 sbap = &ip->i_db[0];
360 soff = start_lbn;
361 } else {
362 idp = &start_ap[start_lvl - 1];
363 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
364 brelse(sbp);
365 return (ENOSPC);
366 }
367 sbap = (daddr_t *)sbp->b_data;
368 soff = idp->in_off;
369 }
370 /*
371 * Find the preferred location for the cluster.
372 */
373 pref = ffs_blkpref(ip, start_lbn, soff, sbap);
374 /*
375 * If the block range spans two block maps, get the second map.
376 */
377 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
378 ssize = len;
379 } else {
380 #ifdef DIAGNOSTIC
381 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
382 panic("ffs_reallocblk: start == end");
383 #endif
384 ssize = len - (idp->in_off + 1);
385 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
386 goto fail;
387 ebap = (daddr_t *)ebp->b_data;
388 }
389 /*
390 * Search the block map looking for an allocation of the desired size.
391 */
392 if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
393 len, (u_long (*)())ffs_clusteralloc)) == 0)
394 goto fail;
395 /*
396 * We have found a new contiguous block.
397 *
398 * First we have to replace the old block pointers with the new
399 * block pointers in the inode and indirect blocks associated
400 * with the file.
401 */
402 blkno = newblk;
403 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
404 if (i == ssize)
405 bap = ebap;
406 #ifdef DIAGNOSTIC
407 if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
408 panic("ffs_reallocblks: alloc mismatch");
409 #endif
410 *bap++ = blkno;
411 }
412 /*
413 * Next we must write out the modified inode and indirect blocks.
414 * For strict correctness, the writes should be synchronous since
415 * the old block values may have been written to disk. In practise
416 * they are almost never written, but if we are concerned about
417 * strict correctness, the `doasyncfree' flag should be set to zero.
418 *
419 * The test on `doasyncfree' should be changed to test a flag
420 * that shows whether the associated buffers and inodes have
421 * been written. The flag should be set when the cluster is
422 * started and cleared whenever the buffer or inode is flushed.
423 * We can then check below to see if it is set, and do the
424 * synchronous write only when it has been cleared.
425 */
426 if (sbap != &ip->i_db[0]) {
427 if (doasyncfree)
428 bdwrite(sbp);
429 else
430 bwrite(sbp);
431 } else {
432 ip->i_flag |= IN_CHANGE | IN_UPDATE;
433 if (!doasyncfree)
434 VOP_UPDATE(vp, &time, &time, MNT_WAIT);
435 }
436 if (ssize < len)
437 if (doasyncfree)
438 bdwrite(ebp);
439 else
440 bwrite(ebp);
441 /*
442 * Last, free the old blocks and assign the new blocks to the buffers.
443 */
444 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
445 ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
446 fs->fs_bsize);
447 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
448 }
449 return (0);
450
451 fail:
452 if (ssize < len)
453 brelse(ebp);
454 if (sbap != &ip->i_db[0])
455 brelse(sbp);
456 return (ENOSPC);
457 }
458
459 /*
460 * Allocate an inode in the file system.
461 *
462 * If allocating a directory, use ffs_dirpref to select the inode.
463 * If allocating in a directory, the following hierarchy is followed:
464 * 1) allocate the preferred inode.
465 * 2) allocate an inode in the same cylinder group.
466 * 3) quadradically rehash into other cylinder groups, until an
467 * available inode is located.
468 * If no inode preference is given the following heirarchy is used
469 * to allocate an inode:
470 * 1) allocate an inode in cylinder group 0.
471 * 2) quadradically rehash into other cylinder groups, until an
472 * available inode is located.
473 */
474 ffs_valloc(ap)
475 struct vop_valloc_args /* {
476 struct vnode *a_pvp;
477 int a_mode;
478 struct ucred *a_cred;
479 struct vnode **a_vpp;
480 } */ *ap;
481 {
482 register struct vnode *pvp = ap->a_pvp;
483 register struct inode *pip;
484 register struct fs *fs;
485 register struct inode *ip;
486 mode_t mode = ap->a_mode;
487 ino_t ino, ipref;
488 int cg, error;
489
490 *ap->a_vpp = NULL;
491 pip = VTOI(pvp);
492 fs = pip->i_fs;
493 if (fs->fs_cstotal.cs_nifree == 0)
494 goto noinodes;
495
496 if ((mode & IFMT) == IFDIR)
497 ipref = ffs_dirpref(fs);
498 else
499 ipref = pip->i_number;
500 if (ipref >= fs->fs_ncg * fs->fs_ipg)
501 ipref = 0;
502 cg = ino_to_cg(fs, ipref);
503 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
504 if (ino == 0)
505 goto noinodes;
506 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
507 if (error) {
508 VOP_VFREE(pvp, ino, mode);
509 return (error);
510 }
511 ip = VTOI(*ap->a_vpp);
512 if (ip->i_mode) {
513 printf("mode = 0%o, inum = %d, fs = %s\n",
514 ip->i_mode, ip->i_number, fs->fs_fsmnt);
515 panic("ffs_valloc: dup alloc");
516 }
517 if (ip->i_blocks) { /* XXX */
518 printf("free inode %s/%d had %d blocks\n",
519 fs->fs_fsmnt, ino, ip->i_blocks);
520 ip->i_blocks = 0;
521 }
522 ip->i_flags = 0;
523 /*
524 * Set up a new generation number for this inode.
525 */
526 if (++nextgennumber < (u_long)time.tv_sec)
527 nextgennumber = time.tv_sec;
528 ip->i_gen = nextgennumber;
529 return (0);
530 noinodes:
531 ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
532 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
533 return (ENOSPC);
534 }
535
536 /*
537 * Find a cylinder to place a directory.
538 *
539 * The policy implemented by this algorithm is to select from
540 * among those cylinder groups with above the average number of
541 * free inodes, the one with the smallest number of directories.
542 */
543 static ino_t
544 ffs_dirpref(fs)
545 register struct fs *fs;
546 {
547 int cg, minndir, mincg, avgifree;
548
549 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
550 minndir = fs->fs_ipg;
551 mincg = 0;
552 for (cg = 0; cg < fs->fs_ncg; cg++)
553 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
554 fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
555 mincg = cg;
556 minndir = fs->fs_cs(fs, cg).cs_ndir;
557 }
558 return ((ino_t)(fs->fs_ipg * mincg));
559 }
560
561 /*
562 * Select the desired position for the next block in a file. The file is
563 * logically divided into sections. The first section is composed of the
564 * direct blocks. Each additional section contains fs_maxbpg blocks.
565 *
566 * If no blocks have been allocated in the first section, the policy is to
567 * request a block in the same cylinder group as the inode that describes
568 * the file. If no blocks have been allocated in any other section, the
569 * policy is to place the section in a cylinder group with a greater than
570 * average number of free blocks. An appropriate cylinder group is found
571 * by using a rotor that sweeps the cylinder groups. When a new group of
572 * blocks is needed, the sweep begins in the cylinder group following the
573 * cylinder group from which the previous allocation was made. The sweep
574 * continues until a cylinder group with greater than the average number
575 * of free blocks is found. If the allocation is for the first block in an
576 * indirect block, the information on the previous allocation is unavailable;
577 * here a best guess is made based upon the logical block number being
578 * allocated.
579 *
580 * If a section is already partially allocated, the policy is to
581 * contiguously allocate fs_maxcontig blocks. The end of one of these
582 * contiguous blocks and the beginning of the next is physically separated
583 * so that the disk head will be in transit between them for at least
584 * fs_rotdelay milliseconds. This is to allow time for the processor to
585 * schedule another I/O transfer.
586 */
587 daddr_t
588 ffs_blkpref(ip, lbn, indx, bap)
589 struct inode *ip;
590 daddr_t lbn;
591 int indx;
592 daddr_t *bap;
593 {
594 register struct fs *fs;
595 register int cg;
596 int avgbfree, startcg;
597 daddr_t nextblk;
598
599 fs = ip->i_fs;
600 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
601 if (lbn < NDADDR) {
602 cg = ino_to_cg(fs, ip->i_number);
603 return (fs->fs_fpg * cg + fs->fs_frag);
604 }
605 /*
606 * Find a cylinder with greater than average number of
607 * unused data blocks.
608 */
609 if (indx == 0 || bap[indx - 1] == 0)
610 startcg =
611 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
612 else
613 startcg = dtog(fs, bap[indx - 1]) + 1;
614 startcg %= fs->fs_ncg;
615 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
616 for (cg = startcg; cg < fs->fs_ncg; cg++)
617 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
618 fs->fs_cgrotor = cg;
619 return (fs->fs_fpg * cg + fs->fs_frag);
620 }
621 for (cg = 0; cg <= startcg; cg++)
622 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
623 fs->fs_cgrotor = cg;
624 return (fs->fs_fpg * cg + fs->fs_frag);
625 }
626 return (NULL);
627 }
628 /*
629 * One or more previous blocks have been laid out. If less
630 * than fs_maxcontig previous blocks are contiguous, the
631 * next block is requested contiguously, otherwise it is
632 * requested rotationally delayed by fs_rotdelay milliseconds.
633 */
634 nextblk = bap[indx - 1] + fs->fs_frag;
635 if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] +
636 blkstofrags(fs, fs->fs_maxcontig) != nextblk)
637 return (nextblk);
638 if (fs->fs_rotdelay != 0)
639 /*
640 * Here we convert ms of delay to frags as:
641 * (frags) = (ms) * (rev/sec) * (sect/rev) /
642 * ((sect/frag) * (ms/sec))
643 * then round up to the next block.
644 */
645 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
646 (NSPF(fs) * 1000), fs->fs_frag);
647 return (nextblk);
648 }
649
650 /*
651 * Implement the cylinder overflow algorithm.
652 *
653 * The policy implemented by this algorithm is:
654 * 1) allocate the block in its requested cylinder group.
655 * 2) quadradically rehash on the cylinder group number.
656 * 3) brute force search for a free block.
657 */
658 /*VARARGS5*/
659 static u_long
660 ffs_hashalloc(ip, cg, pref, size, allocator)
661 struct inode *ip;
662 int cg;
663 long pref;
664 int size; /* size for data blocks, mode for inodes */
665 u_long (*allocator)();
666 {
667 register struct fs *fs;
668 long result;
669 int i, icg = cg;
670
671 fs = ip->i_fs;
672 /*
673 * 1: preferred cylinder group
674 */
675 result = (*allocator)(ip, cg, pref, size);
676 if (result)
677 return (result);
678 /*
679 * 2: quadratic rehash
680 */
681 for (i = 1; i < fs->fs_ncg; i *= 2) {
682 cg += i;
683 if (cg >= fs->fs_ncg)
684 cg -= fs->fs_ncg;
685 result = (*allocator)(ip, cg, 0, size);
686 if (result)
687 return (result);
688 }
689 /*
690 * 3: brute force search
691 * Note that we start at i == 2, since 0 was checked initially,
692 * and 1 is always checked in the quadratic rehash.
693 */
694 cg = (icg + 2) % fs->fs_ncg;
695 for (i = 2; i < fs->fs_ncg; i++) {
696 result = (*allocator)(ip, cg, 0, size);
697 if (result)
698 return (result);
699 cg++;
700 if (cg == fs->fs_ncg)
701 cg = 0;
702 }
703 return (NULL);
704 }
705
706 /*
707 * Determine whether a fragment can be extended.
708 *
709 * Check to see if the necessary fragments are available, and
710 * if they are, allocate them.
711 */
712 static daddr_t
713 ffs_fragextend(ip, cg, bprev, osize, nsize)
714 struct inode *ip;
715 int cg;
716 long bprev;
717 int osize, nsize;
718 {
719 register struct fs *fs;
720 register struct cg *cgp;
721 struct buf *bp;
722 long bno;
723 int frags, bbase;
724 int i, error;
725
726 fs = ip->i_fs;
727 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
728 return (NULL);
729 frags = numfrags(fs, nsize);
730 bbase = fragnum(fs, bprev);
731 if (bbase > fragnum(fs, (bprev + frags - 1))) {
732 /* cannot extend across a block boundary */
733 return (NULL);
734 }
735 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
736 (int)fs->fs_cgsize, NOCRED, &bp);
737 if (error) {
738 brelse(bp);
739 return (NULL);
740 }
741 cgp = (struct cg *)bp->b_data;
742 if (!cg_chkmagic(cgp)) {
743 brelse(bp);
744 return (NULL);
745 }
746 cgp->cg_time = time.tv_sec;
747 bno = dtogd(fs, bprev);
748 for (i = numfrags(fs, osize); i < frags; i++)
749 if (isclr(cg_blksfree(cgp), bno + i)) {
750 brelse(bp);
751 return (NULL);
752 }
753 /*
754 * the current fragment can be extended
755 * deduct the count on fragment being extended into
756 * increase the count on the remaining fragment (if any)
757 * allocate the extended piece
758 */
759 for (i = frags; i < fs->fs_frag - bbase; i++)
760 if (isclr(cg_blksfree(cgp), bno + i))
761 break;
762 cgp->cg_frsum[i - numfrags(fs, osize)]--;
763 if (i != frags)
764 cgp->cg_frsum[i - frags]++;
765 for (i = numfrags(fs, osize); i < frags; i++) {
766 clrbit(cg_blksfree(cgp), bno + i);
767 cgp->cg_cs.cs_nffree--;
768 fs->fs_cstotal.cs_nffree--;
769 fs->fs_cs(fs, cg).cs_nffree--;
770 }
771 fs->fs_fmod = 1;
772 bdwrite(bp);
773 return (bprev);
774 }
775
776 /*
777 * Determine whether a block can be allocated.
778 *
779 * Check to see if a block of the appropriate size is available,
780 * and if it is, allocate it.
781 */
782 static daddr_t
783 ffs_alloccg(ip, cg, bpref, size)
784 struct inode *ip;
785 int cg;
786 daddr_t bpref;
787 int size;
788 {
789 register struct fs *fs;
790 register struct cg *cgp;
791 struct buf *bp;
792 register int i;
793 int error, bno, frags, allocsiz;
794
795 fs = ip->i_fs;
796 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
797 return (NULL);
798 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
799 (int)fs->fs_cgsize, NOCRED, &bp);
800 if (error) {
801 brelse(bp);
802 return (NULL);
803 }
804 cgp = (struct cg *)bp->b_data;
805 if (!cg_chkmagic(cgp) ||
806 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
807 brelse(bp);
808 return (NULL);
809 }
810 cgp->cg_time = time.tv_sec;
811 if (size == fs->fs_bsize) {
812 bno = ffs_alloccgblk(fs, cgp, bpref);
813 bdwrite(bp);
814 return (bno);
815 }
816 /*
817 * check to see if any fragments are already available
818 * allocsiz is the size which will be allocated, hacking
819 * it down to a smaller size if necessary
820 */
821 frags = numfrags(fs, size);
822 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
823 if (cgp->cg_frsum[allocsiz] != 0)
824 break;
825 if (allocsiz == fs->fs_frag) {
826 /*
827 * no fragments were available, so a block will be
828 * allocated, and hacked up
829 */
830 if (cgp->cg_cs.cs_nbfree == 0) {
831 brelse(bp);
832 return (NULL);
833 }
834 bno = ffs_alloccgblk(fs, cgp, bpref);
835 bpref = dtogd(fs, bno);
836 for (i = frags; i < fs->fs_frag; i++)
837 setbit(cg_blksfree(cgp), bpref + i);
838 i = fs->fs_frag - frags;
839 cgp->cg_cs.cs_nffree += i;
840 fs->fs_cstotal.cs_nffree += i;
841 fs->fs_cs(fs, cg).cs_nffree += i;
842 fs->fs_fmod = 1;
843 cgp->cg_frsum[i]++;
844 bdwrite(bp);
845 return (bno);
846 }
847 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
848 if (bno < 0) {
849 brelse(bp);
850 return (NULL);
851 }
852 for (i = 0; i < frags; i++)
853 clrbit(cg_blksfree(cgp), bno + i);
854 cgp->cg_cs.cs_nffree -= frags;
855 fs->fs_cstotal.cs_nffree -= frags;
856 fs->fs_cs(fs, cg).cs_nffree -= frags;
857 fs->fs_fmod = 1;
858 cgp->cg_frsum[allocsiz]--;
859 if (frags != allocsiz)
860 cgp->cg_frsum[allocsiz - frags]++;
861 bdwrite(bp);
862 return (cg * fs->fs_fpg + bno);
863 }
864
865 /*
866 * Allocate a block in a cylinder group.
867 *
868 * This algorithm implements the following policy:
869 * 1) allocate the requested block.
870 * 2) allocate a rotationally optimal block in the same cylinder.
871 * 3) allocate the next available block on the block rotor for the
872 * specified cylinder group.
873 * Note that this routine only allocates fs_bsize blocks; these
874 * blocks may be fragmented by the routine that allocates them.
875 */
876 static daddr_t
877 ffs_alloccgblk(fs, cgp, bpref)
878 register struct fs *fs;
879 register struct cg *cgp;
880 daddr_t bpref;
881 {
882 daddr_t bno, blkno;
883 int cylno, pos, delta;
884 short *cylbp;
885 register int i;
886
887 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
888 bpref = cgp->cg_rotor;
889 goto norot;
890 }
891 bpref = blknum(fs, bpref);
892 bpref = dtogd(fs, bpref);
893 /*
894 * if the requested block is available, use it
895 */
896 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
897 bno = bpref;
898 goto gotit;
899 }
900 /*
901 * check for a block available on the same cylinder
902 */
903 cylno = cbtocylno(fs, bpref);
904 if (cg_blktot(cgp)[cylno] == 0)
905 goto norot;
906 if (fs->fs_cpc == 0) {
907 /*
908 * Block layout information is not available.
909 * Leaving bpref unchanged means we take the
910 * next available free block following the one
911 * we just allocated. Hopefully this will at
912 * least hit a track cache on drives of unknown
913 * geometry (e.g. SCSI).
914 */
915 goto norot;
916 }
917 /*
918 * check the summary information to see if a block is
919 * available in the requested cylinder starting at the
920 * requested rotational position and proceeding around.
921 */
922 cylbp = cg_blks(fs, cgp, cylno);
923 pos = cbtorpos(fs, bpref);
924 for (i = pos; i < fs->fs_nrpos; i++)
925 if (cylbp[i] > 0)
926 break;
927 if (i == fs->fs_nrpos)
928 for (i = 0; i < pos; i++)
929 if (cylbp[i] > 0)
930 break;
931 if (cylbp[i] > 0) {
932 /*
933 * found a rotational position, now find the actual
934 * block. A panic if none is actually there.
935 */
936 pos = cylno % fs->fs_cpc;
937 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
938 if (fs_postbl(fs, pos)[i] == -1) {
939 printf("pos = %d, i = %d, fs = %s\n",
940 pos, i, fs->fs_fsmnt);
941 panic("ffs_alloccgblk: cyl groups corrupted");
942 }
943 for (i = fs_postbl(fs, pos)[i];; ) {
944 if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
945 bno = blkstofrags(fs, (bno + i));
946 goto gotit;
947 }
948 delta = fs_rotbl(fs)[i];
949 if (delta <= 0 ||
950 delta + i > fragstoblks(fs, fs->fs_fpg))
951 break;
952 i += delta;
953 }
954 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
955 panic("ffs_alloccgblk: can't find blk in cyl");
956 }
957 norot:
958 /*
959 * no blocks in the requested cylinder, so take next
960 * available one in this cylinder group.
961 */
962 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
963 if (bno < 0)
964 return (NULL);
965 cgp->cg_rotor = bno;
966 gotit:
967 blkno = fragstoblks(fs, bno);
968 ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno);
969 ffs_clusteracct(fs, cgp, blkno, -1);
970 cgp->cg_cs.cs_nbfree--;
971 fs->fs_cstotal.cs_nbfree--;
972 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
973 cylno = cbtocylno(fs, bno);
974 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
975 cg_blktot(cgp)[cylno]--;
976 fs->fs_fmod = 1;
977 return (cgp->cg_cgx * fs->fs_fpg + bno);
978 }
979
980 /*
981 * Determine whether a cluster can be allocated.
982 *
983 * We do not currently check for optimal rotational layout if there
984 * are multiple choices in the same cylinder group. Instead we just
985 * take the first one that we find following bpref.
986 */
987 static daddr_t
988 ffs_clusteralloc(ip, cg, bpref, len)
989 struct inode *ip;
990 int cg;
991 daddr_t bpref;
992 int len;
993 {
994 register struct fs *fs;
995 register struct cg *cgp;
996 struct buf *bp;
997 int i, run, bno, bit, map;
998 u_char *mapp;
999
1000 fs = ip->i_fs;
1001 if (fs->fs_cs(fs, cg).cs_nbfree < len)
1002 return (NULL);
1003 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1004 NOCRED, &bp))
1005 goto fail;
1006 cgp = (struct cg *)bp->b_data;
1007 if (!cg_chkmagic(cgp))
1008 goto fail;
1009 /*
1010 * Check to see if a cluster of the needed size (or bigger) is
1011 * available in this cylinder group.
1012 */
1013 for (i = len; i <= fs->fs_contigsumsize; i++)
1014 if (cg_clustersum(cgp)[i] > 0)
1015 break;
1016 if (i > fs->fs_contigsumsize)
1017 goto fail;
1018 /*
1019 * Search the cluster map to find a big enough cluster.
1020 * We take the first one that we find, even if it is larger
1021 * than we need as we prefer to get one close to the previous
1022 * block allocation. We do not search before the current
1023 * preference point as we do not want to allocate a block
1024 * that is allocated before the previous one (as we will
1025 * then have to wait for another pass of the elevator
1026 * algorithm before it will be read). We prefer to fail and
1027 * be recalled to try an allocation in the next cylinder group.
1028 */
1029 if (dtog(fs, bpref) != cg)
1030 bpref = 0;
1031 else
1032 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1033 mapp = &cg_clustersfree(cgp)[bpref / NBBY];
1034 map = *mapp++;
1035 bit = 1 << (bpref % NBBY);
1036 for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) {
1037 if ((map & bit) == 0) {
1038 run = 0;
1039 } else {
1040 run++;
1041 if (run == len)
1042 break;
1043 }
1044 if ((i & (NBBY - 1)) != (NBBY - 1)) {
1045 bit <<= 1;
1046 } else {
1047 map = *mapp++;
1048 bit = 1;
1049 }
1050 }
1051 if (i == cgp->cg_nclusterblks)
1052 goto fail;
1053 /*
1054 * Allocate the cluster that we have found.
1055 */
1056 bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1);
1057 len = blkstofrags(fs, len);
1058 for (i = 0; i < len; i += fs->fs_frag)
1059 if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i)
1060 panic("ffs_clusteralloc: lost block");
1061 brelse(bp);
1062 return (bno);
1063
1064 fail:
1065 brelse(bp);
1066 return (0);
1067 }
1068
1069 /*
1070 * Determine whether an inode can be allocated.
1071 *
1072 * Check to see if an inode is available, and if it is,
1073 * allocate it using the following policy:
1074 * 1) allocate the requested inode.
1075 * 2) allocate the next available inode after the requested
1076 * inode in the specified cylinder group.
1077 */
1078 static ino_t
1079 ffs_nodealloccg(ip, cg, ipref, mode)
1080 struct inode *ip;
1081 int cg;
1082 daddr_t ipref;
1083 int mode;
1084 {
1085 register struct fs *fs;
1086 register struct cg *cgp;
1087 struct buf *bp;
1088 int error, start, len, loc, map, i;
1089
1090 fs = ip->i_fs;
1091 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1092 return (NULL);
1093 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1094 (int)fs->fs_cgsize, NOCRED, &bp);
1095 if (error) {
1096 brelse(bp);
1097 return (NULL);
1098 }
1099 cgp = (struct cg *)bp->b_data;
1100 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
1101 brelse(bp);
1102 return (NULL);
1103 }
1104 cgp->cg_time = time.tv_sec;
1105 if (ipref) {
1106 ipref %= fs->fs_ipg;
1107 if (isclr(cg_inosused(cgp), ipref))
1108 goto gotit;
1109 }
1110 start = cgp->cg_irotor / NBBY;
1111 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
1112 loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
1113 if (loc == 0) {
1114 len = start + 1;
1115 start = 0;
1116 loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
1117 if (loc == 0) {
1118 printf("cg = %d, irotor = %d, fs = %s\n",
1119 cg, cgp->cg_irotor, fs->fs_fsmnt);
1120 panic("ffs_nodealloccg: map corrupted");
1121 /* NOTREACHED */
1122 }
1123 }
1124 i = start + len - loc;
1125 map = cg_inosused(cgp)[i];
1126 ipref = i * NBBY;
1127 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1128 if ((map & i) == 0) {
1129 cgp->cg_irotor = ipref;
1130 goto gotit;
1131 }
1132 }
1133 printf("fs = %s\n", fs->fs_fsmnt);
1134 panic("ffs_nodealloccg: block not in map");
1135 /* NOTREACHED */
1136 gotit:
1137 setbit(cg_inosused(cgp), ipref);
1138 cgp->cg_cs.cs_nifree--;
1139 fs->fs_cstotal.cs_nifree--;
1140 fs->fs_cs(fs, cg).cs_nifree--;
1141 fs->fs_fmod = 1;
1142 if ((mode & IFMT) == IFDIR) {
1143 cgp->cg_cs.cs_ndir++;
1144 fs->fs_cstotal.cs_ndir++;
1145 fs->fs_cs(fs, cg).cs_ndir++;
1146 }
1147 bdwrite(bp);
1148 return (cg * fs->fs_ipg + ipref);
1149 }
1150
1151 /*
1152 * Free a block or fragment.
1153 *
1154 * The specified block or fragment is placed back in the
1155 * free map. If a fragment is deallocated, a possible
1156 * block reassembly is checked.
1157 */
1158 ffs_blkfree(ip, bno, size)
1159 register struct inode *ip;
1160 daddr_t bno;
1161 long size;
1162 {
1163 register struct fs *fs;
1164 register struct cg *cgp;
1165 struct buf *bp;
1166 daddr_t blkno;
1167 int i, error, cg, blk, frags, bbase;
1168
1169 fs = ip->i_fs;
1170 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1171 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
1172 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
1173 panic("blkfree: bad size");
1174 }
1175 cg = dtog(fs, bno);
1176 if ((u_int)bno >= fs->fs_size) {
1177 printf("bad block %d, ino %d\n", bno, ip->i_number);
1178 ffs_fserr(fs, ip->i_uid, "bad block");
1179 return;
1180 }
1181 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1182 (int)fs->fs_cgsize, NOCRED, &bp);
1183 if (error) {
1184 brelse(bp);
1185 return;
1186 }
1187 cgp = (struct cg *)bp->b_data;
1188 if (!cg_chkmagic(cgp)) {
1189 brelse(bp);
1190 return;
1191 }
1192 cgp->cg_time = time.tv_sec;
1193 bno = dtogd(fs, bno);
1194 if (size == fs->fs_bsize) {
1195 blkno = fragstoblks(fs, bno);
1196 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1197 printf("dev = 0x%x, block = %d, fs = %s\n",
1198 ip->i_dev, bno, fs->fs_fsmnt);
1199 panic("blkfree: freeing free block");
1200 }
1201 ffs_setblock(fs, cg_blksfree(cgp), blkno);
1202 ffs_clusteracct(fs, cgp, blkno, 1);
1203 cgp->cg_cs.cs_nbfree++;
1204 fs->fs_cstotal.cs_nbfree++;
1205 fs->fs_cs(fs, cg).cs_nbfree++;
1206 i = cbtocylno(fs, bno);
1207 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
1208 cg_blktot(cgp)[i]++;
1209 } else {
1210 bbase = bno - fragnum(fs, bno);
1211 /*
1212 * decrement the counts associated with the old frags
1213 */
1214 blk = blkmap(fs, cg_blksfree(cgp), bbase);
1215 ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
1216 /*
1217 * deallocate the fragment
1218 */
1219 frags = numfrags(fs, size);
1220 for (i = 0; i < frags; i++) {
1221 if (isset(cg_blksfree(cgp), bno + i)) {
1222 printf("dev = 0x%x, block = %d, fs = %s\n",
1223 ip->i_dev, bno + i, fs->fs_fsmnt);
1224 panic("blkfree: freeing free frag");
1225 }
1226 setbit(cg_blksfree(cgp), bno + i);
1227 }
1228 cgp->cg_cs.cs_nffree += i;
1229 fs->fs_cstotal.cs_nffree += i;
1230 fs->fs_cs(fs, cg).cs_nffree += i;
1231 /*
1232 * add back in counts associated with the new frags
1233 */
1234 blk = blkmap(fs, cg_blksfree(cgp), bbase);
1235 ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
1236 /*
1237 * if a complete block has been reassembled, account for it
1238 */
1239 blkno = fragstoblks(fs, bbase);
1240 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1241 cgp->cg_cs.cs_nffree -= fs->fs_frag;
1242 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1243 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1244 ffs_clusteracct(fs, cgp, blkno, 1);
1245 cgp->cg_cs.cs_nbfree++;
1246 fs->fs_cstotal.cs_nbfree++;
1247 fs->fs_cs(fs, cg).cs_nbfree++;
1248 i = cbtocylno(fs, bbase);
1249 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
1250 cg_blktot(cgp)[i]++;
1251 }
1252 }
1253 fs->fs_fmod = 1;
1254 bdwrite(bp);
1255 }
1256
1257 /*
1258 * Free an inode.
1259 *
1260 * The specified inode is placed back in the free map.
1261 */
1262 int
1263 ffs_vfree(ap)
1264 struct vop_vfree_args /* {
1265 struct vnode *a_pvp;
1266 ino_t a_ino;
1267 int a_mode;
1268 } */ *ap;
1269 {
1270 register struct fs *fs;
1271 register struct cg *cgp;
1272 register struct inode *pip;
1273 ino_t ino = ap->a_ino;
1274 struct buf *bp;
1275 int error, cg;
1276
1277 pip = VTOI(ap->a_pvp);
1278 fs = pip->i_fs;
1279 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1280 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
1281 pip->i_dev, ino, fs->fs_fsmnt);
1282 cg = ino_to_cg(fs, ino);
1283 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1284 (int)fs->fs_cgsize, NOCRED, &bp);
1285 if (error) {
1286 brelse(bp);
1287 return (0);
1288 }
1289 cgp = (struct cg *)bp->b_data;
1290 if (!cg_chkmagic(cgp)) {
1291 brelse(bp);
1292 return (0);
1293 }
1294 cgp->cg_time = time.tv_sec;
1295 ino %= fs->fs_ipg;
1296 if (isclr(cg_inosused(cgp), ino)) {
1297 printf("dev = 0x%x, ino = %d, fs = %s\n",
1298 pip->i_dev, ino, fs->fs_fsmnt);
1299 if (fs->fs_ronly == 0)
1300 panic("ifree: freeing free inode");
1301 }
1302 clrbit(cg_inosused(cgp), ino);
1303 if (ino < cgp->cg_irotor)
1304 cgp->cg_irotor = ino;
1305 cgp->cg_cs.cs_nifree++;
1306 fs->fs_cstotal.cs_nifree++;
1307 fs->fs_cs(fs, cg).cs_nifree++;
1308 if ((ap->a_mode & IFMT) == IFDIR) {
1309 cgp->cg_cs.cs_ndir--;
1310 fs->fs_cstotal.cs_ndir--;
1311 fs->fs_cs(fs, cg).cs_ndir--;
1312 }
1313 fs->fs_fmod = 1;
1314 bdwrite(bp);
1315 return (0);
1316 }
1317
1318 /*
1319 * Find a block of the specified size in the specified cylinder group.
1320 *
1321 * It is a panic if a request is made to find a block if none are
1322 * available.
1323 */
1324 static daddr_t
1325 ffs_mapsearch(fs, cgp, bpref, allocsiz)
1326 register struct fs *fs;
1327 register struct cg *cgp;
1328 daddr_t bpref;
1329 int allocsiz;
1330 {
1331 daddr_t bno;
1332 int start, len, loc, i;
1333 int blk, field, subfield, pos;
1334
1335 /*
1336 * find the fragment by searching through the free block
1337 * map for an appropriate bit pattern
1338 */
1339 if (bpref)
1340 start = dtogd(fs, bpref) / NBBY;
1341 else
1342 start = cgp->cg_frotor / NBBY;
1343 len = howmany(fs->fs_fpg, NBBY) - start;
1344 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
1345 (u_char *)fragtbl[fs->fs_frag],
1346 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1347 if (loc == 0) {
1348 len = start + 1;
1349 start = 0;
1350 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
1351 (u_char *)fragtbl[fs->fs_frag],
1352 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1353 if (loc == 0) {
1354 printf("start = %d, len = %d, fs = %s\n",
1355 start, len, fs->fs_fsmnt);
1356 panic("ffs_alloccg: map corrupted");
1357 /* NOTREACHED */
1358 }
1359 }
1360 bno = (start + len - loc) * NBBY;
1361 cgp->cg_frotor = bno;
1362 /*
1363 * found the byte in the map
1364 * sift through the bits to find the selected frag
1365 */
1366 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1367 blk = blkmap(fs, cg_blksfree(cgp), bno);
1368 blk <<= 1;
1369 field = around[allocsiz];
1370 subfield = inside[allocsiz];
1371 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1372 if ((blk & field) == subfield)
1373 return (bno + pos);
1374 field <<= 1;
1375 subfield <<= 1;
1376 }
1377 }
1378 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1379 panic("ffs_alloccg: block not in map");
1380 return (-1);
1381 }
1382
1383 /*
1384 * Update the cluster map because of an allocation or free.
1385 *
1386 * Cnt == 1 means free; cnt == -1 means allocating.
1387 */
1388 ffs_clusteracct(fs, cgp, blkno, cnt)
1389 struct fs *fs;
1390 struct cg *cgp;
1391 daddr_t blkno;
1392 int cnt;
1393 {
1394 long *sump;
1395 u_char *freemapp, *mapp;
1396 int i, start, end, forw, back, map, bit;
1397
1398 if (fs->fs_contigsumsize <= 0)
1399 return;
1400 freemapp = cg_clustersfree(cgp);
1401 sump = cg_clustersum(cgp);
1402 /*
1403 * Allocate or clear the actual block.
1404 */
1405 if (cnt > 0)
1406 setbit(freemapp, blkno);
1407 else
1408 clrbit(freemapp, blkno);
1409 /*
1410 * Find the size of the cluster going forward.
1411 */
1412 start = blkno + 1;
1413 end = start + fs->fs_contigsumsize;
1414 if (end >= cgp->cg_nclusterblks)
1415 end = cgp->cg_nclusterblks;
1416 mapp = &freemapp[start / NBBY];
1417 map = *mapp++;
1418 bit = 1 << (start % NBBY);
1419 for (i = start; i < end; i++) {
1420 if ((map & bit) == 0)
1421 break;
1422 if ((i & (NBBY - 1)) != (NBBY - 1)) {
1423 bit <<= 1;
1424 } else {
1425 map = *mapp++;
1426 bit = 1;
1427 }
1428 }
1429 forw = i - start;
1430 /*
1431 * Find the size of the cluster going backward.
1432 */
1433 start = blkno - 1;
1434 end = start - fs->fs_contigsumsize;
1435 if (end < 0)
1436 end = -1;
1437 mapp = &freemapp[start / NBBY];
1438 map = *mapp--;
1439 bit = 1 << (start % NBBY);
1440 for (i = start; i > end; i--) {
1441 if ((map & bit) == 0)
1442 break;
1443 if ((i & (NBBY - 1)) != 0) {
1444 bit >>= 1;
1445 } else {
1446 map = *mapp--;
1447 bit = 1 << (NBBY - 1);
1448 }
1449 }
1450 back = start - i;
1451 /*
1452 * Account for old cluster and the possibly new forward and
1453 * back clusters.
1454 */
1455 i = back + forw + 1;
1456 if (i > fs->fs_contigsumsize)
1457 i = fs->fs_contigsumsize;
1458 sump[i] += cnt;
1459 if (back > 0)
1460 sump[back] -= cnt;
1461 if (forw > 0)
1462 sump[forw] -= cnt;
1463 }
1464
1465 /*
1466 * Fserr prints the name of a file system with an error diagnostic.
1467 *
1468 * The form of the error message is:
1469 * fs: error message
1470 */
1471 static void
1472 ffs_fserr(fs, uid, cp)
1473 struct fs *fs;
1474 u_int uid;
1475 char *cp;
1476 {
1477
1478 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1479 }
1480