ffs_alloc.c revision 1.21 1 /* $NetBSD: ffs_alloc.c,v 1.21 1998/06/08 04:27:51 scottr Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95
36 */
37
38 #include "opt_quota.h"
39 #include "opt_uvm.h"
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/buf.h>
44 #include <sys/proc.h>
45 #include <sys/vnode.h>
46 #include <sys/mount.h>
47 #include <sys/kernel.h>
48 #include <sys/syslog.h>
49
50 #include <vm/vm.h>
51
52 #include <ufs/ufs/quota.h>
53 #include <ufs/ufs/ufsmount.h>
54 #include <ufs/ufs/inode.h>
55 #include <ufs/ufs/ufs_extern.h>
56 #include <ufs/ufs/ufs_bswap.h>
57
58 #include <ufs/ffs/fs.h>
59 #include <ufs/ffs/ffs_extern.h>
60
61 static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
62 static ufs_daddr_t ffs_alloccgblk __P((struct mount *, struct fs *,
63 struct cg *, ufs_daddr_t));
64 static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t, int));
65 static ino_t ffs_dirpref __P((struct fs *));
66 static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
67 static void ffs_fserr __P((struct fs *, u_int, char *));
68 static u_long ffs_hashalloc __P((struct inode *, int, long, int,
69 ufs_daddr_t (*)(struct inode *, int, ufs_daddr_t,
70 int)));
71 static ufs_daddr_t ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
72 static ufs_daddr_t ffs_mapsearch __P((int, struct fs *, struct cg *,
73 ufs_daddr_t, int));
74 #if defined(DIAGNOSTIC) || defined(DEBUG)
75 static int ffs_checkblk __P((struct inode *, ufs_daddr_t, long size));
76 #endif
77
78 /*
79 * Allocate a block in the file system.
80 *
81 * The size of the requested block is given, which must be some
82 * multiple of fs_fsize and <= fs_bsize.
83 * A preference may be optionally specified. If a preference is given
84 * the following hierarchy is used to allocate a block:
85 * 1) allocate the requested block.
86 * 2) allocate a rotationally optimal block in the same cylinder.
87 * 3) allocate a block in the same cylinder group.
88 * 4) quadradically rehash into other cylinder groups, until an
89 * available block is located.
90 * If no block preference is given the following heirarchy is used
91 * to allocate a block:
92 * 1) allocate a block in the cylinder group that contains the
93 * inode for the file.
94 * 2) quadradically rehash into other cylinder groups, until an
95 * available block is located.
96 */
97 int
98 ffs_alloc(ip, lbn, bpref, size, cred, bnp)
99 register struct inode *ip;
100 ufs_daddr_t lbn, bpref;
101 int size;
102 struct ucred *cred;
103 ufs_daddr_t *bnp;
104 {
105 register struct fs *fs;
106 ufs_daddr_t bno;
107 int cg;
108 #ifdef QUOTA
109 int error;
110 #endif
111
112 *bnp = 0;
113 fs = ip->i_fs;
114 #ifdef DIAGNOSTIC
115 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
116 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
117 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
118 panic("ffs_alloc: bad size");
119 }
120 if (cred == NOCRED)
121 panic("ffs_alloc: missing credential\n");
122 #endif /* DIAGNOSTIC */
123 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
124 goto nospace;
125 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
126 goto nospace;
127 #ifdef QUOTA
128 if ((error = chkdq(ip, (long)btodb(size), cred, 0)) != 0)
129 return (error);
130 #endif
131 if (bpref >= fs->fs_size)
132 bpref = 0;
133 if (bpref == 0)
134 cg = ino_to_cg(fs, ip->i_number);
135 else
136 cg = dtog(fs, bpref);
137 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
138 ffs_alloccg);
139 if (bno > 0) {
140 ip->i_ffs_blocks += btodb(size);
141 ip->i_flag |= IN_CHANGE | IN_UPDATE;
142 *bnp = bno;
143 return (0);
144 }
145 #ifdef QUOTA
146 /*
147 * Restore user's disk quota because allocation failed.
148 */
149 (void) chkdq(ip, (long)-btodb(size), cred, FORCE);
150 #endif
151 nospace:
152 ffs_fserr(fs, cred->cr_uid, "file system full");
153 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
154 return (ENOSPC);
155 }
156
157 /*
158 * Reallocate a fragment to a bigger size
159 *
160 * The number and size of the old block is given, and a preference
161 * and new size is also specified. The allocator attempts to extend
162 * the original block. Failing that, the regular block allocator is
163 * invoked to get an appropriate block.
164 */
165 int
166 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
167 register struct inode *ip;
168 ufs_daddr_t lbprev;
169 ufs_daddr_t bpref;
170 int osize, nsize;
171 struct ucred *cred;
172 struct buf **bpp;
173 {
174 register struct fs *fs;
175 struct buf *bp;
176 int cg, request, error;
177 ufs_daddr_t bprev, bno;
178
179 *bpp = 0;
180 fs = ip->i_fs;
181 #ifdef DIAGNOSTIC
182 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
183 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
184 printf(
185 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
186 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
187 panic("ffs_realloccg: bad size");
188 }
189 if (cred == NOCRED)
190 panic("ffs_realloccg: missing credential\n");
191 #endif /* DIAGNOSTIC */
192 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
193 goto nospace;
194 if ((bprev = ufs_rw32(ip->i_ffs_db[lbprev], UFS_IPNEEDSWAP(ip))) == 0) {
195 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
196 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
197 panic("ffs_realloccg: bad bprev");
198 }
199 /*
200 * Allocate the extra space in the buffer.
201 */
202 if ((error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) != 0) {
203 brelse(bp);
204 return (error);
205 }
206 #ifdef QUOTA
207 if ((error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) != 0) {
208 brelse(bp);
209 return (error);
210 }
211 #endif
212 /*
213 * Check for extension in the existing location.
214 */
215 cg = dtog(fs, bprev);
216 if ((bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) != 0) {
217 if (bp->b_blkno != fsbtodb(fs, bno))
218 panic("bad blockno");
219 ip->i_ffs_blocks += btodb(nsize - osize);
220 ip->i_flag |= IN_CHANGE | IN_UPDATE;
221 allocbuf(bp, nsize);
222 bp->b_flags |= B_DONE;
223 bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
224 *bpp = bp;
225 return (0);
226 }
227 /*
228 * Allocate a new disk location.
229 */
230 if (bpref >= fs->fs_size)
231 bpref = 0;
232 switch ((int)fs->fs_optim) {
233 case FS_OPTSPACE:
234 /*
235 * Allocate an exact sized fragment. Although this makes
236 * best use of space, we will waste time relocating it if
237 * the file continues to grow. If the fragmentation is
238 * less than half of the minimum free reserve, we choose
239 * to begin optimizing for time.
240 */
241 request = nsize;
242 if (fs->fs_minfree < 5 ||
243 fs->fs_cstotal.cs_nffree >
244 fs->fs_dsize * fs->fs_minfree / (2 * 100))
245 break;
246 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
247 fs->fs_fsmnt);
248 fs->fs_optim = FS_OPTTIME;
249 break;
250 case FS_OPTTIME:
251 /*
252 * At this point we have discovered a file that is trying to
253 * grow a small fragment to a larger fragment. To save time,
254 * we allocate a full sized block, then free the unused portion.
255 * If the file continues to grow, the `ffs_fragextend' call
256 * above will be able to grow it in place without further
257 * copying. If aberrant programs cause disk fragmentation to
258 * grow within 2% of the free reserve, we choose to begin
259 * optimizing for space.
260 */
261 request = fs->fs_bsize;
262 if (fs->fs_cstotal.cs_nffree <
263 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
264 break;
265 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
266 fs->fs_fsmnt);
267 fs->fs_optim = FS_OPTSPACE;
268 break;
269 default:
270 printf("dev = 0x%x, optim = %d, fs = %s\n",
271 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
272 panic("ffs_realloccg: bad optim");
273 /* NOTREACHED */
274 }
275 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
276 ffs_alloccg);
277 if (bno > 0) {
278 bp->b_blkno = fsbtodb(fs, bno);
279 #if defined(UVM)
280 (void) uvm_vnp_uncache(ITOV(ip));
281 #else
282 (void) vnode_pager_uncache(ITOV(ip));
283 #endif
284 ffs_blkfree(ip, bprev, (long)osize);
285 if (nsize < request)
286 ffs_blkfree(ip, bno + numfrags(fs, nsize),
287 (long)(request - nsize));
288 ip->i_ffs_blocks += btodb(nsize - osize);
289 ip->i_flag |= IN_CHANGE | IN_UPDATE;
290 allocbuf(bp, nsize);
291 bp->b_flags |= B_DONE;
292 bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
293 *bpp = bp;
294 return (0);
295 }
296 #ifdef QUOTA
297 /*
298 * Restore user's disk quota because allocation failed.
299 */
300 (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
301 #endif
302 brelse(bp);
303 nospace:
304 /*
305 * no space available
306 */
307 ffs_fserr(fs, cred->cr_uid, "file system full");
308 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
309 return (ENOSPC);
310 }
311
312 /*
313 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
314 *
315 * The vnode and an array of buffer pointers for a range of sequential
316 * logical blocks to be made contiguous is given. The allocator attempts
317 * to find a range of sequential blocks starting as close as possible to
318 * an fs_rotdelay offset from the end of the allocation for the logical
319 * block immediately preceeding the current range. If successful, the
320 * physical block numbers in the buffer pointers and in the inode are
321 * changed to reflect the new allocation. If unsuccessful, the allocation
322 * is left unchanged. The success in doing the reallocation is returned.
323 * Note that the error return is not reflected back to the user. Rather
324 * the previous block allocation will be used.
325 */
326 #ifdef DEBUG
327 #include <sys/sysctl.h>
328 int prtrealloc = 0;
329 struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
330 #endif
331
332 int doasyncfree = 1;
333 extern int doreallocblks;
334
335 int
336 ffs_reallocblks(v)
337 void *v;
338 {
339 struct vop_reallocblks_args /* {
340 struct vnode *a_vp;
341 struct cluster_save *a_buflist;
342 } */ *ap = v;
343 struct fs *fs;
344 struct inode *ip;
345 struct vnode *vp;
346 struct buf *sbp, *ebp;
347 ufs_daddr_t *bap, *sbap, *ebap = NULL;
348 struct cluster_save *buflist;
349 ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno;
350 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
351 int i, len, start_lvl, end_lvl, pref, ssize;
352 struct timespec ts;
353
354 vp = ap->a_vp;
355 ip = VTOI(vp);
356 fs = ip->i_fs;
357 if (fs->fs_contigsumsize <= 0)
358 return (ENOSPC);
359 buflist = ap->a_buflist;
360 len = buflist->bs_nchildren;
361 start_lbn = buflist->bs_children[0]->b_lblkno;
362 end_lbn = start_lbn + len - 1;
363 #ifdef DIAGNOSTIC
364 for (i = 0; i < len; i++)
365 if (!ffs_checkblk(ip,
366 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
367 panic("ffs_reallocblks: unallocated block 1");
368 for (i = 1; i < len; i++)
369 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
370 panic("ffs_reallocblks: non-logical cluster");
371 blkno = buflist->bs_children[0]->b_blkno;
372 ssize = fsbtodb(fs, fs->fs_frag);
373 for (i = 1; i < len - 1; i++)
374 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
375 panic("ffs_reallocblks: non-physical cluster %d", i);
376 #endif
377 /*
378 * If the latest allocation is in a new cylinder group, assume that
379 * the filesystem has decided to move and do not force it back to
380 * the previous cylinder group.
381 */
382 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
383 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
384 return (ENOSPC);
385 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
386 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
387 return (ENOSPC);
388 /*
389 * Get the starting offset and block map for the first block.
390 */
391 if (start_lvl == 0) {
392 sbap = &ip->i_ffs_db[0];
393 soff = start_lbn;
394 } else {
395 idp = &start_ap[start_lvl - 1];
396 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
397 brelse(sbp);
398 return (ENOSPC);
399 }
400 sbap = (ufs_daddr_t *)sbp->b_data;
401 soff = idp->in_off;
402 }
403 /*
404 * Find the preferred location for the cluster.
405 */
406 pref = ffs_blkpref(ip, start_lbn, soff, sbap);
407 /*
408 * If the block range spans two block maps, get the second map.
409 */
410 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
411 ssize = len;
412 } else {
413 #ifdef DIAGNOSTIC
414 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
415 panic("ffs_reallocblk: start == end");
416 #endif
417 ssize = len - (idp->in_off + 1);
418 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
419 goto fail;
420 ebap = (ufs_daddr_t *)ebp->b_data;
421 }
422 /*
423 * Search the block map looking for an allocation of the desired size.
424 */
425 if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
426 len, ffs_clusteralloc)) == 0)
427 goto fail;
428 /*
429 * We have found a new contiguous block.
430 *
431 * First we have to replace the old block pointers with the new
432 * block pointers in the inode and indirect blocks associated
433 * with the file.
434 */
435 #ifdef DEBUG
436 if (prtrealloc)
437 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
438 start_lbn, end_lbn);
439 #endif
440 blkno = newblk;
441 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
442 if (i == ssize)
443 bap = ebap;
444 #ifdef DIAGNOSTIC
445 if (!ffs_checkblk(ip,
446 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
447 panic("ffs_reallocblks: unallocated block 2");
448 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) !=
449 ufs_rw32(*bap, UFS_MPNEEDSWAP(vp->v_mount)))
450 panic("ffs_reallocblks: alloc mismatch");
451 #endif
452 #ifdef DEBUG
453 if (prtrealloc)
454 printf(" %d,", ufs_rw32(*bap, UFS_MPNEEDSWAP(vp->v_mount)));
455 #endif
456 *bap++ = ufs_rw32(blkno, UFS_MPNEEDSWAP(vp->v_mount));
457 }
458 /*
459 * Next we must write out the modified inode and indirect blocks.
460 * For strict correctness, the writes should be synchronous since
461 * the old block values may have been written to disk. In practise
462 * they are almost never written, but if we are concerned about
463 * strict correctness, the `doasyncfree' flag should be set to zero.
464 *
465 * The test on `doasyncfree' should be changed to test a flag
466 * that shows whether the associated buffers and inodes have
467 * been written. The flag should be set when the cluster is
468 * started and cleared whenever the buffer or inode is flushed.
469 * We can then check below to see if it is set, and do the
470 * synchronous write only when it has been cleared.
471 */
472 if (sbap != &ip->i_ffs_db[0]) {
473 if (doasyncfree)
474 bdwrite(sbp);
475 else
476 bwrite(sbp);
477 } else {
478 ip->i_flag |= IN_CHANGE | IN_UPDATE;
479 if (!doasyncfree) {
480 TIMEVAL_TO_TIMESPEC(&time, &ts);
481 VOP_UPDATE(vp, &ts, &ts, 1);
482 }
483 }
484 if (ssize < len)
485 if (doasyncfree)
486 bdwrite(ebp);
487 else
488 bwrite(ebp);
489 /*
490 * Last, free the old blocks and assign the new blocks to the buffers.
491 */
492 #ifdef DEBUG
493 if (prtrealloc)
494 printf("\n\tnew:");
495 #endif
496 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
497 ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
498 fs->fs_bsize);
499 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
500 #ifdef DEBUG
501 if (!ffs_checkblk(ip,
502 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
503 panic("ffs_reallocblks: unallocated block 3");
504 if (prtrealloc)
505 printf(" %d,", blkno);
506 #endif
507 }
508 #ifdef DEBUG
509 if (prtrealloc) {
510 prtrealloc--;
511 printf("\n");
512 }
513 #endif
514 return (0);
515
516 fail:
517 if (ssize < len)
518 brelse(ebp);
519 if (sbap != &ip->i_ffs_db[0])
520 brelse(sbp);
521 return (ENOSPC);
522 }
523
524 /*
525 * Allocate an inode in the file system.
526 *
527 * If allocating a directory, use ffs_dirpref to select the inode.
528 * If allocating in a directory, the following hierarchy is followed:
529 * 1) allocate the preferred inode.
530 * 2) allocate an inode in the same cylinder group.
531 * 3) quadradically rehash into other cylinder groups, until an
532 * available inode is located.
533 * If no inode preference is given the following heirarchy is used
534 * to allocate an inode:
535 * 1) allocate an inode in cylinder group 0.
536 * 2) quadradically rehash into other cylinder groups, until an
537 * available inode is located.
538 */
539 int
540 ffs_valloc(v)
541 void *v;
542 {
543 struct vop_valloc_args /* {
544 struct vnode *a_pvp;
545 int a_mode;
546 struct ucred *a_cred;
547 struct vnode **a_vpp;
548 } */ *ap = v;
549 register struct vnode *pvp = ap->a_pvp;
550 register struct inode *pip;
551 register struct fs *fs;
552 register struct inode *ip;
553 mode_t mode = ap->a_mode;
554 ino_t ino, ipref;
555 int cg, error;
556
557 *ap->a_vpp = NULL;
558 pip = VTOI(pvp);
559 fs = pip->i_fs;
560 if (fs->fs_cstotal.cs_nifree == 0)
561 goto noinodes;
562
563 if ((mode & IFMT) == IFDIR)
564 ipref = ffs_dirpref(fs);
565 else
566 ipref = pip->i_number;
567 if (ipref >= fs->fs_ncg * fs->fs_ipg)
568 ipref = 0;
569 cg = ino_to_cg(fs, ipref);
570 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
571 if (ino == 0)
572 goto noinodes;
573 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
574 if (error) {
575 VOP_VFREE(pvp, ino, mode);
576 return (error);
577 }
578 ip = VTOI(*ap->a_vpp);
579 if (ip->i_ffs_mode) {
580 printf("mode = 0%o, inum = %d, fs = %s\n",
581 ip->i_ffs_mode, ip->i_number, fs->fs_fsmnt);
582 panic("ffs_valloc: dup alloc");
583 }
584 if (ip->i_ffs_blocks) { /* XXX */
585 printf("free inode %s/%d had %d blocks\n",
586 fs->fs_fsmnt, ino, ip->i_ffs_blocks);
587 ip->i_ffs_blocks = 0;
588 }
589 ip->i_ffs_flags = 0;
590 /*
591 * Set up a new generation number for this inode.
592 */
593 ip->i_ffs_gen++;
594 return (0);
595 noinodes:
596 ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
597 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
598 return (ENOSPC);
599 }
600
601 /*
602 * Find a cylinder to place a directory.
603 *
604 * The policy implemented by this algorithm is to select from
605 * among those cylinder groups with above the average number of
606 * free inodes, the one with the smallest number of directories.
607 */
608 static ino_t
609 ffs_dirpref(fs)
610 register struct fs *fs;
611 {
612 int cg, minndir, mincg, avgifree;
613
614 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
615 minndir = fs->fs_ipg;
616 mincg = 0;
617 for (cg = 0; cg < fs->fs_ncg; cg++)
618 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
619 fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
620 mincg = cg;
621 minndir = fs->fs_cs(fs, cg).cs_ndir;
622 }
623 return ((ino_t)(fs->fs_ipg * mincg));
624 }
625
626 /*
627 * Select the desired position for the next block in a file. The file is
628 * logically divided into sections. The first section is composed of the
629 * direct blocks. Each additional section contains fs_maxbpg blocks.
630 *
631 * If no blocks have been allocated in the first section, the policy is to
632 * request a block in the same cylinder group as the inode that describes
633 * the file. If no blocks have been allocated in any other section, the
634 * policy is to place the section in a cylinder group with a greater than
635 * average number of free blocks. An appropriate cylinder group is found
636 * by using a rotor that sweeps the cylinder groups. When a new group of
637 * blocks is needed, the sweep begins in the cylinder group following the
638 * cylinder group from which the previous allocation was made. The sweep
639 * continues until a cylinder group with greater than the average number
640 * of free blocks is found. If the allocation is for the first block in an
641 * indirect block, the information on the previous allocation is unavailable;
642 * here a best guess is made based upon the logical block number being
643 * allocated.
644 *
645 * If a section is already partially allocated, the policy is to
646 * contiguously allocate fs_maxcontig blocks. The end of one of these
647 * contiguous blocks and the beginning of the next is physically separated
648 * so that the disk head will be in transit between them for at least
649 * fs_rotdelay milliseconds. This is to allow time for the processor to
650 * schedule another I/O transfer.
651 */
652 ufs_daddr_t
653 ffs_blkpref(ip, lbn, indx, bap)
654 struct inode *ip;
655 ufs_daddr_t lbn;
656 int indx;
657 ufs_daddr_t *bap;
658 {
659 register struct fs *fs;
660 register int cg;
661 int avgbfree, startcg;
662 ufs_daddr_t nextblk;
663
664 fs = ip->i_fs;
665 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
666 if (lbn < NDADDR) {
667 cg = ino_to_cg(fs, ip->i_number);
668 return (fs->fs_fpg * cg + fs->fs_frag);
669 }
670 /*
671 * Find a cylinder with greater than average number of
672 * unused data blocks.
673 */
674 if (indx == 0 || bap[indx - 1] == 0)
675 startcg =
676 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
677 else
678 startcg = dtog(fs,
679 ufs_rw32(bap[indx - 1], UFS_IPNEEDSWAP(ip)) + 1);
680 startcg %= fs->fs_ncg;
681 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
682 for (cg = startcg; cg < fs->fs_ncg; cg++)
683 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
684 fs->fs_cgrotor = cg;
685 return (fs->fs_fpg * cg + fs->fs_frag);
686 }
687 for (cg = 0; cg <= startcg; cg++)
688 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
689 fs->fs_cgrotor = cg;
690 return (fs->fs_fpg * cg + fs->fs_frag);
691 }
692 return (NULL);
693 }
694 /*
695 * One or more previous blocks have been laid out. If less
696 * than fs_maxcontig previous blocks are contiguous, the
697 * next block is requested contiguously, otherwise it is
698 * requested rotationally delayed by fs_rotdelay milliseconds.
699 */
700 nextblk = ufs_rw32(bap[indx - 1], UFS_IPNEEDSWAP(ip)) + fs->fs_frag;
701 if (indx < fs->fs_maxcontig ||
702 ufs_rw32(bap[indx - fs->fs_maxcontig], UFS_IPNEEDSWAP(ip)) +
703 blkstofrags(fs, fs->fs_maxcontig) != nextblk)
704 return (nextblk);
705 if (fs->fs_rotdelay != 0)
706 /*
707 * Here we convert ms of delay to frags as:
708 * (frags) = (ms) * (rev/sec) * (sect/rev) /
709 * ((sect/frag) * (ms/sec))
710 * then round up to the next block.
711 */
712 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
713 (NSPF(fs) * 1000), fs->fs_frag);
714 return (nextblk);
715 }
716
717 /*
718 * Implement the cylinder overflow algorithm.
719 *
720 * The policy implemented by this algorithm is:
721 * 1) allocate the block in its requested cylinder group.
722 * 2) quadradically rehash on the cylinder group number.
723 * 3) brute force search for a free block.
724 */
725 /*VARARGS5*/
726 static u_long
727 ffs_hashalloc(ip, cg, pref, size, allocator)
728 struct inode *ip;
729 int cg;
730 long pref;
731 int size; /* size for data blocks, mode for inodes */
732 ufs_daddr_t (*allocator) __P((struct inode *, int, ufs_daddr_t, int));
733 {
734 register struct fs *fs;
735 long result;
736 int i, icg = cg;
737
738 fs = ip->i_fs;
739 /*
740 * 1: preferred cylinder group
741 */
742 result = (*allocator)(ip, cg, pref, size);
743 if (result)
744 return (result);
745 /*
746 * 2: quadratic rehash
747 */
748 for (i = 1; i < fs->fs_ncg; i *= 2) {
749 cg += i;
750 if (cg >= fs->fs_ncg)
751 cg -= fs->fs_ncg;
752 result = (*allocator)(ip, cg, 0, size);
753 if (result)
754 return (result);
755 }
756 /*
757 * 3: brute force search
758 * Note that we start at i == 2, since 0 was checked initially,
759 * and 1 is always checked in the quadratic rehash.
760 */
761 cg = (icg + 2) % fs->fs_ncg;
762 for (i = 2; i < fs->fs_ncg; i++) {
763 result = (*allocator)(ip, cg, 0, size);
764 if (result)
765 return (result);
766 cg++;
767 if (cg == fs->fs_ncg)
768 cg = 0;
769 }
770 return (NULL);
771 }
772
773 /*
774 * Determine whether a fragment can be extended.
775 *
776 * Check to see if the necessary fragments are available, and
777 * if they are, allocate them.
778 */
779 static ufs_daddr_t
780 ffs_fragextend(ip, cg, bprev, osize, nsize)
781 struct inode *ip;
782 int cg;
783 long bprev;
784 int osize, nsize;
785 {
786 register struct fs *fs;
787 register struct cg *cgp;
788 struct buf *bp;
789 long bno;
790 int frags, bbase;
791 int i, error;
792
793 fs = ip->i_fs;
794 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
795 return (NULL);
796 frags = numfrags(fs, nsize);
797 bbase = fragnum(fs, bprev);
798 if (bbase > fragnum(fs, (bprev + frags - 1))) {
799 /* cannot extend across a block boundary */
800 return (NULL);
801 }
802 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
803 (int)fs->fs_cgsize, NOCRED, &bp);
804 if (error) {
805 brelse(bp);
806 return (NULL);
807 }
808 cgp = (struct cg *)bp->b_data;
809 if (!cg_chkmagic(cgp, UFS_IPNEEDSWAP(ip))) {
810 brelse(bp);
811 return (NULL);
812 }
813 cgp->cg_time = ufs_rw32(time.tv_sec, UFS_IPNEEDSWAP(ip));
814 bno = dtogd(fs, bprev);
815 for (i = numfrags(fs, osize); i < frags; i++)
816 if (isclr(cg_blksfree(cgp, UFS_IPNEEDSWAP(ip)), bno + i)) {
817 brelse(bp);
818 return (NULL);
819 }
820 /*
821 * the current fragment can be extended
822 * deduct the count on fragment being extended into
823 * increase the count on the remaining fragment (if any)
824 * allocate the extended piece
825 */
826 for (i = frags; i < fs->fs_frag - bbase; i++)
827 if (isclr(cg_blksfree(cgp, UFS_IPNEEDSWAP(ip)), bno + i))
828 break;
829 ufs_add32(cgp->cg_frsum[i - numfrags(fs, osize)], -1, UFS_IPNEEDSWAP(ip));
830 if (i != frags)
831 ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_IPNEEDSWAP(ip));
832 for (i = numfrags(fs, osize); i < frags; i++) {
833 clrbit(cg_blksfree(cgp, UFS_IPNEEDSWAP(ip)), bno + i);
834 ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_IPNEEDSWAP(ip));
835 fs->fs_cstotal.cs_nffree--;
836 fs->fs_cs(fs, cg).cs_nffree--;
837 }
838 fs->fs_fmod = 1;
839 bdwrite(bp);
840 return (bprev);
841 }
842
843 /*
844 * Determine whether a block can be allocated.
845 *
846 * Check to see if a block of the appropriate size is available,
847 * and if it is, allocate it.
848 */
849 static ufs_daddr_t
850 ffs_alloccg(ip, cg, bpref, size)
851 struct inode *ip;
852 int cg;
853 ufs_daddr_t bpref;
854 int size;
855 {
856 register struct fs *fs;
857 register struct cg *cgp;
858 struct buf *bp;
859 register int i;
860 int error, bno, frags, allocsiz;
861 const int needswap = UFS_IPNEEDSWAP(ip);
862
863 fs = ip->i_fs;
864 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
865 return (NULL);
866 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
867 (int)fs->fs_cgsize, NOCRED, &bp);
868 if (error) {
869 brelse(bp);
870 return (NULL);
871 }
872 cgp = (struct cg *)bp->b_data;
873 if (!cg_chkmagic(cgp, needswap) ||
874 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
875 brelse(bp);
876 return (NULL);
877 }
878 cgp->cg_time = ufs_rw32(time.tv_sec, needswap);
879 if (size == fs->fs_bsize) {
880 bno = ffs_alloccgblk(ITOV(ip)->v_mount, fs, cgp, bpref);
881 bdwrite(bp);
882 return (bno);
883 }
884 /*
885 * check to see if any fragments are already available
886 * allocsiz is the size which will be allocated, hacking
887 * it down to a smaller size if necessary
888 */
889 frags = numfrags(fs, size);
890 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
891 if (cgp->cg_frsum[allocsiz] != 0)
892 break;
893 if (allocsiz == fs->fs_frag) {
894 /*
895 * no fragments were available, so a block will be
896 * allocated, and hacked up
897 */
898 if (cgp->cg_cs.cs_nbfree == 0) {
899 brelse(bp);
900 return (NULL);
901 }
902 bno = ffs_alloccgblk(ITOV(ip)->v_mount, fs, cgp, bpref);
903 bpref = dtogd(fs, bno);
904 for (i = frags; i < fs->fs_frag; i++)
905 setbit(cg_blksfree(cgp, needswap), bpref + i);
906 i = fs->fs_frag - frags;
907 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
908 fs->fs_cstotal.cs_nffree += i;
909 fs->fs_cs(fs, cg).cs_nffree +=i;
910 fs->fs_fmod = 1;
911 ufs_add32(cgp->cg_frsum[i], 1, needswap);
912 bdwrite(bp);
913 return (bno);
914 }
915 bno = ffs_mapsearch(needswap, fs, cgp, bpref, allocsiz);
916 if (bno < 0) {
917 brelse(bp);
918 return (NULL);
919 }
920 for (i = 0; i < frags; i++)
921 clrbit(cg_blksfree(cgp, needswap), bno + i);
922 ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap);
923 fs->fs_cstotal.cs_nffree -= frags;
924 fs->fs_cs(fs, cg).cs_nffree -= frags;
925 fs->fs_fmod = 1;
926 ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap);
927 if (frags != allocsiz)
928 ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap);
929 bdwrite(bp);
930 return (cg * fs->fs_fpg + bno);
931 }
932
933 /*
934 * Allocate a block in a cylinder group.
935 *
936 * This algorithm implements the following policy:
937 * 1) allocate the requested block.
938 * 2) allocate a rotationally optimal block in the same cylinder.
939 * 3) allocate the next available block on the block rotor for the
940 * specified cylinder group.
941 * Note that this routine only allocates fs_bsize blocks; these
942 * blocks may be fragmented by the routine that allocates them.
943 */
944 static ufs_daddr_t
945 ffs_alloccgblk(mp, fs, cgp, bpref)
946 struct mount *mp;
947 register struct fs *fs;
948 register struct cg *cgp;
949 ufs_daddr_t bpref;
950 {
951 ufs_daddr_t bno, blkno;
952 int cylno, pos, delta;
953 short *cylbp;
954 register int i;
955 const int needswap = UFS_MPNEEDSWAP(mp);
956
957 if (bpref == 0 ||
958 dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) {
959 bpref = ufs_rw32(cgp->cg_rotor, needswap);
960 goto norot;
961 }
962 bpref = blknum(fs, bpref);
963 bpref = dtogd(fs, bpref);
964 /*
965 * if the requested block is available, use it
966 */
967 if (ffs_isblock(fs, cg_blksfree(cgp, needswap),
968 fragstoblks(fs, bpref))) {
969 bno = bpref;
970 goto gotit;
971 }
972 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) {
973 /*
974 * Block layout information is not available.
975 * Leaving bpref unchanged means we take the
976 * next available free block following the one
977 * we just allocated. Hopefully this will at
978 * least hit a track cache on drives of unknown
979 * geometry (e.g. SCSI).
980 */
981 goto norot;
982 }
983 /*
984 * check for a block available on the same cylinder
985 */
986 cylno = cbtocylno(fs, bpref);
987 if (cg_blktot(cgp, needswap)[cylno] == 0)
988 goto norot;
989 /*
990 * check the summary information to see if a block is
991 * available in the requested cylinder starting at the
992 * requested rotational position and proceeding around.
993 */
994 cylbp = cg_blks(fs, cgp, cylno, needswap);
995 pos = cbtorpos(fs, bpref);
996 for (i = pos; i < fs->fs_nrpos; i++)
997 if (ufs_rw16(cylbp[i], needswap) > 0)
998 break;
999 if (i == fs->fs_nrpos)
1000 for (i = 0; i < pos; i++)
1001 if (ufs_rw16(cylbp[i], needswap) > 0)
1002 break;
1003 if (ufs_rw16(cylbp[i], needswap) > 0) {
1004 /*
1005 * found a rotational position, now find the actual
1006 * block. A panic if none is actually there.
1007 */
1008 pos = cylno % fs->fs_cpc;
1009 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
1010 if (fs_postbl(fs, pos)[i] == -1) {
1011 printf("pos = %d, i = %d, fs = %s\n",
1012 pos, i, fs->fs_fsmnt);
1013 panic("ffs_alloccgblk: cyl groups corrupted");
1014 }
1015 for (i = fs_postbl(fs, pos)[i];; ) {
1016 if (ffs_isblock(fs, cg_blksfree(cgp, needswap), bno + i)) {
1017 bno = blkstofrags(fs, (bno + i));
1018 goto gotit;
1019 }
1020 delta = fs_rotbl(fs)[i];
1021 if (delta <= 0 ||
1022 delta + i > fragstoblks(fs, fs->fs_fpg))
1023 break;
1024 i += delta;
1025 }
1026 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
1027 panic("ffs_alloccgblk: can't find blk in cyl");
1028 }
1029 norot:
1030 /*
1031 * no blocks in the requested cylinder, so take next
1032 * available one in this cylinder group.
1033 */
1034 bno = ffs_mapsearch(needswap, fs, cgp, bpref, (int)fs->fs_frag);
1035 if (bno < 0)
1036 return (NULL);
1037 cgp->cg_rotor = ufs_rw32(bno, needswap);
1038 gotit:
1039 blkno = fragstoblks(fs, bno);
1040 ffs_clrblock(fs, cg_blksfree(cgp, needswap), (long)blkno);
1041 ffs_clusteracct(needswap, fs, cgp, blkno, -1);
1042 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1043 fs->fs_cstotal.cs_nbfree--;
1044 fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--;
1045 cylno = cbtocylno(fs, bno);
1046 ufs_add16(cg_blks(fs, cgp, cylno, needswap)[cbtorpos(fs, bno)], -1,
1047 needswap);
1048 ufs_add32(cg_blktot(cgp, needswap)[cylno], -1, needswap);
1049 fs->fs_fmod = 1;
1050 return (ufs_rw32(cgp->cg_cgx, needswap) * fs->fs_fpg + bno);
1051 }
1052
1053 /*
1054 * Determine whether a cluster can be allocated.
1055 *
1056 * We do not currently check for optimal rotational layout if there
1057 * are multiple choices in the same cylinder group. Instead we just
1058 * take the first one that we find following bpref.
1059 */
1060 static ufs_daddr_t
1061 ffs_clusteralloc(ip, cg, bpref, len)
1062 struct inode *ip;
1063 int cg;
1064 ufs_daddr_t bpref;
1065 int len;
1066 {
1067 register struct fs *fs;
1068 register struct cg *cgp;
1069 struct buf *bp;
1070 int i, got, run, bno, bit, map;
1071 u_char *mapp;
1072 int32_t *lp;
1073
1074 fs = ip->i_fs;
1075 if (fs->fs_maxcluster[cg] < len)
1076 return (NULL);
1077 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1078 NOCRED, &bp))
1079 goto fail;
1080 cgp = (struct cg *)bp->b_data;
1081 if (!cg_chkmagic(cgp, UFS_IPNEEDSWAP(ip)))
1082 goto fail;
1083 /*
1084 * Check to see if a cluster of the needed size (or bigger) is
1085 * available in this cylinder group.
1086 */
1087 lp = &cg_clustersum(cgp, UFS_IPNEEDSWAP(ip))[len];
1088 for (i = len; i <= fs->fs_contigsumsize; i++)
1089 if (ufs_rw32(*lp++, UFS_IPNEEDSWAP(ip)) > 0)
1090 break;
1091 if (i > fs->fs_contigsumsize) {
1092 /*
1093 * This is the first time looking for a cluster in this
1094 * cylinder group. Update the cluster summary information
1095 * to reflect the true maximum sized cluster so that
1096 * future cluster allocation requests can avoid reading
1097 * the cylinder group map only to find no clusters.
1098 */
1099 lp = &cg_clustersum(cgp, UFS_IPNEEDSWAP(ip))[len - 1];
1100 for (i = len - 1; i > 0; i--)
1101 if (ufs_rw32(*lp--, UFS_IPNEEDSWAP(ip)) > 0)
1102 break;
1103 fs->fs_maxcluster[cg] = i;
1104 goto fail;
1105 }
1106 /*
1107 * Search the cluster map to find a big enough cluster.
1108 * We take the first one that we find, even if it is larger
1109 * than we need as we prefer to get one close to the previous
1110 * block allocation. We do not search before the current
1111 * preference point as we do not want to allocate a block
1112 * that is allocated before the previous one (as we will
1113 * then have to wait for another pass of the elevator
1114 * algorithm before it will be read). We prefer to fail and
1115 * be recalled to try an allocation in the next cylinder group.
1116 */
1117 if (dtog(fs, bpref) != cg)
1118 bpref = 0;
1119 else
1120 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1121 mapp = &cg_clustersfree(cgp, UFS_IPNEEDSWAP(ip))[bpref / NBBY];
1122 map = *mapp++;
1123 bit = 1 << (bpref % NBBY);
1124 for (run = 0, got = bpref;
1125 got < ufs_rw32(cgp->cg_nclusterblks, UFS_IPNEEDSWAP(ip)); got++) {
1126 if ((map & bit) == 0) {
1127 run = 0;
1128 } else {
1129 run++;
1130 if (run == len)
1131 break;
1132 }
1133 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1134 bit <<= 1;
1135 } else {
1136 map = *mapp++;
1137 bit = 1;
1138 }
1139 }
1140 if (got == ufs_rw32(cgp->cg_nclusterblks, UFS_IPNEEDSWAP(ip)))
1141 goto fail;
1142 /*
1143 * Allocate the cluster that we have found.
1144 */
1145 for (i = 1; i <= len; i++)
1146 if (!ffs_isblock(fs, cg_blksfree(cgp, UFS_IPNEEDSWAP(ip)),
1147 got - run + i))
1148 panic("ffs_clusteralloc: map mismatch");
1149 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1150 if (dtog(fs, bno) != cg)
1151 panic("ffs_clusteralloc: allocated out of group");
1152 len = blkstofrags(fs, len);
1153 for (i = 0; i < len; i += fs->fs_frag)
1154 if ((got = ffs_alloccgblk(ITOV(ip)->v_mount, fs, cgp, bno + i))
1155 != bno + i)
1156 panic("ffs_clusteralloc: lost block");
1157 bdwrite(bp);
1158 return (bno);
1159
1160 fail:
1161 brelse(bp);
1162 return (0);
1163 }
1164
1165 /*
1166 * Determine whether an inode can be allocated.
1167 *
1168 * Check to see if an inode is available, and if it is,
1169 * allocate it using the following policy:
1170 * 1) allocate the requested inode.
1171 * 2) allocate the next available inode after the requested
1172 * inode in the specified cylinder group.
1173 */
1174 static ufs_daddr_t
1175 ffs_nodealloccg(ip, cg, ipref, mode)
1176 struct inode *ip;
1177 int cg;
1178 ufs_daddr_t ipref;
1179 int mode;
1180 {
1181 register struct fs *fs;
1182 register struct cg *cgp;
1183 struct buf *bp;
1184 int error, start, len, loc, map, i;
1185 #ifdef FFS_EI
1186 const int needswap = UFS_IPNEEDSWAP(ip);
1187 #endif
1188
1189 fs = ip->i_fs;
1190 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1191 return (NULL);
1192 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1193 (int)fs->fs_cgsize, NOCRED, &bp);
1194 if (error) {
1195 brelse(bp);
1196 return (NULL);
1197 }
1198 cgp = (struct cg *)bp->b_data;
1199 if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0) {
1200 brelse(bp);
1201 return (NULL);
1202 }
1203 cgp->cg_time = ufs_rw32(time.tv_sec, needswap);
1204 if (ipref) {
1205 ipref %= fs->fs_ipg;
1206 if (isclr(cg_inosused(cgp, needswap), ipref))
1207 goto gotit;
1208 }
1209 start = ufs_rw32(cgp->cg_irotor, needswap) / NBBY;
1210 len = howmany(fs->fs_ipg - ufs_rw32(cgp->cg_irotor, needswap),
1211 NBBY);
1212 loc = skpc(0xff, len, &cg_inosused(cgp, needswap)[start]);
1213 if (loc == 0) {
1214 len = start + 1;
1215 start = 0;
1216 loc = skpc(0xff, len, &cg_inosused(cgp, needswap)[0]);
1217 if (loc == 0) {
1218 printf("cg = %d, irotor = %d, fs = %s\n",
1219 cg, ufs_rw32(cgp->cg_irotor, needswap),
1220 fs->fs_fsmnt);
1221 panic("ffs_nodealloccg: map corrupted");
1222 /* NOTREACHED */
1223 }
1224 }
1225 i = start + len - loc;
1226 map = cg_inosused(cgp, needswap)[i];
1227 ipref = i * NBBY;
1228 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1229 if ((map & i) == 0) {
1230 cgp->cg_irotor = ufs_rw32(ipref, needswap);
1231 goto gotit;
1232 }
1233 }
1234 printf("fs = %s\n", fs->fs_fsmnt);
1235 panic("ffs_nodealloccg: block not in map");
1236 /* NOTREACHED */
1237 gotit:
1238 setbit(cg_inosused(cgp, needswap), ipref);
1239 ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap);
1240 fs->fs_cstotal.cs_nifree--;
1241 fs->fs_cs(fs, cg).cs_nifree --;
1242 fs->fs_fmod = 1;
1243 if ((mode & IFMT) == IFDIR) {
1244 ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap);
1245 fs->fs_cstotal.cs_ndir++;
1246 fs->fs_cs(fs, cg).cs_ndir++;
1247 }
1248 bdwrite(bp);
1249 return (cg * fs->fs_ipg + ipref);
1250 }
1251
1252 /*
1253 * Free a block or fragment.
1254 *
1255 * The specified block or fragment is placed back in the
1256 * free map. If a fragment is deallocated, a possible
1257 * block reassembly is checked.
1258 */
1259 void
1260 ffs_blkfree(ip, bno, size)
1261 register struct inode *ip;
1262 ufs_daddr_t bno;
1263 long size;
1264 {
1265 register struct fs *fs;
1266 register struct cg *cgp;
1267 struct buf *bp;
1268 ufs_daddr_t blkno;
1269 int i, error, cg, blk, frags, bbase;
1270 const int needswap = UFS_MPNEEDSWAP(ITOV(ip)->v_mount);
1271
1272 fs = ip->i_fs;
1273 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1274 printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n",
1275 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
1276 panic("blkfree: bad size");
1277 }
1278 cg = dtog(fs, bno);
1279 if ((u_int)bno >= fs->fs_size) {
1280 printf("bad block %d, ino %d\n", bno, ip->i_number);
1281 ffs_fserr(fs, ip->i_ffs_uid, "bad block");
1282 return;
1283 }
1284 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1285 (int)fs->fs_cgsize, NOCRED, &bp);
1286 if (error) {
1287 brelse(bp);
1288 return;
1289 }
1290 cgp = (struct cg *)bp->b_data;
1291 if (!cg_chkmagic(cgp, needswap)) {
1292 brelse(bp);
1293 return;
1294 }
1295 cgp->cg_time = ufs_rw32(time.tv_sec, needswap);
1296 bno = dtogd(fs, bno);
1297 if (size == fs->fs_bsize) {
1298 blkno = fragstoblks(fs, bno);
1299 if (ffs_isblock(fs, cg_blksfree(cgp, needswap), blkno)) {
1300 printf("dev = 0x%x, block = %d, fs = %s\n",
1301 ip->i_dev, bno, fs->fs_fsmnt);
1302 panic("blkfree: freeing free block");
1303 }
1304 ffs_setblock(fs, cg_blksfree(cgp, needswap), blkno);
1305 ffs_clusteracct(needswap, fs, cgp, blkno, 1);
1306 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1307 fs->fs_cstotal.cs_nbfree++;
1308 fs->fs_cs(fs, cg).cs_nbfree++;
1309 i = cbtocylno(fs, bno);
1310 ufs_add16(cg_blks(fs, cgp, i, needswap)[cbtorpos(fs, bno)], 1,
1311 needswap);
1312 ufs_add32(cg_blktot(cgp, needswap)[i], 1, needswap);
1313 } else {
1314 bbase = bno - fragnum(fs, bno);
1315 /*
1316 * decrement the counts associated with the old frags
1317 */
1318 blk = blkmap(fs, cg_blksfree(cgp, needswap), bbase);
1319 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
1320 /*
1321 * deallocate the fragment
1322 */
1323 frags = numfrags(fs, size);
1324 for (i = 0; i < frags; i++) {
1325 if (isset(cg_blksfree(cgp, needswap), bno + i)) {
1326 printf("dev = 0x%x, block = %d, fs = %s\n",
1327 ip->i_dev, bno + i, fs->fs_fsmnt);
1328 panic("blkfree: freeing free frag");
1329 }
1330 setbit(cg_blksfree(cgp, needswap), bno + i);
1331 }
1332 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1333 fs->fs_cstotal.cs_nffree += i;
1334 fs->fs_cs(fs, cg).cs_nffree +=i;
1335 /*
1336 * add back in counts associated with the new frags
1337 */
1338 blk = blkmap(fs, cg_blksfree(cgp, needswap), bbase);
1339 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
1340 /*
1341 * if a complete block has been reassembled, account for it
1342 */
1343 blkno = fragstoblks(fs, bbase);
1344 if (ffs_isblock(fs, cg_blksfree(cgp, needswap), blkno)) {
1345 ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap);
1346 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1347 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1348 ffs_clusteracct(needswap, fs, cgp, blkno, 1);
1349 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1350 fs->fs_cstotal.cs_nbfree++;
1351 fs->fs_cs(fs, cg).cs_nbfree++;
1352 i = cbtocylno(fs, bbase);
1353 ufs_add16(cg_blks(fs, cgp, i, needswap)[cbtorpos(fs, bbase)], 1,
1354 needswap);
1355 ufs_add32(cg_blktot(cgp, needswap)[i], 1, needswap);
1356 }
1357 }
1358 fs->fs_fmod = 1;
1359 bdwrite(bp);
1360 }
1361
1362 #if defined(DIAGNOSTIC) || defined(DEBUG)
1363 /*
1364 * Verify allocation of a block or fragment. Returns true if block or
1365 * fragment is allocated, false if it is free.
1366 */
1367 static int
1368 ffs_checkblk(ip, bno, size)
1369 struct inode *ip;
1370 ufs_daddr_t bno;
1371 long size;
1372 {
1373 struct fs *fs;
1374 struct cg *cgp;
1375 struct buf *bp;
1376 int i, error, frags, free;
1377
1378 fs = ip->i_fs;
1379 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1380 printf("bsize = %d, size = %ld, fs = %s\n",
1381 fs->fs_bsize, size, fs->fs_fsmnt);
1382 panic("checkblk: bad size");
1383 }
1384 if ((u_int)bno >= fs->fs_size)
1385 panic("checkblk: bad block %d", bno);
1386 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1387 (int)fs->fs_cgsize, NOCRED, &bp);
1388 if (error) {
1389 brelse(bp);
1390 return 0;
1391 }
1392 cgp = (struct cg *)bp->b_data;
1393 if (!cg_chkmagic(cgp, UFS_IPNEEDSWAP(ip))) {
1394 brelse(bp);
1395 return 0;
1396 }
1397 bno = dtogd(fs, bno);
1398 if (size == fs->fs_bsize) {
1399 free = ffs_isblock(fs, cg_blksfree(cgp, UFS_IPNEEDSWAP(ip)),
1400 fragstoblks(fs, bno));
1401 } else {
1402 frags = numfrags(fs, size);
1403 for (free = 0, i = 0; i < frags; i++)
1404 if (isset(cg_blksfree(cgp, UFS_IPNEEDSWAP(ip)), bno + i))
1405 free++;
1406 if (free != 0 && free != frags)
1407 panic("checkblk: partially free fragment");
1408 }
1409 brelse(bp);
1410 return (!free);
1411 }
1412 #endif /* DIAGNOSTIC */
1413
1414 /*
1415 * Free an inode.
1416 *
1417 * The specified inode is placed back in the free map.
1418 */
1419 int
1420 ffs_vfree(v)
1421 void *v;
1422 {
1423 struct vop_vfree_args /* {
1424 struct vnode *a_pvp;
1425 ino_t a_ino;
1426 int a_mode;
1427 } */ *ap = v;
1428 register struct fs *fs;
1429 register struct cg *cgp;
1430 register struct inode *pip;
1431 ino_t ino = ap->a_ino;
1432 struct buf *bp;
1433 int error, cg;
1434 #ifdef FFS_EI
1435 const int needswap = UFS_MPNEEDSWAP(ap->a_pvp->v_mount);
1436 #endif
1437
1438 pip = VTOI(ap->a_pvp);
1439 fs = pip->i_fs;
1440 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1441 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
1442 pip->i_dev, ino, fs->fs_fsmnt);
1443 cg = ino_to_cg(fs, ino);
1444 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1445 (int)fs->fs_cgsize, NOCRED, &bp);
1446 if (error) {
1447 brelse(bp);
1448 return (0);
1449 }
1450 cgp = (struct cg *)bp->b_data;
1451 if (!cg_chkmagic(cgp, needswap)) {
1452 brelse(bp);
1453 return (0);
1454 }
1455 cgp->cg_time = ufs_rw32(time.tv_sec, needswap);
1456 ino %= fs->fs_ipg;
1457 if (isclr(cg_inosused(cgp, needswap), ino)) {
1458 printf("dev = 0x%x, ino = %d, fs = %s\n",
1459 pip->i_dev, ino, fs->fs_fsmnt);
1460 if (fs->fs_ronly == 0)
1461 panic("ifree: freeing free inode");
1462 }
1463 clrbit(cg_inosused(cgp, needswap), ino);
1464 if (ino < ufs_rw32(cgp->cg_irotor, needswap))
1465 cgp->cg_irotor = ufs_rw32(ino, needswap);
1466 ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);
1467 fs->fs_cstotal.cs_nifree++;
1468 fs->fs_cs(fs, cg).cs_nifree++;
1469 if ((ap->a_mode & IFMT) == IFDIR) {
1470 ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap);
1471 fs->fs_cstotal.cs_ndir--;
1472 fs->fs_cs(fs, cg).cs_ndir--;
1473 }
1474 fs->fs_fmod = 1;
1475 bdwrite(bp);
1476 return (0);
1477 }
1478
1479 /*
1480 * Find a block of the specified size in the specified cylinder group.
1481 *
1482 * It is a panic if a request is made to find a block if none are
1483 * available.
1484 */
1485 static ufs_daddr_t
1486 ffs_mapsearch(needswap, fs, cgp, bpref, allocsiz)
1487 int needswap;
1488 register struct fs *fs;
1489 register struct cg *cgp;
1490 ufs_daddr_t bpref;
1491 int allocsiz;
1492 {
1493 ufs_daddr_t bno;
1494 int start, len, loc, i;
1495 int blk, field, subfield, pos;
1496 int ostart, olen;
1497
1498 /*
1499 * find the fragment by searching through the free block
1500 * map for an appropriate bit pattern
1501 */
1502 if (bpref)
1503 start = dtogd(fs, bpref) / NBBY;
1504 else
1505 start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY;
1506 len = howmany(fs->fs_fpg, NBBY) - start;
1507 ostart = start;
1508 olen = len;
1509 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp, needswap)[start],
1510 (u_char *)fragtbl[fs->fs_frag],
1511 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1512 if (loc == 0) {
1513 len = start + 1;
1514 start = 0;
1515 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp, needswap)[0],
1516 (u_char *)fragtbl[fs->fs_frag],
1517 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1518 if (loc == 0) {
1519 printf("start = %d, len = %d, fs = %s\n",
1520 ostart, olen, fs->fs_fsmnt);
1521 printf("offset=%d %ld\n",
1522 ufs_rw32(cgp->cg_freeoff, needswap),
1523 (long)cg_blksfree(cgp, needswap) - (long)cgp);
1524 panic("ffs_alloccg: map corrupted");
1525 /* NOTREACHED */
1526 }
1527 }
1528 bno = (start + len - loc) * NBBY;
1529 cgp->cg_frotor = ufs_rw32(bno, needswap);
1530 /*
1531 * found the byte in the map
1532 * sift through the bits to find the selected frag
1533 */
1534 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1535 blk = blkmap(fs, cg_blksfree(cgp, needswap), bno);
1536 blk <<= 1;
1537 field = around[allocsiz];
1538 subfield = inside[allocsiz];
1539 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1540 if ((blk & field) == subfield)
1541 return (bno + pos);
1542 field <<= 1;
1543 subfield <<= 1;
1544 }
1545 }
1546 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1547 panic("ffs_alloccg: block not in map");
1548 return (-1);
1549 }
1550
1551 /*
1552 * Update the cluster map because of an allocation or free.
1553 *
1554 * Cnt == 1 means free; cnt == -1 means allocating.
1555 */
1556 void
1557 ffs_clusteracct(needswap, fs, cgp, blkno, cnt)
1558 int needswap;
1559 struct fs *fs;
1560 struct cg *cgp;
1561 ufs_daddr_t blkno;
1562 int cnt;
1563 {
1564 int32_t *sump;
1565 int32_t *lp;
1566 u_char *freemapp, *mapp;
1567 int i, start, end, forw, back, map, bit;
1568
1569 if (fs->fs_contigsumsize <= 0)
1570 return;
1571 freemapp = cg_clustersfree(cgp, needswap);
1572 sump = cg_clustersum(cgp, needswap);
1573 /*
1574 * Allocate or clear the actual block.
1575 */
1576 if (cnt > 0)
1577 setbit(freemapp, blkno);
1578 else
1579 clrbit(freemapp, blkno);
1580 /*
1581 * Find the size of the cluster going forward.
1582 */
1583 start = blkno + 1;
1584 end = start + fs->fs_contigsumsize;
1585 if (end >= ufs_rw32(cgp->cg_nclusterblks, needswap))
1586 end = ufs_rw32(cgp->cg_nclusterblks, needswap);
1587 mapp = &freemapp[start / NBBY];
1588 map = *mapp++;
1589 bit = 1 << (start % NBBY);
1590 for (i = start; i < end; i++) {
1591 if ((map & bit) == 0)
1592 break;
1593 if ((i & (NBBY - 1)) != (NBBY - 1)) {
1594 bit <<= 1;
1595 } else {
1596 map = *mapp++;
1597 bit = 1;
1598 }
1599 }
1600 forw = i - start;
1601 /*
1602 * Find the size of the cluster going backward.
1603 */
1604 start = blkno - 1;
1605 end = start - fs->fs_contigsumsize;
1606 if (end < 0)
1607 end = -1;
1608 mapp = &freemapp[start / NBBY];
1609 map = *mapp--;
1610 bit = 1 << (start % NBBY);
1611 for (i = start; i > end; i--) {
1612 if ((map & bit) == 0)
1613 break;
1614 if ((i & (NBBY - 1)) != 0) {
1615 bit >>= 1;
1616 } else {
1617 map = *mapp--;
1618 bit = 1 << (NBBY - 1);
1619 }
1620 }
1621 back = start - i;
1622 /*
1623 * Account for old cluster and the possibly new forward and
1624 * back clusters.
1625 */
1626 i = back + forw + 1;
1627 if (i > fs->fs_contigsumsize)
1628 i = fs->fs_contigsumsize;
1629 ufs_add32(sump[i], cnt, needswap);
1630 if (back > 0)
1631 ufs_add32(sump[back], -cnt, needswap);
1632 if (forw > 0)
1633 ufs_add32(sump[forw], -cnt, needswap);
1634
1635 /*
1636 * Update cluster summary information.
1637 */
1638 lp = &sump[fs->fs_contigsumsize];
1639 for (i = fs->fs_contigsumsize; i > 0; i--)
1640 if (ufs_rw32(*lp--, needswap) > 0)
1641 break;
1642 fs->fs_maxcluster[ufs_rw32(cgp->cg_cgx, needswap)] = i;
1643 }
1644
1645 /*
1646 * Fserr prints the name of a file system with an error diagnostic.
1647 *
1648 * The form of the error message is:
1649 * fs: error message
1650 */
1651 static void
1652 ffs_fserr(fs, uid, cp)
1653 struct fs *fs;
1654 u_int uid;
1655 char *cp;
1656 {
1657
1658 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1659 }
1660