ffs_alloc.c revision 1.103 1 /* $NetBSD: ffs_alloc.c,v 1.103 2007/10/18 17:39:04 hannken Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.103 2007/10/18 17:39:04 hannken Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_ffs.h"
48 #include "opt_quota.h"
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/buf.h>
54 #include <sys/proc.h>
55 #include <sys/vnode.h>
56 #include <sys/mount.h>
57 #include <sys/kernel.h>
58 #include <sys/syslog.h>
59 #include <sys/kauth.h>
60
61 #include <miscfs/specfs/specdev.h>
62 #include <ufs/ufs/quota.h>
63 #include <ufs/ufs/ufsmount.h>
64 #include <ufs/ufs/inode.h>
65 #include <ufs/ufs/ufs_extern.h>
66 #include <ufs/ufs/ufs_bswap.h>
67
68 #include <ufs/ffs/fs.h>
69 #include <ufs/ffs/ffs_extern.h>
70
71 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int);
72 static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t);
73 #ifdef XXXUBC
74 static daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int);
75 #endif
76 static ino_t ffs_dirpref(struct inode *);
77 static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int);
78 static void ffs_fserr(struct fs *, u_int, const char *);
79 static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int,
80 daddr_t (*)(struct inode *, int, daddr_t, int));
81 static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int);
82 static int32_t ffs_mapsearch(struct fs *, struct cg *,
83 daddr_t, int);
84 #if defined(DIAGNOSTIC) || defined(DEBUG)
85 #ifdef XXXUBC
86 static int ffs_checkblk(struct inode *, daddr_t, long size);
87 #endif
88 #endif
89
90 /* if 1, changes in optimalization strategy are logged */
91 int ffs_log_changeopt = 0;
92
93 /* in ffs_tables.c */
94 extern const int inside[], around[];
95 extern const u_char * const fragtbl[];
96
97 /*
98 * Allocate a block in the file system.
99 *
100 * The size of the requested block is given, which must be some
101 * multiple of fs_fsize and <= fs_bsize.
102 * A preference may be optionally specified. If a preference is given
103 * the following hierarchy is used to allocate a block:
104 * 1) allocate the requested block.
105 * 2) allocate a rotationally optimal block in the same cylinder.
106 * 3) allocate a block in the same cylinder group.
107 * 4) quadradically rehash into other cylinder groups, until an
108 * available block is located.
109 * If no block preference is given the following hierarchy is used
110 * to allocate a block:
111 * 1) allocate a block in the cylinder group that contains the
112 * inode for the file.
113 * 2) quadradically rehash into other cylinder groups, until an
114 * available block is located.
115 */
116 int
117 ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
118 kauth_cred_t cred, daddr_t *bnp)
119 {
120 struct ufsmount *ump;
121 struct fs *fs;
122 daddr_t bno;
123 int cg;
124 #ifdef QUOTA
125 int error;
126 #endif
127
128 fs = ip->i_fs;
129 ump = ip->i_ump;
130
131 KASSERT(mutex_owned(&ump->um_lock));
132
133 #ifdef UVM_PAGE_TRKOWN
134 if (ITOV(ip)->v_type == VREG &&
135 lblktosize(fs, (voff_t)lbn) < round_page(ITOV(ip)->v_size)) {
136 struct vm_page *pg;
137 struct uvm_object *uobj = &ITOV(ip)->v_uobj;
138 voff_t off = trunc_page(lblktosize(fs, lbn));
139 voff_t endoff = round_page(lblktosize(fs, lbn) + size);
140
141 simple_lock(&uobj->vmobjlock);
142 while (off < endoff) {
143 pg = uvm_pagelookup(uobj, off);
144 KASSERT(pg != NULL);
145 KASSERT(pg->owner == curproc->p_pid);
146 off += PAGE_SIZE;
147 }
148 simple_unlock(&uobj->vmobjlock);
149 }
150 #endif
151
152 *bnp = 0;
153 #ifdef DIAGNOSTIC
154 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
155 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
156 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
157 panic("ffs_alloc: bad size");
158 }
159 if (cred == NOCRED)
160 panic("ffs_alloc: missing credential");
161 #endif /* DIAGNOSTIC */
162 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
163 goto nospace;
164 if (freespace(fs, fs->fs_minfree) <= 0 &&
165 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0)
166 goto nospace;
167 #ifdef QUOTA
168 mutex_exit(&ump->um_lock);
169 if ((error = chkdq(ip, btodb(size), cred, 0)) != 0)
170 return (error);
171 mutex_enter(&ump->um_lock);
172 #endif
173 if (bpref >= fs->fs_size)
174 bpref = 0;
175 if (bpref == 0)
176 cg = ino_to_cg(fs, ip->i_number);
177 else
178 cg = dtog(fs, bpref);
179 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
180 if (bno > 0) {
181 DIP_ADD(ip, blocks, btodb(size));
182 ip->i_flag |= IN_CHANGE | IN_UPDATE;
183 *bnp = bno;
184 return (0);
185 }
186 #ifdef QUOTA
187 /*
188 * Restore user's disk quota because allocation failed.
189 */
190 (void) chkdq(ip, -btodb(size), cred, FORCE);
191 #endif
192 nospace:
193 mutex_exit(&ump->um_lock);
194 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
195 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
196 return (ENOSPC);
197 }
198
199 /*
200 * Reallocate a fragment to a bigger size
201 *
202 * The number and size of the old block is given, and a preference
203 * and new size is also specified. The allocator attempts to extend
204 * the original block. Failing that, the regular block allocator is
205 * invoked to get an appropriate block.
206 */
207 int
208 ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
209 int nsize, kauth_cred_t cred, struct buf **bpp, daddr_t *blknop)
210 {
211 struct ufsmount *ump;
212 struct fs *fs;
213 struct buf *bp;
214 int cg, request, error;
215 daddr_t bprev, bno;
216
217 fs = ip->i_fs;
218 ump = ip->i_ump;
219
220 KASSERT(mutex_owned(&ump->um_lock));
221
222 #ifdef UVM_PAGE_TRKOWN
223 if (ITOV(ip)->v_type == VREG) {
224 struct vm_page *pg;
225 struct uvm_object *uobj = &ITOV(ip)->v_uobj;
226 voff_t off = trunc_page(lblktosize(fs, lbprev));
227 voff_t endoff = round_page(lblktosize(fs, lbprev) + osize);
228
229 simple_lock(&uobj->vmobjlock);
230 while (off < endoff) {
231 pg = uvm_pagelookup(uobj, off);
232 KASSERT(pg != NULL);
233 KASSERT(pg->owner == curproc->p_pid);
234 KASSERT((pg->flags & PG_CLEAN) == 0);
235 off += PAGE_SIZE;
236 }
237 simple_unlock(&uobj->vmobjlock);
238 }
239 #endif
240
241 #ifdef DIAGNOSTIC
242 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
243 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
244 printf(
245 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
246 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
247 panic("ffs_realloccg: bad size");
248 }
249 if (cred == NOCRED)
250 panic("ffs_realloccg: missing credential");
251 #endif /* DIAGNOSTIC */
252 if (freespace(fs, fs->fs_minfree) <= 0 &&
253 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0) {
254 mutex_exit(&ump->um_lock);
255 goto nospace;
256 }
257 if (fs->fs_magic == FS_UFS2_MAGIC)
258 bprev = ufs_rw64(ip->i_ffs2_db[lbprev], UFS_FSNEEDSWAP(fs));
259 else
260 bprev = ufs_rw32(ip->i_ffs1_db[lbprev], UFS_FSNEEDSWAP(fs));
261
262 if (bprev == 0) {
263 printf("dev = 0x%x, bsize = %d, bprev = %" PRId64 ", fs = %s\n",
264 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
265 panic("ffs_realloccg: bad bprev");
266 }
267 mutex_exit(&ump->um_lock);
268
269 /*
270 * Allocate the extra space in the buffer.
271 */
272 if (bpp != NULL &&
273 (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) != 0) {
274 brelse(bp, 0);
275 return (error);
276 }
277 #ifdef QUOTA
278 if ((error = chkdq(ip, btodb(nsize - osize), cred, 0)) != 0) {
279 if (bpp != NULL) {
280 brelse(bp, 0);
281 }
282 return (error);
283 }
284 #endif
285 /*
286 * Check for extension in the existing location.
287 */
288 cg = dtog(fs, bprev);
289 mutex_enter(&ump->um_lock);
290 if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) {
291 DIP_ADD(ip, blocks, btodb(nsize - osize));
292 ip->i_flag |= IN_CHANGE | IN_UPDATE;
293
294 if (bpp != NULL) {
295 if (bp->b_blkno != fsbtodb(fs, bno))
296 panic("bad blockno");
297 allocbuf(bp, nsize, 1);
298 bp->b_flags |= B_DONE;
299 memset((char *)bp->b_data + osize, 0, nsize - osize);
300 *bpp = bp;
301 }
302 if (blknop != NULL) {
303 *blknop = bno;
304 }
305 return (0);
306 }
307 /*
308 * Allocate a new disk location.
309 */
310 if (bpref >= fs->fs_size)
311 bpref = 0;
312 switch ((int)fs->fs_optim) {
313 case FS_OPTSPACE:
314 /*
315 * Allocate an exact sized fragment. Although this makes
316 * best use of space, we will waste time relocating it if
317 * the file continues to grow. If the fragmentation is
318 * less than half of the minimum free reserve, we choose
319 * to begin optimizing for time.
320 */
321 request = nsize;
322 if (fs->fs_minfree < 5 ||
323 fs->fs_cstotal.cs_nffree >
324 fs->fs_dsize * fs->fs_minfree / (2 * 100))
325 break;
326
327 if (ffs_log_changeopt) {
328 log(LOG_NOTICE,
329 "%s: optimization changed from SPACE to TIME\n",
330 fs->fs_fsmnt);
331 }
332
333 fs->fs_optim = FS_OPTTIME;
334 break;
335 case FS_OPTTIME:
336 /*
337 * At this point we have discovered a file that is trying to
338 * grow a small fragment to a larger fragment. To save time,
339 * we allocate a full sized block, then free the unused portion.
340 * If the file continues to grow, the `ffs_fragextend' call
341 * above will be able to grow it in place without further
342 * copying. If aberrant programs cause disk fragmentation to
343 * grow within 2% of the free reserve, we choose to begin
344 * optimizing for space.
345 */
346 request = fs->fs_bsize;
347 if (fs->fs_cstotal.cs_nffree <
348 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
349 break;
350
351 if (ffs_log_changeopt) {
352 log(LOG_NOTICE,
353 "%s: optimization changed from TIME to SPACE\n",
354 fs->fs_fsmnt);
355 }
356
357 fs->fs_optim = FS_OPTSPACE;
358 break;
359 default:
360 printf("dev = 0x%x, optim = %d, fs = %s\n",
361 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
362 panic("ffs_realloccg: bad optim");
363 /* NOTREACHED */
364 }
365 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg);
366 if (bno > 0) {
367 if (!DOINGSOFTDEP(ITOV(ip)))
368 ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
369 ip->i_number);
370 if (nsize < request)
371 ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize),
372 (long)(request - nsize), ip->i_number);
373 DIP_ADD(ip, blocks, btodb(nsize - osize));
374 ip->i_flag |= IN_CHANGE | IN_UPDATE;
375 if (bpp != NULL) {
376 bp->b_blkno = fsbtodb(fs, bno);
377 allocbuf(bp, nsize, 1);
378 bp->b_flags |= B_DONE;
379 memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize);
380 *bpp = bp;
381 }
382 if (blknop != NULL) {
383 *blknop = bno;
384 }
385 return (0);
386 }
387 mutex_exit(&ump->um_lock);
388
389 #ifdef QUOTA
390 /*
391 * Restore user's disk quota because allocation failed.
392 */
393 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
394 #endif
395 if (bpp != NULL) {
396 brelse(bp, 0);
397 }
398
399 nospace:
400 /*
401 * no space available
402 */
403 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
404 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
405 return (ENOSPC);
406 }
407
408 #if 0
409 /*
410 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
411 *
412 * The vnode and an array of buffer pointers for a range of sequential
413 * logical blocks to be made contiguous is given. The allocator attempts
414 * to find a range of sequential blocks starting as close as possible
415 * from the end of the allocation for the logical block immediately
416 * preceding the current range. If successful, the physical block numbers
417 * in the buffer pointers and in the inode are changed to reflect the new
418 * allocation. If unsuccessful, the allocation is left unchanged. The
419 * success in doing the reallocation is returned. Note that the error
420 * return is not reflected back to the user. Rather the previous block
421 * allocation will be used.
422
423 */
424 #ifdef XXXUBC
425 #ifdef DEBUG
426 #include <sys/sysctl.h>
427 int prtrealloc = 0;
428 struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
429 #endif
430 #endif
431
432 /*
433 * NOTE: when re-enabling this, it must be updated for UFS2.
434 */
435
436 int doasyncfree = 1;
437
438 int
439 ffs_reallocblks(void *v)
440 {
441 #ifdef XXXUBC
442 struct vop_reallocblks_args /* {
443 struct vnode *a_vp;
444 struct cluster_save *a_buflist;
445 } */ *ap = v;
446 struct fs *fs;
447 struct inode *ip;
448 struct vnode *vp;
449 struct buf *sbp, *ebp;
450 int32_t *bap, *ebap = NULL, *sbap; /* XXX ondisk32 */
451 struct cluster_save *buflist;
452 daddr_t start_lbn, end_lbn, soff, newblk, blkno;
453 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
454 int i, len, start_lvl, end_lvl, pref, ssize;
455 struct ufsmount *ump;
456 #endif /* XXXUBC */
457
458 /* XXXUBC don't reallocblks for now */
459 return ENOSPC;
460
461 #ifdef XXXUBC
462 vp = ap->a_vp;
463 ip = VTOI(vp);
464 fs = ip->i_fs;
465 ump = ip->i_ump;
466 if (fs->fs_contigsumsize <= 0)
467 return (ENOSPC);
468 buflist = ap->a_buflist;
469 len = buflist->bs_nchildren;
470 start_lbn = buflist->bs_children[0]->b_lblkno;
471 end_lbn = start_lbn + len - 1;
472 #ifdef DIAGNOSTIC
473 for (i = 0; i < len; i++)
474 if (!ffs_checkblk(ip,
475 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
476 panic("ffs_reallocblks: unallocated block 1");
477 for (i = 1; i < len; i++)
478 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
479 panic("ffs_reallocblks: non-logical cluster");
480 blkno = buflist->bs_children[0]->b_blkno;
481 ssize = fsbtodb(fs, fs->fs_frag);
482 for (i = 1; i < len - 1; i++)
483 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
484 panic("ffs_reallocblks: non-physical cluster %d", i);
485 #endif
486 /*
487 * If the latest allocation is in a new cylinder group, assume that
488 * the filesystem has decided to move and do not force it back to
489 * the previous cylinder group.
490 */
491 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
492 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
493 return (ENOSPC);
494 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
495 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
496 return (ENOSPC);
497 /*
498 * Get the starting offset and block map for the first block.
499 */
500 if (start_lvl == 0) {
501 sbap = &ip->i_ffs1_db[0];
502 soff = start_lbn;
503 } else {
504 idp = &start_ap[start_lvl - 1];
505 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
506 brelse(sbp, 0);
507 return (ENOSPC);
508 }
509 sbap = (int32_t *)sbp->b_data;
510 soff = idp->in_off;
511 }
512 /*
513 * Find the preferred location for the cluster.
514 */
515 mutex_enter(&ump->um_lock);
516 pref = ffs_blkpref(ip, start_lbn, soff, sbap);
517 /*
518 * If the block range spans two block maps, get the second map.
519 */
520 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
521 ssize = len;
522 } else {
523 #ifdef DIAGNOSTIC
524 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
525 panic("ffs_reallocblk: start == end");
526 #endif
527 ssize = len - (idp->in_off + 1);
528 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
529 goto fail;
530 ebap = (int32_t *)ebp->b_data; /* XXX ondisk32 */
531 }
532 /*
533 * Search the block map looking for an allocation of the desired size.
534 */
535 if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
536 len, ffs_clusteralloc)) == 0) {
537 mutex_exit(&ump->um_lock);
538 goto fail;
539 }
540 /*
541 * We have found a new contiguous block.
542 *
543 * First we have to replace the old block pointers with the new
544 * block pointers in the inode and indirect blocks associated
545 * with the file.
546 */
547 #ifdef DEBUG
548 if (prtrealloc)
549 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
550 start_lbn, end_lbn);
551 #endif
552 blkno = newblk;
553 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
554 daddr_t ba;
555
556 if (i == ssize) {
557 bap = ebap;
558 soff = -i;
559 }
560 /* XXX ondisk32 */
561 ba = ufs_rw32(*bap, UFS_FSNEEDSWAP(fs));
562 #ifdef DIAGNOSTIC
563 if (!ffs_checkblk(ip,
564 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
565 panic("ffs_reallocblks: unallocated block 2");
566 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != ba)
567 panic("ffs_reallocblks: alloc mismatch");
568 #endif
569 #ifdef DEBUG
570 if (prtrealloc)
571 printf(" %d,", ba);
572 #endif
573 if (DOINGSOFTDEP(vp)) {
574 if (sbap == &ip->i_ffs1_db[0] && i < ssize)
575 softdep_setup_allocdirect(ip, start_lbn + i,
576 blkno, ba, fs->fs_bsize, fs->fs_bsize,
577 buflist->bs_children[i]);
578 else
579 softdep_setup_allocindir_page(ip, start_lbn + i,
580 i < ssize ? sbp : ebp, soff + i, blkno,
581 ba, buflist->bs_children[i]);
582 }
583 /* XXX ondisk32 */
584 *bap++ = ufs_rw32((u_int32_t)blkno, UFS_FSNEEDSWAP(fs));
585 }
586 /*
587 * Next we must write out the modified inode and indirect blocks.
588 * For strict correctness, the writes should be synchronous since
589 * the old block values may have been written to disk. In practise
590 * they are almost never written, but if we are concerned about
591 * strict correctness, the `doasyncfree' flag should be set to zero.
592 *
593 * The test on `doasyncfree' should be changed to test a flag
594 * that shows whether the associated buffers and inodes have
595 * been written. The flag should be set when the cluster is
596 * started and cleared whenever the buffer or inode is flushed.
597 * We can then check below to see if it is set, and do the
598 * synchronous write only when it has been cleared.
599 */
600 if (sbap != &ip->i_ffs1_db[0]) {
601 if (doasyncfree)
602 bdwrite(sbp);
603 else
604 bwrite(sbp);
605 } else {
606 ip->i_flag |= IN_CHANGE | IN_UPDATE;
607 if (!doasyncfree)
608 ffs_update(vp, NULL, NULL, 1);
609 }
610 if (ssize < len) {
611 if (doasyncfree)
612 bdwrite(ebp);
613 else
614 bwrite(ebp);
615 }
616 /*
617 * Last, free the old blocks and assign the new blocks to the buffers.
618 */
619 #ifdef DEBUG
620 if (prtrealloc)
621 printf("\n\tnew:");
622 #endif
623 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
624 if (!DOINGSOFTDEP(vp))
625 ffs_blkfree(fs, ip->i_devvp,
626 dbtofsb(fs, buflist->bs_children[i]->b_blkno),
627 fs->fs_bsize, ip->i_number);
628 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
629 #ifdef DEBUG
630 if (!ffs_checkblk(ip,
631 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
632 panic("ffs_reallocblks: unallocated block 3");
633 if (prtrealloc)
634 printf(" %d,", blkno);
635 #endif
636 }
637 #ifdef DEBUG
638 if (prtrealloc) {
639 prtrealloc--;
640 printf("\n");
641 }
642 #endif
643 return (0);
644
645 fail:
646 if (ssize < len)
647 brelse(ebp, 0);
648 if (sbap != &ip->i_ffs1_db[0])
649 brelse(sbp, 0);
650 return (ENOSPC);
651 #endif /* XXXUBC */
652 }
653 #endif /* 0 */
654
655 /*
656 * Allocate an inode in the file system.
657 *
658 * If allocating a directory, use ffs_dirpref to select the inode.
659 * If allocating in a directory, the following hierarchy is followed:
660 * 1) allocate the preferred inode.
661 * 2) allocate an inode in the same cylinder group.
662 * 3) quadradically rehash into other cylinder groups, until an
663 * available inode is located.
664 * If no inode preference is given the following hierarchy is used
665 * to allocate an inode:
666 * 1) allocate an inode in cylinder group 0.
667 * 2) quadradically rehash into other cylinder groups, until an
668 * available inode is located.
669 */
670 int
671 ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
672 struct vnode **vpp)
673 {
674 struct ufsmount *ump;
675 struct inode *pip;
676 struct fs *fs;
677 struct inode *ip;
678 struct timespec ts;
679 ino_t ino, ipref;
680 int cg, error;
681
682 *vpp = NULL;
683 pip = VTOI(pvp);
684 fs = pip->i_fs;
685 ump = pip->i_ump;
686
687 mutex_enter(&ump->um_lock);
688 if (fs->fs_cstotal.cs_nifree == 0)
689 goto noinodes;
690
691 if ((mode & IFMT) == IFDIR)
692 ipref = ffs_dirpref(pip);
693 else
694 ipref = pip->i_number;
695 if (ipref >= fs->fs_ncg * fs->fs_ipg)
696 ipref = 0;
697 cg = ino_to_cg(fs, ipref);
698 /*
699 * Track number of dirs created one after another
700 * in a same cg without intervening by files.
701 */
702 if ((mode & IFMT) == IFDIR) {
703 if (fs->fs_contigdirs[cg] < 255)
704 fs->fs_contigdirs[cg]++;
705 } else {
706 if (fs->fs_contigdirs[cg] > 0)
707 fs->fs_contigdirs[cg]--;
708 }
709 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, ffs_nodealloccg);
710 if (ino == 0)
711 goto noinodes;
712 error = VFS_VGET(pvp->v_mount, ino, vpp);
713 if (error) {
714 ffs_vfree(pvp, ino, mode);
715 return (error);
716 }
717 KASSERT((*vpp)->v_type == VNON);
718 ip = VTOI(*vpp);
719 if (ip->i_mode) {
720 #if 0
721 printf("mode = 0%o, inum = %d, fs = %s\n",
722 ip->i_mode, ip->i_number, fs->fs_fsmnt);
723 #else
724 printf("dmode %x mode %x dgen %x gen %x\n",
725 DIP(ip, mode), ip->i_mode,
726 DIP(ip, gen), ip->i_gen);
727 printf("size %llx blocks %llx\n",
728 (long long)DIP(ip, size), (long long)DIP(ip, blocks));
729 printf("ino %llu ipref %llu\n", (unsigned long long)ino,
730 (unsigned long long)ipref);
731 #if 0
732 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
733 (int)fs->fs_bsize, NOCRED, &bp);
734 #endif
735
736 #endif
737 panic("ffs_valloc: dup alloc");
738 }
739 if (DIP(ip, blocks)) { /* XXX */
740 printf("free inode %s/%llu had %" PRId64 " blocks\n",
741 fs->fs_fsmnt, (unsigned long long)ino, DIP(ip, blocks));
742 DIP_ASSIGN(ip, blocks, 0);
743 }
744 ip->i_flag &= ~IN_SPACECOUNTED;
745 ip->i_flags = 0;
746 DIP_ASSIGN(ip, flags, 0);
747 /*
748 * Set up a new generation number for this inode.
749 */
750 ip->i_gen++;
751 DIP_ASSIGN(ip, gen, ip->i_gen);
752 if (fs->fs_magic == FS_UFS2_MAGIC) {
753 vfs_timestamp(&ts);
754 ip->i_ffs2_birthtime = ts.tv_sec;
755 ip->i_ffs2_birthnsec = ts.tv_nsec;
756 }
757 return (0);
758 noinodes:
759 mutex_exit(&ump->um_lock);
760 ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
761 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
762 return (ENOSPC);
763 }
764
765 /*
766 * Find a cylinder group in which to place a directory.
767 *
768 * The policy implemented by this algorithm is to allocate a
769 * directory inode in the same cylinder group as its parent
770 * directory, but also to reserve space for its files inodes
771 * and data. Restrict the number of directories which may be
772 * allocated one after another in the same cylinder group
773 * without intervening allocation of files.
774 *
775 * If we allocate a first level directory then force allocation
776 * in another cylinder group.
777 */
778 static ino_t
779 ffs_dirpref(struct inode *pip)
780 {
781 register struct fs *fs;
782 int cg, prefcg;
783 int64_t dirsize, cgsize, curdsz;
784 int avgifree, avgbfree, avgndir;
785 int minifree, minbfree, maxndir;
786 int mincg, minndir;
787 int maxcontigdirs;
788
789 KASSERT(mutex_owned(&pip->i_ump->um_lock));
790
791 fs = pip->i_fs;
792
793 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
794 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
795 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
796
797 /*
798 * Force allocation in another cg if creating a first level dir.
799 */
800 if (ITOV(pip)->v_vflag & VV_ROOT) {
801 prefcg = random() % fs->fs_ncg;
802 mincg = prefcg;
803 minndir = fs->fs_ipg;
804 for (cg = prefcg; cg < fs->fs_ncg; cg++)
805 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
806 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
807 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
808 mincg = cg;
809 minndir = fs->fs_cs(fs, cg).cs_ndir;
810 }
811 for (cg = 0; cg < prefcg; cg++)
812 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
813 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
814 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
815 mincg = cg;
816 minndir = fs->fs_cs(fs, cg).cs_ndir;
817 }
818 return ((ino_t)(fs->fs_ipg * mincg));
819 }
820
821 /*
822 * Count various limits which used for
823 * optimal allocation of a directory inode.
824 */
825 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
826 minifree = avgifree - fs->fs_ipg / 4;
827 if (minifree < 0)
828 minifree = 0;
829 minbfree = avgbfree - fragstoblks(fs, fs->fs_fpg) / 4;
830 if (minbfree < 0)
831 minbfree = 0;
832 cgsize = (int64_t)fs->fs_fsize * fs->fs_fpg;
833 dirsize = (int64_t)fs->fs_avgfilesize * fs->fs_avgfpdir;
834 if (avgndir != 0) {
835 curdsz = (cgsize - (int64_t)avgbfree * fs->fs_bsize) / avgndir;
836 if (dirsize < curdsz)
837 dirsize = curdsz;
838 }
839 if (cgsize < dirsize * 255)
840 maxcontigdirs = cgsize / dirsize;
841 else
842 maxcontigdirs = 255;
843 if (fs->fs_avgfpdir > 0)
844 maxcontigdirs = min(maxcontigdirs,
845 fs->fs_ipg / fs->fs_avgfpdir);
846 if (maxcontigdirs == 0)
847 maxcontigdirs = 1;
848
849 /*
850 * Limit number of dirs in one cg and reserve space for
851 * regular files, but only if we have no deficit in
852 * inodes or space.
853 */
854 prefcg = ino_to_cg(fs, pip->i_number);
855 for (cg = prefcg; cg < fs->fs_ncg; cg++)
856 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
857 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
858 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
859 if (fs->fs_contigdirs[cg] < maxcontigdirs)
860 return ((ino_t)(fs->fs_ipg * cg));
861 }
862 for (cg = 0; cg < prefcg; cg++)
863 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
864 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
865 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
866 if (fs->fs_contigdirs[cg] < maxcontigdirs)
867 return ((ino_t)(fs->fs_ipg * cg));
868 }
869 /*
870 * This is a backstop when we are deficient in space.
871 */
872 for (cg = prefcg; cg < fs->fs_ncg; cg++)
873 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
874 return ((ino_t)(fs->fs_ipg * cg));
875 for (cg = 0; cg < prefcg; cg++)
876 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
877 break;
878 return ((ino_t)(fs->fs_ipg * cg));
879 }
880
881 /*
882 * Select the desired position for the next block in a file. The file is
883 * logically divided into sections. The first section is composed of the
884 * direct blocks. Each additional section contains fs_maxbpg blocks.
885 *
886 * If no blocks have been allocated in the first section, the policy is to
887 * request a block in the same cylinder group as the inode that describes
888 * the file. If no blocks have been allocated in any other section, the
889 * policy is to place the section in a cylinder group with a greater than
890 * average number of free blocks. An appropriate cylinder group is found
891 * by using a rotor that sweeps the cylinder groups. When a new group of
892 * blocks is needed, the sweep begins in the cylinder group following the
893 * cylinder group from which the previous allocation was made. The sweep
894 * continues until a cylinder group with greater than the average number
895 * of free blocks is found. If the allocation is for the first block in an
896 * indirect block, the information on the previous allocation is unavailable;
897 * here a best guess is made based upon the logical block number being
898 * allocated.
899 *
900 * If a section is already partially allocated, the policy is to
901 * contiguously allocate fs_maxcontig blocks. The end of one of these
902 * contiguous blocks and the beginning of the next is laid out
903 * contigously if possible.
904 */
905 daddr_t
906 ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx,
907 int32_t *bap /* XXX ondisk32 */)
908 {
909 struct fs *fs;
910 int cg;
911 int avgbfree, startcg;
912
913 KASSERT(mutex_owned(&ip->i_ump->um_lock));
914
915 fs = ip->i_fs;
916 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
917 if (lbn < NDADDR + NINDIR(fs)) {
918 cg = ino_to_cg(fs, ip->i_number);
919 return (fs->fs_fpg * cg + fs->fs_frag);
920 }
921 /*
922 * Find a cylinder with greater than average number of
923 * unused data blocks.
924 */
925 if (indx == 0 || bap[indx - 1] == 0)
926 startcg =
927 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
928 else
929 startcg = dtog(fs,
930 ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
931 startcg %= fs->fs_ncg;
932 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
933 for (cg = startcg; cg < fs->fs_ncg; cg++)
934 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
935 return (fs->fs_fpg * cg + fs->fs_frag);
936 }
937 for (cg = 0; cg < startcg; cg++)
938 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
939 return (fs->fs_fpg * cg + fs->fs_frag);
940 }
941 return (0);
942 }
943 /*
944 * We just always try to lay things out contiguously.
945 */
946 return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
947 }
948
949 daddr_t
950 ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
951 {
952 struct fs *fs;
953 int cg;
954 int avgbfree, startcg;
955
956 KASSERT(mutex_owned(&ip->i_ump->um_lock));
957
958 fs = ip->i_fs;
959 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
960 if (lbn < NDADDR + NINDIR(fs)) {
961 cg = ino_to_cg(fs, ip->i_number);
962 return (fs->fs_fpg * cg + fs->fs_frag);
963 }
964 /*
965 * Find a cylinder with greater than average number of
966 * unused data blocks.
967 */
968 if (indx == 0 || bap[indx - 1] == 0)
969 startcg =
970 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
971 else
972 startcg = dtog(fs,
973 ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
974 startcg %= fs->fs_ncg;
975 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
976 for (cg = startcg; cg < fs->fs_ncg; cg++)
977 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
978 return (fs->fs_fpg * cg + fs->fs_frag);
979 }
980 for (cg = 0; cg < startcg; cg++)
981 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
982 return (fs->fs_fpg * cg + fs->fs_frag);
983 }
984 return (0);
985 }
986 /*
987 * We just always try to lay things out contiguously.
988 */
989 return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
990 }
991
992
993 /*
994 * Implement the cylinder overflow algorithm.
995 *
996 * The policy implemented by this algorithm is:
997 * 1) allocate the block in its requested cylinder group.
998 * 2) quadradically rehash on the cylinder group number.
999 * 3) brute force search for a free block.
1000 */
1001 /*VARARGS5*/
1002 static daddr_t
1003 ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
1004 int size /* size for data blocks, mode for inodes */,
1005 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
1006 {
1007 struct fs *fs;
1008 daddr_t result;
1009 int i, icg = cg;
1010
1011 fs = ip->i_fs;
1012 /*
1013 * 1: preferred cylinder group
1014 */
1015 result = (*allocator)(ip, cg, pref, size);
1016 if (result)
1017 return (result);
1018 /*
1019 * 2: quadratic rehash
1020 */
1021 for (i = 1; i < fs->fs_ncg; i *= 2) {
1022 cg += i;
1023 if (cg >= fs->fs_ncg)
1024 cg -= fs->fs_ncg;
1025 result = (*allocator)(ip, cg, 0, size);
1026 if (result)
1027 return (result);
1028 }
1029 /*
1030 * 3: brute force search
1031 * Note that we start at i == 2, since 0 was checked initially,
1032 * and 1 is always checked in the quadratic rehash.
1033 */
1034 cg = (icg + 2) % fs->fs_ncg;
1035 for (i = 2; i < fs->fs_ncg; i++) {
1036 result = (*allocator)(ip, cg, 0, size);
1037 if (result)
1038 return (result);
1039 cg++;
1040 if (cg == fs->fs_ncg)
1041 cg = 0;
1042 }
1043 return (0);
1044 }
1045
1046 /*
1047 * Determine whether a fragment can be extended.
1048 *
1049 * Check to see if the necessary fragments are available, and
1050 * if they are, allocate them.
1051 */
1052 static daddr_t
1053 ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize)
1054 {
1055 struct ufsmount *ump;
1056 struct fs *fs;
1057 struct cg *cgp;
1058 struct buf *bp;
1059 daddr_t bno;
1060 int frags, bbase;
1061 int i, error;
1062 u_int8_t *blksfree;
1063
1064 fs = ip->i_fs;
1065 ump = ip->i_ump;
1066
1067 KASSERT(mutex_owned(&ump->um_lock));
1068
1069 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
1070 return (0);
1071 frags = numfrags(fs, nsize);
1072 bbase = fragnum(fs, bprev);
1073 if (bbase > fragnum(fs, (bprev + frags - 1))) {
1074 /* cannot extend across a block boundary */
1075 return (0);
1076 }
1077 mutex_exit(&ump->um_lock);
1078 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1079 (int)fs->fs_cgsize, NOCRED, &bp);
1080 if (error)
1081 goto fail;
1082 cgp = (struct cg *)bp->b_data;
1083 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1084 goto fail;
1085 cgp->cg_old_time = ufs_rw32(time_second, UFS_FSNEEDSWAP(fs));
1086 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1087 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1088 cgp->cg_time = ufs_rw64(time_second, UFS_FSNEEDSWAP(fs));
1089 bno = dtogd(fs, bprev);
1090 blksfree = cg_blksfree(cgp, UFS_FSNEEDSWAP(fs));
1091 for (i = numfrags(fs, osize); i < frags; i++)
1092 if (isclr(blksfree, bno + i))
1093 goto fail;
1094 /*
1095 * the current fragment can be extended
1096 * deduct the count on fragment being extended into
1097 * increase the count on the remaining fragment (if any)
1098 * allocate the extended piece
1099 */
1100 for (i = frags; i < fs->fs_frag - bbase; i++)
1101 if (isclr(blksfree, bno + i))
1102 break;
1103 ufs_add32(cgp->cg_frsum[i - numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs));
1104 if (i != frags)
1105 ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs));
1106 mutex_enter(&ump->um_lock);
1107 for (i = numfrags(fs, osize); i < frags; i++) {
1108 clrbit(blksfree, bno + i);
1109 ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs));
1110 fs->fs_cstotal.cs_nffree--;
1111 fs->fs_cs(fs, cg).cs_nffree--;
1112 }
1113 fs->fs_fmod = 1;
1114 ACTIVECG_CLR(fs, cg);
1115 mutex_exit(&ump->um_lock);
1116 if (DOINGSOFTDEP(ITOV(ip)))
1117 softdep_setup_blkmapdep(bp, fs, bprev);
1118 bdwrite(bp);
1119 return (bprev);
1120
1121 fail:
1122 brelse(bp, 0);
1123 mutex_enter(&ump->um_lock);
1124 return (0);
1125 }
1126
1127 /*
1128 * Determine whether a block can be allocated.
1129 *
1130 * Check to see if a block of the appropriate size is available,
1131 * and if it is, allocate it.
1132 */
1133 static daddr_t
1134 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
1135 {
1136 struct ufsmount *ump;
1137 struct fs *fs = ip->i_fs;
1138 struct cg *cgp;
1139 struct buf *bp;
1140 int32_t bno;
1141 daddr_t blkno;
1142 int error, frags, allocsiz, i;
1143 u_int8_t *blksfree;
1144 #ifdef FFS_EI
1145 const int needswap = UFS_FSNEEDSWAP(fs);
1146 #endif
1147
1148 ump = ip->i_ump;
1149
1150 KASSERT(mutex_owned(&ump->um_lock));
1151
1152 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
1153 return (0);
1154 mutex_exit(&ump->um_lock);
1155 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1156 (int)fs->fs_cgsize, NOCRED, &bp);
1157 if (error)
1158 goto fail;
1159 cgp = (struct cg *)bp->b_data;
1160 if (!cg_chkmagic(cgp, needswap) ||
1161 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize))
1162 goto fail;
1163 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1164 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1165 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1166 cgp->cg_time = ufs_rw64(time_second, needswap);
1167 if (size == fs->fs_bsize) {
1168 mutex_enter(&ump->um_lock);
1169 blkno = ffs_alloccgblk(ip, bp, bpref);
1170 ACTIVECG_CLR(fs, cg);
1171 mutex_exit(&ump->um_lock);
1172 bdwrite(bp);
1173 return (blkno);
1174 }
1175 /*
1176 * check to see if any fragments are already available
1177 * allocsiz is the size which will be allocated, hacking
1178 * it down to a smaller size if necessary
1179 */
1180 blksfree = cg_blksfree(cgp, needswap);
1181 frags = numfrags(fs, size);
1182 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
1183 if (cgp->cg_frsum[allocsiz] != 0)
1184 break;
1185 if (allocsiz == fs->fs_frag) {
1186 /*
1187 * no fragments were available, so a block will be
1188 * allocated, and hacked up
1189 */
1190 if (cgp->cg_cs.cs_nbfree == 0)
1191 goto fail;
1192 mutex_enter(&ump->um_lock);
1193 blkno = ffs_alloccgblk(ip, bp, bpref);
1194 bno = dtogd(fs, blkno);
1195 for (i = frags; i < fs->fs_frag; i++)
1196 setbit(blksfree, bno + i);
1197 i = fs->fs_frag - frags;
1198 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1199 fs->fs_cstotal.cs_nffree += i;
1200 fs->fs_cs(fs, cg).cs_nffree += i;
1201 fs->fs_fmod = 1;
1202 ufs_add32(cgp->cg_frsum[i], 1, needswap);
1203 ACTIVECG_CLR(fs, cg);
1204 mutex_exit(&ump->um_lock);
1205 bdwrite(bp);
1206 return (blkno);
1207 }
1208 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
1209 #if 0
1210 /*
1211 * XXX fvdl mapsearch will panic, and never return -1
1212 * also: returning NULL as daddr_t ?
1213 */
1214 if (bno < 0)
1215 goto fail;
1216 #endif
1217 for (i = 0; i < frags; i++)
1218 clrbit(blksfree, bno + i);
1219 mutex_enter(&ump->um_lock);
1220 ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap);
1221 fs->fs_cstotal.cs_nffree -= frags;
1222 fs->fs_cs(fs, cg).cs_nffree -= frags;
1223 fs->fs_fmod = 1;
1224 ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap);
1225 if (frags != allocsiz)
1226 ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap);
1227 blkno = cg * fs->fs_fpg + bno;
1228 ACTIVECG_CLR(fs, cg);
1229 mutex_exit(&ump->um_lock);
1230 if (DOINGSOFTDEP(ITOV(ip)))
1231 softdep_setup_blkmapdep(bp, fs, blkno);
1232 bdwrite(bp);
1233 return blkno;
1234
1235 fail:
1236 brelse(bp, 0);
1237 mutex_enter(&ump->um_lock);
1238 return (0);
1239 }
1240
1241 /*
1242 * Allocate a block in a cylinder group.
1243 *
1244 * This algorithm implements the following policy:
1245 * 1) allocate the requested block.
1246 * 2) allocate a rotationally optimal block in the same cylinder.
1247 * 3) allocate the next available block on the block rotor for the
1248 * specified cylinder group.
1249 * Note that this routine only allocates fs_bsize blocks; these
1250 * blocks may be fragmented by the routine that allocates them.
1251 */
1252 static daddr_t
1253 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
1254 {
1255 struct ufsmount *ump;
1256 struct fs *fs = ip->i_fs;
1257 struct cg *cgp;
1258 daddr_t blkno;
1259 int32_t bno;
1260 u_int8_t *blksfree;
1261 #ifdef FFS_EI
1262 const int needswap = UFS_FSNEEDSWAP(fs);
1263 #endif
1264
1265 ump = ip->i_ump;
1266
1267 KASSERT(mutex_owned(&ump->um_lock));
1268
1269 cgp = (struct cg *)bp->b_data;
1270 blksfree = cg_blksfree(cgp, needswap);
1271 if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) {
1272 bpref = ufs_rw32(cgp->cg_rotor, needswap);
1273 } else {
1274 bpref = blknum(fs, bpref);
1275 bno = dtogd(fs, bpref);
1276 /*
1277 * if the requested block is available, use it
1278 */
1279 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
1280 goto gotit;
1281 }
1282 /*
1283 * Take the next available block in this cylinder group.
1284 */
1285 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1286 if (bno < 0)
1287 return (0);
1288 cgp->cg_rotor = ufs_rw32(bno, needswap);
1289 gotit:
1290 blkno = fragstoblks(fs, bno);
1291 ffs_clrblock(fs, blksfree, blkno);
1292 ffs_clusteracct(fs, cgp, blkno, -1);
1293 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1294 fs->fs_cstotal.cs_nbfree--;
1295 fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--;
1296 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1297 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1298 int cylno;
1299 cylno = old_cbtocylno(fs, bno);
1300 KASSERT(cylno >= 0);
1301 KASSERT(cylno < fs->fs_old_ncyl);
1302 KASSERT(old_cbtorpos(fs, bno) >= 0);
1303 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bno) < fs->fs_old_nrpos);
1304 ufs_add16(old_cg_blks(fs, cgp, cylno, needswap)[old_cbtorpos(fs, bno)], -1,
1305 needswap);
1306 ufs_add32(old_cg_blktot(cgp, needswap)[cylno], -1, needswap);
1307 }
1308 fs->fs_fmod = 1;
1309 blkno = ufs_rw32(cgp->cg_cgx, needswap) * fs->fs_fpg + bno;
1310 if (DOINGSOFTDEP(ITOV(ip))) {
1311 mutex_exit(&ump->um_lock);
1312 softdep_setup_blkmapdep(bp, fs, blkno);
1313 mutex_enter(&ump->um_lock);
1314 }
1315 return (blkno);
1316 }
1317
1318 #ifdef XXXUBC
1319 /*
1320 * Determine whether a cluster can be allocated.
1321 *
1322 * We do not currently check for optimal rotational layout if there
1323 * are multiple choices in the same cylinder group. Instead we just
1324 * take the first one that we find following bpref.
1325 */
1326
1327 /*
1328 * This function must be fixed for UFS2 if re-enabled.
1329 */
1330 static daddr_t
1331 ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
1332 {
1333 struct ufsmount *ump;
1334 struct fs *fs;
1335 struct cg *cgp;
1336 struct buf *bp;
1337 int i, got, run, bno, bit, map;
1338 u_char *mapp;
1339 int32_t *lp;
1340
1341 fs = ip->i_fs;
1342 ump = ip->i_ump;
1343
1344 KASSERT(mutex_owned(&ump->um_lock));
1345 if (fs->fs_maxcluster[cg] < len)
1346 return (0);
1347 mutex_exit(&ump->um_lock);
1348 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1349 NOCRED, &bp))
1350 goto fail;
1351 cgp = (struct cg *)bp->b_data;
1352 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1353 goto fail;
1354 /*
1355 * Check to see if a cluster of the needed size (or bigger) is
1356 * available in this cylinder group.
1357 */
1358 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len];
1359 for (i = len; i <= fs->fs_contigsumsize; i++)
1360 if (ufs_rw32(*lp++, UFS_FSNEEDSWAP(fs)) > 0)
1361 break;
1362 if (i > fs->fs_contigsumsize) {
1363 /*
1364 * This is the first time looking for a cluster in this
1365 * cylinder group. Update the cluster summary information
1366 * to reflect the true maximum sized cluster so that
1367 * future cluster allocation requests can avoid reading
1368 * the cylinder group map only to find no clusters.
1369 */
1370 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len - 1];
1371 for (i = len - 1; i > 0; i--)
1372 if (ufs_rw32(*lp--, UFS_FSNEEDSWAP(fs)) > 0)
1373 break;
1374 mutex_enter(&ump->um_lock);
1375 fs->fs_maxcluster[cg] = i;
1376 mutex_exit(&ump->um_lock);
1377 goto fail;
1378 }
1379 /*
1380 * Search the cluster map to find a big enough cluster.
1381 * We take the first one that we find, even if it is larger
1382 * than we need as we prefer to get one close to the previous
1383 * block allocation. We do not search before the current
1384 * preference point as we do not want to allocate a block
1385 * that is allocated before the previous one (as we will
1386 * then have to wait for another pass of the elevator
1387 * algorithm before it will be read). We prefer to fail and
1388 * be recalled to try an allocation in the next cylinder group.
1389 */
1390 if (dtog(fs, bpref) != cg)
1391 bpref = 0;
1392 else
1393 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1394 mapp = &cg_clustersfree(cgp, UFS_FSNEEDSWAP(fs))[bpref / NBBY];
1395 map = *mapp++;
1396 bit = 1 << (bpref % NBBY);
1397 for (run = 0, got = bpref;
1398 got < ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)); got++) {
1399 if ((map & bit) == 0) {
1400 run = 0;
1401 } else {
1402 run++;
1403 if (run == len)
1404 break;
1405 }
1406 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1407 bit <<= 1;
1408 } else {
1409 map = *mapp++;
1410 bit = 1;
1411 }
1412 }
1413 if (got == ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)))
1414 goto fail;
1415 /*
1416 * Allocate the cluster that we have found.
1417 */
1418 #ifdef DIAGNOSTIC
1419 for (i = 1; i <= len; i++)
1420 if (!ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1421 got - run + i))
1422 panic("ffs_clusteralloc: map mismatch");
1423 #endif
1424 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1425 if (dtog(fs, bno) != cg)
1426 panic("ffs_clusteralloc: allocated out of group");
1427 len = blkstofrags(fs, len);
1428 mutex_enter(&ump->um_lock);
1429 for (i = 0; i < len; i += fs->fs_frag)
1430 if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i)
1431 panic("ffs_clusteralloc: lost block");
1432 ACTIVECG_CLR(fs, cg);
1433 mutex_exit(&ump->um_lock);
1434 bdwrite(bp);
1435 return (bno);
1436
1437 fail:
1438 brelse(bp, 0);
1439 mutex_enter(&ump->um_lock);
1440 return (0);
1441 }
1442 #endif /* XXXUBC */
1443
1444 /*
1445 * Determine whether an inode can be allocated.
1446 *
1447 * Check to see if an inode is available, and if it is,
1448 * allocate it using the following policy:
1449 * 1) allocate the requested inode.
1450 * 2) allocate the next available inode after the requested
1451 * inode in the specified cylinder group.
1452 */
1453 static daddr_t
1454 ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
1455 {
1456 struct ufsmount *ump = ip->i_ump;
1457 struct fs *fs = ip->i_fs;
1458 struct cg *cgp;
1459 struct buf *bp, *ibp;
1460 u_int8_t *inosused;
1461 int error, start, len, loc, map, i;
1462 int32_t initediblk;
1463 struct ufs2_dinode *dp2;
1464 #ifdef FFS_EI
1465 const int needswap = UFS_FSNEEDSWAP(fs);
1466 #endif
1467
1468 KASSERT(mutex_owned(&ump->um_lock));
1469
1470 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1471 return (0);
1472 mutex_exit(&ump->um_lock);
1473 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1474 (int)fs->fs_cgsize, NOCRED, &bp);
1475 if (error)
1476 goto fail;
1477 cgp = (struct cg *)bp->b_data;
1478 if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0)
1479 goto fail;
1480 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1481 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1482 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1483 cgp->cg_time = ufs_rw64(time_second, needswap);
1484 inosused = cg_inosused(cgp, needswap);
1485 if (ipref) {
1486 ipref %= fs->fs_ipg;
1487 if (isclr(inosused, ipref))
1488 goto gotit;
1489 }
1490 start = ufs_rw32(cgp->cg_irotor, needswap) / NBBY;
1491 len = howmany(fs->fs_ipg - ufs_rw32(cgp->cg_irotor, needswap),
1492 NBBY);
1493 loc = skpc(0xff, len, &inosused[start]);
1494 if (loc == 0) {
1495 len = start + 1;
1496 start = 0;
1497 loc = skpc(0xff, len, &inosused[0]);
1498 if (loc == 0) {
1499 printf("cg = %d, irotor = %d, fs = %s\n",
1500 cg, ufs_rw32(cgp->cg_irotor, needswap),
1501 fs->fs_fsmnt);
1502 panic("ffs_nodealloccg: map corrupted");
1503 /* NOTREACHED */
1504 }
1505 }
1506 i = start + len - loc;
1507 map = inosused[i];
1508 ipref = i * NBBY;
1509 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1510 if ((map & i) == 0) {
1511 cgp->cg_irotor = ufs_rw32(ipref, needswap);
1512 goto gotit;
1513 }
1514 }
1515 printf("fs = %s\n", fs->fs_fsmnt);
1516 panic("ffs_nodealloccg: block not in map");
1517 /* NOTREACHED */
1518 gotit:
1519 /*
1520 * Check to see if we need to initialize more inodes.
1521 */
1522 initediblk = ufs_rw32(cgp->cg_initediblk, needswap);
1523 if (fs->fs_magic == FS_UFS2_MAGIC &&
1524 ipref + INOPB(fs) > initediblk &&
1525 initediblk < ufs_rw32(cgp->cg_niblk, needswap)) {
1526 ibp = getblk(ip->i_devvp, fsbtodb(fs,
1527 ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)),
1528 (int)fs->fs_bsize, 0, 0);
1529 memset(ibp->b_data, 0, fs->fs_bsize);
1530 dp2 = (struct ufs2_dinode *)(ibp->b_data);
1531 for (i = 0; i < INOPB(fs); i++) {
1532 /*
1533 * Don't bother to swap, it's supposed to be
1534 * random, after all.
1535 */
1536 dp2->di_gen = (arc4random() & INT32_MAX) / 2 + 1;
1537 dp2++;
1538 }
1539 bawrite(ibp);
1540 initediblk += INOPB(fs);
1541 cgp->cg_initediblk = ufs_rw32(initediblk, needswap);
1542 }
1543
1544 mutex_enter(&ump->um_lock);
1545 ACTIVECG_CLR(fs, cg);
1546 setbit(inosused, ipref);
1547 ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap);
1548 fs->fs_cstotal.cs_nifree--;
1549 fs->fs_cs(fs, cg).cs_nifree--;
1550 fs->fs_fmod = 1;
1551 if ((mode & IFMT) == IFDIR) {
1552 ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap);
1553 fs->fs_cstotal.cs_ndir++;
1554 fs->fs_cs(fs, cg).cs_ndir++;
1555 }
1556 mutex_exit(&ump->um_lock);
1557 if (DOINGSOFTDEP(ITOV(ip)))
1558 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
1559 bdwrite(bp);
1560 return (cg * fs->fs_ipg + ipref);
1561 fail:
1562 brelse(bp, 0);
1563 mutex_enter(&ump->um_lock);
1564 return (0);
1565 }
1566
1567 /*
1568 * Free a block or fragment.
1569 *
1570 * The specified block or fragment is placed back in the
1571 * free map. If a fragment is deallocated, a possible
1572 * block reassembly is checked.
1573 */
1574 void
1575 ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
1576 ino_t inum)
1577 {
1578 struct cg *cgp;
1579 struct buf *bp;
1580 struct ufsmount *ump;
1581 int32_t fragno, cgbno;
1582 daddr_t cgblkno;
1583 int i, error, cg, blk, frags, bbase;
1584 u_int8_t *blksfree;
1585 dev_t dev;
1586 const int needswap = UFS_FSNEEDSWAP(fs);
1587
1588 cg = dtog(fs, bno);
1589 if (devvp->v_type != VBLK) {
1590 /* devvp is a snapshot */
1591 dev = VTOI(devvp)->i_devvp->v_rdev;
1592 ump = VFSTOUFS(devvp->v_mount);
1593 cgblkno = fragstoblks(fs, cgtod(fs, cg));
1594 } else {
1595 dev = devvp->v_rdev;
1596 ump = VFSTOUFS(devvp->v_specmountpoint);
1597 cgblkno = fsbtodb(fs, cgtod(fs, cg));
1598 if (ffs_snapblkfree(fs, devvp, bno, size, inum))
1599 return;
1600 }
1601 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
1602 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
1603 printf("dev = 0x%x, bno = %" PRId64 " bsize = %d, "
1604 "size = %ld, fs = %s\n",
1605 dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
1606 panic("blkfree: bad size");
1607 }
1608
1609 if (bno >= fs->fs_size) {
1610 printf("bad block %" PRId64 ", ino %llu\n", bno,
1611 (unsigned long long)inum);
1612 ffs_fserr(fs, inum, "bad block");
1613 return;
1614 }
1615 error = bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp);
1616 if (error) {
1617 brelse(bp, 0);
1618 return;
1619 }
1620 cgp = (struct cg *)bp->b_data;
1621 if (!cg_chkmagic(cgp, needswap)) {
1622 brelse(bp, 0);
1623 return;
1624 }
1625 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1626 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1627 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1628 cgp->cg_time = ufs_rw64(time_second, needswap);
1629 cgbno = dtogd(fs, bno);
1630 blksfree = cg_blksfree(cgp, needswap);
1631 mutex_enter(&ump->um_lock);
1632 if (size == fs->fs_bsize) {
1633 fragno = fragstoblks(fs, cgbno);
1634 if (!ffs_isfreeblock(fs, blksfree, fragno)) {
1635 if (devvp->v_type != VBLK) {
1636 /* devvp is a snapshot */
1637 mutex_exit(&ump->um_lock);
1638 brelse(bp, 0);
1639 return;
1640 }
1641 printf("dev = 0x%x, block = %" PRId64 ", fs = %s\n",
1642 dev, bno, fs->fs_fsmnt);
1643 panic("blkfree: freeing free block");
1644 }
1645 ffs_setblock(fs, blksfree, fragno);
1646 ffs_clusteracct(fs, cgp, fragno, 1);
1647 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1648 fs->fs_cstotal.cs_nbfree++;
1649 fs->fs_cs(fs, cg).cs_nbfree++;
1650 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1651 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1652 i = old_cbtocylno(fs, cgbno);
1653 KASSERT(i >= 0);
1654 KASSERT(i < fs->fs_old_ncyl);
1655 KASSERT(old_cbtorpos(fs, cgbno) >= 0);
1656 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, cgbno) < fs->fs_old_nrpos);
1657 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, cgbno)], 1,
1658 needswap);
1659 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1660 }
1661 } else {
1662 bbase = cgbno - fragnum(fs, cgbno);
1663 /*
1664 * decrement the counts associated with the old frags
1665 */
1666 blk = blkmap(fs, blksfree, bbase);
1667 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
1668 /*
1669 * deallocate the fragment
1670 */
1671 frags = numfrags(fs, size);
1672 for (i = 0; i < frags; i++) {
1673 if (isset(blksfree, cgbno + i)) {
1674 printf("dev = 0x%x, block = %" PRId64
1675 ", fs = %s\n",
1676 dev, bno + i, fs->fs_fsmnt);
1677 panic("blkfree: freeing free frag");
1678 }
1679 setbit(blksfree, cgbno + i);
1680 }
1681 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1682 fs->fs_cstotal.cs_nffree += i;
1683 fs->fs_cs(fs, cg).cs_nffree += i;
1684 /*
1685 * add back in counts associated with the new frags
1686 */
1687 blk = blkmap(fs, blksfree, bbase);
1688 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
1689 /*
1690 * if a complete block has been reassembled, account for it
1691 */
1692 fragno = fragstoblks(fs, bbase);
1693 if (ffs_isblock(fs, blksfree, fragno)) {
1694 ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap);
1695 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1696 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1697 ffs_clusteracct(fs, cgp, fragno, 1);
1698 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1699 fs->fs_cstotal.cs_nbfree++;
1700 fs->fs_cs(fs, cg).cs_nbfree++;
1701 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1702 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1703 i = old_cbtocylno(fs, bbase);
1704 KASSERT(i >= 0);
1705 KASSERT(i < fs->fs_old_ncyl);
1706 KASSERT(old_cbtorpos(fs, bbase) >= 0);
1707 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bbase) < fs->fs_old_nrpos);
1708 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs,
1709 bbase)], 1, needswap);
1710 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1711 }
1712 }
1713 }
1714 fs->fs_fmod = 1;
1715 ACTIVECG_CLR(fs, cg);
1716 mutex_exit(&ump->um_lock);
1717 bdwrite(bp);
1718 }
1719
1720 #if defined(DIAGNOSTIC) || defined(DEBUG)
1721 #ifdef XXXUBC
1722 /*
1723 * Verify allocation of a block or fragment. Returns true if block or
1724 * fragment is allocated, false if it is free.
1725 */
1726 static int
1727 ffs_checkblk(struct inode *ip, daddr_t bno, long size)
1728 {
1729 struct fs *fs;
1730 struct cg *cgp;
1731 struct buf *bp;
1732 int i, error, frags, free;
1733
1734 fs = ip->i_fs;
1735 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1736 printf("bsize = %d, size = %ld, fs = %s\n",
1737 fs->fs_bsize, size, fs->fs_fsmnt);
1738 panic("checkblk: bad size");
1739 }
1740 if (bno >= fs->fs_size)
1741 panic("checkblk: bad block %d", bno);
1742 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1743 (int)fs->fs_cgsize, NOCRED, &bp);
1744 if (error) {
1745 brelse(bp, 0);
1746 return 0;
1747 }
1748 cgp = (struct cg *)bp->b_data;
1749 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1750 brelse(bp, 0);
1751 return 0;
1752 }
1753 bno = dtogd(fs, bno);
1754 if (size == fs->fs_bsize) {
1755 free = ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1756 fragstoblks(fs, bno));
1757 } else {
1758 frags = numfrags(fs, size);
1759 for (free = 0, i = 0; i < frags; i++)
1760 if (isset(cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)), bno + i))
1761 free++;
1762 if (free != 0 && free != frags)
1763 panic("checkblk: partially free fragment");
1764 }
1765 brelse(bp, 0);
1766 return (!free);
1767 }
1768 #endif /* XXXUBC */
1769 #endif /* DIAGNOSTIC */
1770
1771 /*
1772 * Free an inode.
1773 */
1774 int
1775 ffs_vfree(struct vnode *vp, ino_t ino, int mode)
1776 {
1777
1778 if (DOINGSOFTDEP(vp)) {
1779 softdep_freefile(vp, ino, mode);
1780 return (0);
1781 }
1782 return ffs_freefile(VTOI(vp)->i_fs, VTOI(vp)->i_devvp, ino, mode);
1783 }
1784
1785 /*
1786 * Do the actual free operation.
1787 * The specified inode is placed back in the free map.
1788 */
1789 int
1790 ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
1791 {
1792 struct ufsmount *ump;
1793 struct cg *cgp;
1794 struct buf *bp;
1795 int error, cg;
1796 daddr_t cgbno;
1797 u_int8_t *inosused;
1798 dev_t dev;
1799 #ifdef FFS_EI
1800 const int needswap = UFS_FSNEEDSWAP(fs);
1801 #endif
1802
1803 cg = ino_to_cg(fs, ino);
1804 if (devvp->v_type != VBLK) {
1805 /* devvp is a snapshot */
1806 dev = VTOI(devvp)->i_devvp->v_rdev;
1807 ump = VFSTOUFS(devvp->v_mount);
1808 cgbno = fragstoblks(fs, cgtod(fs, cg));
1809 } else {
1810 dev = devvp->v_rdev;
1811 ump = VFSTOUFS(devvp->v_specmountpoint);
1812 cgbno = fsbtodb(fs, cgtod(fs, cg));
1813 }
1814 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1815 panic("ifree: range: dev = 0x%x, ino = %llu, fs = %s",
1816 dev, (unsigned long long)ino, fs->fs_fsmnt);
1817 error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp);
1818 if (error) {
1819 brelse(bp, 0);
1820 return (error);
1821 }
1822 cgp = (struct cg *)bp->b_data;
1823 if (!cg_chkmagic(cgp, needswap)) {
1824 brelse(bp, 0);
1825 return (0);
1826 }
1827 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1828 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1829 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1830 cgp->cg_time = ufs_rw64(time_second, needswap);
1831 inosused = cg_inosused(cgp, needswap);
1832 ino %= fs->fs_ipg;
1833 if (isclr(inosused, ino)) {
1834 printf("ifree: dev = 0x%x, ino = %llu, fs = %s\n",
1835 dev, (unsigned long long)ino + cg * fs->fs_ipg,
1836 fs->fs_fsmnt);
1837 if (fs->fs_ronly == 0)
1838 panic("ifree: freeing free inode");
1839 }
1840 clrbit(inosused, ino);
1841 if (ino < ufs_rw32(cgp->cg_irotor, needswap))
1842 cgp->cg_irotor = ufs_rw32(ino, needswap);
1843 ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);
1844 mutex_enter(&ump->um_lock);
1845 fs->fs_cstotal.cs_nifree++;
1846 fs->fs_cs(fs, cg).cs_nifree++;
1847 if ((mode & IFMT) == IFDIR) {
1848 ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap);
1849 fs->fs_cstotal.cs_ndir--;
1850 fs->fs_cs(fs, cg).cs_ndir--;
1851 }
1852 fs->fs_fmod = 1;
1853 ACTIVECG_CLR(fs, cg);
1854 mutex_exit(&ump->um_lock);
1855 bdwrite(bp);
1856 return (0);
1857 }
1858
1859 /*
1860 * Check to see if a file is free.
1861 */
1862 int
1863 ffs_checkfreefile(struct fs *fs, struct vnode *devvp, ino_t ino)
1864 {
1865 struct cg *cgp;
1866 struct buf *bp;
1867 daddr_t cgbno;
1868 int ret, cg;
1869 u_int8_t *inosused;
1870
1871 cg = ino_to_cg(fs, ino);
1872 if (devvp->v_type != VBLK) {
1873 /* devvp is a snapshot */
1874 cgbno = fragstoblks(fs, cgtod(fs, cg));
1875 } else
1876 cgbno = fsbtodb(fs, cgtod(fs, cg));
1877 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1878 return 1;
1879 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) {
1880 brelse(bp, 0);
1881 return 1;
1882 }
1883 cgp = (struct cg *)bp->b_data;
1884 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1885 brelse(bp, 0);
1886 return 1;
1887 }
1888 inosused = cg_inosused(cgp, UFS_FSNEEDSWAP(fs));
1889 ino %= fs->fs_ipg;
1890 ret = isclr(inosused, ino);
1891 brelse(bp, 0);
1892 return ret;
1893 }
1894
1895 /*
1896 * Find a block of the specified size in the specified cylinder group.
1897 *
1898 * It is a panic if a request is made to find a block if none are
1899 * available.
1900 */
1901 static int32_t
1902 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz)
1903 {
1904 int32_t bno;
1905 int start, len, loc, i;
1906 int blk, field, subfield, pos;
1907 int ostart, olen;
1908 u_int8_t *blksfree;
1909 #ifdef FFS_EI
1910 const int needswap = UFS_FSNEEDSWAP(fs);
1911 #endif
1912
1913 /* KASSERT(mutex_owned(&ump->um_lock)); */
1914
1915 /*
1916 * find the fragment by searching through the free block
1917 * map for an appropriate bit pattern
1918 */
1919 if (bpref)
1920 start = dtogd(fs, bpref) / NBBY;
1921 else
1922 start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY;
1923 blksfree = cg_blksfree(cgp, needswap);
1924 len = howmany(fs->fs_fpg, NBBY) - start;
1925 ostart = start;
1926 olen = len;
1927 loc = scanc((u_int)len,
1928 (const u_char *)&blksfree[start],
1929 (const u_char *)fragtbl[fs->fs_frag],
1930 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
1931 if (loc == 0) {
1932 len = start + 1;
1933 start = 0;
1934 loc = scanc((u_int)len,
1935 (const u_char *)&blksfree[0],
1936 (const u_char *)fragtbl[fs->fs_frag],
1937 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
1938 if (loc == 0) {
1939 printf("start = %d, len = %d, fs = %s\n",
1940 ostart, olen, fs->fs_fsmnt);
1941 printf("offset=%d %ld\n",
1942 ufs_rw32(cgp->cg_freeoff, needswap),
1943 (long)blksfree - (long)cgp);
1944 printf("cg %d\n", cgp->cg_cgx);
1945 panic("ffs_alloccg: map corrupted");
1946 /* NOTREACHED */
1947 }
1948 }
1949 bno = (start + len - loc) * NBBY;
1950 cgp->cg_frotor = ufs_rw32(bno, needswap);
1951 /*
1952 * found the byte in the map
1953 * sift through the bits to find the selected frag
1954 */
1955 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1956 blk = blkmap(fs, blksfree, bno);
1957 blk <<= 1;
1958 field = around[allocsiz];
1959 subfield = inside[allocsiz];
1960 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1961 if ((blk & field) == subfield)
1962 return (bno + pos);
1963 field <<= 1;
1964 subfield <<= 1;
1965 }
1966 }
1967 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1968 panic("ffs_alloccg: block not in map");
1969 /* return (-1); */
1970 }
1971
1972 /*
1973 * Update the cluster map because of an allocation or free.
1974 *
1975 * Cnt == 1 means free; cnt == -1 means allocating.
1976 */
1977 void
1978 ffs_clusteracct(struct fs *fs, struct cg *cgp, int32_t blkno, int cnt)
1979 {
1980 int32_t *sump;
1981 int32_t *lp;
1982 u_char *freemapp, *mapp;
1983 int i, start, end, forw, back, map, bit;
1984 #ifdef FFS_EI
1985 const int needswap = UFS_FSNEEDSWAP(fs);
1986 #endif
1987
1988 /* KASSERT(mutex_owned(&ump->um_lock)); */
1989
1990 if (fs->fs_contigsumsize <= 0)
1991 return;
1992 freemapp = cg_clustersfree(cgp, needswap);
1993 sump = cg_clustersum(cgp, needswap);
1994 /*
1995 * Allocate or clear the actual block.
1996 */
1997 if (cnt > 0)
1998 setbit(freemapp, blkno);
1999 else
2000 clrbit(freemapp, blkno);
2001 /*
2002 * Find the size of the cluster going forward.
2003 */
2004 start = blkno + 1;
2005 end = start + fs->fs_contigsumsize;
2006 if (end >= ufs_rw32(cgp->cg_nclusterblks, needswap))
2007 end = ufs_rw32(cgp->cg_nclusterblks, needswap);
2008 mapp = &freemapp[start / NBBY];
2009 map = *mapp++;
2010 bit = 1 << (start % NBBY);
2011 for (i = start; i < end; i++) {
2012 if ((map & bit) == 0)
2013 break;
2014 if ((i & (NBBY - 1)) != (NBBY - 1)) {
2015 bit <<= 1;
2016 } else {
2017 map = *mapp++;
2018 bit = 1;
2019 }
2020 }
2021 forw = i - start;
2022 /*
2023 * Find the size of the cluster going backward.
2024 */
2025 start = blkno - 1;
2026 end = start - fs->fs_contigsumsize;
2027 if (end < 0)
2028 end = -1;
2029 mapp = &freemapp[start / NBBY];
2030 map = *mapp--;
2031 bit = 1 << (start % NBBY);
2032 for (i = start; i > end; i--) {
2033 if ((map & bit) == 0)
2034 break;
2035 if ((i & (NBBY - 1)) != 0) {
2036 bit >>= 1;
2037 } else {
2038 map = *mapp--;
2039 bit = 1 << (NBBY - 1);
2040 }
2041 }
2042 back = start - i;
2043 /*
2044 * Account for old cluster and the possibly new forward and
2045 * back clusters.
2046 */
2047 i = back + forw + 1;
2048 if (i > fs->fs_contigsumsize)
2049 i = fs->fs_contigsumsize;
2050 ufs_add32(sump[i], cnt, needswap);
2051 if (back > 0)
2052 ufs_add32(sump[back], -cnt, needswap);
2053 if (forw > 0)
2054 ufs_add32(sump[forw], -cnt, needswap);
2055
2056 /*
2057 * Update cluster summary information.
2058 */
2059 lp = &sump[fs->fs_contigsumsize];
2060 for (i = fs->fs_contigsumsize; i > 0; i--)
2061 if (ufs_rw32(*lp--, needswap) > 0)
2062 break;
2063 fs->fs_maxcluster[ufs_rw32(cgp->cg_cgx, needswap)] = i;
2064 }
2065
2066 /*
2067 * Fserr prints the name of a file system with an error diagnostic.
2068 *
2069 * The form of the error message is:
2070 * fs: error message
2071 */
2072 static void
2073 ffs_fserr(struct fs *fs, u_int uid, const char *cp)
2074 {
2075
2076 log(LOG_ERR, "uid %d, pid %d, command %s, on %s: %s\n",
2077 uid, curproc->p_pid, curproc->p_comm, fs->fs_fsmnt, cp);
2078 }
2079