ffs_alloc.c revision 1.106 1 /* $NetBSD: ffs_alloc.c,v 1.106 2008/01/21 23:36:26 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2002 Networks Associates Technology, Inc.
5 * All rights reserved.
6 *
7 * This software was developed for the FreeBSD Project by Marshall
8 * Kirk McKusick and Network Associates Laboratories, the Security
9 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11 * research program
12 *
13 * Copyright (c) 1982, 1986, 1989, 1993
14 * The Regents of the University of California. All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.106 2008/01/21 23:36:26 pooka Exp $");
45
46 #if defined(_KERNEL_OPT)
47 #include "opt_ffs.h"
48 #include "opt_quota.h"
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/buf.h>
54 #include <sys/proc.h>
55 #include <sys/vnode.h>
56 #include <sys/mount.h>
57 #include <sys/kernel.h>
58 #include <sys/syslog.h>
59 #include <sys/kauth.h>
60
61 #include <miscfs/specfs/specdev.h>
62 #include <ufs/ufs/quota.h>
63 #include <ufs/ufs/ufsmount.h>
64 #include <ufs/ufs/inode.h>
65 #include <ufs/ufs/ufs_extern.h>
66 #include <ufs/ufs/ufs_bswap.h>
67
68 #include <ufs/ffs/fs.h>
69 #include <ufs/ffs/ffs_extern.h>
70
71 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int);
72 static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t);
73 #ifdef XXXUBC
74 static daddr_t ffs_clusteralloc(struct inode *, int, daddr_t, int);
75 #endif
76 static ino_t ffs_dirpref(struct inode *);
77 static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int);
78 static void ffs_fserr(struct fs *, u_int, const char *);
79 static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int,
80 daddr_t (*)(struct inode *, int, daddr_t, int));
81 static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int);
82 static int32_t ffs_mapsearch(struct fs *, struct cg *,
83 daddr_t, int);
84 #if defined(DIAGNOSTIC) || defined(DEBUG)
85 #ifdef XXXUBC
86 static int ffs_checkblk(struct inode *, daddr_t, long size);
87 #endif
88 #endif
89
90 /* if 1, changes in optimalization strategy are logged */
91 int ffs_log_changeopt = 0;
92
93 /* in ffs_tables.c */
94 extern const int inside[], around[];
95 extern const u_char * const fragtbl[];
96
97 /*
98 * Allocate a block in the file system.
99 *
100 * The size of the requested block is given, which must be some
101 * multiple of fs_fsize and <= fs_bsize.
102 * A preference may be optionally specified. If a preference is given
103 * the following hierarchy is used to allocate a block:
104 * 1) allocate the requested block.
105 * 2) allocate a rotationally optimal block in the same cylinder.
106 * 3) allocate a block in the same cylinder group.
107 * 4) quadradically rehash into other cylinder groups, until an
108 * available block is located.
109 * If no block preference is given the following hierarchy is used
110 * to allocate a block:
111 * 1) allocate a block in the cylinder group that contains the
112 * inode for the file.
113 * 2) quadradically rehash into other cylinder groups, until an
114 * available block is located.
115 *
116 * => called with um_lock held
117 * => releases um_lock before returning
118 */
119 int
120 ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
121 kauth_cred_t cred, daddr_t *bnp)
122 {
123 struct ufsmount *ump;
124 struct fs *fs;
125 daddr_t bno;
126 int cg;
127 #ifdef QUOTA
128 int error;
129 #endif
130
131 fs = ip->i_fs;
132 ump = ip->i_ump;
133
134 KASSERT(mutex_owned(&ump->um_lock));
135
136 #ifdef UVM_PAGE_TRKOWN
137 if (ITOV(ip)->v_type == VREG &&
138 lblktosize(fs, (voff_t)lbn) < round_page(ITOV(ip)->v_size)) {
139 struct vm_page *pg;
140 struct uvm_object *uobj = &ITOV(ip)->v_uobj;
141 voff_t off = trunc_page(lblktosize(fs, lbn));
142 voff_t endoff = round_page(lblktosize(fs, lbn) + size);
143
144 mutex_enter(&uobj->vmobjlock);
145 while (off < endoff) {
146 pg = uvm_pagelookup(uobj, off);
147 KASSERT(pg != NULL);
148 KASSERT(pg->owner == curproc->p_pid);
149 off += PAGE_SIZE;
150 }
151 mutex_exit(&uobj->vmobjlock);
152 }
153 #endif
154
155 *bnp = 0;
156 #ifdef DIAGNOSTIC
157 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
158 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
159 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
160 panic("ffs_alloc: bad size");
161 }
162 if (cred == NOCRED)
163 panic("ffs_alloc: missing credential");
164 #endif /* DIAGNOSTIC */
165 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
166 goto nospace;
167 if (freespace(fs, fs->fs_minfree) <= 0 &&
168 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0)
169 goto nospace;
170 #ifdef QUOTA
171 mutex_exit(&ump->um_lock);
172 if ((error = chkdq(ip, btodb(size), cred, 0)) != 0)
173 return (error);
174 mutex_enter(&ump->um_lock);
175 #endif
176 if (bpref >= fs->fs_size)
177 bpref = 0;
178 if (bpref == 0)
179 cg = ino_to_cg(fs, ip->i_number);
180 else
181 cg = dtog(fs, bpref);
182 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg);
183 if (bno > 0) {
184 DIP_ADD(ip, blocks, btodb(size));
185 ip->i_flag |= IN_CHANGE | IN_UPDATE;
186 *bnp = bno;
187 return (0);
188 }
189 #ifdef QUOTA
190 /*
191 * Restore user's disk quota because allocation failed.
192 */
193 (void) chkdq(ip, -btodb(size), cred, FORCE);
194 #endif
195 nospace:
196 mutex_exit(&ump->um_lock);
197 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
198 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
199 return (ENOSPC);
200 }
201
202 /*
203 * Reallocate a fragment to a bigger size
204 *
205 * The number and size of the old block is given, and a preference
206 * and new size is also specified. The allocator attempts to extend
207 * the original block. Failing that, the regular block allocator is
208 * invoked to get an appropriate block.
209 *
210 * => called with um_lock held
211 * => return with um_lock released
212 */
213 int
214 ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize,
215 int nsize, kauth_cred_t cred, struct buf **bpp, daddr_t *blknop)
216 {
217 struct ufsmount *ump;
218 struct fs *fs;
219 struct buf *bp;
220 int cg, request, error;
221 daddr_t bprev, bno;
222
223 fs = ip->i_fs;
224 ump = ip->i_ump;
225
226 KASSERT(mutex_owned(&ump->um_lock));
227
228 #ifdef UVM_PAGE_TRKOWN
229 if (ITOV(ip)->v_type == VREG) {
230 struct vm_page *pg;
231 struct uvm_object *uobj = &ITOV(ip)->v_uobj;
232 voff_t off = trunc_page(lblktosize(fs, lbprev));
233 voff_t endoff = round_page(lblktosize(fs, lbprev) + osize);
234
235 mutex_enter(&uobj->vmobjlock);
236 while (off < endoff) {
237 pg = uvm_pagelookup(uobj, off);
238 KASSERT(pg != NULL);
239 KASSERT(pg->owner == curproc->p_pid);
240 KASSERT((pg->flags & PG_CLEAN) == 0);
241 off += PAGE_SIZE;
242 }
243 mutex_exit(&uobj->vmobjlock);
244 }
245 #endif
246
247 #ifdef DIAGNOSTIC
248 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
249 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
250 printf(
251 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
252 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
253 panic("ffs_realloccg: bad size");
254 }
255 if (cred == NOCRED)
256 panic("ffs_realloccg: missing credential");
257 #endif /* DIAGNOSTIC */
258 if (freespace(fs, fs->fs_minfree) <= 0 &&
259 kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) != 0) {
260 mutex_exit(&ump->um_lock);
261 goto nospace;
262 }
263 if (fs->fs_magic == FS_UFS2_MAGIC)
264 bprev = ufs_rw64(ip->i_ffs2_db[lbprev], UFS_FSNEEDSWAP(fs));
265 else
266 bprev = ufs_rw32(ip->i_ffs1_db[lbprev], UFS_FSNEEDSWAP(fs));
267
268 if (bprev == 0) {
269 printf("dev = 0x%x, bsize = %d, bprev = %" PRId64 ", fs = %s\n",
270 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
271 panic("ffs_realloccg: bad bprev");
272 }
273 mutex_exit(&ump->um_lock);
274
275 /*
276 * Allocate the extra space in the buffer.
277 */
278 if (bpp != NULL &&
279 (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) != 0) {
280 brelse(bp, 0);
281 return (error);
282 }
283 #ifdef QUOTA
284 if ((error = chkdq(ip, btodb(nsize - osize), cred, 0)) != 0) {
285 if (bpp != NULL) {
286 brelse(bp, 0);
287 }
288 return (error);
289 }
290 #endif
291 /*
292 * Check for extension in the existing location.
293 */
294 cg = dtog(fs, bprev);
295 mutex_enter(&ump->um_lock);
296 if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) {
297 DIP_ADD(ip, blocks, btodb(nsize - osize));
298 ip->i_flag |= IN_CHANGE | IN_UPDATE;
299
300 if (bpp != NULL) {
301 if (bp->b_blkno != fsbtodb(fs, bno))
302 panic("bad blockno");
303 allocbuf(bp, nsize, 1);
304 memset((char *)bp->b_data + osize, 0, nsize - osize);
305 mutex_enter(bp->b_objlock);
306 bp->b_oflags |= BO_DONE;
307 mutex_exit(bp->b_objlock);
308 *bpp = bp;
309 }
310 if (blknop != NULL) {
311 *blknop = bno;
312 }
313 return (0);
314 }
315 /*
316 * Allocate a new disk location.
317 */
318 if (bpref >= fs->fs_size)
319 bpref = 0;
320 switch ((int)fs->fs_optim) {
321 case FS_OPTSPACE:
322 /*
323 * Allocate an exact sized fragment. Although this makes
324 * best use of space, we will waste time relocating it if
325 * the file continues to grow. If the fragmentation is
326 * less than half of the minimum free reserve, we choose
327 * to begin optimizing for time.
328 */
329 request = nsize;
330 if (fs->fs_minfree < 5 ||
331 fs->fs_cstotal.cs_nffree >
332 fs->fs_dsize * fs->fs_minfree / (2 * 100))
333 break;
334
335 if (ffs_log_changeopt) {
336 log(LOG_NOTICE,
337 "%s: optimization changed from SPACE to TIME\n",
338 fs->fs_fsmnt);
339 }
340
341 fs->fs_optim = FS_OPTTIME;
342 break;
343 case FS_OPTTIME:
344 /*
345 * At this point we have discovered a file that is trying to
346 * grow a small fragment to a larger fragment. To save time,
347 * we allocate a full sized block, then free the unused portion.
348 * If the file continues to grow, the `ffs_fragextend' call
349 * above will be able to grow it in place without further
350 * copying. If aberrant programs cause disk fragmentation to
351 * grow within 2% of the free reserve, we choose to begin
352 * optimizing for space.
353 */
354 request = fs->fs_bsize;
355 if (fs->fs_cstotal.cs_nffree <
356 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
357 break;
358
359 if (ffs_log_changeopt) {
360 log(LOG_NOTICE,
361 "%s: optimization changed from TIME to SPACE\n",
362 fs->fs_fsmnt);
363 }
364
365 fs->fs_optim = FS_OPTSPACE;
366 break;
367 default:
368 printf("dev = 0x%x, optim = %d, fs = %s\n",
369 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
370 panic("ffs_realloccg: bad optim");
371 /* NOTREACHED */
372 }
373 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg);
374 if (bno > 0) {
375 if (!DOINGSOFTDEP(ITOV(ip)))
376 ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize,
377 ip->i_number);
378 if (nsize < request)
379 ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize),
380 (long)(request - nsize), ip->i_number);
381 DIP_ADD(ip, blocks, btodb(nsize - osize));
382 ip->i_flag |= IN_CHANGE | IN_UPDATE;
383 if (bpp != NULL) {
384 bp->b_blkno = fsbtodb(fs, bno);
385 allocbuf(bp, nsize, 1);
386 memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize);
387 mutex_enter(bp->b_objlock);
388 bp->b_oflags |= BO_DONE;
389 mutex_exit(bp->b_objlock);
390 *bpp = bp;
391 }
392 if (blknop != NULL) {
393 *blknop = bno;
394 }
395 return (0);
396 }
397 mutex_exit(&ump->um_lock);
398
399 #ifdef QUOTA
400 /*
401 * Restore user's disk quota because allocation failed.
402 */
403 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
404 #endif
405 if (bpp != NULL) {
406 brelse(bp, 0);
407 }
408
409 nospace:
410 /*
411 * no space available
412 */
413 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full");
414 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
415 return (ENOSPC);
416 }
417
418 #if 0
419 /*
420 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
421 *
422 * The vnode and an array of buffer pointers for a range of sequential
423 * logical blocks to be made contiguous is given. The allocator attempts
424 * to find a range of sequential blocks starting as close as possible
425 * from the end of the allocation for the logical block immediately
426 * preceding the current range. If successful, the physical block numbers
427 * in the buffer pointers and in the inode are changed to reflect the new
428 * allocation. If unsuccessful, the allocation is left unchanged. The
429 * success in doing the reallocation is returned. Note that the error
430 * return is not reflected back to the user. Rather the previous block
431 * allocation will be used.
432
433 */
434 #ifdef XXXUBC
435 #ifdef DEBUG
436 #include <sys/sysctl.h>
437 int prtrealloc = 0;
438 struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
439 #endif
440 #endif
441
442 /*
443 * NOTE: when re-enabling this, it must be updated for UFS2.
444 */
445
446 int doasyncfree = 1;
447
448 int
449 ffs_reallocblks(void *v)
450 {
451 #ifdef XXXUBC
452 struct vop_reallocblks_args /* {
453 struct vnode *a_vp;
454 struct cluster_save *a_buflist;
455 } */ *ap = v;
456 struct fs *fs;
457 struct inode *ip;
458 struct vnode *vp;
459 struct buf *sbp, *ebp;
460 int32_t *bap, *ebap = NULL, *sbap; /* XXX ondisk32 */
461 struct cluster_save *buflist;
462 daddr_t start_lbn, end_lbn, soff, newblk, blkno;
463 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
464 int i, len, start_lvl, end_lvl, pref, ssize;
465 struct ufsmount *ump;
466 #endif /* XXXUBC */
467
468 /* XXXUBC don't reallocblks for now */
469 return ENOSPC;
470
471 #ifdef XXXUBC
472 vp = ap->a_vp;
473 ip = VTOI(vp);
474 fs = ip->i_fs;
475 ump = ip->i_ump;
476 if (fs->fs_contigsumsize <= 0)
477 return (ENOSPC);
478 buflist = ap->a_buflist;
479 len = buflist->bs_nchildren;
480 start_lbn = buflist->bs_children[0]->b_lblkno;
481 end_lbn = start_lbn + len - 1;
482 #ifdef DIAGNOSTIC
483 for (i = 0; i < len; i++)
484 if (!ffs_checkblk(ip,
485 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
486 panic("ffs_reallocblks: unallocated block 1");
487 for (i = 1; i < len; i++)
488 if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
489 panic("ffs_reallocblks: non-logical cluster");
490 blkno = buflist->bs_children[0]->b_blkno;
491 ssize = fsbtodb(fs, fs->fs_frag);
492 for (i = 1; i < len - 1; i++)
493 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
494 panic("ffs_reallocblks: non-physical cluster %d", i);
495 #endif
496 /*
497 * If the latest allocation is in a new cylinder group, assume that
498 * the filesystem has decided to move and do not force it back to
499 * the previous cylinder group.
500 */
501 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
502 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
503 return (ENOSPC);
504 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
505 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
506 return (ENOSPC);
507 /*
508 * Get the starting offset and block map for the first block.
509 */
510 if (start_lvl == 0) {
511 sbap = &ip->i_ffs1_db[0];
512 soff = start_lbn;
513 } else {
514 idp = &start_ap[start_lvl - 1];
515 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
516 brelse(sbp, 0);
517 return (ENOSPC);
518 }
519 sbap = (int32_t *)sbp->b_data;
520 soff = idp->in_off;
521 }
522 /*
523 * Find the preferred location for the cluster.
524 */
525 mutex_enter(&ump->um_lock);
526 pref = ffs_blkpref(ip, start_lbn, soff, sbap);
527 /*
528 * If the block range spans two block maps, get the second map.
529 */
530 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
531 ssize = len;
532 } else {
533 #ifdef DIAGNOSTIC
534 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
535 panic("ffs_reallocblk: start == end");
536 #endif
537 ssize = len - (idp->in_off + 1);
538 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
539 goto fail;
540 ebap = (int32_t *)ebp->b_data; /* XXX ondisk32 */
541 }
542 /*
543 * Search the block map looking for an allocation of the desired size.
544 */
545 if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
546 len, ffs_clusteralloc)) == 0) {
547 mutex_exit(&ump->um_lock);
548 goto fail;
549 }
550 /*
551 * We have found a new contiguous block.
552 *
553 * First we have to replace the old block pointers with the new
554 * block pointers in the inode and indirect blocks associated
555 * with the file.
556 */
557 #ifdef DEBUG
558 if (prtrealloc)
559 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
560 start_lbn, end_lbn);
561 #endif
562 blkno = newblk;
563 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
564 daddr_t ba;
565
566 if (i == ssize) {
567 bap = ebap;
568 soff = -i;
569 }
570 /* XXX ondisk32 */
571 ba = ufs_rw32(*bap, UFS_FSNEEDSWAP(fs));
572 #ifdef DIAGNOSTIC
573 if (!ffs_checkblk(ip,
574 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
575 panic("ffs_reallocblks: unallocated block 2");
576 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != ba)
577 panic("ffs_reallocblks: alloc mismatch");
578 #endif
579 #ifdef DEBUG
580 if (prtrealloc)
581 printf(" %d,", ba);
582 #endif
583 if (DOINGSOFTDEP(vp)) {
584 if (sbap == &ip->i_ffs1_db[0] && i < ssize)
585 softdep_setup_allocdirect(ip, start_lbn + i,
586 blkno, ba, fs->fs_bsize, fs->fs_bsize,
587 buflist->bs_children[i]);
588 else
589 softdep_setup_allocindir_page(ip, start_lbn + i,
590 i < ssize ? sbp : ebp, soff + i, blkno,
591 ba, buflist->bs_children[i]);
592 }
593 /* XXX ondisk32 */
594 *bap++ = ufs_rw32((u_int32_t)blkno, UFS_FSNEEDSWAP(fs));
595 }
596 /*
597 * Next we must write out the modified inode and indirect blocks.
598 * For strict correctness, the writes should be synchronous since
599 * the old block values may have been written to disk. In practise
600 * they are almost never written, but if we are concerned about
601 * strict correctness, the `doasyncfree' flag should be set to zero.
602 *
603 * The test on `doasyncfree' should be changed to test a flag
604 * that shows whether the associated buffers and inodes have
605 * been written. The flag should be set when the cluster is
606 * started and cleared whenever the buffer or inode is flushed.
607 * We can then check below to see if it is set, and do the
608 * synchronous write only when it has been cleared.
609 */
610 if (sbap != &ip->i_ffs1_db[0]) {
611 if (doasyncfree)
612 bdwrite(sbp);
613 else
614 bwrite(sbp);
615 } else {
616 ip->i_flag |= IN_CHANGE | IN_UPDATE;
617 if (!doasyncfree)
618 ffs_update(vp, NULL, NULL, 1);
619 }
620 if (ssize < len) {
621 if (doasyncfree)
622 bdwrite(ebp);
623 else
624 bwrite(ebp);
625 }
626 /*
627 * Last, free the old blocks and assign the new blocks to the buffers.
628 */
629 #ifdef DEBUG
630 if (prtrealloc)
631 printf("\n\tnew:");
632 #endif
633 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
634 if (!DOINGSOFTDEP(vp))
635 ffs_blkfree(fs, ip->i_devvp,
636 dbtofsb(fs, buflist->bs_children[i]->b_blkno),
637 fs->fs_bsize, ip->i_number);
638 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
639 #ifdef DEBUG
640 if (!ffs_checkblk(ip,
641 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
642 panic("ffs_reallocblks: unallocated block 3");
643 if (prtrealloc)
644 printf(" %d,", blkno);
645 #endif
646 }
647 #ifdef DEBUG
648 if (prtrealloc) {
649 prtrealloc--;
650 printf("\n");
651 }
652 #endif
653 return (0);
654
655 fail:
656 if (ssize < len)
657 brelse(ebp, 0);
658 if (sbap != &ip->i_ffs1_db[0])
659 brelse(sbp, 0);
660 return (ENOSPC);
661 #endif /* XXXUBC */
662 }
663 #endif /* 0 */
664
665 /*
666 * Allocate an inode in the file system.
667 *
668 * If allocating a directory, use ffs_dirpref to select the inode.
669 * If allocating in a directory, the following hierarchy is followed:
670 * 1) allocate the preferred inode.
671 * 2) allocate an inode in the same cylinder group.
672 * 3) quadradically rehash into other cylinder groups, until an
673 * available inode is located.
674 * If no inode preference is given the following hierarchy is used
675 * to allocate an inode:
676 * 1) allocate an inode in cylinder group 0.
677 * 2) quadradically rehash into other cylinder groups, until an
678 * available inode is located.
679 *
680 * => um_lock not held upon entry or return
681 */
682 int
683 ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
684 struct vnode **vpp)
685 {
686 struct ufsmount *ump;
687 struct inode *pip;
688 struct fs *fs;
689 struct inode *ip;
690 struct timespec ts;
691 ino_t ino, ipref;
692 int cg, error;
693
694 *vpp = NULL;
695 pip = VTOI(pvp);
696 fs = pip->i_fs;
697 ump = pip->i_ump;
698
699 mutex_enter(&ump->um_lock);
700 if (fs->fs_cstotal.cs_nifree == 0)
701 goto noinodes;
702
703 if ((mode & IFMT) == IFDIR)
704 ipref = ffs_dirpref(pip);
705 else
706 ipref = pip->i_number;
707 if (ipref >= fs->fs_ncg * fs->fs_ipg)
708 ipref = 0;
709 cg = ino_to_cg(fs, ipref);
710 /*
711 * Track number of dirs created one after another
712 * in a same cg without intervening by files.
713 */
714 if ((mode & IFMT) == IFDIR) {
715 if (fs->fs_contigdirs[cg] < 255)
716 fs->fs_contigdirs[cg]++;
717 } else {
718 if (fs->fs_contigdirs[cg] > 0)
719 fs->fs_contigdirs[cg]--;
720 }
721 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, ffs_nodealloccg);
722 if (ino == 0)
723 goto noinodes;
724 error = VFS_VGET(pvp->v_mount, ino, vpp);
725 if (error) {
726 ffs_vfree(pvp, ino, mode);
727 return (error);
728 }
729 KASSERT((*vpp)->v_type == VNON);
730 ip = VTOI(*vpp);
731 if (ip->i_mode) {
732 #if 0
733 printf("mode = 0%o, inum = %d, fs = %s\n",
734 ip->i_mode, ip->i_number, fs->fs_fsmnt);
735 #else
736 printf("dmode %x mode %x dgen %x gen %x\n",
737 DIP(ip, mode), ip->i_mode,
738 DIP(ip, gen), ip->i_gen);
739 printf("size %llx blocks %llx\n",
740 (long long)DIP(ip, size), (long long)DIP(ip, blocks));
741 printf("ino %llu ipref %llu\n", (unsigned long long)ino,
742 (unsigned long long)ipref);
743 #if 0
744 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
745 (int)fs->fs_bsize, NOCRED, &bp);
746 #endif
747
748 #endif
749 panic("ffs_valloc: dup alloc");
750 }
751 if (DIP(ip, blocks)) { /* XXX */
752 printf("free inode %s/%llu had %" PRId64 " blocks\n",
753 fs->fs_fsmnt, (unsigned long long)ino, DIP(ip, blocks));
754 DIP_ASSIGN(ip, blocks, 0);
755 }
756 ip->i_flag &= ~IN_SPACECOUNTED;
757 ip->i_flags = 0;
758 DIP_ASSIGN(ip, flags, 0);
759 /*
760 * Set up a new generation number for this inode.
761 */
762 ip->i_gen++;
763 DIP_ASSIGN(ip, gen, ip->i_gen);
764 if (fs->fs_magic == FS_UFS2_MAGIC) {
765 vfs_timestamp(&ts);
766 ip->i_ffs2_birthtime = ts.tv_sec;
767 ip->i_ffs2_birthnsec = ts.tv_nsec;
768 }
769 return (0);
770 noinodes:
771 mutex_exit(&ump->um_lock);
772 ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes");
773 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
774 return (ENOSPC);
775 }
776
777 /*
778 * Find a cylinder group in which to place a directory.
779 *
780 * The policy implemented by this algorithm is to allocate a
781 * directory inode in the same cylinder group as its parent
782 * directory, but also to reserve space for its files inodes
783 * and data. Restrict the number of directories which may be
784 * allocated one after another in the same cylinder group
785 * without intervening allocation of files.
786 *
787 * If we allocate a first level directory then force allocation
788 * in another cylinder group.
789 */
790 static ino_t
791 ffs_dirpref(struct inode *pip)
792 {
793 register struct fs *fs;
794 int cg, prefcg;
795 int64_t dirsize, cgsize, curdsz;
796 int avgifree, avgbfree, avgndir;
797 int minifree, minbfree, maxndir;
798 int mincg, minndir;
799 int maxcontigdirs;
800
801 KASSERT(mutex_owned(&pip->i_ump->um_lock));
802
803 fs = pip->i_fs;
804
805 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
806 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
807 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
808
809 /*
810 * Force allocation in another cg if creating a first level dir.
811 */
812 if (ITOV(pip)->v_vflag & VV_ROOT) {
813 prefcg = random() % fs->fs_ncg;
814 mincg = prefcg;
815 minndir = fs->fs_ipg;
816 for (cg = prefcg; cg < fs->fs_ncg; cg++)
817 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
818 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
819 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
820 mincg = cg;
821 minndir = fs->fs_cs(fs, cg).cs_ndir;
822 }
823 for (cg = 0; cg < prefcg; cg++)
824 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
825 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
826 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
827 mincg = cg;
828 minndir = fs->fs_cs(fs, cg).cs_ndir;
829 }
830 return ((ino_t)(fs->fs_ipg * mincg));
831 }
832
833 /*
834 * Count various limits which used for
835 * optimal allocation of a directory inode.
836 */
837 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
838 minifree = avgifree - fs->fs_ipg / 4;
839 if (minifree < 0)
840 minifree = 0;
841 minbfree = avgbfree - fragstoblks(fs, fs->fs_fpg) / 4;
842 if (minbfree < 0)
843 minbfree = 0;
844 cgsize = (int64_t)fs->fs_fsize * fs->fs_fpg;
845 dirsize = (int64_t)fs->fs_avgfilesize * fs->fs_avgfpdir;
846 if (avgndir != 0) {
847 curdsz = (cgsize - (int64_t)avgbfree * fs->fs_bsize) / avgndir;
848 if (dirsize < curdsz)
849 dirsize = curdsz;
850 }
851 if (cgsize < dirsize * 255)
852 maxcontigdirs = cgsize / dirsize;
853 else
854 maxcontigdirs = 255;
855 if (fs->fs_avgfpdir > 0)
856 maxcontigdirs = min(maxcontigdirs,
857 fs->fs_ipg / fs->fs_avgfpdir);
858 if (maxcontigdirs == 0)
859 maxcontigdirs = 1;
860
861 /*
862 * Limit number of dirs in one cg and reserve space for
863 * regular files, but only if we have no deficit in
864 * inodes or space.
865 */
866 prefcg = ino_to_cg(fs, pip->i_number);
867 for (cg = prefcg; cg < fs->fs_ncg; cg++)
868 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
869 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
870 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
871 if (fs->fs_contigdirs[cg] < maxcontigdirs)
872 return ((ino_t)(fs->fs_ipg * cg));
873 }
874 for (cg = 0; cg < prefcg; cg++)
875 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
876 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
877 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
878 if (fs->fs_contigdirs[cg] < maxcontigdirs)
879 return ((ino_t)(fs->fs_ipg * cg));
880 }
881 /*
882 * This is a backstop when we are deficient in space.
883 */
884 for (cg = prefcg; cg < fs->fs_ncg; cg++)
885 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
886 return ((ino_t)(fs->fs_ipg * cg));
887 for (cg = 0; cg < prefcg; cg++)
888 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
889 break;
890 return ((ino_t)(fs->fs_ipg * cg));
891 }
892
893 /*
894 * Select the desired position for the next block in a file. The file is
895 * logically divided into sections. The first section is composed of the
896 * direct blocks. Each additional section contains fs_maxbpg blocks.
897 *
898 * If no blocks have been allocated in the first section, the policy is to
899 * request a block in the same cylinder group as the inode that describes
900 * the file. If no blocks have been allocated in any other section, the
901 * policy is to place the section in a cylinder group with a greater than
902 * average number of free blocks. An appropriate cylinder group is found
903 * by using a rotor that sweeps the cylinder groups. When a new group of
904 * blocks is needed, the sweep begins in the cylinder group following the
905 * cylinder group from which the previous allocation was made. The sweep
906 * continues until a cylinder group with greater than the average number
907 * of free blocks is found. If the allocation is for the first block in an
908 * indirect block, the information on the previous allocation is unavailable;
909 * here a best guess is made based upon the logical block number being
910 * allocated.
911 *
912 * If a section is already partially allocated, the policy is to
913 * contiguously allocate fs_maxcontig blocks. The end of one of these
914 * contiguous blocks and the beginning of the next is laid out
915 * contigously if possible.
916 *
917 * => um_lock held on entry and exit
918 */
919 daddr_t
920 ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx,
921 int32_t *bap /* XXX ondisk32 */)
922 {
923 struct fs *fs;
924 int cg;
925 int avgbfree, startcg;
926
927 KASSERT(mutex_owned(&ip->i_ump->um_lock));
928
929 fs = ip->i_fs;
930 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
931 if (lbn < NDADDR + NINDIR(fs)) {
932 cg = ino_to_cg(fs, ip->i_number);
933 return (fs->fs_fpg * cg + fs->fs_frag);
934 }
935 /*
936 * Find a cylinder with greater than average number of
937 * unused data blocks.
938 */
939 if (indx == 0 || bap[indx - 1] == 0)
940 startcg =
941 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
942 else
943 startcg = dtog(fs,
944 ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
945 startcg %= fs->fs_ncg;
946 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
947 for (cg = startcg; cg < fs->fs_ncg; cg++)
948 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
949 return (fs->fs_fpg * cg + fs->fs_frag);
950 }
951 for (cg = 0; cg < startcg; cg++)
952 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
953 return (fs->fs_fpg * cg + fs->fs_frag);
954 }
955 return (0);
956 }
957 /*
958 * We just always try to lay things out contiguously.
959 */
960 return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
961 }
962
963 daddr_t
964 ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap)
965 {
966 struct fs *fs;
967 int cg;
968 int avgbfree, startcg;
969
970 KASSERT(mutex_owned(&ip->i_ump->um_lock));
971
972 fs = ip->i_fs;
973 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
974 if (lbn < NDADDR + NINDIR(fs)) {
975 cg = ino_to_cg(fs, ip->i_number);
976 return (fs->fs_fpg * cg + fs->fs_frag);
977 }
978 /*
979 * Find a cylinder with greater than average number of
980 * unused data blocks.
981 */
982 if (indx == 0 || bap[indx - 1] == 0)
983 startcg =
984 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
985 else
986 startcg = dtog(fs,
987 ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1);
988 startcg %= fs->fs_ncg;
989 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
990 for (cg = startcg; cg < fs->fs_ncg; cg++)
991 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
992 return (fs->fs_fpg * cg + fs->fs_frag);
993 }
994 for (cg = 0; cg < startcg; cg++)
995 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
996 return (fs->fs_fpg * cg + fs->fs_frag);
997 }
998 return (0);
999 }
1000 /*
1001 * We just always try to lay things out contiguously.
1002 */
1003 return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag;
1004 }
1005
1006
1007 /*
1008 * Implement the cylinder overflow algorithm.
1009 *
1010 * The policy implemented by this algorithm is:
1011 * 1) allocate the block in its requested cylinder group.
1012 * 2) quadradically rehash on the cylinder group number.
1013 * 3) brute force search for a free block.
1014 *
1015 * => called with um_lock held
1016 * => returns with um_lock released on success, held on failure
1017 * (*allocator releases lock on success, retains lock on failure)
1018 */
1019 /*VARARGS5*/
1020 static daddr_t
1021 ffs_hashalloc(struct inode *ip, int cg, daddr_t pref,
1022 int size /* size for data blocks, mode for inodes */,
1023 daddr_t (*allocator)(struct inode *, int, daddr_t, int))
1024 {
1025 struct fs *fs;
1026 daddr_t result;
1027 int i, icg = cg;
1028
1029 fs = ip->i_fs;
1030 /*
1031 * 1: preferred cylinder group
1032 */
1033 result = (*allocator)(ip, cg, pref, size);
1034 if (result)
1035 return (result);
1036 /*
1037 * 2: quadratic rehash
1038 */
1039 for (i = 1; i < fs->fs_ncg; i *= 2) {
1040 cg += i;
1041 if (cg >= fs->fs_ncg)
1042 cg -= fs->fs_ncg;
1043 result = (*allocator)(ip, cg, 0, size);
1044 if (result)
1045 return (result);
1046 }
1047 /*
1048 * 3: brute force search
1049 * Note that we start at i == 2, since 0 was checked initially,
1050 * and 1 is always checked in the quadratic rehash.
1051 */
1052 cg = (icg + 2) % fs->fs_ncg;
1053 for (i = 2; i < fs->fs_ncg; i++) {
1054 result = (*allocator)(ip, cg, 0, size);
1055 if (result)
1056 return (result);
1057 cg++;
1058 if (cg == fs->fs_ncg)
1059 cg = 0;
1060 }
1061 return (0);
1062 }
1063
1064 /*
1065 * Determine whether a fragment can be extended.
1066 *
1067 * Check to see if the necessary fragments are available, and
1068 * if they are, allocate them.
1069 *
1070 * => called with um_lock held
1071 * => returns with um_lock released on success, held on failure
1072 */
1073 static daddr_t
1074 ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize)
1075 {
1076 struct ufsmount *ump;
1077 struct fs *fs;
1078 struct cg *cgp;
1079 struct buf *bp;
1080 daddr_t bno;
1081 int frags, bbase;
1082 int i, error;
1083 u_int8_t *blksfree;
1084
1085 fs = ip->i_fs;
1086 ump = ip->i_ump;
1087
1088 KASSERT(mutex_owned(&ump->um_lock));
1089
1090 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
1091 return (0);
1092 frags = numfrags(fs, nsize);
1093 bbase = fragnum(fs, bprev);
1094 if (bbase > fragnum(fs, (bprev + frags - 1))) {
1095 /* cannot extend across a block boundary */
1096 return (0);
1097 }
1098 mutex_exit(&ump->um_lock);
1099 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1100 (int)fs->fs_cgsize, NOCRED, &bp);
1101 if (error)
1102 goto fail;
1103 cgp = (struct cg *)bp->b_data;
1104 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1105 goto fail;
1106 cgp->cg_old_time = ufs_rw32(time_second, UFS_FSNEEDSWAP(fs));
1107 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1108 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1109 cgp->cg_time = ufs_rw64(time_second, UFS_FSNEEDSWAP(fs));
1110 bno = dtogd(fs, bprev);
1111 blksfree = cg_blksfree(cgp, UFS_FSNEEDSWAP(fs));
1112 for (i = numfrags(fs, osize); i < frags; i++)
1113 if (isclr(blksfree, bno + i))
1114 goto fail;
1115 /*
1116 * the current fragment can be extended
1117 * deduct the count on fragment being extended into
1118 * increase the count on the remaining fragment (if any)
1119 * allocate the extended piece
1120 */
1121 for (i = frags; i < fs->fs_frag - bbase; i++)
1122 if (isclr(blksfree, bno + i))
1123 break;
1124 ufs_add32(cgp->cg_frsum[i - numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs));
1125 if (i != frags)
1126 ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs));
1127 mutex_enter(&ump->um_lock);
1128 for (i = numfrags(fs, osize); i < frags; i++) {
1129 clrbit(blksfree, bno + i);
1130 ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs));
1131 fs->fs_cstotal.cs_nffree--;
1132 fs->fs_cs(fs, cg).cs_nffree--;
1133 }
1134 fs->fs_fmod = 1;
1135 ACTIVECG_CLR(fs, cg);
1136 mutex_exit(&ump->um_lock);
1137 if (DOINGSOFTDEP(ITOV(ip)))
1138 softdep_setup_blkmapdep(bp, fs, bprev);
1139 bdwrite(bp);
1140 return (bprev);
1141
1142 fail:
1143 brelse(bp, 0);
1144 mutex_enter(&ump->um_lock);
1145 return (0);
1146 }
1147
1148 /*
1149 * Determine whether a block can be allocated.
1150 *
1151 * Check to see if a block of the appropriate size is available,
1152 * and if it is, allocate it.
1153 */
1154 static daddr_t
1155 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
1156 {
1157 struct ufsmount *ump;
1158 struct fs *fs = ip->i_fs;
1159 struct cg *cgp;
1160 struct buf *bp;
1161 int32_t bno;
1162 daddr_t blkno;
1163 int error, frags, allocsiz, i;
1164 u_int8_t *blksfree;
1165 #ifdef FFS_EI
1166 const int needswap = UFS_FSNEEDSWAP(fs);
1167 #endif
1168
1169 ump = ip->i_ump;
1170
1171 KASSERT(mutex_owned(&ump->um_lock));
1172
1173 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
1174 return (0);
1175 mutex_exit(&ump->um_lock);
1176 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1177 (int)fs->fs_cgsize, NOCRED, &bp);
1178 if (error)
1179 goto fail;
1180 cgp = (struct cg *)bp->b_data;
1181 if (!cg_chkmagic(cgp, needswap) ||
1182 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize))
1183 goto fail;
1184 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1185 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1186 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1187 cgp->cg_time = ufs_rw64(time_second, needswap);
1188 if (size == fs->fs_bsize) {
1189 mutex_enter(&ump->um_lock);
1190 blkno = ffs_alloccgblk(ip, bp, bpref);
1191 ACTIVECG_CLR(fs, cg);
1192 mutex_exit(&ump->um_lock);
1193 bdwrite(bp);
1194 return (blkno);
1195 }
1196 /*
1197 * check to see if any fragments are already available
1198 * allocsiz is the size which will be allocated, hacking
1199 * it down to a smaller size if necessary
1200 */
1201 blksfree = cg_blksfree(cgp, needswap);
1202 frags = numfrags(fs, size);
1203 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
1204 if (cgp->cg_frsum[allocsiz] != 0)
1205 break;
1206 if (allocsiz == fs->fs_frag) {
1207 /*
1208 * no fragments were available, so a block will be
1209 * allocated, and hacked up
1210 */
1211 if (cgp->cg_cs.cs_nbfree == 0)
1212 goto fail;
1213 mutex_enter(&ump->um_lock);
1214 blkno = ffs_alloccgblk(ip, bp, bpref);
1215 bno = dtogd(fs, blkno);
1216 for (i = frags; i < fs->fs_frag; i++)
1217 setbit(blksfree, bno + i);
1218 i = fs->fs_frag - frags;
1219 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1220 fs->fs_cstotal.cs_nffree += i;
1221 fs->fs_cs(fs, cg).cs_nffree += i;
1222 fs->fs_fmod = 1;
1223 ufs_add32(cgp->cg_frsum[i], 1, needswap);
1224 ACTIVECG_CLR(fs, cg);
1225 mutex_exit(&ump->um_lock);
1226 bdwrite(bp);
1227 return (blkno);
1228 }
1229 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
1230 #if 0
1231 /*
1232 * XXX fvdl mapsearch will panic, and never return -1
1233 * also: returning NULL as daddr_t ?
1234 */
1235 if (bno < 0)
1236 goto fail;
1237 #endif
1238 for (i = 0; i < frags; i++)
1239 clrbit(blksfree, bno + i);
1240 mutex_enter(&ump->um_lock);
1241 ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap);
1242 fs->fs_cstotal.cs_nffree -= frags;
1243 fs->fs_cs(fs, cg).cs_nffree -= frags;
1244 fs->fs_fmod = 1;
1245 ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap);
1246 if (frags != allocsiz)
1247 ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap);
1248 blkno = cg * fs->fs_fpg + bno;
1249 ACTIVECG_CLR(fs, cg);
1250 mutex_exit(&ump->um_lock);
1251 if (DOINGSOFTDEP(ITOV(ip)))
1252 softdep_setup_blkmapdep(bp, fs, blkno);
1253 bdwrite(bp);
1254 return blkno;
1255
1256 fail:
1257 brelse(bp, 0);
1258 mutex_enter(&ump->um_lock);
1259 return (0);
1260 }
1261
1262 /*
1263 * Allocate a block in a cylinder group.
1264 *
1265 * This algorithm implements the following policy:
1266 * 1) allocate the requested block.
1267 * 2) allocate a rotationally optimal block in the same cylinder.
1268 * 3) allocate the next available block on the block rotor for the
1269 * specified cylinder group.
1270 * Note that this routine only allocates fs_bsize blocks; these
1271 * blocks may be fragmented by the routine that allocates them.
1272 */
1273 static daddr_t
1274 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref)
1275 {
1276 struct ufsmount *ump;
1277 struct fs *fs = ip->i_fs;
1278 struct cg *cgp;
1279 daddr_t blkno;
1280 int32_t bno;
1281 u_int8_t *blksfree;
1282 #ifdef FFS_EI
1283 const int needswap = UFS_FSNEEDSWAP(fs);
1284 #endif
1285
1286 ump = ip->i_ump;
1287
1288 KASSERT(mutex_owned(&ump->um_lock));
1289
1290 cgp = (struct cg *)bp->b_data;
1291 blksfree = cg_blksfree(cgp, needswap);
1292 if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) {
1293 bpref = ufs_rw32(cgp->cg_rotor, needswap);
1294 } else {
1295 bpref = blknum(fs, bpref);
1296 bno = dtogd(fs, bpref);
1297 /*
1298 * if the requested block is available, use it
1299 */
1300 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
1301 goto gotit;
1302 }
1303 /*
1304 * Take the next available block in this cylinder group.
1305 */
1306 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1307 if (bno < 0)
1308 return (0);
1309 cgp->cg_rotor = ufs_rw32(bno, needswap);
1310 gotit:
1311 blkno = fragstoblks(fs, bno);
1312 ffs_clrblock(fs, blksfree, blkno);
1313 ffs_clusteracct(fs, cgp, blkno, -1);
1314 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap);
1315 fs->fs_cstotal.cs_nbfree--;
1316 fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--;
1317 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1318 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1319 int cylno;
1320 cylno = old_cbtocylno(fs, bno);
1321 KASSERT(cylno >= 0);
1322 KASSERT(cylno < fs->fs_old_ncyl);
1323 KASSERT(old_cbtorpos(fs, bno) >= 0);
1324 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bno) < fs->fs_old_nrpos);
1325 ufs_add16(old_cg_blks(fs, cgp, cylno, needswap)[old_cbtorpos(fs, bno)], -1,
1326 needswap);
1327 ufs_add32(old_cg_blktot(cgp, needswap)[cylno], -1, needswap);
1328 }
1329 fs->fs_fmod = 1;
1330 blkno = ufs_rw32(cgp->cg_cgx, needswap) * fs->fs_fpg + bno;
1331 if (DOINGSOFTDEP(ITOV(ip))) {
1332 mutex_exit(&ump->um_lock);
1333 softdep_setup_blkmapdep(bp, fs, blkno);
1334 mutex_enter(&ump->um_lock);
1335 }
1336 return (blkno);
1337 }
1338
1339 #ifdef XXXUBC
1340 /*
1341 * Determine whether a cluster can be allocated.
1342 *
1343 * We do not currently check for optimal rotational layout if there
1344 * are multiple choices in the same cylinder group. Instead we just
1345 * take the first one that we find following bpref.
1346 */
1347
1348 /*
1349 * This function must be fixed for UFS2 if re-enabled.
1350 */
1351 static daddr_t
1352 ffs_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
1353 {
1354 struct ufsmount *ump;
1355 struct fs *fs;
1356 struct cg *cgp;
1357 struct buf *bp;
1358 int i, got, run, bno, bit, map;
1359 u_char *mapp;
1360 int32_t *lp;
1361
1362 fs = ip->i_fs;
1363 ump = ip->i_ump;
1364
1365 KASSERT(mutex_owned(&ump->um_lock));
1366 if (fs->fs_maxcluster[cg] < len)
1367 return (0);
1368 mutex_exit(&ump->um_lock);
1369 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1370 NOCRED, &bp))
1371 goto fail;
1372 cgp = (struct cg *)bp->b_data;
1373 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs)))
1374 goto fail;
1375 /*
1376 * Check to see if a cluster of the needed size (or bigger) is
1377 * available in this cylinder group.
1378 */
1379 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len];
1380 for (i = len; i <= fs->fs_contigsumsize; i++)
1381 if (ufs_rw32(*lp++, UFS_FSNEEDSWAP(fs)) > 0)
1382 break;
1383 if (i > fs->fs_contigsumsize) {
1384 /*
1385 * This is the first time looking for a cluster in this
1386 * cylinder group. Update the cluster summary information
1387 * to reflect the true maximum sized cluster so that
1388 * future cluster allocation requests can avoid reading
1389 * the cylinder group map only to find no clusters.
1390 */
1391 lp = &cg_clustersum(cgp, UFS_FSNEEDSWAP(fs))[len - 1];
1392 for (i = len - 1; i > 0; i--)
1393 if (ufs_rw32(*lp--, UFS_FSNEEDSWAP(fs)) > 0)
1394 break;
1395 mutex_enter(&ump->um_lock);
1396 fs->fs_maxcluster[cg] = i;
1397 mutex_exit(&ump->um_lock);
1398 goto fail;
1399 }
1400 /*
1401 * Search the cluster map to find a big enough cluster.
1402 * We take the first one that we find, even if it is larger
1403 * than we need as we prefer to get one close to the previous
1404 * block allocation. We do not search before the current
1405 * preference point as we do not want to allocate a block
1406 * that is allocated before the previous one (as we will
1407 * then have to wait for another pass of the elevator
1408 * algorithm before it will be read). We prefer to fail and
1409 * be recalled to try an allocation in the next cylinder group.
1410 */
1411 if (dtog(fs, bpref) != cg)
1412 bpref = 0;
1413 else
1414 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1415 mapp = &cg_clustersfree(cgp, UFS_FSNEEDSWAP(fs))[bpref / NBBY];
1416 map = *mapp++;
1417 bit = 1 << (bpref % NBBY);
1418 for (run = 0, got = bpref;
1419 got < ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)); got++) {
1420 if ((map & bit) == 0) {
1421 run = 0;
1422 } else {
1423 run++;
1424 if (run == len)
1425 break;
1426 }
1427 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1428 bit <<= 1;
1429 } else {
1430 map = *mapp++;
1431 bit = 1;
1432 }
1433 }
1434 if (got == ufs_rw32(cgp->cg_nclusterblks, UFS_FSNEEDSWAP(fs)))
1435 goto fail;
1436 /*
1437 * Allocate the cluster that we have found.
1438 */
1439 #ifdef DIAGNOSTIC
1440 for (i = 1; i <= len; i++)
1441 if (!ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1442 got - run + i))
1443 panic("ffs_clusteralloc: map mismatch");
1444 #endif
1445 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1446 if (dtog(fs, bno) != cg)
1447 panic("ffs_clusteralloc: allocated out of group");
1448 len = blkstofrags(fs, len);
1449 mutex_enter(&ump->um_lock);
1450 for (i = 0; i < len; i += fs->fs_frag)
1451 if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i)
1452 panic("ffs_clusteralloc: lost block");
1453 ACTIVECG_CLR(fs, cg);
1454 mutex_exit(&ump->um_lock);
1455 bdwrite(bp);
1456 return (bno);
1457
1458 fail:
1459 brelse(bp, 0);
1460 mutex_enter(&ump->um_lock);
1461 return (0);
1462 }
1463 #endif /* XXXUBC */
1464
1465 /*
1466 * Determine whether an inode can be allocated.
1467 *
1468 * Check to see if an inode is available, and if it is,
1469 * allocate it using the following policy:
1470 * 1) allocate the requested inode.
1471 * 2) allocate the next available inode after the requested
1472 * inode in the specified cylinder group.
1473 */
1474 static daddr_t
1475 ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
1476 {
1477 struct ufsmount *ump = ip->i_ump;
1478 struct fs *fs = ip->i_fs;
1479 struct cg *cgp;
1480 struct buf *bp, *ibp;
1481 u_int8_t *inosused;
1482 int error, start, len, loc, map, i;
1483 int32_t initediblk;
1484 struct ufs2_dinode *dp2;
1485 #ifdef FFS_EI
1486 const int needswap = UFS_FSNEEDSWAP(fs);
1487 #endif
1488
1489 KASSERT(mutex_owned(&ump->um_lock));
1490
1491 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1492 return (0);
1493 mutex_exit(&ump->um_lock);
1494 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1495 (int)fs->fs_cgsize, NOCRED, &bp);
1496 if (error)
1497 goto fail;
1498 cgp = (struct cg *)bp->b_data;
1499 if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0)
1500 goto fail;
1501 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1502 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1503 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1504 cgp->cg_time = ufs_rw64(time_second, needswap);
1505 inosused = cg_inosused(cgp, needswap);
1506 if (ipref) {
1507 ipref %= fs->fs_ipg;
1508 if (isclr(inosused, ipref))
1509 goto gotit;
1510 }
1511 start = ufs_rw32(cgp->cg_irotor, needswap) / NBBY;
1512 len = howmany(fs->fs_ipg - ufs_rw32(cgp->cg_irotor, needswap),
1513 NBBY);
1514 loc = skpc(0xff, len, &inosused[start]);
1515 if (loc == 0) {
1516 len = start + 1;
1517 start = 0;
1518 loc = skpc(0xff, len, &inosused[0]);
1519 if (loc == 0) {
1520 printf("cg = %d, irotor = %d, fs = %s\n",
1521 cg, ufs_rw32(cgp->cg_irotor, needswap),
1522 fs->fs_fsmnt);
1523 panic("ffs_nodealloccg: map corrupted");
1524 /* NOTREACHED */
1525 }
1526 }
1527 i = start + len - loc;
1528 map = inosused[i];
1529 ipref = i * NBBY;
1530 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1531 if ((map & i) == 0) {
1532 cgp->cg_irotor = ufs_rw32(ipref, needswap);
1533 goto gotit;
1534 }
1535 }
1536 printf("fs = %s\n", fs->fs_fsmnt);
1537 panic("ffs_nodealloccg: block not in map");
1538 /* NOTREACHED */
1539 gotit:
1540 /*
1541 * Check to see if we need to initialize more inodes.
1542 */
1543 initediblk = ufs_rw32(cgp->cg_initediblk, needswap);
1544 ibp = NULL;
1545 if (fs->fs_magic == FS_UFS2_MAGIC &&
1546 ipref + INOPB(fs) > initediblk &&
1547 initediblk < ufs_rw32(cgp->cg_niblk, needswap)) {
1548 ibp = getblk(ip->i_devvp, fsbtodb(fs,
1549 ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)),
1550 (int)fs->fs_bsize, 0, 0);
1551 memset(ibp->b_data, 0, fs->fs_bsize);
1552 dp2 = (struct ufs2_dinode *)(ibp->b_data);
1553 for (i = 0; i < INOPB(fs); i++) {
1554 /*
1555 * Don't bother to swap, it's supposed to be
1556 * random, after all.
1557 */
1558 dp2->di_gen = (arc4random() & INT32_MAX) / 2 + 1;
1559 dp2++;
1560 }
1561 initediblk += INOPB(fs);
1562 cgp->cg_initediblk = ufs_rw32(initediblk, needswap);
1563 }
1564
1565 mutex_enter(&ump->um_lock);
1566 ACTIVECG_CLR(fs, cg);
1567 setbit(inosused, ipref);
1568 ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap);
1569 fs->fs_cstotal.cs_nifree--;
1570 fs->fs_cs(fs, cg).cs_nifree--;
1571 fs->fs_fmod = 1;
1572 if ((mode & IFMT) == IFDIR) {
1573 ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap);
1574 fs->fs_cstotal.cs_ndir++;
1575 fs->fs_cs(fs, cg).cs_ndir++;
1576 }
1577 mutex_exit(&ump->um_lock);
1578 if (DOINGSOFTDEP(ITOV(ip)))
1579 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
1580 bdwrite(bp);
1581 if (ibp != NULL)
1582 bawrite(ibp);
1583 return (cg * fs->fs_ipg + ipref);
1584 fail:
1585 brelse(bp, 0);
1586 mutex_enter(&ump->um_lock);
1587 return (0);
1588 }
1589
1590 /*
1591 * Free a block or fragment.
1592 *
1593 * The specified block or fragment is placed back in the
1594 * free map. If a fragment is deallocated, a possible
1595 * block reassembly is checked.
1596 *
1597 * => um_lock not held on entry or exit
1598 */
1599 void
1600 ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size,
1601 ino_t inum)
1602 {
1603 struct cg *cgp;
1604 struct buf *bp;
1605 struct ufsmount *ump;
1606 int32_t fragno, cgbno;
1607 daddr_t cgblkno;
1608 int i, error, cg, blk, frags, bbase;
1609 u_int8_t *blksfree;
1610 dev_t dev;
1611 const int needswap = UFS_FSNEEDSWAP(fs);
1612
1613 cg = dtog(fs, bno);
1614 if (devvp->v_type != VBLK) {
1615 /* devvp is a snapshot */
1616 dev = VTOI(devvp)->i_devvp->v_rdev;
1617 ump = VFSTOUFS(devvp->v_mount);
1618 cgblkno = fragstoblks(fs, cgtod(fs, cg));
1619 } else {
1620 dev = devvp->v_rdev;
1621 ump = VFSTOUFS(devvp->v_specmountpoint);
1622 cgblkno = fsbtodb(fs, cgtod(fs, cg));
1623 if (ffs_snapblkfree(fs, devvp, bno, size, inum))
1624 return;
1625 }
1626 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
1627 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
1628 printf("dev = 0x%x, bno = %" PRId64 " bsize = %d, "
1629 "size = %ld, fs = %s\n",
1630 dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
1631 panic("blkfree: bad size");
1632 }
1633
1634 if (bno >= fs->fs_size) {
1635 printf("bad block %" PRId64 ", ino %llu\n", bno,
1636 (unsigned long long)inum);
1637 ffs_fserr(fs, inum, "bad block");
1638 return;
1639 }
1640 error = bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp);
1641 if (error) {
1642 brelse(bp, 0);
1643 return;
1644 }
1645 cgp = (struct cg *)bp->b_data;
1646 if (!cg_chkmagic(cgp, needswap)) {
1647 brelse(bp, 0);
1648 return;
1649 }
1650 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1651 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1652 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1653 cgp->cg_time = ufs_rw64(time_second, needswap);
1654 cgbno = dtogd(fs, bno);
1655 blksfree = cg_blksfree(cgp, needswap);
1656 mutex_enter(&ump->um_lock);
1657 if (size == fs->fs_bsize) {
1658 fragno = fragstoblks(fs, cgbno);
1659 if (!ffs_isfreeblock(fs, blksfree, fragno)) {
1660 if (devvp->v_type != VBLK) {
1661 /* devvp is a snapshot */
1662 mutex_exit(&ump->um_lock);
1663 brelse(bp, 0);
1664 return;
1665 }
1666 printf("dev = 0x%x, block = %" PRId64 ", fs = %s\n",
1667 dev, bno, fs->fs_fsmnt);
1668 panic("blkfree: freeing free block");
1669 }
1670 ffs_setblock(fs, blksfree, fragno);
1671 ffs_clusteracct(fs, cgp, fragno, 1);
1672 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1673 fs->fs_cstotal.cs_nbfree++;
1674 fs->fs_cs(fs, cg).cs_nbfree++;
1675 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1676 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1677 i = old_cbtocylno(fs, cgbno);
1678 KASSERT(i >= 0);
1679 KASSERT(i < fs->fs_old_ncyl);
1680 KASSERT(old_cbtorpos(fs, cgbno) >= 0);
1681 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, cgbno) < fs->fs_old_nrpos);
1682 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, cgbno)], 1,
1683 needswap);
1684 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1685 }
1686 } else {
1687 bbase = cgbno - fragnum(fs, cgbno);
1688 /*
1689 * decrement the counts associated with the old frags
1690 */
1691 blk = blkmap(fs, blksfree, bbase);
1692 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap);
1693 /*
1694 * deallocate the fragment
1695 */
1696 frags = numfrags(fs, size);
1697 for (i = 0; i < frags; i++) {
1698 if (isset(blksfree, cgbno + i)) {
1699 printf("dev = 0x%x, block = %" PRId64
1700 ", fs = %s\n",
1701 dev, bno + i, fs->fs_fsmnt);
1702 panic("blkfree: freeing free frag");
1703 }
1704 setbit(blksfree, cgbno + i);
1705 }
1706 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap);
1707 fs->fs_cstotal.cs_nffree += i;
1708 fs->fs_cs(fs, cg).cs_nffree += i;
1709 /*
1710 * add back in counts associated with the new frags
1711 */
1712 blk = blkmap(fs, blksfree, bbase);
1713 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap);
1714 /*
1715 * if a complete block has been reassembled, account for it
1716 */
1717 fragno = fragstoblks(fs, bbase);
1718 if (ffs_isblock(fs, blksfree, fragno)) {
1719 ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap);
1720 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1721 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1722 ffs_clusteracct(fs, cgp, fragno, 1);
1723 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap);
1724 fs->fs_cstotal.cs_nbfree++;
1725 fs->fs_cs(fs, cg).cs_nbfree++;
1726 if ((fs->fs_magic == FS_UFS1_MAGIC) &&
1727 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) {
1728 i = old_cbtocylno(fs, bbase);
1729 KASSERT(i >= 0);
1730 KASSERT(i < fs->fs_old_ncyl);
1731 KASSERT(old_cbtorpos(fs, bbase) >= 0);
1732 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bbase) < fs->fs_old_nrpos);
1733 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs,
1734 bbase)], 1, needswap);
1735 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap);
1736 }
1737 }
1738 }
1739 fs->fs_fmod = 1;
1740 ACTIVECG_CLR(fs, cg);
1741 mutex_exit(&ump->um_lock);
1742 bdwrite(bp);
1743 }
1744
1745 #if defined(DIAGNOSTIC) || defined(DEBUG)
1746 #ifdef XXXUBC
1747 /*
1748 * Verify allocation of a block or fragment. Returns true if block or
1749 * fragment is allocated, false if it is free.
1750 */
1751 static int
1752 ffs_checkblk(struct inode *ip, daddr_t bno, long size)
1753 {
1754 struct fs *fs;
1755 struct cg *cgp;
1756 struct buf *bp;
1757 int i, error, frags, free;
1758
1759 fs = ip->i_fs;
1760 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1761 printf("bsize = %d, size = %ld, fs = %s\n",
1762 fs->fs_bsize, size, fs->fs_fsmnt);
1763 panic("checkblk: bad size");
1764 }
1765 if (bno >= fs->fs_size)
1766 panic("checkblk: bad block %d", bno);
1767 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1768 (int)fs->fs_cgsize, NOCRED, &bp);
1769 if (error) {
1770 brelse(bp, 0);
1771 return 0;
1772 }
1773 cgp = (struct cg *)bp->b_data;
1774 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1775 brelse(bp, 0);
1776 return 0;
1777 }
1778 bno = dtogd(fs, bno);
1779 if (size == fs->fs_bsize) {
1780 free = ffs_isblock(fs, cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)),
1781 fragstoblks(fs, bno));
1782 } else {
1783 frags = numfrags(fs, size);
1784 for (free = 0, i = 0; i < frags; i++)
1785 if (isset(cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)), bno + i))
1786 free++;
1787 if (free != 0 && free != frags)
1788 panic("checkblk: partially free fragment");
1789 }
1790 brelse(bp, 0);
1791 return (!free);
1792 }
1793 #endif /* XXXUBC */
1794 #endif /* DIAGNOSTIC */
1795
1796 /*
1797 * Free an inode.
1798 */
1799 int
1800 ffs_vfree(struct vnode *vp, ino_t ino, int mode)
1801 {
1802
1803 if (DOINGSOFTDEP(vp)) {
1804 softdep_freefile(vp, ino, mode);
1805 return (0);
1806 }
1807 return ffs_freefile(VTOI(vp)->i_fs, VTOI(vp)->i_devvp, ino, mode);
1808 }
1809
1810 /*
1811 * Do the actual free operation.
1812 * The specified inode is placed back in the free map.
1813 */
1814 int
1815 ffs_freefile(struct fs *fs, struct vnode *devvp, ino_t ino, int mode)
1816 {
1817 struct ufsmount *ump;
1818 struct cg *cgp;
1819 struct buf *bp;
1820 int error, cg;
1821 daddr_t cgbno;
1822 u_int8_t *inosused;
1823 dev_t dev;
1824 #ifdef FFS_EI
1825 const int needswap = UFS_FSNEEDSWAP(fs);
1826 #endif
1827
1828 cg = ino_to_cg(fs, ino);
1829 if (devvp->v_type != VBLK) {
1830 /* devvp is a snapshot */
1831 dev = VTOI(devvp)->i_devvp->v_rdev;
1832 ump = VFSTOUFS(devvp->v_mount);
1833 cgbno = fragstoblks(fs, cgtod(fs, cg));
1834 } else {
1835 dev = devvp->v_rdev;
1836 ump = VFSTOUFS(devvp->v_specmountpoint);
1837 cgbno = fsbtodb(fs, cgtod(fs, cg));
1838 }
1839 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1840 panic("ifree: range: dev = 0x%x, ino = %llu, fs = %s",
1841 dev, (unsigned long long)ino, fs->fs_fsmnt);
1842 error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp);
1843 if (error) {
1844 brelse(bp, 0);
1845 return (error);
1846 }
1847 cgp = (struct cg *)bp->b_data;
1848 if (!cg_chkmagic(cgp, needswap)) {
1849 brelse(bp, 0);
1850 return (0);
1851 }
1852 cgp->cg_old_time = ufs_rw32(time_second, needswap);
1853 if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1854 (fs->fs_old_flags & FS_FLAGS_UPDATED))
1855 cgp->cg_time = ufs_rw64(time_second, needswap);
1856 inosused = cg_inosused(cgp, needswap);
1857 ino %= fs->fs_ipg;
1858 if (isclr(inosused, ino)) {
1859 printf("ifree: dev = 0x%x, ino = %llu, fs = %s\n",
1860 dev, (unsigned long long)ino + cg * fs->fs_ipg,
1861 fs->fs_fsmnt);
1862 if (fs->fs_ronly == 0)
1863 panic("ifree: freeing free inode");
1864 }
1865 clrbit(inosused, ino);
1866 if (ino < ufs_rw32(cgp->cg_irotor, needswap))
1867 cgp->cg_irotor = ufs_rw32(ino, needswap);
1868 ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap);
1869 mutex_enter(&ump->um_lock);
1870 fs->fs_cstotal.cs_nifree++;
1871 fs->fs_cs(fs, cg).cs_nifree++;
1872 if ((mode & IFMT) == IFDIR) {
1873 ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap);
1874 fs->fs_cstotal.cs_ndir--;
1875 fs->fs_cs(fs, cg).cs_ndir--;
1876 }
1877 fs->fs_fmod = 1;
1878 ACTIVECG_CLR(fs, cg);
1879 mutex_exit(&ump->um_lock);
1880 bdwrite(bp);
1881 return (0);
1882 }
1883
1884 /*
1885 * Check to see if a file is free.
1886 */
1887 int
1888 ffs_checkfreefile(struct fs *fs, struct vnode *devvp, ino_t ino)
1889 {
1890 struct cg *cgp;
1891 struct buf *bp;
1892 daddr_t cgbno;
1893 int ret, cg;
1894 u_int8_t *inosused;
1895
1896 cg = ino_to_cg(fs, ino);
1897 if (devvp->v_type != VBLK) {
1898 /* devvp is a snapshot */
1899 cgbno = fragstoblks(fs, cgtod(fs, cg));
1900 } else
1901 cgbno = fsbtodb(fs, cgtod(fs, cg));
1902 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1903 return 1;
1904 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp)) {
1905 brelse(bp, 0);
1906 return 1;
1907 }
1908 cgp = (struct cg *)bp->b_data;
1909 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) {
1910 brelse(bp, 0);
1911 return 1;
1912 }
1913 inosused = cg_inosused(cgp, UFS_FSNEEDSWAP(fs));
1914 ino %= fs->fs_ipg;
1915 ret = isclr(inosused, ino);
1916 brelse(bp, 0);
1917 return ret;
1918 }
1919
1920 /*
1921 * Find a block of the specified size in the specified cylinder group.
1922 *
1923 * It is a panic if a request is made to find a block if none are
1924 * available.
1925 */
1926 static int32_t
1927 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz)
1928 {
1929 int32_t bno;
1930 int start, len, loc, i;
1931 int blk, field, subfield, pos;
1932 int ostart, olen;
1933 u_int8_t *blksfree;
1934 #ifdef FFS_EI
1935 const int needswap = UFS_FSNEEDSWAP(fs);
1936 #endif
1937
1938 /* KASSERT(mutex_owned(&ump->um_lock)); */
1939
1940 /*
1941 * find the fragment by searching through the free block
1942 * map for an appropriate bit pattern
1943 */
1944 if (bpref)
1945 start = dtogd(fs, bpref) / NBBY;
1946 else
1947 start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY;
1948 blksfree = cg_blksfree(cgp, needswap);
1949 len = howmany(fs->fs_fpg, NBBY) - start;
1950 ostart = start;
1951 olen = len;
1952 loc = scanc((u_int)len,
1953 (const u_char *)&blksfree[start],
1954 (const u_char *)fragtbl[fs->fs_frag],
1955 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
1956 if (loc == 0) {
1957 len = start + 1;
1958 start = 0;
1959 loc = scanc((u_int)len,
1960 (const u_char *)&blksfree[0],
1961 (const u_char *)fragtbl[fs->fs_frag],
1962 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1)))));
1963 if (loc == 0) {
1964 printf("start = %d, len = %d, fs = %s\n",
1965 ostart, olen, fs->fs_fsmnt);
1966 printf("offset=%d %ld\n",
1967 ufs_rw32(cgp->cg_freeoff, needswap),
1968 (long)blksfree - (long)cgp);
1969 printf("cg %d\n", cgp->cg_cgx);
1970 panic("ffs_alloccg: map corrupted");
1971 /* NOTREACHED */
1972 }
1973 }
1974 bno = (start + len - loc) * NBBY;
1975 cgp->cg_frotor = ufs_rw32(bno, needswap);
1976 /*
1977 * found the byte in the map
1978 * sift through the bits to find the selected frag
1979 */
1980 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1981 blk = blkmap(fs, blksfree, bno);
1982 blk <<= 1;
1983 field = around[allocsiz];
1984 subfield = inside[allocsiz];
1985 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1986 if ((blk & field) == subfield)
1987 return (bno + pos);
1988 field <<= 1;
1989 subfield <<= 1;
1990 }
1991 }
1992 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1993 panic("ffs_alloccg: block not in map");
1994 /* return (-1); */
1995 }
1996
1997 /*
1998 * Update the cluster map because of an allocation or free.
1999 *
2000 * Cnt == 1 means free; cnt == -1 means allocating.
2001 */
2002 void
2003 ffs_clusteracct(struct fs *fs, struct cg *cgp, int32_t blkno, int cnt)
2004 {
2005 int32_t *sump;
2006 int32_t *lp;
2007 u_char *freemapp, *mapp;
2008 int i, start, end, forw, back, map, bit;
2009 #ifdef FFS_EI
2010 const int needswap = UFS_FSNEEDSWAP(fs);
2011 #endif
2012
2013 /* KASSERT(mutex_owned(&ump->um_lock)); */
2014
2015 if (fs->fs_contigsumsize <= 0)
2016 return;
2017 freemapp = cg_clustersfree(cgp, needswap);
2018 sump = cg_clustersum(cgp, needswap);
2019 /*
2020 * Allocate or clear the actual block.
2021 */
2022 if (cnt > 0)
2023 setbit(freemapp, blkno);
2024 else
2025 clrbit(freemapp, blkno);
2026 /*
2027 * Find the size of the cluster going forward.
2028 */
2029 start = blkno + 1;
2030 end = start + fs->fs_contigsumsize;
2031 if (end >= ufs_rw32(cgp->cg_nclusterblks, needswap))
2032 end = ufs_rw32(cgp->cg_nclusterblks, needswap);
2033 mapp = &freemapp[start / NBBY];
2034 map = *mapp++;
2035 bit = 1 << (start % NBBY);
2036 for (i = start; i < end; i++) {
2037 if ((map & bit) == 0)
2038 break;
2039 if ((i & (NBBY - 1)) != (NBBY - 1)) {
2040 bit <<= 1;
2041 } else {
2042 map = *mapp++;
2043 bit = 1;
2044 }
2045 }
2046 forw = i - start;
2047 /*
2048 * Find the size of the cluster going backward.
2049 */
2050 start = blkno - 1;
2051 end = start - fs->fs_contigsumsize;
2052 if (end < 0)
2053 end = -1;
2054 mapp = &freemapp[start / NBBY];
2055 map = *mapp--;
2056 bit = 1 << (start % NBBY);
2057 for (i = start; i > end; i--) {
2058 if ((map & bit) == 0)
2059 break;
2060 if ((i & (NBBY - 1)) != 0) {
2061 bit >>= 1;
2062 } else {
2063 map = *mapp--;
2064 bit = 1 << (NBBY - 1);
2065 }
2066 }
2067 back = start - i;
2068 /*
2069 * Account for old cluster and the possibly new forward and
2070 * back clusters.
2071 */
2072 i = back + forw + 1;
2073 if (i > fs->fs_contigsumsize)
2074 i = fs->fs_contigsumsize;
2075 ufs_add32(sump[i], cnt, needswap);
2076 if (back > 0)
2077 ufs_add32(sump[back], -cnt, needswap);
2078 if (forw > 0)
2079 ufs_add32(sump[forw], -cnt, needswap);
2080
2081 /*
2082 * Update cluster summary information.
2083 */
2084 lp = &sump[fs->fs_contigsumsize];
2085 for (i = fs->fs_contigsumsize; i > 0; i--)
2086 if (ufs_rw32(*lp--, needswap) > 0)
2087 break;
2088 fs->fs_maxcluster[ufs_rw32(cgp->cg_cgx, needswap)] = i;
2089 }
2090
2091 /*
2092 * Fserr prints the name of a file system with an error diagnostic.
2093 *
2094 * The form of the error message is:
2095 * fs: error message
2096 */
2097 static void
2098 ffs_fserr(struct fs *fs, u_int uid, const char *cp)
2099 {
2100
2101 log(LOG_ERR, "uid %d, pid %d, command %s, on %s: %s\n",
2102 uid, curproc->p_pid, curproc->p_comm, fs->fs_fsmnt, cp);
2103 }
2104