lfs_alloc.c revision 1.145 1 /* $NetBSD: lfs_alloc.c,v 1.145 2025/09/21 14:19:14 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31 /*
32 * Copyright (c) 1991, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94
60 */
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.145 2025/09/21 14:19:14 christos Exp $");
64
65 #if defined(_KERNEL_OPT)
66 #include "opt_quota.h"
67 #endif
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/buf.h>
73 #include <sys/lock.h>
74 #include <sys/vnode.h>
75 #include <sys/syslog.h>
76 #include <sys/mount.h>
77 #include <sys/malloc.h>
78 #include <sys/pool.h>
79 #include <sys/proc.h>
80 #include <sys/kauth.h>
81
82 #include <ufs/lfs/ulfs_quotacommon.h>
83 #include <ufs/lfs/ulfs_inode.h>
84 #include <ufs/lfs/ulfsmount.h>
85 #include <ufs/lfs/ulfs_extern.h>
86
87 #include <ufs/lfs/lfs.h>
88 #include <ufs/lfs/lfs_accessors.h>
89 #include <ufs/lfs/lfs_extern.h>
90 #include <ufs/lfs/lfs_kernel.h>
91
92 int lfs_do_check_freelist = 0;
93
94 /* Constants for inode free bitmap */
95 #define BMSHIFT 5 /* 2 ** 5 = 32 */
96 #define BMMASK ((1 << BMSHIFT) - 1)
97 #define SET_BITMAP_FREE(F, I) do { \
98 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), \
99 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
100 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1U << ((I) & BMMASK)); \
101 } while (0)
102 #define CLR_BITMAP_FREE(F, I) do { \
103 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), \
104 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
105 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1U << ((I) & BMMASK)); \
106 } while(0)
107
108 #define ISSET_BITMAP_FREE(F, I) \
109 ((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1U << ((I) & BMMASK)))
110
111 /*
112 * Add a new block to the Ifile, to accommodate future file creations.
113 * Called with the segment lock held.
114 */
115 int
116 lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
117 {
118 struct vnode *vp;
119 struct inode *ip;
120 IFILE64 *ifp64;
121 IFILE32 *ifp32;
122 IFILE_V1 *ifp_v1;
123 struct buf *bp, *cbp;
124 int error;
125 daddr_t i, blkno, xmax;
126 ino_t oldhead, maxino, tail;
127 CLEANERINFO *cip;
128
129 ASSERT_SEGLOCK(fs);
130
131 /* XXX should check or assert that we aren't readonly. */
132
133 /*
134 * Get a block and extend the ifile inode. Leave the buffer for
135 * the block in bp.
136 */
137
138 vp = fs->lfs_ivnode;
139 ip = VTOI(vp);
140 blkno = lfs_lblkno(fs, ip->i_size);
141 if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
142 &bp)) != 0) {
143 return (error);
144 }
145 ip->i_size += lfs_sb_getbsize(fs);
146 lfs_dino_setsize(fs, ip->i_din, ip->i_size);
147 uvm_vnp_setsize(vp, ip->i_size);
148
149 /*
150 * Compute the new number of inodes, and reallocate the in-memory
151 * inode freemap.
152 */
153
154 maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
155 lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
156 fs->lfs_ino_bitmap = (lfs_bm_t *)
157 realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
158 sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
159 KASSERT(fs->lfs_ino_bitmap != NULL);
160
161 /* first new inode number */
162 i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
163 lfs_sb_getifpb(fs);
164
165 /* inode number to stop at (XXX: why *x*max?) */
166 xmax = i + lfs_sb_getifpb(fs);
167
168 /*
169 * We insert the new inodes at the head of the free list.
170 * Under normal circumstances, the free list is empty here,
171 * so we are also incidentally placing them at the end (which
172 * we must do if we are to keep them in order).
173 */
174 LFS_GET_HEADFREE(fs, cip, cbp, &oldhead);
175 LFS_PUT_HEADFREE(fs, cip, cbp, i);
176 LFS_GET_TAILFREE(fs, cip, cbp, &tail);
177 DLOG((DLOG_ALLOC, "oldhead=%jd, i=%jd, xmax=%jd, oldtail=%jd\n",
178 (intmax_t)oldhead, (intmax_t)i, (intmax_t)xmax,
179 (intmax_t)tail));
180 if (tail == LFS_UNUSED_INUM) {
181 tail = xmax - 1;
182 LFS_PUT_TAILFREE(fs, cip, cbp, tail);
183 }
184 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
185 "inode 0 allocated [2]");
186
187
188 /*
189 * Initialize the ifile block.
190 *
191 * XXX: these loops should be restructured to use the accessor
192 * functions instead of using cutpaste polymorphism.
193 */
194
195 if (fs->lfs_is64) {
196 for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
197 SET_BITMAP_FREE(fs, i);
198 ifp64->if_version = 1;
199 ifp64->if_daddr = LFS_UNUSED_DADDR;
200 ifp64->if_nextfree = ++i;
201 }
202 ifp64--;
203 ifp64->if_nextfree = oldhead;
204 } else if (lfs_sb_getversion(fs) > 1) {
205 for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
206 SET_BITMAP_FREE(fs, i);
207 ifp32->if_version = 1;
208 ifp32->if_daddr = LFS_UNUSED_DADDR;
209 ifp32->if_nextfree = ++i;
210 }
211 ifp32--;
212 ifp32->if_nextfree = oldhead;
213 } else {
214 for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
215 SET_BITMAP_FREE(fs, i);
216 ifp_v1->if_version = 1;
217 ifp_v1->if_daddr = LFS_UNUSED_DADDR;
218 ifp_v1->if_nextfree = ++i;
219 }
220 ifp_v1--;
221 ifp_v1->if_nextfree = oldhead;
222 }
223 DLOG((DLOG_ALLOC, " now head=%jd tail=%jd\n",
224 (intmax_t)xmax - lfs_sb_getifpb(fs), (intmax_t)tail));
225
226 /*
227 * Write out the new block.
228 */
229
230 (void) LFS_BWRITE_LOG(bp); /* Ifile */
231
232 return 0;
233 }
234
235 /*
236 * Allocate an inode for a new file.
237 *
238 * Takes the segment lock. Also (while holding it) takes lfs_lock
239 * to frob fs->lfs_fmod.
240 *
241 * XXX: the mode argument is unused; should just get rid of it.
242 */
243 /* ARGSUSED */
244 /* VOP_BWRITE 2i times */
245 int
246 lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
247 ino_t *ino, int *gen)
248 {
249 struct lfs *fs;
250 struct buf *bp, *cbp;
251 IFILE *ifp;
252 int error;
253 CLEANERINFO *cip;
254
255 fs = VTOI(pvp)->i_lfs;
256 if (fs->lfs_ronly)
257 return EROFS;
258
259 if (!(fs->lfs_flags & LFS_NOTYET))
260 ASSERT_NO_SEGLOCK(fs);
261
262 DEBUG_CHECK_FREELIST(fs);
263
264 lfs_seglock(fs, SEGM_PROT);
265
266 /* Get the head of the freelist. */
267 LFS_GET_HEADFREE(fs, cip, cbp, ino);
268
269 /* paranoia */
270 KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
271 DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
272 *ino));
273
274 /* Update the in-memory inode freemap */
275 CLR_BITMAP_FREE(fs, *ino);
276
277 /*
278 * Fetch the ifile entry and make sure the inode is really
279 * free.
280 */
281 LFS_IENTRY(ifp, fs, *ino, bp);
282 if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
283 panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
284 *ino);
285
286 /* Update the inode freelist head in the superblock. */
287 LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
288 DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
289 *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
290
291 /*
292 * Retrieve the version number from the ifile entry. It was
293 * bumped by vfree, so don't bump it again.
294 */
295 *gen = lfs_if_getversion(fs, ifp);
296
297 /* Done with ifile entry */
298
299 /*
300 * Note LFS_ILLEGAL_DADDR == LFS_UNUSED_DADDR
301 * unless DEBUG is set. In that case we
302 * use that address to track inodes that have been
303 * allocated (and so not on the free list) but do not
304 * have proper disk addresses yet.
305 */
306 lfs_if_setdaddr(fs, ifp, LFS_ILLEGAL_DADDR);
307 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
308 LFS_BWRITE_LOG(bp);
309
310 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
311 /*
312 * No more inodes; extend the ifile so that the next
313 * lfs_valloc will succeed.
314 */
315 if ((error = lfs_extend_ifile(fs, cred)) != 0) {
316 /* restore the freelist */
317 LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
318
319 /* unlock and return */
320 lfs_segunlock(fs);
321 return error;
322 }
323 }
324 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
325 "inode 0 allocated [3]");
326
327 /* Set superblock modified bit */
328 mutex_enter(&lfs_lock);
329 fs->lfs_fmod = 1;
330 mutex_exit(&lfs_lock);
331
332 /* increment file count */
333 lfs_sb_addnfiles(fs, 1);
334
335 /* done */
336 lfs_segunlock(fs);
337
338 DEBUG_CHECK_FREELIST(fs);
339 return 0;
340 }
341
342 /*
343 * Allocate an inode for a new file, with given inode number and
344 * version.
345 *
346 * Called in the same context as lfs_valloc and therefore shares the
347 * same locking assumptions.
348 */
349 int
350 lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
351 {
352 IFILE *ifp;
353 struct buf *bp, *cbp;
354 ino_t headino, thisino, oldnext, tailino;
355 CLEANERINFO *cip;
356 int extended = 0;
357
358 if (fs->lfs_ronly)
359 return EROFS;
360
361 if (!(fs->lfs_flags & LFS_NOTYET))
362 ASSERT_NO_SEGLOCK(fs);
363
364 DEBUG_CHECK_FREELIST(fs);
365
366 lfs_seglock(fs, SEGM_PROT);
367
368 /*
369 * If the ifile is too short to contain this inum, extend it.
370 *
371 * XXX: lfs_extend_ifile should take a size instead of always
372 * doing just one block at time.
373 */
374 while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
375 lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
376 << lfs_sb_getbshift(fs)) {
377 DLOG((DLOG_ALLOC, "extend ifile to accommodate ino %jd\n",
378 (intmax_t)ino));
379 lfs_extend_ifile(fs, NOCRED);
380 extended = 1;
381 }
382
383 /*
384 * Get the inode freelist next pointer.
385 */
386 LFS_IENTRY(ifp, fs, ino, bp);
387 oldnext = lfs_if_getnextfree(fs, ifp);
388 brelse(bp, 0);
389
390 /* Get fail of inode freelist */
391 LFS_GET_TAILFREE(fs, cip, cbp, &tailino);
392
393 /* Get head of inode freelist */
394 LFS_GET_HEADFREE(fs, cip, cbp, &headino);
395 if (headino == ino) {
396 /* Easy case: the inode we wanted was at the head */
397 LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
398 } else {
399 ino_t nextfree = 0, maxino, count; /* XXX: gcc */
400
401 /* Have to find the desired inode in the freelist... */
402 maxino = ((VTOI(fs->lfs_ivnode)->i_size >> lfs_sb_getbshift(fs))
403 - lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs))
404 * lfs_sb_getifpb(fs);
405 count = 0;
406
407 thisino = headino;
408 while (thisino != LFS_UNUSED_INUM) {
409 /* read this ifile entry */
410 LFS_IENTRY(ifp, fs, thisino, bp);
411 nextfree = lfs_if_getnextfree(fs, ifp);
412 /* stop if we find it or we hit the end */
413 if (nextfree == ino ||
414 nextfree == LFS_UNUSED_INUM)
415 break;
416 /* nope, keep going... */
417 thisino = nextfree;
418 brelse(bp, 0);
419 if (++count > maxino)
420 break;
421 }
422 if (count > maxino) {
423 panic("loop in free list");
424 lfs_segunlock(fs);
425 return ENOENT;
426 }
427 if (nextfree == LFS_UNUSED_INUM) {
428 /* hit the end -- this inode is not available */
429 brelse(bp, 0);
430 lfs_segunlock(fs);
431 if (extended)
432 panic("extended ifile to accommodate but inode not found");
433 return ENOENT;
434 }
435 /* found it; update the next pointer */
436 lfs_if_setnextfree(fs, ifp, oldnext);
437 /* write the ifile block */
438 LFS_BWRITE_LOG(bp);
439
440 /* If our inode was the tail, thisino is now the tail */
441 if (tailino == ino)
442 LFS_PUT_TAILFREE(fs, cip, cbp, thisino);
443 }
444
445 /* Clear nextfree, note daddr, and set generation number */
446 LFS_IENTRY(ifp, fs, ino, bp);
447 lfs_if_setversion(fs, ifp, vers);
448 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
449 /* See comment in lfs_valloc */
450 lfs_if_setdaddr(fs, ifp, LFS_ILLEGAL_DADDR);
451 LFS_BWRITE_LOG(bp);
452
453 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
454 int error;
455 /*
456 * No more inodes; extend the ifile so that the next
457 * lfs_valloc will succeed.
458 */
459 if ((error = lfs_extend_ifile(fs, NOCRED)) != 0) {
460 /* restore the freelist */
461 LFS_PUT_HEADFREE(fs, cip, cbp, ino);
462
463 /* unlock and return */
464 lfs_segunlock(fs);
465 return error;
466 }
467 }
468 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
469 "inode 0 allocated [4]");
470
471 /* done */
472 lfs_segunlock(fs);
473
474 DEBUG_CHECK_FREELIST(fs);
475
476 return 0;
477 }
478
479 #if 0
480 /*
481 * Find the highest-numbered allocated inode.
482 * This will be used to shrink the Ifile.
483 */
484 static inline ino_t
485 lfs_last_alloc_ino(struct lfs *fs)
486 {
487 ino_t ino, maxino;
488
489 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
490 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
491 lfs_sb_getifpb(fs);
492 for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
493 if (ISSET_BITMAP_FREE(fs, ino) == 0)
494 break;
495 }
496 return ino;
497 }
498
499 /*
500 * Find the previous (next lowest numbered) free inode, if any.
501 * If there is none, return LFS_UNUSED_INUM.
502 *
503 * XXX: locking?
504 */
505 static inline ino_t
506 lfs_freelist_prev(struct lfs *fs, ino_t ino)
507 {
508 ino_t tino, bound, bb, freehdbb;
509
510 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
511 /* No free inodes at all */
512 return LFS_UNUSED_INUM;
513 }
514
515 /* Search our own word first */
516 bound = ino & ~BMMASK;
517 for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
518 if (ISSET_BITMAP_FREE(fs, tino))
519 return tino;
520 /* If there are no lower words to search, just return */
521 if (ino >> BMSHIFT == 0)
522 return LFS_UNUSED_INUM;
523
524 /*
525 * Find a word with a free inode in it. We have to be a bit
526 * careful here since ino_t is unsigned.
527 */
528 freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
529 for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
530 if (fs->lfs_ino_bitmap[bb])
531 break;
532 if (fs->lfs_ino_bitmap[bb] == 0)
533 return LFS_UNUSED_INUM;
534
535 /* Search the word we found */
536 for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
537 tino > LFS_UNUSED_INUM; tino--)
538 if (ISSET_BITMAP_FREE(fs, tino))
539 break;
540
541 /* Avoid returning reserved inode numbers */
542 if (tino <= LFS_IFILE_INUM)
543 tino = LFS_UNUSED_INUM;
544
545 return tino;
546 }
547 #endif
548
549 /*
550 * Free an inode.
551 *
552 * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
553 */
554 /* ARGUSED */
555 /* VOP_BWRITE 2i times */
556 int
557 lfs_vfree(struct vnode *vp, ino_t ino, int mode)
558 {
559 SEGUSE *sup;
560 CLEANERINFO *cip;
561 struct buf *cbp, *bp;
562 IFILE *ifp;
563 struct inode *ip;
564 struct lfs *fs;
565 daddr_t old_iaddr;
566
567 /* Get the inode number and file system. */
568 ip = VTOI(vp);
569 fs = ip->i_lfs;
570 ino = ip->i_number;
571
572 /* XXX: assert not readonly */
573
574 ASSERT_NO_SEGLOCK(fs);
575 DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
576
577 /* Drain of pending writes */
578 mutex_enter(vp->v_interlock);
579 while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
580 cv_wait(&vp->v_cv, vp->v_interlock);
581 }
582 mutex_exit(vp->v_interlock);
583
584 lfs_seglock(fs, SEGM_PROT);
585
586 DEBUG_CHECK_FREELIST(fs);
587
588 /*
589 * If the inode was in a dirop, it isn't now.
590 *
591 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_state & IN_ADIROP)
592 * not updated together in one function? (and why do both exist,
593 * anyway?)
594 */
595 UNMARK_VNODE(vp);
596
597 mutex_enter(&lfs_lock);
598 if (vp->v_uflag & VU_DIROP) {
599 vp->v_uflag &= ~VU_DIROP;
600 --lfs_dirvcount;
601 --fs->lfs_dirvcount;
602 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
603 wakeup(&fs->lfs_dirvcount);
604 wakeup(&lfs_dirvcount);
605 mutex_exit(&lfs_lock);
606 vrele(vp);
607
608 /*
609 * If this inode is not going to be written any more, any
610 * segment accounting left over from its truncation needs
611 * to occur at the end of the next dirops flush. Attach
612 * them to the fs-wide list for that purpose.
613 */
614 if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
615 struct segdelta *sd;
616
617 while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
618 LIST_REMOVE(sd, list);
619 LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
620 }
621 }
622 } else {
623 /*
624 * If it's not a dirop, we can finalize right away.
625 */
626 mutex_exit(&lfs_lock);
627 lfs_finalize_ino_seguse(fs, ip);
628 }
629
630 /* it is no longer an unwritten inode, so update the counts */
631 mutex_enter(&lfs_lock);
632 LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
633 mutex_exit(&lfs_lock);
634
635 /* Turn off all inode modification flags */
636 ip->i_state &= ~IN_ALLMOD;
637
638 /* Mark it deleted */
639 ip->i_lfs_iflags |= LFSI_DELETED;
640
641 /* Mark it free in the in-memory inode freemap */
642 SET_BITMAP_FREE(fs, ino);
643
644 /*
645 * Set the ifile's inode entry to unused, increment its version number
646 * and link it onto the free chain.
647 */
648
649 /* fetch the ifile entry */
650 LFS_IENTRY(ifp, fs, ino, bp);
651
652 /* update the on-disk address (to "nowhere") */
653 old_iaddr = lfs_if_getdaddr(fs, ifp);
654 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
655
656 /* bump the version */
657 lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
658
659 #if 0
660 if (lfs_sb_getversion(fs) == 1) {
661 #endif
662 ino_t nextfree;
663
664 /* insert on freelist */
665 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
666 lfs_if_setnextfree(fs, ifp, nextfree);
667 LFS_PUT_HEADFREE(fs, cip, cbp, ino);
668
669 /* write the ifile block */
670 (void) LFS_BWRITE_LOG(bp); /* Ifile */
671 #if 0
672 } else {
673 ino_t tino, onf, otail;
674
675 /*
676 * Clear the freelist next pointer and write the ifile
677 * block. XXX: why? I'm sure there must be a reason but
678 * it seems both silly and dangerous.
679 */
680 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
681 (void) LFS_BWRITE_LOG(bp); /* Ifile */
682
683 /*
684 * Insert on freelist in order.
685 */
686
687 /* Find the next lower (by number) free inode */
688 tino = lfs_freelist_prev(fs, ino);
689
690 if (tino == LFS_UNUSED_INUM) {
691 ino_t nextfree;
692
693 /*
694 * There isn't one; put us on the freelist head.
695 */
696
697 /* reload the ifile block */
698 LFS_IENTRY(ifp, fs, ino, bp);
699 /* update the list */
700 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
701 lfs_if_setnextfree(fs, ifp, nextfree);
702 LFS_PUT_HEADFREE(fs, cip, cbp, ino);
703 DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
704 (long long)nextfree, (long long)ino));
705 /* write the ifile block */
706 LFS_BWRITE_LOG(bp); /* Ifile */
707
708 /* If the list was empty, set tail too */
709 LFS_GET_TAILFREE(fs, cip, cbp, &otail);
710 if (otail == LFS_UNUSED_INUM) {
711 LFS_PUT_TAILFREE(fs, cip, cbp, ino);
712 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
713 "-> %lld\n", (long long)otail,
714 (long long)ino));
715 }
716 } else {
717 /*
718 * Insert this inode into the list after tino.
719 * We hold the segment lock so we don't have to
720 * worry about blocks being written out of order.
721 */
722
723 DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
724 " after %lld\n", ino, tino));
725
726 /* load the previous inode's ifile block */
727 LFS_IENTRY(ifp, fs, tino, bp);
728 /* update the list pointer */
729 onf = lfs_if_getnextfree(fs, ifp);
730 lfs_if_setnextfree(fs, ifp, ino);
731 /* write the block */
732 LFS_BWRITE_LOG(bp); /* Ifile */
733
734 /* load this inode's ifile block */
735 LFS_IENTRY(ifp, fs, ino, bp);
736 /* update the list pointer */
737 lfs_if_setnextfree(fs, ifp, onf);
738 /* write the block */
739 LFS_BWRITE_LOG(bp); /* Ifile */
740
741 /* If we're last, put us on the tail */
742 if (onf == LFS_UNUSED_INUM) {
743 LFS_GET_TAILFREE(fs, cip, cbp, &otail);
744 LFS_PUT_TAILFREE(fs, cip, cbp, ino);
745 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
746 "-> %lld\n", (long long)otail,
747 (long long)ino));
748 }
749 }
750 }
751 #endif
752 /* XXX: shouldn't this check be further up *before* we trash the fs? */
753 KASSERTMSG((ino != LFS_UNUSED_INUM), "inode 0 freed");
754
755 /*
756 * Update the segment summary for the segment where the on-disk
757 * copy used to be.
758 */
759 if (!DADDR_IS_BAD(old_iaddr)) {
760 /* load it */
761 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
762 /* the number of bytes in the segment should not become < 0 */
763 KASSERTMSG((sup->su_nbytes >= DINOSIZE(fs)),
764 "lfs_vfree: negative byte count"
765 " (segment %" PRIu32 " short by %d)\n",
766 lfs_dtosn(fs, old_iaddr),
767 (int)DINOSIZE(fs) - sup->su_nbytes);
768 /* update the number of bytes in the segment */
769 sup->su_nbytes -= DINOSIZE(fs);
770 /* write the segment entry */
771 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
772 }
773
774 /* Set superblock modified bit. */
775 mutex_enter(&lfs_lock);
776 fs->lfs_fmod = 1;
777 mutex_exit(&lfs_lock);
778
779 /* Decrement file count. */
780 lfs_sb_subnfiles(fs, 1);
781
782 lfs_segunlock(fs);
783
784 DEBUG_CHECK_FREELIST(fs);
785
786 return (0);
787 }
788
789 /*
790 * Sort the freelist and set up the free-inode bitmap.
791 * To be called by lfs_mountfs().
792 *
793 * Takes the segmenet lock.
794 */
795 void
796 lfs_order_freelist(struct lfs *fs, ino_t **orphanp, size_t *norphanp)
797 {
798 CLEANERINFO *cip;
799 IFILE *ifp = NULL;
800 struct buf *bp;
801 ino_t ino, firstino, lastino, maxino;
802 ino_t *orphan = NULL;
803 size_t norphan = 0;
804 size_t norphan_alloc = 0;
805
806 ASSERT_NO_SEGLOCK(fs);
807 lfs_seglock(fs, SEGM_PROT);
808
809 DEBUG_CHECK_FREELIST(fs);
810
811 /* largest inode on fs */
812 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
813 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
814
815 /* allocate the in-memory inode freemap */
816 /* XXX: assert that fs->lfs_ino_bitmap is null here */
817 fs->lfs_ino_bitmap =
818 malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
819 M_SEGMENT, M_WAITOK | M_ZERO);
820 KASSERT(fs->lfs_ino_bitmap != NULL);
821
822 /*
823 * Scan the ifile.
824 */
825
826 firstino = lastino = LFS_UNUSED_INUM;
827 for (ino = 0; ino < maxino; ino++) {
828 /* Load this inode's ifile entry. */
829 if (ino % lfs_sb_getifpb(fs) == 0)
830 LFS_IENTRY(ifp, fs, ino, bp);
831 else
832 LFS_IENTRY_NEXT(ifp, fs);
833
834 /* Don't put zero or ifile on the free list */
835 if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
836 continue;
837
838 /*
839 * Address orphaned files.
840 *
841 * The idea of this is to free inodes belonging to
842 * files that were unlinked but not reclaimed, I guess
843 * because if we're going to scan the whole ifile
844 * anyway it costs very little to do this. I don't
845 * immediately see any reason this should be disabled,
846 * but presumably it doesn't work... not sure what
847 * happens to such files currently. -- dholland 20160806
848 */
849 if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE(fs)) {
850 if (orphan == NULL) {
851 norphan_alloc = 32; /* XXX pulled from arse */
852 orphan = kmem_zalloc(sizeof(orphan[0]) *
853 norphan_alloc, KM_SLEEP);
854 } else if (norphan == norphan_alloc) {
855 ino_t *orphan_new;
856 if (norphan_alloc >= 4096)
857 norphan_alloc += 4096;
858 else
859 norphan_alloc *= 2;
860 orphan_new = kmem_zalloc(sizeof(orphan[0]) *
861 norphan_alloc, KM_SLEEP);
862 memcpy(orphan_new, orphan, sizeof(orphan[0]) *
863 norphan);
864 kmem_free(orphan, sizeof(orphan[0]) * norphan);
865 orphan = orphan_new;
866 }
867 orphan[norphan++] = ino;
868 }
869
870 if (DADDR_IS_BAD(lfs_if_getdaddr(fs, ifp))) {
871
872 /*
873 * This inode is free. Put it on the free list.
874 */
875
876 if (firstino == LFS_UNUSED_INUM) {
877 /* XXX: assert lastino == LFS_UNUSED_INUM? */
878 /* remember the first free inode */
879 firstino = ino;
880 } else {
881 /* release this inode's ifile entry */
882 brelse(bp, 0);
883
884 /* XXX: assert lastino != LFS_UNUSED_INUM? */
885
886 /* load lastino's ifile entry */
887 LFS_IENTRY(ifp, fs, lastino, bp);
888 /* set the list pointer */
889 lfs_if_setnextfree(fs, ifp, ino);
890 /* write the block */
891 LFS_BWRITE_LOG(bp);
892
893 /* reload this inode's ifile entry */
894 LFS_IENTRY(ifp, fs, ino, bp);
895 }
896 /* remember the last free inode seen so far */
897 lastino = ino;
898
899 /* Mark this inode free in the in-memory freemap */
900 SET_BITMAP_FREE(fs, ino);
901 }
902
903 /* If moving to the next ifile block, release the buffer. */
904 if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
905 brelse(bp, 0);
906 }
907
908 /* Write the freelist head and tail pointers */
909 /* XXX: do we need to mark the superblock dirty? */
910 LFS_PUT_HEADFREE(fs, cip, bp, firstino);
911 LFS_PUT_TAILFREE(fs, cip, bp, lastino);
912
913 /* done */
914 lfs_segunlock(fs);
915
916 /*
917 * Shrink the array of orphans so we don't have to carry around
918 * the allocation size.
919 */
920 if (norphan < norphan_alloc) {
921 ino_t *orphan_new = kmem_alloc(sizeof(orphan[0]) * norphan,
922 KM_SLEEP);
923 memcpy(orphan_new, orphan, sizeof(orphan[0]) * norphan);
924 kmem_free(orphan, sizeof(orphan[0]) * norphan_alloc);
925 orphan = orphan_new;
926 norphan_alloc = norphan;
927 }
928
929 *orphanp = orphan;
930 *norphanp = norphan;
931
932 DEBUG_CHECK_FREELIST(fs);
933 }
934
935 /*
936 * Mark a file orphaned (unlinked but not yet reclaimed) by inode
937 * number. Do this with a magic freelist next pointer.
938 *
939 * XXX: howzabout some locking?
940 */
941 void
942 lfs_orphan(struct lfs *fs, ino_t ino)
943 {
944 IFILE *ifp;
945 struct buf *bp;
946
947 LFS_IENTRY(ifp, fs, ino, bp);
948 lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE(fs));
949 LFS_BWRITE_LOG(bp);
950 }
951
952 /*
953 * Free orphans discovered during mount. This is a separate stage
954 * because it requires fs->lfs_suflags to be set up, which is not done
955 * by the time we run lfs_order_freelist. It's possible that we could
956 * run lfs_order_freelist later (i.e., set up fs->lfs_suflags sooner)
957 * but that requires more thought than I can put into this at the
958 * moment.
959 */
960 void
961 lfs_free_orphans(struct lfs *fs, ino_t *orphan, size_t norphan)
962 {
963 size_t i;
964
965 DEBUG_CHECK_FREELIST(fs);
966
967 for (i = 0; i < norphan; i++) {
968 ino_t ino = orphan[i];
969 unsigned segno;
970 struct vnode *vp;
971 struct inode *ip;
972 struct buf *bp;
973 IFILE *ifp;
974 SEGUSE *sup;
975 int error;
976
977 /* Get the segment the inode is in on disk. */
978 LFS_IENTRY(ifp, fs, ino, bp);
979 KASSERT(!DADDR_IS_BAD(lfs_if_getdaddr(fs, ifp)));
980 segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
981 brelse(bp, 0);
982
983 /*
984 * Try to get the vnode. If we can't, tough -- hope
985 * you have backups!
986 */
987 error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE,
988 &vp);
989 if (error) {
990 printf("orphan %jd vget error %d\n", (intmax_t)ino,
991 error);
992 continue;
993 }
994
995 /*
996 * Sanity-check the inode.
997 *
998 * XXX What to do if it is still referenced?
999 */
1000 ip = VTOI(vp);
1001 if (ip->i_nlink != 0)
1002 printf("orphan %jd nlink %d\n", (intmax_t)ino,
1003 ip->i_nlink);
1004
1005 /*
1006 * Truncate the inode, to free any blocks allocated for
1007 * it, and release it, to free the inode number.
1008 *
1009 * XXX Isn't it redundant to truncate? Won't vput do
1010 * that for us?
1011 */
1012 error = lfs_truncate(vp, 0, 0, NOCRED);
1013 if (error)
1014 printf("orphan %jd truncate error %d", (intmax_t)ino,
1015 error);
1016 vput(vp);
1017
1018 /* Update the number of bytes in the segment summary. */
1019 LFS_SEGENTRY(sup, fs, segno, bp);
1020 KASSERT(sup->su_nbytes >= DINOSIZE(fs));
1021 sup->su_nbytes -= DINOSIZE(fs);
1022 LFS_WRITESEGENTRY(sup, fs, segno, bp);
1023
1024 /* Drop the on-disk address. */
1025 LFS_IENTRY(ifp, fs, ino, bp);
1026 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
1027 LFS_BWRITE_LOG(bp);
1028 }
1029
1030 if (orphan)
1031 kmem_free(orphan, sizeof(orphan[0]) * norphan);
1032
1033 DEBUG_CHECK_FREELIST(fs);
1034 }
1035
1036 #ifdef DEBUG
1037 static void dump_freelist(struct lfs *);
1038
1039 void
1040 lfs_check_freelist(struct lfs *fs, const char *func, int line)
1041 {
1042 ino_t i, headino, maxino, thisino, tailino, nextfree;
1043 int nfree, count;
1044 struct inode *ip;
1045 IFILE *ifp;
1046 CLEANERINFO *cip;
1047 struct buf *bp;
1048
1049 if (!lfs_do_check_freelist)
1050 return;
1051
1052 lfs_seglock(fs, SEGM_PROT);
1053
1054 ip = VTOI(fs->lfs_ivnode);
1055 maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
1056 lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
1057
1058 /*
1059 * First, check every node. Every inode that doesn't have a disk
1060 * address must have a nextfree pointer, the only exception
1061 * being the tail of the free list.
1062 */
1063 LFS_GET_TAILFREE(fs, cip, bp, &tailino);
1064 nfree = 0;
1065 for (i = LFS_IFILE_INUM + 1; i < maxino; ++i) {
1066 LFS_IENTRY(ifp, fs, i, bp);
1067 if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) {
1068 ++nfree;
1069 if (lfs_if_getnextfree(fs, ifp) == LFS_UNUSED_INUM
1070 && i != tailino) {
1071 brelse(bp, 0);
1072 dump_freelist(fs);
1073 printf("At %s:%d:\n", func, line);
1074 printf("tailino=%jd, but ino=%jd"
1075 " neither daddr nor nextfree\n",
1076 (intmax_t)tailino, (intmax_t)i);
1077 panic("Free list leak\n");
1078 }
1079 }
1080 if (i == tailino
1081 || (!INUM_IS_BAD(fs, lfs_if_getnextfree(fs, ifp)))) {
1082 if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR) {
1083 brelse(bp, 0);
1084 dump_freelist(fs);
1085 printf("At %s:%d:\n", func, line);
1086 printf("with tailino=%jd, ino=%jd"
1087 " daddr=0x%jx, nextfree=0x%jx\n",
1088 (intmax_t)tailino,
1089 (intmax_t)i,
1090 (intmax_t)lfs_if_getdaddr(fs, ifp),
1091 (intmax_t)lfs_if_getnextfree(fs, ifp));
1092 panic("In use inode on free list\n");
1093 }
1094 }
1095 brelse(bp, 0);
1096 }
1097
1098 /*
1099 * Walk the free list from head to tail. We should end up with
1100 * the same number of free inodes as we counted above.
1101 */
1102
1103 /* Get head of inode freelist */
1104 LFS_GET_HEADFREE(fs, cip, bp, &headino);
1105 count = 0;
1106 thisino = headino;
1107 while (thisino != LFS_UNUSED_INUM) {
1108 if (++count > maxino)
1109 break;
1110 /* read this ifile entry */
1111 LFS_IENTRY(ifp, fs, thisino, bp);
1112 nextfree = lfs_if_getnextfree(fs, ifp);
1113 brelse(bp, 0);
1114 if (nextfree == LFS_UNUSED_INUM)
1115 break;
1116 thisino = nextfree;
1117 }
1118 if (count > maxino) {
1119 dump_freelist(fs);
1120 printf("At %s:%d:\n", func, line);
1121 printf("count=%jd, maxino=%jd:\n",
1122 (intmax_t)count, (intmax_t)maxino);
1123 panic("loop in free list");
1124 }
1125 if (count != nfree) {
1126 dump_freelist(fs);
1127 printf("At %s:%d:\n", func, line);
1128 printf("%d inodes without addresses, %d on free list\n",
1129 nfree, count);
1130 panic("Bad free list count");
1131 }
1132 if (thisino != tailino) {
1133 dump_freelist(fs);
1134 printf("At %s:%d:\n", func, line);
1135 printf("Last ino %jd but tail %jd\n",
1136 (intmax_t)thisino, (intmax_t)tailino);
1137 panic("Bad tail");
1138 }
1139 lfs_segunlock(fs);
1140 }
1141
1142 static void
1143 dump_freelist(struct lfs *fs)
1144 {
1145 ino_t i, ni, maxino, headino, tailino;
1146 struct inode *ip;
1147 IFILE *ifp;
1148 CLEANERINFO *cip;
1149 struct buf *bp;
1150 int count;
1151
1152 ip = VTOI(fs->lfs_ivnode);
1153 maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
1154 lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
1155
1156 LFS_GET_HEADFREE(fs, cip, bp, &headino);
1157 printf(" head: %jd\n", (intmax_t)headino);
1158 LFS_GET_TAILFREE(fs, cip, bp, &tailino);
1159 printf(" tail: %jd\n", (intmax_t)tailino);
1160 count = 0;
1161 for (i = LFS_IFILE_INUM + 1; i < maxino; ++i) {
1162 LFS_IENTRY(ifp, fs, i, bp);
1163 ni = lfs_if_getnextfree(fs, ifp);
1164 if (ni != LFS_UNUSED_DADDR) {
1165 printf("%jd -> %jd\n",
1166 (intmax_t)i, (intmax_t)ni);
1167 if (++count > 30) {
1168 printf("...\n");
1169 i = maxino; /* terminate loop */
1170 }
1171 }
1172 brelse(bp, 0);
1173 }
1174 }
1175 #endif /* DEBUG */
1176