lfs_alloc.c revision 1.143 1 /* $NetBSD: lfs_alloc.c,v 1.143 2025/09/15 18:49:16 perseant Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31 /*
32 * Copyright (c) 1991, 1993
33 * The Regents of the University of California. All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94
60 */
61
62 #include <sys/cdefs.h>
63 __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.143 2025/09/15 18:49:16 perseant Exp $");
64
65 #if defined(_KERNEL_OPT)
66 #include "opt_quota.h"
67 #endif
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/kernel.h>
72 #include <sys/buf.h>
73 #include <sys/lock.h>
74 #include <sys/vnode.h>
75 #include <sys/syslog.h>
76 #include <sys/mount.h>
77 #include <sys/malloc.h>
78 #include <sys/pool.h>
79 #include <sys/proc.h>
80 #include <sys/kauth.h>
81
82 #include <ufs/lfs/ulfs_quotacommon.h>
83 #include <ufs/lfs/ulfs_inode.h>
84 #include <ufs/lfs/ulfsmount.h>
85 #include <ufs/lfs/ulfs_extern.h>
86
87 #include <ufs/lfs/lfs.h>
88 #include <ufs/lfs/lfs_accessors.h>
89 #include <ufs/lfs/lfs_extern.h>
90 #include <ufs/lfs/lfs_kernel.h>
91
92 /* Constants for inode free bitmap */
93 #define BMSHIFT 5 /* 2 ** 5 = 32 */
94 #define BMMASK ((1 << BMSHIFT) - 1)
95 #define SET_BITMAP_FREE(F, I) do { \
96 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), \
97 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
98 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1U << ((I) & BMMASK)); \
99 } while (0)
100 #define CLR_BITMAP_FREE(F, I) do { \
101 DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), \
102 (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
103 (F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1U << ((I) & BMMASK)); \
104 } while(0)
105
106 #define ISSET_BITMAP_FREE(F, I) \
107 ((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1U << ((I) & BMMASK)))
108
109 /*
110 * Add a new block to the Ifile, to accommodate future file creations.
111 * Called with the segment lock held.
112 */
113 int
114 lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
115 {
116 struct vnode *vp;
117 struct inode *ip;
118 IFILE64 *ifp64;
119 IFILE32 *ifp32;
120 IFILE_V1 *ifp_v1;
121 struct buf *bp, *cbp;
122 int error;
123 daddr_t i, blkno, xmax;
124 ino_t oldhead, maxino, tail;
125 CLEANERINFO *cip;
126
127 ASSERT_SEGLOCK(fs);
128
129 /* XXX should check or assert that we aren't readonly. */
130
131 /*
132 * Get a block and extend the ifile inode. Leave the buffer for
133 * the block in bp.
134 */
135
136 vp = fs->lfs_ivnode;
137 ip = VTOI(vp);
138 blkno = lfs_lblkno(fs, ip->i_size);
139 if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
140 &bp)) != 0) {
141 return (error);
142 }
143 ip->i_size += lfs_sb_getbsize(fs);
144 lfs_dino_setsize(fs, ip->i_din, ip->i_size);
145 uvm_vnp_setsize(vp, ip->i_size);
146
147 /*
148 * Compute the new number of inodes, and reallocate the in-memory
149 * inode freemap.
150 */
151
152 maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
153 lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
154 fs->lfs_ino_bitmap = (lfs_bm_t *)
155 realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
156 sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
157 KASSERT(fs->lfs_ino_bitmap != NULL);
158
159 /* first new inode number */
160 i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
161 lfs_sb_getifpb(fs);
162
163 /* inode number to stop at (XXX: why *x*max?) */
164 xmax = i + lfs_sb_getifpb(fs);
165
166 /*
167 * We insert the new inodes at the head of the free list.
168 * Under normal circumstances, the free list is empty here,
169 * so we are also incidentally placing them at the end (which
170 * we must do if we are to keep them in order).
171 */
172 LFS_GET_HEADFREE(fs, cip, cbp, &oldhead);
173 LFS_PUT_HEADFREE(fs, cip, cbp, i);
174 LFS_GET_TAILFREE(fs, cip, cbp, &tail);
175 if (tail == LFS_UNUSED_INUM) {
176 tail = xmax - 1;
177 LFS_PUT_TAILFREE(fs, cip, cbp, tail);
178 }
179 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
180 "inode 0 allocated [2]");
181
182
183 /*
184 * Initialize the ifile block.
185 *
186 * XXX: these loops should be restructured to use the accessor
187 * functions instead of using cutpaste polymorphism.
188 */
189
190 if (fs->lfs_is64) {
191 for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
192 SET_BITMAP_FREE(fs, i);
193 ifp64->if_version = 1;
194 ifp64->if_daddr = LFS_UNUSED_DADDR;
195 ifp64->if_nextfree = ++i;
196 }
197 ifp64--;
198 ifp64->if_nextfree = oldhead;
199 } else if (lfs_sb_getversion(fs) > 1) {
200 for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
201 SET_BITMAP_FREE(fs, i);
202 ifp32->if_version = 1;
203 ifp32->if_daddr = LFS_UNUSED_DADDR;
204 ifp32->if_nextfree = ++i;
205 }
206 ifp32--;
207 ifp32->if_nextfree = oldhead;
208 } else {
209 for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
210 SET_BITMAP_FREE(fs, i);
211 ifp_v1->if_version = 1;
212 ifp_v1->if_daddr = LFS_UNUSED_DADDR;
213 ifp_v1->if_nextfree = ++i;
214 }
215 ifp_v1--;
216 ifp_v1->if_nextfree = oldhead;
217 }
218
219 /*
220 * Write out the new block.
221 */
222
223 (void) LFS_BWRITE_LOG(bp); /* Ifile */
224
225 return 0;
226 }
227
228 /*
229 * Allocate an inode for a new file.
230 *
231 * Takes the segment lock. Also (while holding it) takes lfs_lock
232 * to frob fs->lfs_fmod.
233 *
234 * XXX: the mode argument is unused; should just get rid of it.
235 */
236 /* ARGSUSED */
237 /* VOP_BWRITE 2i times */
238 int
239 lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
240 ino_t *ino, int *gen)
241 {
242 struct lfs *fs;
243 struct buf *bp, *cbp;
244 IFILE *ifp;
245 int error;
246 CLEANERINFO *cip;
247
248 fs = VTOI(pvp)->i_lfs;
249 if (fs->lfs_ronly)
250 return EROFS;
251
252 if (!(fs->lfs_flags & LFS_NOTYET))
253 ASSERT_NO_SEGLOCK(fs);
254
255 lfs_seglock(fs, SEGM_PROT);
256
257 /* Get the head of the freelist. */
258 LFS_GET_HEADFREE(fs, cip, cbp, ino);
259
260 /* paranoia */
261 KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
262 DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
263 *ino));
264
265 /* Update the in-memory inode freemap */
266 CLR_BITMAP_FREE(fs, *ino);
267
268 /*
269 * Fetch the ifile entry and make sure the inode is really
270 * free.
271 */
272 LFS_IENTRY(ifp, fs, *ino, bp);
273 if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
274 panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
275 *ino);
276
277 /* Update the inode freelist head in the superblock. */
278 LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
279 DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
280 *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
281
282 /*
283 * Retrieve the version number from the ifile entry. It was
284 * bumped by vfree, so don't bump it again.
285 */
286 *gen = lfs_if_getversion(fs, ifp);
287
288 /* Done with ifile entry */
289 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
290 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
291 LFS_BWRITE_LOG(bp);
292
293 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
294 /*
295 * No more inodes; extend the ifile so that the next
296 * lfs_valloc will succeed.
297 */
298 if ((error = lfs_extend_ifile(fs, cred)) != 0) {
299 /* restore the freelist */
300 LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
301
302 /* unlock and return */
303 lfs_segunlock(fs);
304 return error;
305 }
306 }
307 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
308 "inode 0 allocated [3]");
309
310 /* Set superblock modified bit */
311 mutex_enter(&lfs_lock);
312 fs->lfs_fmod = 1;
313 mutex_exit(&lfs_lock);
314
315 /* increment file count */
316 lfs_sb_addnfiles(fs, 1);
317
318 /* done */
319 lfs_segunlock(fs);
320 return 0;
321 }
322
323 /*
324 * Allocate an inode for a new file, with given inode number and
325 * version.
326 *
327 * Called in the same context as lfs_valloc and therefore shares the
328 * same locking assumptions.
329 */
330 int
331 lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
332 {
333 IFILE *ifp;
334 struct buf *bp, *cbp;
335 ino_t headino, thisino, oldnext, tailino;
336 CLEANERINFO *cip;
337 int extended = 0;
338
339 if (fs->lfs_ronly)
340 return EROFS;
341
342 if (!(fs->lfs_flags & LFS_NOTYET))
343 ASSERT_NO_SEGLOCK(fs);
344
345
346 lfs_seglock(fs, SEGM_PROT);
347
348 /*
349 * If the ifile is too short to contain this inum, extend it.
350 *
351 * XXX: lfs_extend_ifile should take a size instead of always
352 * doing just one block at time.
353 */
354 while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
355 lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
356 << lfs_sb_getbshift(fs)) {
357 DLOG((DLOG_ALLOC, "extend ifile to accommodate ino %jd\n",
358 (intmax_t)ino));
359 lfs_extend_ifile(fs, NOCRED);
360 extended = 1;
361 }
362
363 /*
364 * Get the inode freelist next pointer.
365 */
366 LFS_IENTRY(ifp, fs, ino, bp);
367 oldnext = lfs_if_getnextfree(fs, ifp);
368 brelse(bp, 0);
369
370 /* Get fail of inode freelist */
371 LFS_GET_TAILFREE(fs, cip, cbp, &tailino);
372
373 /* Get head of inode freelist */
374 LFS_GET_HEADFREE(fs, cip, cbp, &headino);
375 if (headino == ino) {
376 /* Easy case: the inode we wanted was at the head */
377 LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
378 } else {
379 ino_t nextfree = 0, maxino, count; /* XXX: gcc */
380
381 /* Have to find the desired inode in the freelist... */
382 maxino = ((VTOI(fs->lfs_ivnode)->i_size >> lfs_sb_getbshift(fs))
383 - lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs))
384 * lfs_sb_getifpb(fs);
385 count = 0;
386
387 thisino = headino;
388 while (thisino != LFS_UNUSED_INUM) {
389 /* read this ifile entry */
390 LFS_IENTRY(ifp, fs, thisino, bp);
391 nextfree = lfs_if_getnextfree(fs, ifp);
392 /* stop if we find it or we hit the end */
393 if (nextfree == ino ||
394 nextfree == LFS_UNUSED_INUM)
395 break;
396 /* nope, keep going... */
397 thisino = nextfree;
398 brelse(bp, 0);
399 if (++count > maxino)
400 break;
401 }
402 if (count > maxino) {
403 panic("loop in free list");
404 lfs_segunlock(fs);
405 return ENOENT;
406 }
407 if (nextfree == LFS_UNUSED_INUM) {
408 /* hit the end -- this inode is not available */
409 brelse(bp, 0);
410 lfs_segunlock(fs);
411 if (extended)
412 panic("extended ifile to accommodate but inode not found");
413 return ENOENT;
414 }
415 /* found it; update the next pointer */
416 lfs_if_setnextfree(fs, ifp, oldnext);
417 /* write the ifile block */
418 LFS_BWRITE_LOG(bp);
419
420 /* If our inode was the tail, thisino is now the tail */
421 if (tailino == ino)
422 LFS_PUT_TAILFREE(fs, cip, cbp, thisino);
423 }
424
425 /* Clear nextfree, note daddr, and set generation number */
426 LFS_IENTRY(ifp, fs, ino, bp);
427 lfs_if_setversion(fs, ifp, vers);
428 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
429 LFS_BWRITE_LOG(bp);
430
431 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
432 int error;
433 /*
434 * No more inodes; extend the ifile so that the next
435 * lfs_valloc will succeed.
436 */
437 if ((error = lfs_extend_ifile(fs, NOCRED)) != 0) {
438 /* restore the freelist */
439 LFS_PUT_HEADFREE(fs, cip, cbp, ino);
440
441 /* unlock and return */
442 lfs_segunlock(fs);
443 return error;
444 }
445 }
446 KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
447 "inode 0 allocated [4]");
448
449 /* done */
450 lfs_segunlock(fs);
451 return 0;
452 }
453
454 #if 0
455 /*
456 * Find the highest-numbered allocated inode.
457 * This will be used to shrink the Ifile.
458 */
459 static inline ino_t
460 lfs_last_alloc_ino(struct lfs *fs)
461 {
462 ino_t ino, maxino;
463
464 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
465 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
466 lfs_sb_getifpb(fs);
467 for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
468 if (ISSET_BITMAP_FREE(fs, ino) == 0)
469 break;
470 }
471 return ino;
472 }
473 #endif
474
475 /*
476 * Find the previous (next lowest numbered) free inode, if any.
477 * If there is none, return LFS_UNUSED_INUM.
478 *
479 * XXX: locking?
480 */
481 static inline ino_t
482 lfs_freelist_prev(struct lfs *fs, ino_t ino)
483 {
484 ino_t tino, bound, bb, freehdbb;
485
486 if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
487 /* No free inodes at all */
488 return LFS_UNUSED_INUM;
489 }
490
491 /* Search our own word first */
492 bound = ino & ~BMMASK;
493 for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
494 if (ISSET_BITMAP_FREE(fs, tino))
495 return tino;
496 /* If there are no lower words to search, just return */
497 if (ino >> BMSHIFT == 0)
498 return LFS_UNUSED_INUM;
499
500 /*
501 * Find a word with a free inode in it. We have to be a bit
502 * careful here since ino_t is unsigned.
503 */
504 freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
505 for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
506 if (fs->lfs_ino_bitmap[bb])
507 break;
508 if (fs->lfs_ino_bitmap[bb] == 0)
509 return LFS_UNUSED_INUM;
510
511 /* Search the word we found */
512 for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
513 tino > LFS_UNUSED_INUM; tino--)
514 if (ISSET_BITMAP_FREE(fs, tino))
515 break;
516
517 /* Avoid returning reserved inode numbers */
518 if (tino <= LFS_IFILE_INUM)
519 tino = LFS_UNUSED_INUM;
520
521 return tino;
522 }
523
524 /*
525 * Free an inode.
526 *
527 * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
528 */
529 /* ARGUSED */
530 /* VOP_BWRITE 2i times */
531 int
532 lfs_vfree(struct vnode *vp, ino_t ino, int mode)
533 {
534 SEGUSE *sup;
535 CLEANERINFO *cip;
536 struct buf *cbp, *bp;
537 IFILE *ifp;
538 struct inode *ip;
539 struct lfs *fs;
540 daddr_t old_iaddr;
541 ino_t otail;
542
543 /* Get the inode number and file system. */
544 ip = VTOI(vp);
545 fs = ip->i_lfs;
546 ino = ip->i_number;
547
548 /* XXX: assert not readonly */
549
550 ASSERT_NO_SEGLOCK(fs);
551 DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
552
553 /* Drain of pending writes */
554 mutex_enter(vp->v_interlock);
555 while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
556 cv_wait(&vp->v_cv, vp->v_interlock);
557 }
558 mutex_exit(vp->v_interlock);
559
560 lfs_seglock(fs, SEGM_PROT);
561
562 /*
563 * If the inode was in a dirop, it isn't now.
564 *
565 * XXX: why are (v_uflag & VU_DIROP) and (ip->i_state & IN_ADIROP)
566 * not updated together in one function? (and why do both exist,
567 * anyway?)
568 */
569 UNMARK_VNODE(vp);
570
571 mutex_enter(&lfs_lock);
572 if (vp->v_uflag & VU_DIROP) {
573 vp->v_uflag &= ~VU_DIROP;
574 --lfs_dirvcount;
575 --fs->lfs_dirvcount;
576 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
577 wakeup(&fs->lfs_dirvcount);
578 wakeup(&lfs_dirvcount);
579 mutex_exit(&lfs_lock);
580 vrele(vp);
581
582 /*
583 * If this inode is not going to be written any more, any
584 * segment accounting left over from its truncation needs
585 * to occur at the end of the next dirops flush. Attach
586 * them to the fs-wide list for that purpose.
587 */
588 if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
589 struct segdelta *sd;
590
591 while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
592 LIST_REMOVE(sd, list);
593 LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
594 }
595 }
596 } else {
597 /*
598 * If it's not a dirop, we can finalize right away.
599 */
600 mutex_exit(&lfs_lock);
601 lfs_finalize_ino_seguse(fs, ip);
602 }
603
604 /* it is no longer an unwritten inode, so update the counts */
605 mutex_enter(&lfs_lock);
606 LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
607 mutex_exit(&lfs_lock);
608
609 /* Turn off all inode modification flags */
610 ip->i_state &= ~IN_ALLMOD;
611
612 /* Mark it deleted */
613 ip->i_lfs_iflags |= LFSI_DELETED;
614
615 /* Mark it free in the in-memory inode freemap */
616 SET_BITMAP_FREE(fs, ino);
617
618 /*
619 * Set the ifile's inode entry to unused, increment its version number
620 * and link it onto the free chain.
621 */
622
623 /* fetch the ifile entry */
624 LFS_IENTRY(ifp, fs, ino, bp);
625
626 /* update the on-disk address (to "nowhere") */
627 old_iaddr = lfs_if_getdaddr(fs, ifp);
628 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
629
630 /* bump the version */
631 lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
632
633 if (lfs_sb_getversion(fs) == 1) {
634 ino_t nextfree;
635
636 /* insert on freelist */
637 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
638 lfs_if_setnextfree(fs, ifp, nextfree);
639 LFS_PUT_HEADFREE(fs, cip, cbp, ino);
640
641 /* write the ifile block */
642 (void) LFS_BWRITE_LOG(bp); /* Ifile */
643 } else {
644 ino_t tino, onf;
645
646 /*
647 * Clear the freelist next pointer and write the ifile
648 * block. XXX: why? I'm sure there must be a reason but
649 * it seems both silly and dangerous.
650 */
651 lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
652 (void) LFS_BWRITE_LOG(bp); /* Ifile */
653
654 /*
655 * Insert on freelist in order.
656 */
657
658 /* Find the next lower (by number) free inode */
659 tino = lfs_freelist_prev(fs, ino);
660
661 if (tino == LFS_UNUSED_INUM) {
662 ino_t nextfree;
663
664 /*
665 * There isn't one; put us on the freelist head.
666 */
667
668 /* reload the ifile block */
669 LFS_IENTRY(ifp, fs, ino, bp);
670 /* update the list */
671 LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
672 lfs_if_setnextfree(fs, ifp, nextfree);
673 LFS_PUT_HEADFREE(fs, cip, cbp, ino);
674 DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
675 (long long)nextfree, (long long)ino));
676 /* write the ifile block */
677 LFS_BWRITE_LOG(bp); /* Ifile */
678
679 /* If the list was empty, set tail too */
680 LFS_GET_TAILFREE(fs, cip, cbp, &otail);
681 if (otail == LFS_UNUSED_INUM) {
682 LFS_PUT_TAILFREE(fs, cip, cbp, ino);
683 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
684 "-> %lld\n", (long long)otail,
685 (long long)ino));
686 }
687 } else {
688 /*
689 * Insert this inode into the list after tino.
690 * We hold the segment lock so we don't have to
691 * worry about blocks being written out of order.
692 */
693
694 DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
695 " after %lld\n", ino, tino));
696
697 /* load the previous inode's ifile block */
698 LFS_IENTRY(ifp, fs, tino, bp);
699 /* update the list pointer */
700 onf = lfs_if_getnextfree(fs, ifp);
701 lfs_if_setnextfree(fs, ifp, ino);
702 /* write the block */
703 LFS_BWRITE_LOG(bp); /* Ifile */
704
705 /* load this inode's ifile block */
706 LFS_IENTRY(ifp, fs, ino, bp);
707 /* update the list pointer */
708 lfs_if_setnextfree(fs, ifp, onf);
709 /* write the block */
710 LFS_BWRITE_LOG(bp); /* Ifile */
711
712 /* If we're last, put us on the tail */
713 if (onf == LFS_UNUSED_INUM) {
714 LFS_GET_TAILFREE(fs, cip, cbp, &otail);
715 LFS_PUT_TAILFREE(fs, cip, cbp, ino);
716 DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
717 "-> %lld\n", (long long)otail,
718 (long long)ino));
719 }
720 }
721 }
722 /* XXX: shouldn't this check be further up *before* we trash the fs? */
723 KASSERTMSG((ino != LFS_UNUSED_INUM), "inode 0 freed");
724
725 /*
726 * Update the segment summary for the segment where the on-disk
727 * copy used to be.
728 */
729 if (!DADDR_IS_BAD(old_iaddr)) {
730 /* load it */
731 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
732 /* the number of bytes in the segment should not become < 0 */
733 KASSERTMSG((sup->su_nbytes >= DINOSIZE(fs)),
734 "lfs_vfree: negative byte count"
735 " (segment %" PRIu32 " short by %d)\n",
736 lfs_dtosn(fs, old_iaddr),
737 (int)DINOSIZE(fs) - sup->su_nbytes);
738 /* update the number of bytes in the segment */
739 sup->su_nbytes -= DINOSIZE(fs);
740 /* write the segment entry */
741 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
742 }
743
744 /* Set superblock modified bit. */
745 mutex_enter(&lfs_lock);
746 fs->lfs_fmod = 1;
747 mutex_exit(&lfs_lock);
748
749 /* Decrement file count. */
750 lfs_sb_subnfiles(fs, 1);
751
752 lfs_segunlock(fs);
753
754 return (0);
755 }
756
757 /*
758 * Sort the freelist and set up the free-inode bitmap.
759 * To be called by lfs_mountfs().
760 *
761 * Takes the segmenet lock.
762 */
763 void
764 lfs_order_freelist(struct lfs *fs, ino_t **orphanp, size_t *norphanp)
765 {
766 CLEANERINFO *cip;
767 IFILE *ifp = NULL;
768 struct buf *bp;
769 ino_t ino, firstino, lastino, maxino;
770 ino_t *orphan = NULL;
771 size_t norphan = 0;
772 size_t norphan_alloc = 0;
773
774 ASSERT_NO_SEGLOCK(fs);
775 lfs_seglock(fs, SEGM_PROT);
776
777 /* largest inode on fs */
778 maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
779 lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
780
781 /* allocate the in-memory inode freemap */
782 /* XXX: assert that fs->lfs_ino_bitmap is null here */
783 fs->lfs_ino_bitmap =
784 malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
785 M_SEGMENT, M_WAITOK | M_ZERO);
786 KASSERT(fs->lfs_ino_bitmap != NULL);
787
788 /*
789 * Scan the ifile.
790 */
791
792 firstino = lastino = LFS_UNUSED_INUM;
793 for (ino = 0; ino < maxino; ino++) {
794 /* Load this inode's ifile entry. */
795 if (ino % lfs_sb_getifpb(fs) == 0)
796 LFS_IENTRY(ifp, fs, ino, bp);
797 else
798 LFS_IENTRY_NEXT(ifp, fs);
799
800 /* Don't put zero or ifile on the free list */
801 if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
802 continue;
803
804 /*
805 * Address orphaned files.
806 *
807 * The idea of this is to free inodes belonging to
808 * files that were unlinked but not reclaimed, I guess
809 * because if we're going to scan the whole ifile
810 * anyway it costs very little to do this. I don't
811 * immediately see any reason this should be disabled,
812 * but presumably it doesn't work... not sure what
813 * happens to such files currently. -- dholland 20160806
814 */
815 if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE(fs)) {
816 if (orphan == NULL) {
817 norphan_alloc = 32; /* XXX pulled from arse */
818 orphan = kmem_zalloc(sizeof(orphan[0]) *
819 norphan_alloc, KM_SLEEP);
820 } else if (norphan == norphan_alloc) {
821 ino_t *orphan_new;
822 if (norphan_alloc >= 4096)
823 norphan_alloc += 4096;
824 else
825 norphan_alloc *= 2;
826 orphan_new = kmem_zalloc(sizeof(orphan[0]) *
827 norphan_alloc, KM_SLEEP);
828 memcpy(orphan_new, orphan, sizeof(orphan[0]) *
829 norphan);
830 kmem_free(orphan, sizeof(orphan[0]) * norphan);
831 orphan = orphan_new;
832 }
833 orphan[norphan++] = ino;
834 }
835
836 if (DADDR_IS_BAD(lfs_if_getdaddr(fs, ifp))) {
837
838 /*
839 * This inode is free. Put it on the free list.
840 */
841
842 if (firstino == LFS_UNUSED_INUM) {
843 /* XXX: assert lastino == LFS_UNUSED_INUM? */
844 /* remember the first free inode */
845 firstino = ino;
846 } else {
847 /* release this inode's ifile entry */
848 brelse(bp, 0);
849
850 /* XXX: assert lastino != LFS_UNUSED_INUM? */
851
852 /* load lastino's ifile entry */
853 LFS_IENTRY(ifp, fs, lastino, bp);
854 /* set the list pointer */
855 lfs_if_setnextfree(fs, ifp, ino);
856 /* write the block */
857 LFS_BWRITE_LOG(bp);
858
859 /* reload this inode's ifile entry */
860 LFS_IENTRY(ifp, fs, ino, bp);
861 }
862 /* remember the last free inode seen so far */
863 lastino = ino;
864
865 /* Mark this inode free in the in-memory freemap */
866 SET_BITMAP_FREE(fs, ino);
867 }
868
869 /* If moving to the next ifile block, release the buffer. */
870 if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
871 brelse(bp, 0);
872 }
873
874 /* Write the freelist head and tail pointers */
875 /* XXX: do we need to mark the superblock dirty? */
876 LFS_PUT_HEADFREE(fs, cip, bp, firstino);
877 LFS_PUT_TAILFREE(fs, cip, bp, lastino);
878
879 /* done */
880 lfs_segunlock(fs);
881
882 /*
883 * Shrink the array of orphans so we don't have to carry around
884 * the allocation size.
885 */
886 if (norphan < norphan_alloc) {
887 ino_t *orphan_new = kmem_alloc(sizeof(orphan[0]) * norphan,
888 KM_SLEEP);
889 memcpy(orphan_new, orphan, sizeof(orphan[0]) * norphan);
890 kmem_free(orphan, sizeof(orphan[0]) * norphan_alloc);
891 orphan = orphan_new;
892 norphan_alloc = norphan;
893 }
894
895 *orphanp = orphan;
896 *norphanp = norphan;
897 }
898
899 /*
900 * Mark a file orphaned (unlinked but not yet reclaimed) by inode
901 * number. Do this with a magic freelist next pointer.
902 *
903 * XXX: howzabout some locking?
904 */
905 void
906 lfs_orphan(struct lfs *fs, ino_t ino)
907 {
908 IFILE *ifp;
909 struct buf *bp;
910
911 LFS_IENTRY(ifp, fs, ino, bp);
912 lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE(fs));
913 LFS_BWRITE_LOG(bp);
914 }
915
916 /*
917 * Free orphans discovered during mount. This is a separate stage
918 * because it requires fs->lfs_suflags to be set up, which is not done
919 * by the time we run lfs_order_freelist. It's possible that we could
920 * run lfs_order_freelist later (i.e., set up fs->lfs_suflags sooner)
921 * but that requires more thought than I can put into this at the
922 * moment.
923 */
924 void
925 lfs_free_orphans(struct lfs *fs, ino_t *orphan, size_t norphan)
926 {
927 size_t i;
928
929 for (i = 0; i < norphan; i++) {
930 ino_t ino = orphan[i];
931 unsigned segno;
932 struct vnode *vp;
933 struct inode *ip;
934 struct buf *bp;
935 IFILE *ifp;
936 SEGUSE *sup;
937 int error;
938
939 /* Get the segment the inode is in on disk. */
940 LFS_IENTRY(ifp, fs, ino, bp);
941 KASSERT(!DADDR_IS_BAD(lfs_if_getdaddr(fs, ifp)));
942 segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
943 brelse(bp, 0);
944
945 /*
946 * Try to get the vnode. If we can't, tough -- hope
947 * you have backups!
948 */
949 error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE,
950 &vp);
951 if (error) {
952 printf("orphan %jd vget error %d\n", (intmax_t)ino,
953 error);
954 continue;
955 }
956
957 /*
958 * Sanity-check the inode.
959 *
960 * XXX What to do if it is still referenced?
961 */
962 ip = VTOI(vp);
963 if (ip->i_nlink != 0)
964 printf("orphan %jd nlink %d\n", (intmax_t)ino,
965 ip->i_nlink);
966
967 /*
968 * Truncate the inode, to free any blocks allocated for
969 * it, and release it, to free the inode number.
970 *
971 * XXX Isn't it redundant to truncate? Won't vput do
972 * that for us?
973 */
974 error = lfs_truncate(vp, 0, 0, NOCRED);
975 if (error)
976 printf("orphan %jd truncate error %d", (intmax_t)ino,
977 error);
978 vput(vp);
979
980 /* Update the number of bytes in the segment summary. */
981 LFS_SEGENTRY(sup, fs, segno, bp);
982 KASSERT(sup->su_nbytes >= DINOSIZE(fs));
983 sup->su_nbytes -= DINOSIZE(fs);
984 LFS_WRITESEGENTRY(sup, fs, segno, bp);
985
986 /* Drop the on-disk address. */
987 LFS_IENTRY(ifp, fs, ino, bp);
988 lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
989 LFS_BWRITE_LOG(bp);
990 }
991
992 if (orphan)
993 kmem_free(orphan, sizeof(orphan[0]) * norphan);
994 }
995