lfs_alloc.c revision 1.144 1 1.144 perseant /* $NetBSD: lfs_alloc.c,v 1.144 2025/09/17 03:50:38 perseant Exp $ */
2 1.2 cgd
3 1.17 perseant /*-
4 1.100 ad * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
5 1.17 perseant * All rights reserved.
6 1.17 perseant *
7 1.17 perseant * This code is derived from software contributed to The NetBSD Foundation
8 1.17 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 1.17 perseant *
10 1.17 perseant * Redistribution and use in source and binary forms, with or without
11 1.17 perseant * modification, are permitted provided that the following conditions
12 1.17 perseant * are met:
13 1.17 perseant * 1. Redistributions of source code must retain the above copyright
14 1.17 perseant * notice, this list of conditions and the following disclaimer.
15 1.17 perseant * 2. Redistributions in binary form must reproduce the above copyright
16 1.17 perseant * notice, this list of conditions and the following disclaimer in the
17 1.17 perseant * documentation and/or other materials provided with the distribution.
18 1.17 perseant *
19 1.17 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.17 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.17 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.17 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.17 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.17 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.17 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.17 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.17 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.17 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.17 perseant * POSSIBILITY OF SUCH DAMAGE.
30 1.17 perseant */
31 1.1 mycroft /*
32 1.1 mycroft * Copyright (c) 1991, 1993
33 1.1 mycroft * The Regents of the University of California. All rights reserved.
34 1.1 mycroft *
35 1.1 mycroft * Redistribution and use in source and binary forms, with or without
36 1.1 mycroft * modification, are permitted provided that the following conditions
37 1.1 mycroft * are met:
38 1.1 mycroft * 1. Redistributions of source code must retain the above copyright
39 1.1 mycroft * notice, this list of conditions and the following disclaimer.
40 1.1 mycroft * 2. Redistributions in binary form must reproduce the above copyright
41 1.1 mycroft * notice, this list of conditions and the following disclaimer in the
42 1.1 mycroft * documentation and/or other materials provided with the distribution.
43 1.71 agc * 3. Neither the name of the University nor the names of its contributors
44 1.1 mycroft * may be used to endorse or promote products derived from this software
45 1.1 mycroft * without specific prior written permission.
46 1.1 mycroft *
47 1.1 mycroft * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 1.1 mycroft * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 1.1 mycroft * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 1.1 mycroft * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 1.1 mycroft * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 1.1 mycroft * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 1.1 mycroft * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 1.1 mycroft * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 1.1 mycroft * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 1.1 mycroft * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 1.1 mycroft * SUCH DAMAGE.
58 1.1 mycroft *
59 1.2 cgd * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94
60 1.1 mycroft */
61 1.52 lukem
62 1.52 lukem #include <sys/cdefs.h>
63 1.144 perseant __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.144 2025/09/17 03:50:38 perseant Exp $");
64 1.12 scottr
65 1.47 mrg #if defined(_KERNEL_OPT)
66 1.12 scottr #include "opt_quota.h"
67 1.13 scottr #endif
68 1.1 mycroft
69 1.1 mycroft #include <sys/param.h>
70 1.3 christos #include <sys/systm.h>
71 1.1 mycroft #include <sys/kernel.h>
72 1.1 mycroft #include <sys/buf.h>
73 1.56 perseant #include <sys/lock.h>
74 1.1 mycroft #include <sys/vnode.h>
75 1.1 mycroft #include <sys/syslog.h>
76 1.1 mycroft #include <sys/mount.h>
77 1.87 perseant #include <sys/malloc.h>
78 1.15 thorpej #include <sys/pool.h>
79 1.50 chs #include <sys/proc.h>
80 1.94 elad #include <sys/kauth.h>
81 1.1 mycroft
82 1.114 dholland #include <ufs/lfs/ulfs_quotacommon.h>
83 1.114 dholland #include <ufs/lfs/ulfs_inode.h>
84 1.114 dholland #include <ufs/lfs/ulfsmount.h>
85 1.114 dholland #include <ufs/lfs/ulfs_extern.h>
86 1.1 mycroft
87 1.1 mycroft #include <ufs/lfs/lfs.h>
88 1.124 dholland #include <ufs/lfs/lfs_accessors.h>
89 1.1 mycroft #include <ufs/lfs/lfs_extern.h>
90 1.118 dholland #include <ufs/lfs/lfs_kernel.h>
91 1.1 mycroft
92 1.144 perseant int lfs_do_check_freelist = 0;
93 1.144 perseant
94 1.87 perseant /* Constants for inode free bitmap */
95 1.88 perseant #define BMSHIFT 5 /* 2 ** 5 = 32 */
96 1.88 perseant #define BMMASK ((1 << BMSHIFT) - 1)
97 1.88 perseant #define SET_BITMAP_FREE(F, I) do { \
98 1.88 perseant DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), \
99 1.88 perseant (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
100 1.139 kamil (F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1U << ((I) & BMMASK)); \
101 1.88 perseant } while (0)
102 1.88 perseant #define CLR_BITMAP_FREE(F, I) do { \
103 1.88 perseant DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), \
104 1.88 perseant (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
105 1.139 kamil (F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1U << ((I) & BMMASK)); \
106 1.88 perseant } while(0)
107 1.88 perseant
108 1.87 perseant #define ISSET_BITMAP_FREE(F, I) \
109 1.139 kamil ((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1U << ((I) & BMMASK)))
110 1.87 perseant
111 1.44 perseant /*
112 1.65 perseant * Add a new block to the Ifile, to accommodate future file creations.
113 1.65 perseant * Called with the segment lock held.
114 1.56 perseant */
115 1.96 perseant int
116 1.96 perseant lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
117 1.44 perseant {
118 1.44 perseant struct vnode *vp;
119 1.44 perseant struct inode *ip;
120 1.126 dholland IFILE64 *ifp64;
121 1.126 dholland IFILE32 *ifp32;
122 1.48 perseant IFILE_V1 *ifp_v1;
123 1.48 perseant struct buf *bp, *cbp;
124 1.44 perseant int error;
125 1.83 christos daddr_t i, blkno, xmax;
126 1.142 perseant ino_t oldhead, maxino, tail;
127 1.48 perseant CLEANERINFO *cip;
128 1.44 perseant
129 1.78 perseant ASSERT_SEGLOCK(fs);
130 1.78 perseant
131 1.132 dholland /* XXX should check or assert that we aren't readonly. */
132 1.132 dholland
133 1.132 dholland /*
134 1.132 dholland * Get a block and extend the ifile inode. Leave the buffer for
135 1.132 dholland * the block in bp.
136 1.132 dholland */
137 1.132 dholland
138 1.44 perseant vp = fs->lfs_ivnode;
139 1.44 perseant ip = VTOI(vp);
140 1.117 christos blkno = lfs_lblkno(fs, ip->i_size);
141 1.122 dholland if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
142 1.44 perseant &bp)) != 0) {
143 1.44 perseant return (error);
144 1.44 perseant }
145 1.122 dholland ip->i_size += lfs_sb_getbsize(fs);
146 1.129 dholland lfs_dino_setsize(fs, ip->i_din, ip->i_size);
147 1.66 fvdl uvm_vnp_setsize(vp, ip->i_size);
148 1.75 perry
149 1.132 dholland /*
150 1.132 dholland * Compute the new number of inodes, and reallocate the in-memory
151 1.132 dholland * inode freemap.
152 1.132 dholland */
153 1.132 dholland
154 1.123 dholland maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
155 1.122 dholland lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
156 1.88 perseant fs->lfs_ino_bitmap = (lfs_bm_t *)
157 1.88 perseant realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
158 1.88 perseant sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
159 1.88 perseant KASSERT(fs->lfs_ino_bitmap != NULL);
160 1.87 perseant
161 1.132 dholland /* first new inode number */
162 1.122 dholland i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
163 1.122 dholland lfs_sb_getifpb(fs);
164 1.87 perseant
165 1.142 perseant /* inode number to stop at (XXX: why *x*max?) */
166 1.142 perseant xmax = i + lfs_sb_getifpb(fs);
167 1.142 perseant
168 1.87 perseant /*
169 1.87 perseant * We insert the new inodes at the head of the free list.
170 1.87 perseant * Under normal circumstances, the free list is empty here,
171 1.87 perseant * so we are also incidentally placing them at the end (which
172 1.87 perseant * we must do if we are to keep them in order).
173 1.87 perseant */
174 1.142 perseant LFS_GET_HEADFREE(fs, cip, cbp, &oldhead);
175 1.48 perseant LFS_PUT_HEADFREE(fs, cip, cbp, i);
176 1.142 perseant LFS_GET_TAILFREE(fs, cip, cbp, &tail);
177 1.144 perseant DLOG((DLOG_ALLOC, "oldhead=%jd, i=%jd, xmax=%jd, oldtail=%jd\n",
178 1.144 perseant (intmax_t)oldhead, (intmax_t)i, (intmax_t)xmax,
179 1.144 perseant (intmax_t)tail));
180 1.142 perseant if (tail == LFS_UNUSED_INUM) {
181 1.142 perseant tail = xmax - 1;
182 1.142 perseant LFS_PUT_TAILFREE(fs, cip, cbp, tail);
183 1.142 perseant }
184 1.134 riastrad KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
185 1.134 riastrad "inode 0 allocated [2]");
186 1.132 dholland
187 1.142 perseant
188 1.132 dholland /*
189 1.132 dholland * Initialize the ifile block.
190 1.132 dholland *
191 1.132 dholland * XXX: these loops should be restructured to use the accessor
192 1.132 dholland * functions instead of using cutpaste polymorphism.
193 1.132 dholland */
194 1.132 dholland
195 1.126 dholland if (fs->lfs_is64) {
196 1.126 dholland for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
197 1.126 dholland SET_BITMAP_FREE(fs, i);
198 1.126 dholland ifp64->if_version = 1;
199 1.126 dholland ifp64->if_daddr = LFS_UNUSED_DADDR;
200 1.126 dholland ifp64->if_nextfree = ++i;
201 1.126 dholland }
202 1.126 dholland ifp64--;
203 1.142 perseant ifp64->if_nextfree = oldhead;
204 1.126 dholland } else if (lfs_sb_getversion(fs) > 1) {
205 1.126 dholland for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
206 1.126 dholland SET_BITMAP_FREE(fs, i);
207 1.126 dholland ifp32->if_version = 1;
208 1.126 dholland ifp32->if_daddr = LFS_UNUSED_DADDR;
209 1.126 dholland ifp32->if_nextfree = ++i;
210 1.126 dholland }
211 1.126 dholland ifp32--;
212 1.142 perseant ifp32->if_nextfree = oldhead;
213 1.126 dholland } else {
214 1.83 christos for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
215 1.89 perseant SET_BITMAP_FREE(fs, i);
216 1.48 perseant ifp_v1->if_version = 1;
217 1.48 perseant ifp_v1->if_daddr = LFS_UNUSED_DADDR;
218 1.48 perseant ifp_v1->if_nextfree = ++i;
219 1.48 perseant }
220 1.48 perseant ifp_v1--;
221 1.142 perseant ifp_v1->if_nextfree = oldhead;
222 1.44 perseant }
223 1.144 perseant DLOG((DLOG_ALLOC, " now head=%jd tail=%jd\n",
224 1.144 perseant (intmax_t)xmax - lfs_sb_getifpb(fs), (intmax_t)tail));
225 1.48 perseant
226 1.132 dholland /*
227 1.132 dholland * Write out the new block.
228 1.132 dholland */
229 1.132 dholland
230 1.56 perseant (void) LFS_BWRITE_LOG(bp); /* Ifile */
231 1.44 perseant
232 1.44 perseant return 0;
233 1.44 perseant }
234 1.44 perseant
235 1.132 dholland /*
236 1.132 dholland * Allocate an inode for a new file.
237 1.132 dholland *
238 1.132 dholland * Takes the segment lock. Also (while holding it) takes lfs_lock
239 1.132 dholland * to frob fs->lfs_fmod.
240 1.132 dholland *
241 1.132 dholland * XXX: the mode argument is unused; should just get rid of it.
242 1.132 dholland */
243 1.1 mycroft /* ARGSUSED */
244 1.43 perseant /* VOP_BWRITE 2i times */
245 1.1 mycroft int
246 1.99 christos lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
247 1.120 hannken ino_t *ino, int *gen)
248 1.3 christos {
249 1.1 mycroft struct lfs *fs;
250 1.48 perseant struct buf *bp, *cbp;
251 1.126 dholland IFILE *ifp;
252 1.1 mycroft int error;
253 1.48 perseant CLEANERINFO *cip;
254 1.1 mycroft
255 1.85 yamt fs = VTOI(pvp)->i_lfs;
256 1.38 perseant if (fs->lfs_ronly)
257 1.38 perseant return EROFS;
258 1.75 perry
259 1.142 perseant if (!(fs->lfs_flags & LFS_NOTYET))
260 1.142 perseant ASSERT_NO_SEGLOCK(fs);
261 1.78 perseant
262 1.144 perseant DEBUG_CHECK_FREELIST(fs);
263 1.144 perseant
264 1.56 perseant lfs_seglock(fs, SEGM_PROT);
265 1.17 perseant
266 1.1 mycroft /* Get the head of the freelist. */
267 1.120 hannken LFS_GET_HEADFREE(fs, cip, cbp, ino);
268 1.132 dholland
269 1.132 dholland /* paranoia */
270 1.120 hannken KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
271 1.120 hannken DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
272 1.120 hannken *ino));
273 1.75 perry
274 1.132 dholland /* Update the in-memory inode freemap */
275 1.132 dholland CLR_BITMAP_FREE(fs, *ino);
276 1.132 dholland
277 1.1 mycroft /*
278 1.132 dholland * Fetch the ifile entry and make sure the inode is really
279 1.132 dholland * free.
280 1.1 mycroft */
281 1.120 hannken LFS_IENTRY(ifp, fs, *ino, bp);
282 1.126 dholland if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
283 1.120 hannken panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
284 1.120 hannken *ino);
285 1.132 dholland
286 1.132 dholland /* Update the inode freelist head in the superblock. */
287 1.126 dholland LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
288 1.126 dholland DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
289 1.126 dholland *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
290 1.48 perseant
291 1.132 dholland /*
292 1.132 dholland * Retrieve the version number from the ifile entry. It was
293 1.132 dholland * bumped by vfree, so don't bump it again.
294 1.132 dholland */
295 1.126 dholland *gen = lfs_if_getversion(fs, ifp);
296 1.132 dholland
297 1.132 dholland /* Done with ifile entry */
298 1.144 perseant
299 1.144 perseant /*
300 1.144 perseant * Note LFS_ILLEGAL_DADDR == LFS_UNUSED_DADDR
301 1.144 perseant * unless DEBUG is set. In that case we
302 1.144 perseant * use that address to track inodes that have been
303 1.144 perseant * allocated (and so not on the free list) but do not
304 1.144 perseant * have proper disk addresses yet.
305 1.144 perseant */
306 1.144 perseant lfs_if_setdaddr(fs, ifp, LFS_ILLEGAL_DADDR);
307 1.142 perseant lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
308 1.142 perseant LFS_BWRITE_LOG(bp);
309 1.30 perseant
310 1.122 dholland if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
311 1.132 dholland /*
312 1.132 dholland * No more inodes; extend the ifile so that the next
313 1.132 dholland * lfs_valloc will succeed.
314 1.132 dholland */
315 1.96 perseant if ((error = lfs_extend_ifile(fs, cred)) != 0) {
316 1.132 dholland /* restore the freelist */
317 1.120 hannken LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
318 1.132 dholland
319 1.132 dholland /* unlock and return */
320 1.56 perseant lfs_segunlock(fs);
321 1.44 perseant return error;
322 1.1 mycroft }
323 1.1 mycroft }
324 1.134 riastrad KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
325 1.134 riastrad "inode 0 allocated [3]");
326 1.48 perseant
327 1.132 dholland /* Set superblock modified bit */
328 1.105 ad mutex_enter(&lfs_lock);
329 1.78 perseant fs->lfs_fmod = 1;
330 1.105 ad mutex_exit(&lfs_lock);
331 1.132 dholland
332 1.132 dholland /* increment file count */
333 1.122 dholland lfs_sb_addnfiles(fs, 1);
334 1.78 perseant
335 1.132 dholland /* done */
336 1.56 perseant lfs_segunlock(fs);
337 1.144 perseant
338 1.144 perseant DEBUG_CHECK_FREELIST(fs);
339 1.120 hannken return 0;
340 1.44 perseant }
341 1.44 perseant
342 1.65 perseant /*
343 1.132 dholland * Allocate an inode for a new file, with given inode number and
344 1.132 dholland * version.
345 1.132 dholland *
346 1.132 dholland * Called in the same context as lfs_valloc and therefore shares the
347 1.132 dholland * same locking assumptions.
348 1.65 perseant */
349 1.96 perseant int
350 1.120 hannken lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
351 1.44 perseant {
352 1.120 hannken IFILE *ifp;
353 1.120 hannken struct buf *bp, *cbp;
354 1.142 perseant ino_t headino, thisino, oldnext, tailino;
355 1.120 hannken CLEANERINFO *cip;
356 1.142 perseant int extended = 0;
357 1.40 fvdl
358 1.135 maya if (fs->lfs_ronly)
359 1.135 maya return EROFS;
360 1.135 maya
361 1.142 perseant if (!(fs->lfs_flags & LFS_NOTYET))
362 1.142 perseant ASSERT_NO_SEGLOCK(fs);
363 1.142 perseant
364 1.144 perseant DEBUG_CHECK_FREELIST(fs);
365 1.135 maya
366 1.135 maya lfs_seglock(fs, SEGM_PROT);
367 1.132 dholland
368 1.132 dholland /*
369 1.132 dholland * If the ifile is too short to contain this inum, extend it.
370 1.132 dholland *
371 1.132 dholland * XXX: lfs_extend_ifile should take a size instead of always
372 1.132 dholland * doing just one block at time.
373 1.132 dholland */
374 1.120 hannken while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
375 1.122 dholland lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
376 1.123 dholland << lfs_sb_getbshift(fs)) {
377 1.142 perseant DLOG((DLOG_ALLOC, "extend ifile to accommodate ino %jd\n",
378 1.142 perseant (intmax_t)ino));
379 1.120 hannken lfs_extend_ifile(fs, NOCRED);
380 1.142 perseant extended = 1;
381 1.120 hannken }
382 1.78 perseant
383 1.132 dholland /*
384 1.142 perseant * Get the inode freelist next pointer.
385 1.132 dholland */
386 1.120 hannken LFS_IENTRY(ifp, fs, ino, bp);
387 1.126 dholland oldnext = lfs_if_getnextfree(fs, ifp);
388 1.120 hannken brelse(bp, 0);
389 1.51 chs
390 1.142 perseant /* Get fail of inode freelist */
391 1.142 perseant LFS_GET_TAILFREE(fs, cip, cbp, &tailino);
392 1.142 perseant
393 1.132 dholland /* Get head of inode freelist */
394 1.130 dholland LFS_GET_HEADFREE(fs, cip, cbp, &headino);
395 1.130 dholland if (headino == ino) {
396 1.132 dholland /* Easy case: the inode we wanted was at the head */
397 1.120 hannken LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
398 1.120 hannken } else {
399 1.143 perseant ino_t nextfree = 0, maxino, count; /* XXX: gcc */
400 1.126 dholland
401 1.132 dholland /* Have to find the desired inode in the freelist... */
402 1.142 perseant maxino = ((VTOI(fs->lfs_ivnode)->i_size >> lfs_sb_getbshift(fs))
403 1.142 perseant - lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs))
404 1.142 perseant * lfs_sb_getifpb(fs);
405 1.142 perseant count = 0;
406 1.132 dholland
407 1.130 dholland thisino = headino;
408 1.142 perseant while (thisino != LFS_UNUSED_INUM) {
409 1.132 dholland /* read this ifile entry */
410 1.130 dholland LFS_IENTRY(ifp, fs, thisino, bp);
411 1.126 dholland nextfree = lfs_if_getnextfree(fs, ifp);
412 1.132 dholland /* stop if we find it or we hit the end */
413 1.126 dholland if (nextfree == ino ||
414 1.126 dholland nextfree == LFS_UNUSED_INUM)
415 1.120 hannken break;
416 1.132 dholland /* nope, keep going... */
417 1.130 dholland thisino = nextfree;
418 1.120 hannken brelse(bp, 0);
419 1.142 perseant if (++count > maxino)
420 1.142 perseant break;
421 1.142 perseant }
422 1.142 perseant if (count > maxino) {
423 1.142 perseant panic("loop in free list");
424 1.142 perseant lfs_segunlock(fs);
425 1.142 perseant return ENOENT;
426 1.120 hannken }
427 1.126 dholland if (nextfree == LFS_UNUSED_INUM) {
428 1.132 dholland /* hit the end -- this inode is not available */
429 1.120 hannken brelse(bp, 0);
430 1.135 maya lfs_segunlock(fs);
431 1.142 perseant if (extended)
432 1.142 perseant panic("extended ifile to accommodate but inode not found");
433 1.120 hannken return ENOENT;
434 1.120 hannken }
435 1.132 dholland /* found it; update the next pointer */
436 1.126 dholland lfs_if_setnextfree(fs, ifp, oldnext);
437 1.132 dholland /* write the ifile block */
438 1.120 hannken LFS_BWRITE_LOG(bp);
439 1.142 perseant
440 1.142 perseant /* If our inode was the tail, thisino is now the tail */
441 1.142 perseant if (tailino == ino)
442 1.142 perseant LFS_PUT_TAILFREE(fs, cip, cbp, thisino);
443 1.142 perseant }
444 1.142 perseant
445 1.142 perseant /* Clear nextfree, note daddr, and set generation number */
446 1.142 perseant LFS_IENTRY(ifp, fs, ino, bp);
447 1.142 perseant lfs_if_setversion(fs, ifp, vers);
448 1.142 perseant lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
449 1.144 perseant /* See comment in lfs_valloc */
450 1.144 perseant lfs_if_setdaddr(fs, ifp, LFS_ILLEGAL_DADDR);
451 1.142 perseant LFS_BWRITE_LOG(bp);
452 1.142 perseant
453 1.142 perseant if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
454 1.142 perseant int error;
455 1.142 perseant /*
456 1.142 perseant * No more inodes; extend the ifile so that the next
457 1.142 perseant * lfs_valloc will succeed.
458 1.142 perseant */
459 1.142 perseant if ((error = lfs_extend_ifile(fs, NOCRED)) != 0) {
460 1.142 perseant /* restore the freelist */
461 1.142 perseant LFS_PUT_HEADFREE(fs, cip, cbp, ino);
462 1.142 perseant
463 1.142 perseant /* unlock and return */
464 1.142 perseant lfs_segunlock(fs);
465 1.142 perseant return error;
466 1.142 perseant }
467 1.66 fvdl }
468 1.142 perseant KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
469 1.142 perseant "inode 0 allocated [4]");
470 1.51 chs
471 1.132 dholland /* done */
472 1.135 maya lfs_segunlock(fs);
473 1.144 perseant
474 1.144 perseant DEBUG_CHECK_FREELIST(fs);
475 1.144 perseant
476 1.120 hannken return 0;
477 1.1 mycroft }
478 1.1 mycroft
479 1.87 perseant #if 0
480 1.87 perseant /*
481 1.87 perseant * Find the highest-numbered allocated inode.
482 1.87 perseant * This will be used to shrink the Ifile.
483 1.87 perseant */
484 1.87 perseant static inline ino_t
485 1.87 perseant lfs_last_alloc_ino(struct lfs *fs)
486 1.87 perseant {
487 1.87 perseant ino_t ino, maxino;
488 1.87 perseant
489 1.123 dholland maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
490 1.131 dholland lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
491 1.131 dholland lfs_sb_getifpb(fs);
492 1.87 perseant for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
493 1.87 perseant if (ISSET_BITMAP_FREE(fs, ino) == 0)
494 1.87 perseant break;
495 1.87 perseant }
496 1.87 perseant return ino;
497 1.87 perseant }
498 1.87 perseant #endif
499 1.87 perseant
500 1.87 perseant /*
501 1.87 perseant * Find the previous (next lowest numbered) free inode, if any.
502 1.87 perseant * If there is none, return LFS_UNUSED_INUM.
503 1.132 dholland *
504 1.132 dholland * XXX: locking?
505 1.87 perseant */
506 1.87 perseant static inline ino_t
507 1.87 perseant lfs_freelist_prev(struct lfs *fs, ino_t ino)
508 1.87 perseant {
509 1.88 perseant ino_t tino, bound, bb, freehdbb;
510 1.88 perseant
511 1.132 dholland if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
512 1.132 dholland /* No free inodes at all */
513 1.88 perseant return LFS_UNUSED_INUM;
514 1.132 dholland }
515 1.88 perseant
516 1.88 perseant /* Search our own word first */
517 1.88 perseant bound = ino & ~BMMASK;
518 1.89 perseant for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
519 1.88 perseant if (ISSET_BITMAP_FREE(fs, tino))
520 1.88 perseant return tino;
521 1.88 perseant /* If there are no lower words to search, just return */
522 1.88 perseant if (ino >> BMSHIFT == 0)
523 1.88 perseant return LFS_UNUSED_INUM;
524 1.88 perseant
525 1.88 perseant /*
526 1.88 perseant * Find a word with a free inode in it. We have to be a bit
527 1.88 perseant * careful here since ino_t is unsigned.
528 1.88 perseant */
529 1.122 dholland freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
530 1.88 perseant for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
531 1.88 perseant if (fs->lfs_ino_bitmap[bb])
532 1.88 perseant break;
533 1.88 perseant if (fs->lfs_ino_bitmap[bb] == 0)
534 1.88 perseant return LFS_UNUSED_INUM;
535 1.88 perseant
536 1.88 perseant /* Search the word we found */
537 1.89 perseant for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
538 1.89 perseant tino > LFS_UNUSED_INUM; tino--)
539 1.88 perseant if (ISSET_BITMAP_FREE(fs, tino))
540 1.88 perseant break;
541 1.87 perseant
542 1.132 dholland /* Avoid returning reserved inode numbers */
543 1.87 perseant if (tino <= LFS_IFILE_INUM)
544 1.87 perseant tino = LFS_UNUSED_INUM;
545 1.87 perseant
546 1.87 perseant return tino;
547 1.87 perseant }
548 1.87 perseant
549 1.132 dholland /*
550 1.132 dholland * Free an inode.
551 1.132 dholland *
552 1.132 dholland * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
553 1.132 dholland */
554 1.1 mycroft /* ARGUSED */
555 1.43 perseant /* VOP_BWRITE 2i times */
556 1.1 mycroft int
557 1.99 christos lfs_vfree(struct vnode *vp, ino_t ino, int mode)
558 1.3 christos {
559 1.1 mycroft SEGUSE *sup;
560 1.48 perseant CLEANERINFO *cip;
561 1.48 perseant struct buf *cbp, *bp;
562 1.126 dholland IFILE *ifp;
563 1.1 mycroft struct inode *ip;
564 1.1 mycroft struct lfs *fs;
565 1.59 fvdl daddr_t old_iaddr;
566 1.75 perry
567 1.1 mycroft /* Get the inode number and file system. */
568 1.30 perseant ip = VTOI(vp);
569 1.1 mycroft fs = ip->i_lfs;
570 1.1 mycroft ino = ip->i_number;
571 1.34 perseant
572 1.132 dholland /* XXX: assert not readonly */
573 1.132 dholland
574 1.78 perseant ASSERT_NO_SEGLOCK(fs);
575 1.88 perseant DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
576 1.78 perseant
577 1.48 perseant /* Drain of pending writes */
578 1.111 rmind mutex_enter(vp->v_interlock);
579 1.125 dholland while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
580 1.111 rmind cv_wait(&vp->v_cv, vp->v_interlock);
581 1.105 ad }
582 1.111 rmind mutex_exit(vp->v_interlock);
583 1.48 perseant
584 1.63 perseant lfs_seglock(fs, SEGM_PROT);
585 1.75 perry
586 1.144 perseant DEBUG_CHECK_FREELIST(fs);
587 1.144 perseant
588 1.132 dholland /*
589 1.132 dholland * If the inode was in a dirop, it isn't now.
590 1.132 dholland *
591 1.136 maya * XXX: why are (v_uflag & VU_DIROP) and (ip->i_state & IN_ADIROP)
592 1.132 dholland * not updated together in one function? (and why do both exist,
593 1.132 dholland * anyway?)
594 1.132 dholland */
595 1.137 maya UNMARK_VNODE(vp);
596 1.132 dholland
597 1.105 ad mutex_enter(&lfs_lock);
598 1.103 ad if (vp->v_uflag & VU_DIROP) {
599 1.103 ad vp->v_uflag &= ~VU_DIROP;
600 1.30 perseant --lfs_dirvcount;
601 1.92 perseant --fs->lfs_dirvcount;
602 1.63 perseant TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
603 1.92 perseant wakeup(&fs->lfs_dirvcount);
604 1.30 perseant wakeup(&lfs_dirvcount);
605 1.105 ad mutex_exit(&lfs_lock);
606 1.120 hannken vrele(vp);
607 1.90 perseant
608 1.90 perseant /*
609 1.90 perseant * If this inode is not going to be written any more, any
610 1.90 perseant * segment accounting left over from its truncation needs
611 1.90 perseant * to occur at the end of the next dirops flush. Attach
612 1.90 perseant * them to the fs-wide list for that purpose.
613 1.90 perseant */
614 1.90 perseant if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
615 1.90 perseant struct segdelta *sd;
616 1.90 perseant
617 1.90 perseant while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
618 1.90 perseant LIST_REMOVE(sd, list);
619 1.90 perseant LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
620 1.90 perseant }
621 1.90 perseant }
622 1.90 perseant } else {
623 1.90 perseant /*
624 1.90 perseant * If it's not a dirop, we can finalize right away.
625 1.90 perseant */
626 1.105 ad mutex_exit(&lfs_lock);
627 1.90 perseant lfs_finalize_ino_seguse(fs, ip);
628 1.38 perseant }
629 1.30 perseant
630 1.132 dholland /* it is no longer an unwritten inode, so update the counts */
631 1.105 ad mutex_enter(&lfs_lock);
632 1.42 perseant LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
633 1.105 ad mutex_exit(&lfs_lock);
634 1.132 dholland
635 1.132 dholland /* Turn off all inode modification flags */
636 1.136 maya ip->i_state &= ~IN_ALLMOD;
637 1.132 dholland
638 1.132 dholland /* Mark it deleted */
639 1.93 perseant ip->i_lfs_iflags |= LFSI_DELETED;
640 1.93 perseant
641 1.132 dholland /* Mark it free in the in-memory inode freemap */
642 1.132 dholland SET_BITMAP_FREE(fs, ino);
643 1.132 dholland
644 1.1 mycroft /*
645 1.1 mycroft * Set the ifile's inode entry to unused, increment its version number
646 1.48 perseant * and link it onto the free chain.
647 1.1 mycroft */
648 1.132 dholland
649 1.132 dholland /* fetch the ifile entry */
650 1.1 mycroft LFS_IENTRY(ifp, fs, ino, bp);
651 1.132 dholland
652 1.132 dholland /* update the on-disk address (to "nowhere") */
653 1.126 dholland old_iaddr = lfs_if_getdaddr(fs, ifp);
654 1.126 dholland lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
655 1.132 dholland
656 1.132 dholland /* bump the version */
657 1.126 dholland lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
658 1.132 dholland
659 1.144 perseant #if 0
660 1.125 dholland if (lfs_sb_getversion(fs) == 1) {
661 1.144 perseant #endif
662 1.126 dholland ino_t nextfree;
663 1.126 dholland
664 1.132 dholland /* insert on freelist */
665 1.126 dholland LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
666 1.126 dholland lfs_if_setnextfree(fs, ifp, nextfree);
667 1.48 perseant LFS_PUT_HEADFREE(fs, cip, cbp, ino);
668 1.132 dholland
669 1.132 dholland /* write the ifile block */
670 1.56 perseant (void) LFS_BWRITE_LOG(bp); /* Ifile */
671 1.144 perseant #if 0
672 1.48 perseant } else {
673 1.144 perseant ino_t tino, onf, otail;
674 1.87 perseant
675 1.132 dholland /*
676 1.132 dholland * Clear the freelist next pointer and write the ifile
677 1.132 dholland * block. XXX: why? I'm sure there must be a reason but
678 1.132 dholland * it seems both silly and dangerous.
679 1.132 dholland */
680 1.126 dholland lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
681 1.56 perseant (void) LFS_BWRITE_LOG(bp); /* Ifile */
682 1.87 perseant
683 1.132 dholland /*
684 1.132 dholland * Insert on freelist in order.
685 1.132 dholland */
686 1.132 dholland
687 1.132 dholland /* Find the next lower (by number) free inode */
688 1.87 perseant tino = lfs_freelist_prev(fs, ino);
689 1.132 dholland
690 1.87 perseant if (tino == LFS_UNUSED_INUM) {
691 1.126 dholland ino_t nextfree;
692 1.126 dholland
693 1.132 dholland /*
694 1.132 dholland * There isn't one; put us on the freelist head.
695 1.132 dholland */
696 1.132 dholland
697 1.132 dholland /* reload the ifile block */
698 1.87 perseant LFS_IENTRY(ifp, fs, ino, bp);
699 1.132 dholland /* update the list */
700 1.126 dholland LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
701 1.126 dholland lfs_if_setnextfree(fs, ifp, nextfree);
702 1.87 perseant LFS_PUT_HEADFREE(fs, cip, cbp, ino);
703 1.88 perseant DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
704 1.126 dholland (long long)nextfree, (long long)ino));
705 1.132 dholland /* write the ifile block */
706 1.87 perseant LFS_BWRITE_LOG(bp); /* Ifile */
707 1.87 perseant
708 1.87 perseant /* If the list was empty, set tail too */
709 1.87 perseant LFS_GET_TAILFREE(fs, cip, cbp, &otail);
710 1.87 perseant if (otail == LFS_UNUSED_INUM) {
711 1.87 perseant LFS_PUT_TAILFREE(fs, cip, cbp, ino);
712 1.87 perseant DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
713 1.87 perseant "-> %lld\n", (long long)otail,
714 1.87 perseant (long long)ino));
715 1.87 perseant }
716 1.87 perseant } else {
717 1.87 perseant /*
718 1.87 perseant * Insert this inode into the list after tino.
719 1.87 perseant * We hold the segment lock so we don't have to
720 1.87 perseant * worry about blocks being written out of order.
721 1.87 perseant */
722 1.132 dholland
723 1.87 perseant DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
724 1.87 perseant " after %lld\n", ino, tino));
725 1.87 perseant
726 1.132 dholland /* load the previous inode's ifile block */
727 1.87 perseant LFS_IENTRY(ifp, fs, tino, bp);
728 1.132 dholland /* update the list pointer */
729 1.126 dholland onf = lfs_if_getnextfree(fs, ifp);
730 1.126 dholland lfs_if_setnextfree(fs, ifp, ino);
731 1.132 dholland /* write the block */
732 1.87 perseant LFS_BWRITE_LOG(bp); /* Ifile */
733 1.87 perseant
734 1.132 dholland /* load this inode's ifile block */
735 1.87 perseant LFS_IENTRY(ifp, fs, ino, bp);
736 1.132 dholland /* update the list pointer */
737 1.126 dholland lfs_if_setnextfree(fs, ifp, onf);
738 1.132 dholland /* write the block */
739 1.87 perseant LFS_BWRITE_LOG(bp); /* Ifile */
740 1.87 perseant
741 1.87 perseant /* If we're last, put us on the tail */
742 1.87 perseant if (onf == LFS_UNUSED_INUM) {
743 1.87 perseant LFS_GET_TAILFREE(fs, cip, cbp, &otail);
744 1.87 perseant LFS_PUT_TAILFREE(fs, cip, cbp, ino);
745 1.87 perseant DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
746 1.87 perseant "-> %lld\n", (long long)otail,
747 1.87 perseant (long long)ino));
748 1.87 perseant }
749 1.87 perseant }
750 1.48 perseant }
751 1.144 perseant #endif
752 1.132 dholland /* XXX: shouldn't this check be further up *before* we trash the fs? */
753 1.134 riastrad KASSERTMSG((ino != LFS_UNUSED_INUM), "inode 0 freed");
754 1.132 dholland
755 1.132 dholland /*
756 1.132 dholland * Update the segment summary for the segment where the on-disk
757 1.132 dholland * copy used to be.
758 1.132 dholland */
759 1.142 perseant if (!DADDR_IS_BAD(old_iaddr)) {
760 1.132 dholland /* load it */
761 1.117 christos LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
762 1.132 dholland /* the number of bytes in the segment should not become < 0 */
763 1.134 riastrad KASSERTMSG((sup->su_nbytes >= DINOSIZE(fs)),
764 1.134 riastrad "lfs_vfree: negative byte count"
765 1.134 riastrad " (segment %" PRIu32 " short by %d)\n",
766 1.134 riastrad lfs_dtosn(fs, old_iaddr),
767 1.134 riastrad (int)DINOSIZE(fs) - sup->su_nbytes);
768 1.132 dholland /* update the number of bytes in the segment */
769 1.127 dholland sup->su_nbytes -= DINOSIZE(fs);
770 1.132 dholland /* write the segment entry */
771 1.117 christos LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
772 1.1 mycroft }
773 1.75 perry
774 1.132 dholland /* Set superblock modified bit. */
775 1.105 ad mutex_enter(&lfs_lock);
776 1.1 mycroft fs->lfs_fmod = 1;
777 1.105 ad mutex_exit(&lfs_lock);
778 1.132 dholland
779 1.132 dholland /* Decrement file count. */
780 1.122 dholland lfs_sb_subnfiles(fs, 1);
781 1.75 perry
782 1.56 perseant lfs_segunlock(fs);
783 1.63 perseant
784 1.144 perseant DEBUG_CHECK_FREELIST(fs);
785 1.144 perseant
786 1.1 mycroft return (0);
787 1.1 mycroft }
788 1.87 perseant
789 1.87 perseant /*
790 1.87 perseant * Sort the freelist and set up the free-inode bitmap.
791 1.87 perseant * To be called by lfs_mountfs().
792 1.132 dholland *
793 1.132 dholland * Takes the segmenet lock.
794 1.87 perseant */
795 1.87 perseant void
796 1.141 riastrad lfs_order_freelist(struct lfs *fs, ino_t **orphanp, size_t *norphanp)
797 1.87 perseant {
798 1.87 perseant CLEANERINFO *cip;
799 1.87 perseant IFILE *ifp = NULL;
800 1.87 perseant struct buf *bp;
801 1.87 perseant ino_t ino, firstino, lastino, maxino;
802 1.141 riastrad ino_t *orphan = NULL;
803 1.141 riastrad size_t norphan = 0;
804 1.141 riastrad size_t norphan_alloc = 0;
805 1.141 riastrad
806 1.95 perseant ASSERT_NO_SEGLOCK(fs);
807 1.95 perseant lfs_seglock(fs, SEGM_PROT);
808 1.95 perseant
809 1.144 perseant DEBUG_CHECK_FREELIST(fs);
810 1.144 perseant
811 1.132 dholland /* largest inode on fs */
812 1.123 dholland maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
813 1.122 dholland lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
814 1.132 dholland
815 1.132 dholland /* allocate the in-memory inode freemap */
816 1.132 dholland /* XXX: assert that fs->lfs_ino_bitmap is null here */
817 1.121 dholland fs->lfs_ino_bitmap =
818 1.88 perseant malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
819 1.88 perseant M_SEGMENT, M_WAITOK | M_ZERO);
820 1.88 perseant KASSERT(fs->lfs_ino_bitmap != NULL);
821 1.87 perseant
822 1.132 dholland /*
823 1.132 dholland * Scan the ifile.
824 1.132 dholland */
825 1.132 dholland
826 1.87 perseant firstino = lastino = LFS_UNUSED_INUM;
827 1.87 perseant for (ino = 0; ino < maxino; ino++) {
828 1.132 dholland /* Load this inode's ifile entry. */
829 1.122 dholland if (ino % lfs_sb_getifpb(fs) == 0)
830 1.87 perseant LFS_IENTRY(ifp, fs, ino, bp);
831 1.87 perseant else
832 1.128 mlelstv LFS_IENTRY_NEXT(ifp, fs);
833 1.87 perseant
834 1.87 perseant /* Don't put zero or ifile on the free list */
835 1.87 perseant if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
836 1.87 perseant continue;
837 1.87 perseant
838 1.132 dholland /*
839 1.132 dholland * Address orphaned files.
840 1.132 dholland *
841 1.132 dholland * The idea of this is to free inodes belonging to
842 1.132 dholland * files that were unlinked but not reclaimed, I guess
843 1.132 dholland * because if we're going to scan the whole ifile
844 1.132 dholland * anyway it costs very little to do this. I don't
845 1.132 dholland * immediately see any reason this should be disabled,
846 1.132 dholland * but presumably it doesn't work... not sure what
847 1.132 dholland * happens to such files currently. -- dholland 20160806
848 1.132 dholland */
849 1.141 riastrad if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE(fs)) {
850 1.141 riastrad if (orphan == NULL) {
851 1.141 riastrad norphan_alloc = 32; /* XXX pulled from arse */
852 1.141 riastrad orphan = kmem_zalloc(sizeof(orphan[0]) *
853 1.141 riastrad norphan_alloc, KM_SLEEP);
854 1.141 riastrad } else if (norphan == norphan_alloc) {
855 1.141 riastrad ino_t *orphan_new;
856 1.141 riastrad if (norphan_alloc >= 4096)
857 1.141 riastrad norphan_alloc += 4096;
858 1.141 riastrad else
859 1.141 riastrad norphan_alloc *= 2;
860 1.141 riastrad orphan_new = kmem_zalloc(sizeof(orphan[0]) *
861 1.141 riastrad norphan_alloc, KM_SLEEP);
862 1.141 riastrad memcpy(orphan_new, orphan, sizeof(orphan[0]) *
863 1.141 riastrad norphan);
864 1.141 riastrad kmem_free(orphan, sizeof(orphan[0]) * norphan);
865 1.141 riastrad orphan = orphan_new;
866 1.141 riastrad }
867 1.141 riastrad orphan[norphan++] = ino;
868 1.97 perseant }
869 1.97 perseant
870 1.142 perseant if (DADDR_IS_BAD(lfs_if_getdaddr(fs, ifp))) {
871 1.132 dholland
872 1.132 dholland /*
873 1.132 dholland * This inode is free. Put it on the free list.
874 1.132 dholland */
875 1.132 dholland
876 1.132 dholland if (firstino == LFS_UNUSED_INUM) {
877 1.132 dholland /* XXX: assert lastino == LFS_UNUSED_INUM? */
878 1.132 dholland /* remember the first free inode */
879 1.87 perseant firstino = ino;
880 1.132 dholland } else {
881 1.132 dholland /* release this inode's ifile entry */
882 1.102 ad brelse(bp, 0);
883 1.87 perseant
884 1.132 dholland /* XXX: assert lastino != LFS_UNUSED_INUM? */
885 1.132 dholland
886 1.132 dholland /* load lastino's ifile entry */
887 1.87 perseant LFS_IENTRY(ifp, fs, lastino, bp);
888 1.132 dholland /* set the list pointer */
889 1.126 dholland lfs_if_setnextfree(fs, ifp, ino);
890 1.132 dholland /* write the block */
891 1.87 perseant LFS_BWRITE_LOG(bp);
892 1.132 dholland
893 1.132 dholland /* reload this inode's ifile entry */
894 1.87 perseant LFS_IENTRY(ifp, fs, ino, bp);
895 1.87 perseant }
896 1.132 dholland /* remember the last free inode seen so far */
897 1.87 perseant lastino = ino;
898 1.87 perseant
899 1.132 dholland /* Mark this inode free in the in-memory freemap */
900 1.87 perseant SET_BITMAP_FREE(fs, ino);
901 1.87 perseant }
902 1.87 perseant
903 1.132 dholland /* If moving to the next ifile block, release the buffer. */
904 1.122 dholland if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
905 1.102 ad brelse(bp, 0);
906 1.87 perseant }
907 1.87 perseant
908 1.132 dholland /* Write the freelist head and tail pointers */
909 1.132 dholland /* XXX: do we need to mark the superblock dirty? */
910 1.87 perseant LFS_PUT_HEADFREE(fs, cip, bp, firstino);
911 1.87 perseant LFS_PUT_TAILFREE(fs, cip, bp, lastino);
912 1.95 perseant
913 1.132 dholland /* done */
914 1.95 perseant lfs_segunlock(fs);
915 1.141 riastrad
916 1.141 riastrad /*
917 1.141 riastrad * Shrink the array of orphans so we don't have to carry around
918 1.141 riastrad * the allocation size.
919 1.141 riastrad */
920 1.141 riastrad if (norphan < norphan_alloc) {
921 1.141 riastrad ino_t *orphan_new = kmem_alloc(sizeof(orphan[0]) * norphan,
922 1.141 riastrad KM_SLEEP);
923 1.141 riastrad memcpy(orphan_new, orphan, sizeof(orphan[0]) * norphan);
924 1.141 riastrad kmem_free(orphan, sizeof(orphan[0]) * norphan_alloc);
925 1.141 riastrad orphan = orphan_new;
926 1.141 riastrad norphan_alloc = norphan;
927 1.141 riastrad }
928 1.141 riastrad
929 1.141 riastrad *orphanp = orphan;
930 1.141 riastrad *norphanp = norphan;
931 1.144 perseant
932 1.144 perseant DEBUG_CHECK_FREELIST(fs);
933 1.87 perseant }
934 1.97 perseant
935 1.132 dholland /*
936 1.132 dholland * Mark a file orphaned (unlinked but not yet reclaimed) by inode
937 1.132 dholland * number. Do this with a magic freelist next pointer.
938 1.132 dholland *
939 1.132 dholland * XXX: howzabout some locking?
940 1.132 dholland */
941 1.97 perseant void
942 1.97 perseant lfs_orphan(struct lfs *fs, ino_t ino)
943 1.97 perseant {
944 1.97 perseant IFILE *ifp;
945 1.97 perseant struct buf *bp;
946 1.97 perseant
947 1.97 perseant LFS_IENTRY(ifp, fs, ino, bp);
948 1.140 riastrad lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE(fs));
949 1.97 perseant LFS_BWRITE_LOG(bp);
950 1.97 perseant }
951 1.141 riastrad
952 1.141 riastrad /*
953 1.141 riastrad * Free orphans discovered during mount. This is a separate stage
954 1.141 riastrad * because it requires fs->lfs_suflags to be set up, which is not done
955 1.141 riastrad * by the time we run lfs_order_freelist. It's possible that we could
956 1.141 riastrad * run lfs_order_freelist later (i.e., set up fs->lfs_suflags sooner)
957 1.141 riastrad * but that requires more thought than I can put into this at the
958 1.141 riastrad * moment.
959 1.141 riastrad */
960 1.141 riastrad void
961 1.141 riastrad lfs_free_orphans(struct lfs *fs, ino_t *orphan, size_t norphan)
962 1.141 riastrad {
963 1.141 riastrad size_t i;
964 1.141 riastrad
965 1.144 perseant DEBUG_CHECK_FREELIST(fs);
966 1.144 perseant
967 1.141 riastrad for (i = 0; i < norphan; i++) {
968 1.141 riastrad ino_t ino = orphan[i];
969 1.141 riastrad unsigned segno;
970 1.141 riastrad struct vnode *vp;
971 1.141 riastrad struct inode *ip;
972 1.141 riastrad struct buf *bp;
973 1.141 riastrad IFILE *ifp;
974 1.141 riastrad SEGUSE *sup;
975 1.141 riastrad int error;
976 1.141 riastrad
977 1.141 riastrad /* Get the segment the inode is in on disk. */
978 1.141 riastrad LFS_IENTRY(ifp, fs, ino, bp);
979 1.142 perseant KASSERT(!DADDR_IS_BAD(lfs_if_getdaddr(fs, ifp)));
980 1.141 riastrad segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
981 1.141 riastrad brelse(bp, 0);
982 1.141 riastrad
983 1.141 riastrad /*
984 1.141 riastrad * Try to get the vnode. If we can't, tough -- hope
985 1.141 riastrad * you have backups!
986 1.141 riastrad */
987 1.141 riastrad error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, LK_EXCLUSIVE,
988 1.141 riastrad &vp);
989 1.141 riastrad if (error) {
990 1.141 riastrad printf("orphan %jd vget error %d\n", (intmax_t)ino,
991 1.141 riastrad error);
992 1.141 riastrad continue;
993 1.141 riastrad }
994 1.141 riastrad
995 1.141 riastrad /*
996 1.141 riastrad * Sanity-check the inode.
997 1.141 riastrad *
998 1.141 riastrad * XXX What to do if it is still referenced?
999 1.141 riastrad */
1000 1.141 riastrad ip = VTOI(vp);
1001 1.141 riastrad if (ip->i_nlink != 0)
1002 1.141 riastrad printf("orphan %jd nlink %d\n", (intmax_t)ino,
1003 1.141 riastrad ip->i_nlink);
1004 1.141 riastrad
1005 1.141 riastrad /*
1006 1.141 riastrad * Truncate the inode, to free any blocks allocated for
1007 1.141 riastrad * it, and release it, to free the inode number.
1008 1.141 riastrad *
1009 1.141 riastrad * XXX Isn't it redundant to truncate? Won't vput do
1010 1.141 riastrad * that for us?
1011 1.141 riastrad */
1012 1.141 riastrad error = lfs_truncate(vp, 0, 0, NOCRED);
1013 1.141 riastrad if (error)
1014 1.141 riastrad printf("orphan %jd truncate error %d", (intmax_t)ino,
1015 1.141 riastrad error);
1016 1.141 riastrad vput(vp);
1017 1.141 riastrad
1018 1.141 riastrad /* Update the number of bytes in the segment summary. */
1019 1.141 riastrad LFS_SEGENTRY(sup, fs, segno, bp);
1020 1.141 riastrad KASSERT(sup->su_nbytes >= DINOSIZE(fs));
1021 1.141 riastrad sup->su_nbytes -= DINOSIZE(fs);
1022 1.141 riastrad LFS_WRITESEGENTRY(sup, fs, segno, bp);
1023 1.141 riastrad
1024 1.141 riastrad /* Drop the on-disk address. */
1025 1.141 riastrad LFS_IENTRY(ifp, fs, ino, bp);
1026 1.141 riastrad lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
1027 1.141 riastrad LFS_BWRITE_LOG(bp);
1028 1.141 riastrad }
1029 1.141 riastrad
1030 1.141 riastrad if (orphan)
1031 1.141 riastrad kmem_free(orphan, sizeof(orphan[0]) * norphan);
1032 1.144 perseant
1033 1.144 perseant DEBUG_CHECK_FREELIST(fs);
1034 1.144 perseant }
1035 1.144 perseant
1036 1.144 perseant #ifdef DEBUG
1037 1.144 perseant static void dump_freelist(struct lfs *);
1038 1.144 perseant
1039 1.144 perseant void
1040 1.144 perseant lfs_check_freelist(struct lfs *fs, const char *func, int line)
1041 1.144 perseant {
1042 1.144 perseant ino_t i, headino, maxino, thisino, tailino, nextfree;
1043 1.144 perseant int nfree, count;
1044 1.144 perseant struct inode *ip;
1045 1.144 perseant IFILE *ifp;
1046 1.144 perseant CLEANERINFO *cip;
1047 1.144 perseant struct buf *bp;
1048 1.144 perseant
1049 1.144 perseant if (!lfs_do_check_freelist)
1050 1.144 perseant return;
1051 1.144 perseant
1052 1.144 perseant lfs_seglock(fs, SEGM_PROT);
1053 1.144 perseant
1054 1.144 perseant ip = VTOI(fs->lfs_ivnode);
1055 1.144 perseant maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
1056 1.144 perseant lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
1057 1.144 perseant
1058 1.144 perseant /*
1059 1.144 perseant * First, check every node. Every inode that doesn't have a disk
1060 1.144 perseant * address must have a nextfree pointer, the only exception
1061 1.144 perseant * being the tail of the free list.
1062 1.144 perseant */
1063 1.144 perseant LFS_GET_TAILFREE(fs, cip, bp, &tailino);
1064 1.144 perseant nfree = 0;
1065 1.144 perseant for (i = LFS_IFILE_INUM + 1; i < maxino; ++i) {
1066 1.144 perseant LFS_IENTRY(ifp, fs, i, bp);
1067 1.144 perseant if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) {
1068 1.144 perseant ++nfree;
1069 1.144 perseant if (lfs_if_getnextfree(fs, ifp) == LFS_UNUSED_INUM
1070 1.144 perseant && i != tailino) {
1071 1.144 perseant brelse(bp, 0);
1072 1.144 perseant dump_freelist(fs);
1073 1.144 perseant printf("At %s:%d:\n", func, line);
1074 1.144 perseant printf("tailino=%jd, but ino=%jd"
1075 1.144 perseant " neither daddr nor nextfree\n",
1076 1.144 perseant (intmax_t)tailino, (intmax_t)i);
1077 1.144 perseant panic("Free list leak\n");
1078 1.144 perseant }
1079 1.144 perseant }
1080 1.144 perseant if (i == tailino
1081 1.144 perseant || (!INUM_IS_BAD(fs, lfs_if_getnextfree(fs, ifp)))) {
1082 1.144 perseant if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR) {
1083 1.144 perseant brelse(bp, 0);
1084 1.144 perseant dump_freelist(fs);
1085 1.144 perseant printf("At %s:%d:\n", func, line);
1086 1.144 perseant printf("with tailino=%jd, ino=%jd"
1087 1.144 perseant " daddr=0x%jx, nextfree=0x%jx\n",
1088 1.144 perseant (intmax_t)tailino,
1089 1.144 perseant (intmax_t)i,
1090 1.144 perseant (intmax_t)lfs_if_getdaddr(fs, ifp),
1091 1.144 perseant (intmax_t)lfs_if_getnextfree(fs, ifp));
1092 1.144 perseant panic("In use inode on free list\n");
1093 1.144 perseant }
1094 1.144 perseant }
1095 1.144 perseant brelse(bp, 0);
1096 1.144 perseant }
1097 1.144 perseant
1098 1.144 perseant /*
1099 1.144 perseant * Walk the free list from head to tail. We should end up with
1100 1.144 perseant * the same number of free inodes as we counted above.
1101 1.144 perseant */
1102 1.144 perseant
1103 1.144 perseant /* Get head of inode freelist */
1104 1.144 perseant LFS_GET_HEADFREE(fs, cip, bp, &headino);
1105 1.144 perseant count = 0;
1106 1.144 perseant thisino = headino;
1107 1.144 perseant while (thisino != LFS_UNUSED_INUM) {
1108 1.144 perseant if (++count > maxino)
1109 1.144 perseant break;
1110 1.144 perseant /* read this ifile entry */
1111 1.144 perseant LFS_IENTRY(ifp, fs, thisino, bp);
1112 1.144 perseant nextfree = lfs_if_getnextfree(fs, ifp);
1113 1.144 perseant brelse(bp, 0);
1114 1.144 perseant if (nextfree == LFS_UNUSED_INUM)
1115 1.144 perseant break;
1116 1.144 perseant thisino = nextfree;
1117 1.144 perseant }
1118 1.144 perseant if (count > maxino) {
1119 1.144 perseant dump_freelist(fs);
1120 1.144 perseant printf("At %s:%d:\n", func, line);
1121 1.144 perseant printf("count=%jd, maxino=%jd:\n",
1122 1.144 perseant (intmax_t)count, (intmax_t)maxino);
1123 1.144 perseant panic("loop in free list");
1124 1.144 perseant }
1125 1.144 perseant if (count != nfree) {
1126 1.144 perseant dump_freelist(fs);
1127 1.144 perseant printf("At %s:%d:\n", func, line);
1128 1.144 perseant printf("%d inodes without addresses, %d on free list\n",
1129 1.144 perseant nfree, count);
1130 1.144 perseant panic("Bad free list count");
1131 1.144 perseant }
1132 1.144 perseant if (thisino != tailino) {
1133 1.144 perseant dump_freelist(fs);
1134 1.144 perseant printf("At %s:%d:\n", func, line);
1135 1.144 perseant printf("Last ino %jd but tail %jd\n",
1136 1.144 perseant (intmax_t)thisino, (intmax_t)tailino);
1137 1.144 perseant panic("Bad tail");
1138 1.144 perseant }
1139 1.144 perseant lfs_segunlock(fs);
1140 1.144 perseant }
1141 1.144 perseant
1142 1.144 perseant static void
1143 1.144 perseant dump_freelist(struct lfs *fs)
1144 1.144 perseant {
1145 1.144 perseant ino_t i, ni, maxino, headino, tailino;
1146 1.144 perseant struct inode *ip;
1147 1.144 perseant IFILE *ifp;
1148 1.144 perseant CLEANERINFO *cip;
1149 1.144 perseant struct buf *bp;
1150 1.144 perseant int count;
1151 1.144 perseant
1152 1.144 perseant ip = VTOI(fs->lfs_ivnode);
1153 1.144 perseant maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
1154 1.144 perseant lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
1155 1.144 perseant
1156 1.144 perseant LFS_GET_HEADFREE(fs, cip, bp, &headino);
1157 1.144 perseant printf(" head: %jd\n", (intmax_t)headino);
1158 1.144 perseant LFS_GET_TAILFREE(fs, cip, bp, &tailino);
1159 1.144 perseant printf(" tail: %jd\n", (intmax_t)tailino);
1160 1.144 perseant count = 0;
1161 1.144 perseant for (i = LFS_IFILE_INUM + 1; i < maxino; ++i) {
1162 1.144 perseant LFS_IENTRY(ifp, fs, i, bp);
1163 1.144 perseant ni = lfs_if_getnextfree(fs, ifp);
1164 1.144 perseant if (ni != LFS_UNUSED_DADDR) {
1165 1.144 perseant printf("%jd -> %jd\n",
1166 1.144 perseant (intmax_t)i, (intmax_t)ni);
1167 1.144 perseant if (++count > 30) {
1168 1.144 perseant printf("...\n");
1169 1.144 perseant i = maxino; /* terminate loop */
1170 1.144 perseant }
1171 1.144 perseant }
1172 1.144 perseant brelse(bp, 0);
1173 1.144 perseant }
1174 1.141 riastrad }
1175 1.144 perseant #endif /* DEBUG */
1176