lfs_alloc.c revision 1.136 1 1.136 maya /* $NetBSD: lfs_alloc.c,v 1.136 2017/06/10 05:29:36 maya Exp $ */
2 1.2 cgd
3 1.17 perseant /*-
4 1.100 ad * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007 The NetBSD Foundation, Inc.
5 1.17 perseant * All rights reserved.
6 1.17 perseant *
7 1.17 perseant * This code is derived from software contributed to The NetBSD Foundation
8 1.17 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>.
9 1.17 perseant *
10 1.17 perseant * Redistribution and use in source and binary forms, with or without
11 1.17 perseant * modification, are permitted provided that the following conditions
12 1.17 perseant * are met:
13 1.17 perseant * 1. Redistributions of source code must retain the above copyright
14 1.17 perseant * notice, this list of conditions and the following disclaimer.
15 1.17 perseant * 2. Redistributions in binary form must reproduce the above copyright
16 1.17 perseant * notice, this list of conditions and the following disclaimer in the
17 1.17 perseant * documentation and/or other materials provided with the distribution.
18 1.17 perseant *
19 1.17 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.17 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.17 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.17 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.17 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.17 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.17 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.17 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.17 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.17 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.17 perseant * POSSIBILITY OF SUCH DAMAGE.
30 1.17 perseant */
31 1.1 mycroft /*
32 1.1 mycroft * Copyright (c) 1991, 1993
33 1.1 mycroft * The Regents of the University of California. All rights reserved.
34 1.1 mycroft *
35 1.1 mycroft * Redistribution and use in source and binary forms, with or without
36 1.1 mycroft * modification, are permitted provided that the following conditions
37 1.1 mycroft * are met:
38 1.1 mycroft * 1. Redistributions of source code must retain the above copyright
39 1.1 mycroft * notice, this list of conditions and the following disclaimer.
40 1.1 mycroft * 2. Redistributions in binary form must reproduce the above copyright
41 1.1 mycroft * notice, this list of conditions and the following disclaimer in the
42 1.1 mycroft * documentation and/or other materials provided with the distribution.
43 1.71 agc * 3. Neither the name of the University nor the names of its contributors
44 1.1 mycroft * may be used to endorse or promote products derived from this software
45 1.1 mycroft * without specific prior written permission.
46 1.1 mycroft *
47 1.1 mycroft * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 1.1 mycroft * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 1.1 mycroft * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 1.1 mycroft * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 1.1 mycroft * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 1.1 mycroft * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 1.1 mycroft * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 1.1 mycroft * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 1.1 mycroft * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 1.1 mycroft * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 1.1 mycroft * SUCH DAMAGE.
58 1.1 mycroft *
59 1.2 cgd * @(#)lfs_alloc.c 8.4 (Berkeley) 1/4/94
60 1.1 mycroft */
61 1.52 lukem
62 1.52 lukem #include <sys/cdefs.h>
63 1.136 maya __KERNEL_RCSID(0, "$NetBSD: lfs_alloc.c,v 1.136 2017/06/10 05:29:36 maya Exp $");
64 1.12 scottr
65 1.47 mrg #if defined(_KERNEL_OPT)
66 1.12 scottr #include "opt_quota.h"
67 1.13 scottr #endif
68 1.1 mycroft
69 1.1 mycroft #include <sys/param.h>
70 1.3 christos #include <sys/systm.h>
71 1.1 mycroft #include <sys/kernel.h>
72 1.1 mycroft #include <sys/buf.h>
73 1.56 perseant #include <sys/lock.h>
74 1.1 mycroft #include <sys/vnode.h>
75 1.1 mycroft #include <sys/syslog.h>
76 1.1 mycroft #include <sys/mount.h>
77 1.87 perseant #include <sys/malloc.h>
78 1.15 thorpej #include <sys/pool.h>
79 1.50 chs #include <sys/proc.h>
80 1.94 elad #include <sys/kauth.h>
81 1.1 mycroft
82 1.114 dholland #include <ufs/lfs/ulfs_quotacommon.h>
83 1.114 dholland #include <ufs/lfs/ulfs_inode.h>
84 1.114 dholland #include <ufs/lfs/ulfsmount.h>
85 1.114 dholland #include <ufs/lfs/ulfs_extern.h>
86 1.1 mycroft
87 1.1 mycroft #include <ufs/lfs/lfs.h>
88 1.124 dholland #include <ufs/lfs/lfs_accessors.h>
89 1.1 mycroft #include <ufs/lfs/lfs_extern.h>
90 1.118 dholland #include <ufs/lfs/lfs_kernel.h>
91 1.1 mycroft
92 1.87 perseant /* Constants for inode free bitmap */
93 1.88 perseant #define BMSHIFT 5 /* 2 ** 5 = 32 */
94 1.88 perseant #define BMMASK ((1 << BMSHIFT) - 1)
95 1.88 perseant #define SET_BITMAP_FREE(F, I) do { \
96 1.88 perseant DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d set\n", (int)(I), \
97 1.88 perseant (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
98 1.88 perseant (F)->lfs_ino_bitmap[(I) >> BMSHIFT] |= (1 << ((I) & BMMASK)); \
99 1.88 perseant } while (0)
100 1.88 perseant #define CLR_BITMAP_FREE(F, I) do { \
101 1.88 perseant DLOG((DLOG_ALLOC, "lfs: ino %d wrd %d bit %d clr\n", (int)(I), \
102 1.88 perseant (int)((I) >> BMSHIFT), (int)((I) & BMMASK))); \
103 1.88 perseant (F)->lfs_ino_bitmap[(I) >> BMSHIFT] &= ~(1 << ((I) & BMMASK)); \
104 1.88 perseant } while(0)
105 1.88 perseant
106 1.87 perseant #define ISSET_BITMAP_FREE(F, I) \
107 1.88 perseant ((F)->lfs_ino_bitmap[(I) >> BMSHIFT] & (1 << ((I) & BMMASK)))
108 1.87 perseant
109 1.44 perseant /*
110 1.65 perseant * Add a new block to the Ifile, to accommodate future file creations.
111 1.65 perseant * Called with the segment lock held.
112 1.56 perseant */
113 1.96 perseant int
114 1.96 perseant lfs_extend_ifile(struct lfs *fs, kauth_cred_t cred)
115 1.44 perseant {
116 1.44 perseant struct vnode *vp;
117 1.44 perseant struct inode *ip;
118 1.126 dholland IFILE64 *ifp64;
119 1.126 dholland IFILE32 *ifp32;
120 1.48 perseant IFILE_V1 *ifp_v1;
121 1.48 perseant struct buf *bp, *cbp;
122 1.44 perseant int error;
123 1.83 christos daddr_t i, blkno, xmax;
124 1.87 perseant ino_t oldlast, maxino;
125 1.48 perseant CLEANERINFO *cip;
126 1.44 perseant
127 1.78 perseant ASSERT_SEGLOCK(fs);
128 1.78 perseant
129 1.132 dholland /* XXX should check or assert that we aren't readonly. */
130 1.132 dholland
131 1.132 dholland /*
132 1.132 dholland * Get a block and extend the ifile inode. Leave the buffer for
133 1.132 dholland * the block in bp.
134 1.132 dholland */
135 1.132 dholland
136 1.44 perseant vp = fs->lfs_ivnode;
137 1.44 perseant ip = VTOI(vp);
138 1.117 christos blkno = lfs_lblkno(fs, ip->i_size);
139 1.122 dholland if ((error = lfs_balloc(vp, ip->i_size, lfs_sb_getbsize(fs), cred, 0,
140 1.44 perseant &bp)) != 0) {
141 1.44 perseant return (error);
142 1.44 perseant }
143 1.122 dholland ip->i_size += lfs_sb_getbsize(fs);
144 1.129 dholland lfs_dino_setsize(fs, ip->i_din, ip->i_size);
145 1.66 fvdl uvm_vnp_setsize(vp, ip->i_size);
146 1.75 perry
147 1.132 dholland /*
148 1.132 dholland * Compute the new number of inodes, and reallocate the in-memory
149 1.132 dholland * inode freemap.
150 1.132 dholland */
151 1.132 dholland
152 1.123 dholland maxino = ((ip->i_size >> lfs_sb_getbshift(fs)) - lfs_sb_getcleansz(fs) -
153 1.122 dholland lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
154 1.88 perseant fs->lfs_ino_bitmap = (lfs_bm_t *)
155 1.88 perseant realloc(fs->lfs_ino_bitmap, ((maxino + BMMASK) >> BMSHIFT) *
156 1.88 perseant sizeof(lfs_bm_t), M_SEGMENT, M_WAITOK);
157 1.88 perseant KASSERT(fs->lfs_ino_bitmap != NULL);
158 1.87 perseant
159 1.132 dholland /* first new inode number */
160 1.122 dholland i = (blkno - lfs_sb_getsegtabsz(fs) - lfs_sb_getcleansz(fs)) *
161 1.122 dholland lfs_sb_getifpb(fs);
162 1.87 perseant
163 1.87 perseant /*
164 1.87 perseant * We insert the new inodes at the head of the free list.
165 1.87 perseant * Under normal circumstances, the free list is empty here,
166 1.87 perseant * so we are also incidentally placing them at the end (which
167 1.87 perseant * we must do if we are to keep them in order).
168 1.87 perseant */
169 1.48 perseant LFS_GET_HEADFREE(fs, cip, cbp, &oldlast);
170 1.48 perseant LFS_PUT_HEADFREE(fs, cip, cbp, i);
171 1.134 riastrad KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
172 1.134 riastrad "inode 0 allocated [2]");
173 1.132 dholland
174 1.132 dholland /* inode number to stop at (XXX: why *x*max?) */
175 1.122 dholland xmax = i + lfs_sb_getifpb(fs);
176 1.48 perseant
177 1.132 dholland /*
178 1.132 dholland * Initialize the ifile block.
179 1.132 dholland *
180 1.132 dholland * XXX: these loops should be restructured to use the accessor
181 1.132 dholland * functions instead of using cutpaste polymorphism.
182 1.132 dholland */
183 1.132 dholland
184 1.126 dholland if (fs->lfs_is64) {
185 1.126 dholland for (ifp64 = (IFILE64 *)bp->b_data; i < xmax; ++ifp64) {
186 1.126 dholland SET_BITMAP_FREE(fs, i);
187 1.126 dholland ifp64->if_version = 1;
188 1.126 dholland ifp64->if_daddr = LFS_UNUSED_DADDR;
189 1.126 dholland ifp64->if_nextfree = ++i;
190 1.126 dholland }
191 1.126 dholland ifp64--;
192 1.126 dholland ifp64->if_nextfree = oldlast;
193 1.126 dholland } else if (lfs_sb_getversion(fs) > 1) {
194 1.126 dholland for (ifp32 = (IFILE32 *)bp->b_data; i < xmax; ++ifp32) {
195 1.126 dholland SET_BITMAP_FREE(fs, i);
196 1.126 dholland ifp32->if_version = 1;
197 1.126 dholland ifp32->if_daddr = LFS_UNUSED_DADDR;
198 1.126 dholland ifp32->if_nextfree = ++i;
199 1.126 dholland }
200 1.126 dholland ifp32--;
201 1.126 dholland ifp32->if_nextfree = oldlast;
202 1.126 dholland } else {
203 1.83 christos for (ifp_v1 = (IFILE_V1 *)bp->b_data; i < xmax; ++ifp_v1) {
204 1.89 perseant SET_BITMAP_FREE(fs, i);
205 1.48 perseant ifp_v1->if_version = 1;
206 1.48 perseant ifp_v1->if_daddr = LFS_UNUSED_DADDR;
207 1.48 perseant ifp_v1->if_nextfree = ++i;
208 1.48 perseant }
209 1.48 perseant ifp_v1--;
210 1.48 perseant ifp_v1->if_nextfree = oldlast;
211 1.44 perseant }
212 1.83 christos LFS_PUT_TAILFREE(fs, cip, cbp, xmax - 1);
213 1.48 perseant
214 1.132 dholland /*
215 1.132 dholland * Write out the new block.
216 1.132 dholland */
217 1.132 dholland
218 1.56 perseant (void) LFS_BWRITE_LOG(bp); /* Ifile */
219 1.44 perseant
220 1.44 perseant return 0;
221 1.44 perseant }
222 1.44 perseant
223 1.132 dholland /*
224 1.132 dholland * Allocate an inode for a new file.
225 1.132 dholland *
226 1.132 dholland * Takes the segment lock. Also (while holding it) takes lfs_lock
227 1.132 dholland * to frob fs->lfs_fmod.
228 1.132 dholland *
229 1.132 dholland * XXX: the mode argument is unused; should just get rid of it.
230 1.132 dholland */
231 1.1 mycroft /* ARGSUSED */
232 1.43 perseant /* VOP_BWRITE 2i times */
233 1.1 mycroft int
234 1.99 christos lfs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred,
235 1.120 hannken ino_t *ino, int *gen)
236 1.3 christos {
237 1.1 mycroft struct lfs *fs;
238 1.48 perseant struct buf *bp, *cbp;
239 1.126 dholland IFILE *ifp;
240 1.1 mycroft int error;
241 1.48 perseant CLEANERINFO *cip;
242 1.1 mycroft
243 1.85 yamt fs = VTOI(pvp)->i_lfs;
244 1.38 perseant if (fs->lfs_ronly)
245 1.38 perseant return EROFS;
246 1.75 perry
247 1.78 perseant ASSERT_NO_SEGLOCK(fs);
248 1.78 perseant
249 1.56 perseant lfs_seglock(fs, SEGM_PROT);
250 1.17 perseant
251 1.1 mycroft /* Get the head of the freelist. */
252 1.120 hannken LFS_GET_HEADFREE(fs, cip, cbp, ino);
253 1.132 dholland
254 1.132 dholland /* paranoia */
255 1.120 hannken KASSERT(*ino != LFS_UNUSED_INUM && *ino != LFS_IFILE_INUM);
256 1.120 hannken DLOG((DLOG_ALLOC, "lfs_valloc: allocate inode %" PRId64 "\n",
257 1.120 hannken *ino));
258 1.75 perry
259 1.132 dholland /* Update the in-memory inode freemap */
260 1.132 dholland CLR_BITMAP_FREE(fs, *ino);
261 1.132 dholland
262 1.1 mycroft /*
263 1.132 dholland * Fetch the ifile entry and make sure the inode is really
264 1.132 dholland * free.
265 1.1 mycroft */
266 1.120 hannken LFS_IENTRY(ifp, fs, *ino, bp);
267 1.126 dholland if (lfs_if_getdaddr(fs, ifp) != LFS_UNUSED_DADDR)
268 1.120 hannken panic("lfs_valloc: inuse inode %" PRId64 " on the free list",
269 1.120 hannken *ino);
270 1.132 dholland
271 1.132 dholland /* Update the inode freelist head in the superblock. */
272 1.126 dholland LFS_PUT_HEADFREE(fs, cip, cbp, lfs_if_getnextfree(fs, ifp));
273 1.126 dholland DLOG((DLOG_ALLOC, "lfs_valloc: headfree %" PRId64 " -> %ju\n",
274 1.126 dholland *ino, (uintmax_t)lfs_if_getnextfree(fs, ifp)));
275 1.48 perseant
276 1.132 dholland /*
277 1.132 dholland * Retrieve the version number from the ifile entry. It was
278 1.132 dholland * bumped by vfree, so don't bump it again.
279 1.132 dholland */
280 1.126 dholland *gen = lfs_if_getversion(fs, ifp);
281 1.132 dholland
282 1.132 dholland /* Done with ifile entry */
283 1.102 ad brelse(bp, 0);
284 1.30 perseant
285 1.122 dholland if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
286 1.132 dholland /*
287 1.132 dholland * No more inodes; extend the ifile so that the next
288 1.132 dholland * lfs_valloc will succeed.
289 1.132 dholland */
290 1.96 perseant if ((error = lfs_extend_ifile(fs, cred)) != 0) {
291 1.132 dholland /* restore the freelist */
292 1.120 hannken LFS_PUT_HEADFREE(fs, cip, cbp, *ino);
293 1.132 dholland
294 1.132 dholland /* unlock and return */
295 1.56 perseant lfs_segunlock(fs);
296 1.44 perseant return error;
297 1.1 mycroft }
298 1.1 mycroft }
299 1.134 riastrad KASSERTMSG((lfs_sb_getfreehd(fs) != LFS_UNUSED_INUM),
300 1.134 riastrad "inode 0 allocated [3]");
301 1.48 perseant
302 1.132 dholland /* Set superblock modified bit */
303 1.105 ad mutex_enter(&lfs_lock);
304 1.78 perseant fs->lfs_fmod = 1;
305 1.105 ad mutex_exit(&lfs_lock);
306 1.132 dholland
307 1.132 dholland /* increment file count */
308 1.122 dholland lfs_sb_addnfiles(fs, 1);
309 1.78 perseant
310 1.132 dholland /* done */
311 1.56 perseant lfs_segunlock(fs);
312 1.120 hannken return 0;
313 1.44 perseant }
314 1.44 perseant
315 1.65 perseant /*
316 1.132 dholland * Allocate an inode for a new file, with given inode number and
317 1.132 dholland * version.
318 1.132 dholland *
319 1.132 dholland * Called in the same context as lfs_valloc and therefore shares the
320 1.132 dholland * same locking assumptions.
321 1.65 perseant */
322 1.96 perseant int
323 1.120 hannken lfs_valloc_fixed(struct lfs *fs, ino_t ino, int vers)
324 1.44 perseant {
325 1.120 hannken IFILE *ifp;
326 1.120 hannken struct buf *bp, *cbp;
327 1.130 dholland ino_t headino, thisino, oldnext;
328 1.120 hannken CLEANERINFO *cip;
329 1.40 fvdl
330 1.135 maya if (fs->lfs_ronly)
331 1.135 maya return EROFS;
332 1.135 maya
333 1.135 maya ASSERT_NO_SEGLOCK(fs);
334 1.135 maya
335 1.135 maya lfs_seglock(fs, SEGM_PROT);
336 1.132 dholland
337 1.132 dholland /*
338 1.132 dholland * If the ifile is too short to contain this inum, extend it.
339 1.132 dholland *
340 1.132 dholland * XXX: lfs_extend_ifile should take a size instead of always
341 1.132 dholland * doing just one block at time.
342 1.132 dholland */
343 1.120 hannken while (VTOI(fs->lfs_ivnode)->i_size <= (ino /
344 1.122 dholland lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + lfs_sb_getsegtabsz(fs))
345 1.123 dholland << lfs_sb_getbshift(fs)) {
346 1.120 hannken lfs_extend_ifile(fs, NOCRED);
347 1.120 hannken }
348 1.78 perseant
349 1.132 dholland /*
350 1.132 dholland * fetch the ifile entry; get the inode freelist next pointer,
351 1.132 dholland * and set the version as directed.
352 1.132 dholland */
353 1.120 hannken LFS_IENTRY(ifp, fs, ino, bp);
354 1.126 dholland oldnext = lfs_if_getnextfree(fs, ifp);
355 1.126 dholland lfs_if_setversion(fs, ifp, vers);
356 1.120 hannken brelse(bp, 0);
357 1.51 chs
358 1.132 dholland /* Get head of inode freelist */
359 1.130 dholland LFS_GET_HEADFREE(fs, cip, cbp, &headino);
360 1.130 dholland if (headino == ino) {
361 1.132 dholland /* Easy case: the inode we wanted was at the head */
362 1.120 hannken LFS_PUT_HEADFREE(fs, cip, cbp, oldnext);
363 1.120 hannken } else {
364 1.126 dholland ino_t nextfree;
365 1.126 dholland
366 1.132 dholland /* Have to find the desired inode in the freelist... */
367 1.132 dholland
368 1.130 dholland thisino = headino;
369 1.120 hannken while (1) {
370 1.132 dholland /* read this ifile entry */
371 1.130 dholland LFS_IENTRY(ifp, fs, thisino, bp);
372 1.126 dholland nextfree = lfs_if_getnextfree(fs, ifp);
373 1.132 dholland /* stop if we find it or we hit the end */
374 1.126 dholland if (nextfree == ino ||
375 1.126 dholland nextfree == LFS_UNUSED_INUM)
376 1.120 hannken break;
377 1.132 dholland /* nope, keep going... */
378 1.130 dholland thisino = nextfree;
379 1.120 hannken brelse(bp, 0);
380 1.120 hannken }
381 1.126 dholland if (nextfree == LFS_UNUSED_INUM) {
382 1.132 dholland /* hit the end -- this inode is not available */
383 1.120 hannken brelse(bp, 0);
384 1.135 maya lfs_segunlock(fs);
385 1.120 hannken return ENOENT;
386 1.120 hannken }
387 1.132 dholland /* found it; update the next pointer */
388 1.126 dholland lfs_if_setnextfree(fs, ifp, oldnext);
389 1.132 dholland /* write the ifile block */
390 1.120 hannken LFS_BWRITE_LOG(bp);
391 1.66 fvdl }
392 1.51 chs
393 1.132 dholland /* done */
394 1.135 maya lfs_segunlock(fs);
395 1.120 hannken return 0;
396 1.1 mycroft }
397 1.1 mycroft
398 1.87 perseant #if 0
399 1.87 perseant /*
400 1.87 perseant * Find the highest-numbered allocated inode.
401 1.87 perseant * This will be used to shrink the Ifile.
402 1.87 perseant */
403 1.87 perseant static inline ino_t
404 1.87 perseant lfs_last_alloc_ino(struct lfs *fs)
405 1.87 perseant {
406 1.87 perseant ino_t ino, maxino;
407 1.87 perseant
408 1.123 dholland maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
409 1.131 dholland lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) *
410 1.131 dholland lfs_sb_getifpb(fs);
411 1.87 perseant for (ino = maxino - 1; ino > LFS_UNUSED_INUM; --ino) {
412 1.87 perseant if (ISSET_BITMAP_FREE(fs, ino) == 0)
413 1.87 perseant break;
414 1.87 perseant }
415 1.87 perseant return ino;
416 1.87 perseant }
417 1.87 perseant #endif
418 1.87 perseant
419 1.87 perseant /*
420 1.87 perseant * Find the previous (next lowest numbered) free inode, if any.
421 1.87 perseant * If there is none, return LFS_UNUSED_INUM.
422 1.132 dholland *
423 1.132 dholland * XXX: locking?
424 1.87 perseant */
425 1.87 perseant static inline ino_t
426 1.87 perseant lfs_freelist_prev(struct lfs *fs, ino_t ino)
427 1.87 perseant {
428 1.88 perseant ino_t tino, bound, bb, freehdbb;
429 1.88 perseant
430 1.132 dholland if (lfs_sb_getfreehd(fs) == LFS_UNUSED_INUM) {
431 1.132 dholland /* No free inodes at all */
432 1.88 perseant return LFS_UNUSED_INUM;
433 1.132 dholland }
434 1.88 perseant
435 1.88 perseant /* Search our own word first */
436 1.88 perseant bound = ino & ~BMMASK;
437 1.89 perseant for (tino = ino - 1; tino >= bound && tino > LFS_UNUSED_INUM; tino--)
438 1.88 perseant if (ISSET_BITMAP_FREE(fs, tino))
439 1.88 perseant return tino;
440 1.88 perseant /* If there are no lower words to search, just return */
441 1.88 perseant if (ino >> BMSHIFT == 0)
442 1.88 perseant return LFS_UNUSED_INUM;
443 1.88 perseant
444 1.88 perseant /*
445 1.88 perseant * Find a word with a free inode in it. We have to be a bit
446 1.88 perseant * careful here since ino_t is unsigned.
447 1.88 perseant */
448 1.122 dholland freehdbb = (lfs_sb_getfreehd(fs) >> BMSHIFT);
449 1.88 perseant for (bb = (ino >> BMSHIFT) - 1; bb >= freehdbb && bb > 0; --bb)
450 1.88 perseant if (fs->lfs_ino_bitmap[bb])
451 1.88 perseant break;
452 1.88 perseant if (fs->lfs_ino_bitmap[bb] == 0)
453 1.88 perseant return LFS_UNUSED_INUM;
454 1.88 perseant
455 1.88 perseant /* Search the word we found */
456 1.89 perseant for (tino = (bb << BMSHIFT) | BMMASK; tino >= (bb << BMSHIFT) &&
457 1.89 perseant tino > LFS_UNUSED_INUM; tino--)
458 1.88 perseant if (ISSET_BITMAP_FREE(fs, tino))
459 1.88 perseant break;
460 1.87 perseant
461 1.132 dholland /* Avoid returning reserved inode numbers */
462 1.87 perseant if (tino <= LFS_IFILE_INUM)
463 1.87 perseant tino = LFS_UNUSED_INUM;
464 1.87 perseant
465 1.87 perseant return tino;
466 1.87 perseant }
467 1.87 perseant
468 1.132 dholland /*
469 1.132 dholland * Free an inode.
470 1.132 dholland *
471 1.132 dholland * Takes lfs_seglock. Also (independently) takes vp->v_interlock.
472 1.132 dholland */
473 1.1 mycroft /* ARGUSED */
474 1.43 perseant /* VOP_BWRITE 2i times */
475 1.1 mycroft int
476 1.99 christos lfs_vfree(struct vnode *vp, ino_t ino, int mode)
477 1.3 christos {
478 1.1 mycroft SEGUSE *sup;
479 1.48 perseant CLEANERINFO *cip;
480 1.48 perseant struct buf *cbp, *bp;
481 1.126 dholland IFILE *ifp;
482 1.1 mycroft struct inode *ip;
483 1.1 mycroft struct lfs *fs;
484 1.59 fvdl daddr_t old_iaddr;
485 1.85 yamt ino_t otail;
486 1.75 perry
487 1.1 mycroft /* Get the inode number and file system. */
488 1.30 perseant ip = VTOI(vp);
489 1.1 mycroft fs = ip->i_lfs;
490 1.1 mycroft ino = ip->i_number;
491 1.34 perseant
492 1.132 dholland /* XXX: assert not readonly */
493 1.132 dholland
494 1.78 perseant ASSERT_NO_SEGLOCK(fs);
495 1.88 perseant DLOG((DLOG_ALLOC, "lfs_vfree: free ino %lld\n", (long long)ino));
496 1.78 perseant
497 1.48 perseant /* Drain of pending writes */
498 1.111 rmind mutex_enter(vp->v_interlock);
499 1.125 dholland while (lfs_sb_getversion(fs) > 1 && WRITEINPROG(vp)) {
500 1.111 rmind cv_wait(&vp->v_cv, vp->v_interlock);
501 1.105 ad }
502 1.111 rmind mutex_exit(vp->v_interlock);
503 1.48 perseant
504 1.63 perseant lfs_seglock(fs, SEGM_PROT);
505 1.75 perry
506 1.132 dholland /*
507 1.132 dholland * If the inode was in a dirop, it isn't now.
508 1.132 dholland *
509 1.136 maya * XXX: why are (v_uflag & VU_DIROP) and (ip->i_state & IN_ADIROP)
510 1.132 dholland * not updated together in one function? (and why do both exist,
511 1.132 dholland * anyway?)
512 1.132 dholland */
513 1.72 yamt lfs_unmark_vnode(vp);
514 1.132 dholland
515 1.105 ad mutex_enter(&lfs_lock);
516 1.103 ad if (vp->v_uflag & VU_DIROP) {
517 1.103 ad vp->v_uflag &= ~VU_DIROP;
518 1.30 perseant --lfs_dirvcount;
519 1.92 perseant --fs->lfs_dirvcount;
520 1.63 perseant TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
521 1.92 perseant wakeup(&fs->lfs_dirvcount);
522 1.30 perseant wakeup(&lfs_dirvcount);
523 1.105 ad mutex_exit(&lfs_lock);
524 1.120 hannken vrele(vp);
525 1.90 perseant
526 1.90 perseant /*
527 1.90 perseant * If this inode is not going to be written any more, any
528 1.90 perseant * segment accounting left over from its truncation needs
529 1.90 perseant * to occur at the end of the next dirops flush. Attach
530 1.90 perseant * them to the fs-wide list for that purpose.
531 1.90 perseant */
532 1.90 perseant if (LIST_FIRST(&ip->i_lfs_segdhd) != NULL) {
533 1.90 perseant struct segdelta *sd;
534 1.90 perseant
535 1.90 perseant while((sd = LIST_FIRST(&ip->i_lfs_segdhd)) != NULL) {
536 1.90 perseant LIST_REMOVE(sd, list);
537 1.90 perseant LIST_INSERT_HEAD(&fs->lfs_segdhd, sd, list);
538 1.90 perseant }
539 1.90 perseant }
540 1.90 perseant } else {
541 1.90 perseant /*
542 1.90 perseant * If it's not a dirop, we can finalize right away.
543 1.90 perseant */
544 1.105 ad mutex_exit(&lfs_lock);
545 1.90 perseant lfs_finalize_ino_seguse(fs, ip);
546 1.38 perseant }
547 1.30 perseant
548 1.132 dholland /* it is no longer an unwritten inode, so update the counts */
549 1.105 ad mutex_enter(&lfs_lock);
550 1.42 perseant LFS_CLR_UINO(ip, IN_ACCESSED|IN_CLEANING|IN_MODIFIED);
551 1.105 ad mutex_exit(&lfs_lock);
552 1.132 dholland
553 1.132 dholland /* Turn off all inode modification flags */
554 1.136 maya ip->i_state &= ~IN_ALLMOD;
555 1.132 dholland
556 1.132 dholland /* Mark it deleted */
557 1.93 perseant ip->i_lfs_iflags |= LFSI_DELETED;
558 1.93 perseant
559 1.132 dholland /* Mark it free in the in-memory inode freemap */
560 1.132 dholland SET_BITMAP_FREE(fs, ino);
561 1.132 dholland
562 1.1 mycroft /*
563 1.1 mycroft * Set the ifile's inode entry to unused, increment its version number
564 1.48 perseant * and link it onto the free chain.
565 1.1 mycroft */
566 1.132 dholland
567 1.132 dholland /* fetch the ifile entry */
568 1.1 mycroft LFS_IENTRY(ifp, fs, ino, bp);
569 1.132 dholland
570 1.132 dholland /* update the on-disk address (to "nowhere") */
571 1.126 dholland old_iaddr = lfs_if_getdaddr(fs, ifp);
572 1.126 dholland lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
573 1.132 dholland
574 1.132 dholland /* bump the version */
575 1.126 dholland lfs_if_setversion(fs, ifp, lfs_if_getversion(fs, ifp) + 1);
576 1.132 dholland
577 1.125 dholland if (lfs_sb_getversion(fs) == 1) {
578 1.126 dholland ino_t nextfree;
579 1.126 dholland
580 1.132 dholland /* insert on freelist */
581 1.126 dholland LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
582 1.126 dholland lfs_if_setnextfree(fs, ifp, nextfree);
583 1.48 perseant LFS_PUT_HEADFREE(fs, cip, cbp, ino);
584 1.132 dholland
585 1.132 dholland /* write the ifile block */
586 1.56 perseant (void) LFS_BWRITE_LOG(bp); /* Ifile */
587 1.48 perseant } else {
588 1.87 perseant ino_t tino, onf;
589 1.87 perseant
590 1.132 dholland /*
591 1.132 dholland * Clear the freelist next pointer and write the ifile
592 1.132 dholland * block. XXX: why? I'm sure there must be a reason but
593 1.132 dholland * it seems both silly and dangerous.
594 1.132 dholland */
595 1.126 dholland lfs_if_setnextfree(fs, ifp, LFS_UNUSED_INUM);
596 1.56 perseant (void) LFS_BWRITE_LOG(bp); /* Ifile */
597 1.87 perseant
598 1.132 dholland /*
599 1.132 dholland * Insert on freelist in order.
600 1.132 dholland */
601 1.132 dholland
602 1.132 dholland /* Find the next lower (by number) free inode */
603 1.87 perseant tino = lfs_freelist_prev(fs, ino);
604 1.132 dholland
605 1.87 perseant if (tino == LFS_UNUSED_INUM) {
606 1.126 dholland ino_t nextfree;
607 1.126 dholland
608 1.132 dholland /*
609 1.132 dholland * There isn't one; put us on the freelist head.
610 1.132 dholland */
611 1.132 dholland
612 1.132 dholland /* reload the ifile block */
613 1.87 perseant LFS_IENTRY(ifp, fs, ino, bp);
614 1.132 dholland /* update the list */
615 1.126 dholland LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
616 1.126 dholland lfs_if_setnextfree(fs, ifp, nextfree);
617 1.87 perseant LFS_PUT_HEADFREE(fs, cip, cbp, ino);
618 1.88 perseant DLOG((DLOG_ALLOC, "lfs_vfree: headfree %lld -> %lld\n",
619 1.126 dholland (long long)nextfree, (long long)ino));
620 1.132 dholland /* write the ifile block */
621 1.87 perseant LFS_BWRITE_LOG(bp); /* Ifile */
622 1.87 perseant
623 1.87 perseant /* If the list was empty, set tail too */
624 1.87 perseant LFS_GET_TAILFREE(fs, cip, cbp, &otail);
625 1.87 perseant if (otail == LFS_UNUSED_INUM) {
626 1.87 perseant LFS_PUT_TAILFREE(fs, cip, cbp, ino);
627 1.87 perseant DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
628 1.87 perseant "-> %lld\n", (long long)otail,
629 1.87 perseant (long long)ino));
630 1.87 perseant }
631 1.87 perseant } else {
632 1.87 perseant /*
633 1.87 perseant * Insert this inode into the list after tino.
634 1.87 perseant * We hold the segment lock so we don't have to
635 1.87 perseant * worry about blocks being written out of order.
636 1.87 perseant */
637 1.132 dholland
638 1.87 perseant DLOG((DLOG_ALLOC, "lfs_vfree: insert ino %lld "
639 1.87 perseant " after %lld\n", ino, tino));
640 1.87 perseant
641 1.132 dholland /* load the previous inode's ifile block */
642 1.87 perseant LFS_IENTRY(ifp, fs, tino, bp);
643 1.132 dholland /* update the list pointer */
644 1.126 dholland onf = lfs_if_getnextfree(fs, ifp);
645 1.126 dholland lfs_if_setnextfree(fs, ifp, ino);
646 1.132 dholland /* write the block */
647 1.87 perseant LFS_BWRITE_LOG(bp); /* Ifile */
648 1.87 perseant
649 1.132 dholland /* load this inode's ifile block */
650 1.87 perseant LFS_IENTRY(ifp, fs, ino, bp);
651 1.132 dholland /* update the list pointer */
652 1.126 dholland lfs_if_setnextfree(fs, ifp, onf);
653 1.132 dholland /* write the block */
654 1.87 perseant LFS_BWRITE_LOG(bp); /* Ifile */
655 1.87 perseant
656 1.87 perseant /* If we're last, put us on the tail */
657 1.87 perseant if (onf == LFS_UNUSED_INUM) {
658 1.87 perseant LFS_GET_TAILFREE(fs, cip, cbp, &otail);
659 1.87 perseant LFS_PUT_TAILFREE(fs, cip, cbp, ino);
660 1.87 perseant DLOG((DLOG_ALLOC, "lfs_vfree: tailfree %lld "
661 1.87 perseant "-> %lld\n", (long long)otail,
662 1.87 perseant (long long)ino));
663 1.87 perseant }
664 1.87 perseant }
665 1.48 perseant }
666 1.132 dholland /* XXX: shouldn't this check be further up *before* we trash the fs? */
667 1.134 riastrad KASSERTMSG((ino != LFS_UNUSED_INUM), "inode 0 freed");
668 1.132 dholland
669 1.132 dholland /*
670 1.132 dholland * Update the segment summary for the segment where the on-disk
671 1.132 dholland * copy used to be.
672 1.132 dholland */
673 1.1 mycroft if (old_iaddr != LFS_UNUSED_DADDR) {
674 1.132 dholland /* load it */
675 1.117 christos LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp);
676 1.132 dholland /* the number of bytes in the segment should not become < 0 */
677 1.134 riastrad KASSERTMSG((sup->su_nbytes >= DINOSIZE(fs)),
678 1.134 riastrad "lfs_vfree: negative byte count"
679 1.134 riastrad " (segment %" PRIu32 " short by %d)\n",
680 1.134 riastrad lfs_dtosn(fs, old_iaddr),
681 1.134 riastrad (int)DINOSIZE(fs) - sup->su_nbytes);
682 1.132 dholland /* update the number of bytes in the segment */
683 1.127 dholland sup->su_nbytes -= DINOSIZE(fs);
684 1.132 dholland /* write the segment entry */
685 1.117 christos LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, old_iaddr), bp); /* Ifile */
686 1.1 mycroft }
687 1.75 perry
688 1.132 dholland /* Set superblock modified bit. */
689 1.105 ad mutex_enter(&lfs_lock);
690 1.1 mycroft fs->lfs_fmod = 1;
691 1.105 ad mutex_exit(&lfs_lock);
692 1.132 dholland
693 1.132 dholland /* Decrement file count. */
694 1.122 dholland lfs_sb_subnfiles(fs, 1);
695 1.75 perry
696 1.56 perseant lfs_segunlock(fs);
697 1.63 perseant
698 1.1 mycroft return (0);
699 1.1 mycroft }
700 1.87 perseant
701 1.87 perseant /*
702 1.87 perseant * Sort the freelist and set up the free-inode bitmap.
703 1.87 perseant * To be called by lfs_mountfs().
704 1.132 dholland *
705 1.132 dholland * Takes the segmenet lock.
706 1.87 perseant */
707 1.87 perseant void
708 1.87 perseant lfs_order_freelist(struct lfs *fs)
709 1.87 perseant {
710 1.87 perseant CLEANERINFO *cip;
711 1.87 perseant IFILE *ifp = NULL;
712 1.87 perseant struct buf *bp;
713 1.87 perseant ino_t ino, firstino, lastino, maxino;
714 1.97 perseant #ifdef notyet
715 1.97 perseant struct vnode *vp;
716 1.97 perseant #endif
717 1.87 perseant
718 1.95 perseant ASSERT_NO_SEGLOCK(fs);
719 1.95 perseant lfs_seglock(fs, SEGM_PROT);
720 1.95 perseant
721 1.132 dholland /* largest inode on fs */
722 1.123 dholland maxino = ((fs->lfs_ivnode->v_size >> lfs_sb_getbshift(fs)) -
723 1.122 dholland lfs_sb_getcleansz(fs) - lfs_sb_getsegtabsz(fs)) * lfs_sb_getifpb(fs);
724 1.132 dholland
725 1.132 dholland /* allocate the in-memory inode freemap */
726 1.132 dholland /* XXX: assert that fs->lfs_ino_bitmap is null here */
727 1.121 dholland fs->lfs_ino_bitmap =
728 1.88 perseant malloc(((maxino + BMMASK) >> BMSHIFT) * sizeof(lfs_bm_t),
729 1.88 perseant M_SEGMENT, M_WAITOK | M_ZERO);
730 1.88 perseant KASSERT(fs->lfs_ino_bitmap != NULL);
731 1.87 perseant
732 1.132 dholland /*
733 1.132 dholland * Scan the ifile.
734 1.132 dholland */
735 1.132 dholland
736 1.87 perseant firstino = lastino = LFS_UNUSED_INUM;
737 1.87 perseant for (ino = 0; ino < maxino; ino++) {
738 1.132 dholland /* Load this inode's ifile entry. */
739 1.122 dholland if (ino % lfs_sb_getifpb(fs) == 0)
740 1.87 perseant LFS_IENTRY(ifp, fs, ino, bp);
741 1.87 perseant else
742 1.128 mlelstv LFS_IENTRY_NEXT(ifp, fs);
743 1.87 perseant
744 1.87 perseant /* Don't put zero or ifile on the free list */
745 1.87 perseant if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM)
746 1.87 perseant continue;
747 1.87 perseant
748 1.97 perseant #ifdef notyet
749 1.132 dholland /*
750 1.132 dholland * Address orphaned files.
751 1.132 dholland *
752 1.132 dholland * The idea of this is to free inodes belonging to
753 1.132 dholland * files that were unlinked but not reclaimed, I guess
754 1.132 dholland * because if we're going to scan the whole ifile
755 1.132 dholland * anyway it costs very little to do this. I don't
756 1.132 dholland * immediately see any reason this should be disabled,
757 1.132 dholland * but presumably it doesn't work... not sure what
758 1.132 dholland * happens to such files currently. -- dholland 20160806
759 1.132 dholland */
760 1.126 dholland if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE &&
761 1.97 perseant VFS_VGET(fs->lfs_ivnode->v_mount, ino, &vp) == 0) {
762 1.126 dholland unsigned segno;
763 1.126 dholland
764 1.132 dholland /* get the segment the inode in on disk */
765 1.126 dholland segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp));
766 1.132 dholland
767 1.132 dholland /* truncate the inode */
768 1.104 he lfs_truncate(vp, 0, 0, NOCRED);
769 1.97 perseant vput(vp);
770 1.132 dholland
771 1.132 dholland /* load the segment summary */
772 1.126 dholland LFS_SEGENTRY(sup, fs, segno, bp);
773 1.132 dholland /* update the number of bytes in the segment */
774 1.131 dholland KASSERT(sup->su_nbytes >= DINOSIZE(fs));
775 1.131 dholland sup->su_nbytes -= DINOSIZE(fs);
776 1.132 dholland /* write the segment summary */
777 1.126 dholland LFS_WRITESEGENTRY(sup, fs, segno, bp);
778 1.97 perseant
779 1.132 dholland /* Drop the on-disk address */
780 1.126 dholland lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR);
781 1.132 dholland /* write the ifile entry */
782 1.97 perseant LFS_BWRITE_LOG(bp);
783 1.132 dholland
784 1.132 dholland /*
785 1.132 dholland * and reload it (XXX: why? I guess
786 1.132 dholland * LFS_BWRITE_LOG drops it...)
787 1.132 dholland */
788 1.97 perseant LFS_IENTRY(ifp, fs, ino, bp);
789 1.132 dholland
790 1.132 dholland /* Fall through to next if block */
791 1.97 perseant }
792 1.97 perseant #endif
793 1.97 perseant
794 1.126 dholland if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) {
795 1.132 dholland
796 1.132 dholland /*
797 1.132 dholland * This inode is free. Put it on the free list.
798 1.132 dholland */
799 1.132 dholland
800 1.132 dholland if (firstino == LFS_UNUSED_INUM) {
801 1.132 dholland /* XXX: assert lastino == LFS_UNUSED_INUM? */
802 1.132 dholland /* remember the first free inode */
803 1.87 perseant firstino = ino;
804 1.132 dholland } else {
805 1.132 dholland /* release this inode's ifile entry */
806 1.102 ad brelse(bp, 0);
807 1.87 perseant
808 1.132 dholland /* XXX: assert lastino != LFS_UNUSED_INUM? */
809 1.132 dholland
810 1.132 dholland /* load lastino's ifile entry */
811 1.87 perseant LFS_IENTRY(ifp, fs, lastino, bp);
812 1.132 dholland /* set the list pointer */
813 1.126 dholland lfs_if_setnextfree(fs, ifp, ino);
814 1.132 dholland /* write the block */
815 1.87 perseant LFS_BWRITE_LOG(bp);
816 1.132 dholland
817 1.132 dholland /* reload this inode's ifile entry */
818 1.87 perseant LFS_IENTRY(ifp, fs, ino, bp);
819 1.87 perseant }
820 1.132 dholland /* remember the last free inode seen so far */
821 1.87 perseant lastino = ino;
822 1.87 perseant
823 1.132 dholland /* Mark this inode free in the in-memory freemap */
824 1.87 perseant SET_BITMAP_FREE(fs, ino);
825 1.87 perseant }
826 1.87 perseant
827 1.132 dholland /* If moving to the next ifile block, release the buffer. */
828 1.122 dholland if ((ino + 1) % lfs_sb_getifpb(fs) == 0)
829 1.102 ad brelse(bp, 0);
830 1.87 perseant }
831 1.87 perseant
832 1.132 dholland /* Write the freelist head and tail pointers */
833 1.132 dholland /* XXX: do we need to mark the superblock dirty? */
834 1.87 perseant LFS_PUT_HEADFREE(fs, cip, bp, firstino);
835 1.87 perseant LFS_PUT_TAILFREE(fs, cip, bp, lastino);
836 1.95 perseant
837 1.132 dholland /* done */
838 1.95 perseant lfs_segunlock(fs);
839 1.87 perseant }
840 1.97 perseant
841 1.132 dholland /*
842 1.132 dholland * Mark a file orphaned (unlinked but not yet reclaimed) by inode
843 1.132 dholland * number. Do this with a magic freelist next pointer.
844 1.132 dholland *
845 1.132 dholland * XXX: howzabout some locking?
846 1.132 dholland */
847 1.97 perseant void
848 1.97 perseant lfs_orphan(struct lfs *fs, ino_t ino)
849 1.97 perseant {
850 1.97 perseant IFILE *ifp;
851 1.97 perseant struct buf *bp;
852 1.97 perseant
853 1.97 perseant LFS_IENTRY(ifp, fs, ino, bp);
854 1.126 dholland lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE);
855 1.97 perseant LFS_BWRITE_LOG(bp);
856 1.97 perseant }
857