1 1.60 brad /* $NetBSD: lfs_cleanerd.c,v 1.60 2019/08/30 23:37:23 brad Exp $ */ 2 1.1 perseant 3 1.1 perseant /*- 4 1.1 perseant * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 1.1 perseant * All rights reserved. 6 1.1 perseant * 7 1.1 perseant * This code is derived from software contributed to The NetBSD Foundation 8 1.1 perseant * by Konrad E. Schroder <perseant (at) hhhh.org>. 9 1.1 perseant * 10 1.1 perseant * Redistribution and use in source and binary forms, with or without 11 1.1 perseant * modification, are permitted provided that the following conditions 12 1.1 perseant * are met: 13 1.1 perseant * 1. Redistributions of source code must retain the above copyright 14 1.1 perseant * notice, this list of conditions and the following disclaimer. 15 1.1 perseant * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 perseant * notice, this list of conditions and the following disclaimer in the 17 1.1 perseant * documentation and/or other materials provided with the distribution. 18 1.1 perseant * 19 1.1 perseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 perseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 perseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 perseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 perseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 perseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 perseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 perseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 perseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 perseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 perseant * POSSIBILITY OF SUCH DAMAGE. 30 1.1 perseant */ 31 1.1 perseant 32 1.1 perseant /* 33 1.1 perseant * The cleaner daemon for the NetBSD Log-structured File System. 34 1.1 perseant * Only tested for use with version 2 LFSs. 35 1.1 perseant */ 36 1.1 perseant 37 1.1 perseant #include <sys/syslog.h> 38 1.1 perseant #include <sys/param.h> 39 1.1 perseant #include <sys/mount.h> 40 1.1 perseant #include <sys/stat.h> 41 1.1 perseant #include <ufs/lfs/lfs.h> 42 1.1 perseant 43 1.1 perseant #include <assert.h> 44 1.1 perseant #include <err.h> 45 1.1 perseant #include <errno.h> 46 1.1 perseant #include <fcntl.h> 47 1.24 pooka #include <semaphore.h> 48 1.56 riastrad #include <stdbool.h> 49 1.1 perseant #include <stdio.h> 50 1.1 perseant #include <stdlib.h> 51 1.1 perseant #include <string.h> 52 1.1 perseant #include <unistd.h> 53 1.1 perseant #include <time.h> 54 1.1 perseant #include <util.h> 55 1.1 perseant 56 1.1 perseant #include "bufcache.h" 57 1.1 perseant #include "vnode.h" 58 1.1 perseant #include "lfs_user.h" 59 1.1 perseant #include "fdfs.h" 60 1.1 perseant #include "cleaner.h" 61 1.21 pooka #include "kernelops.h" 62 1.21 pooka #include "mount_lfs.h" 63 1.1 perseant 64 1.1 perseant /* 65 1.1 perseant * Global variables. 66 1.1 perseant */ 67 1.1 perseant /* XXX these top few should really be fs-specific */ 68 1.1 perseant int use_fs_idle; /* Use fs idle rather than cpu idle time */ 69 1.1 perseant int use_bytes; /* Use bytes written rather than segments cleaned */ 70 1.31 joerg double load_threshold; /* How idle is idle (CPU idle) */ 71 1.1 perseant int atatime; /* How many segments (bytes) to clean at a time */ 72 1.1 perseant 73 1.1 perseant int nfss; /* Number of filesystems monitored by this cleanerd */ 74 1.1 perseant struct clfs **fsp; /* Array of extended filesystem structures */ 75 1.1 perseant int segwait_timeout; /* Time to wait in lfs_segwait() */ 76 1.1 perseant int do_quit; /* Quit after one cleaning loop */ 77 1.1 perseant int do_coalesce; /* Coalesce filesystem */ 78 1.1 perseant int do_small; /* Use small writes through markv */ 79 1.60 brad char *do_asdevice; /* Use this as the raw device */ 80 1.1 perseant char *copylog_filename; /* File to use for fs debugging analysis */ 81 1.1 perseant int inval_segment; /* Segment to invalidate */ 82 1.1 perseant int stat_report; /* Report statistics for this period of cycles */ 83 1.1 perseant int debug; /* Turn on debugging */ 84 1.1 perseant struct cleaner_stats { 85 1.1 perseant double util_tot; 86 1.1 perseant double util_sos; 87 1.1 perseant off_t bytes_read; 88 1.1 perseant off_t bytes_written; 89 1.1 perseant off_t segs_cleaned; 90 1.1 perseant off_t segs_empty; 91 1.1 perseant off_t segs_error; 92 1.1 perseant } cleaner_stats; 93 1.1 perseant 94 1.1 perseant extern u_int32_t cksum(void *, size_t); 95 1.1 perseant extern u_int32_t lfs_sb_cksum(struct dlfs *); 96 1.1 perseant extern u_int32_t lfs_cksum_part(void *, size_t, u_int32_t); 97 1.33 dholland extern int ulfs_getlbns(struct lfs *, struct uvnode *, daddr_t, struct indir *, int *); 98 1.1 perseant 99 1.44 dholland /* Ugh */ 100 1.44 dholland #define FSMNT_SIZE MAX(sizeof(((struct dlfs *)0)->dlfs_fsmnt), \ 101 1.44 dholland sizeof(((struct dlfs64 *)0)->dlfs_fsmnt)) 102 1.44 dholland 103 1.44 dholland 104 1.1 perseant /* Compat */ 105 1.1 perseant void pwarn(const char *unused, ...) { /* Does nothing */ }; 106 1.1 perseant 107 1.1 perseant /* 108 1.1 perseant * Log a message if debugging is turned on. 109 1.1 perseant */ 110 1.1 perseant void 111 1.17 lukem dlog(const char *fmt, ...) 112 1.1 perseant { 113 1.1 perseant va_list ap; 114 1.1 perseant 115 1.1 perseant if (debug == 0) 116 1.1 perseant return; 117 1.1 perseant 118 1.1 perseant va_start(ap, fmt); 119 1.1 perseant vsyslog(LOG_DEBUG, fmt, ap); 120 1.1 perseant va_end(ap); 121 1.1 perseant } 122 1.1 perseant 123 1.1 perseant /* 124 1.1 perseant * Remove the specified filesystem from the list, due to its having 125 1.1 perseant * become unmounted or other error condition. 126 1.1 perseant */ 127 1.1 perseant void 128 1.16 lukem handle_error(struct clfs **cfsp, int n) 129 1.1 perseant { 130 1.40 dholland syslog(LOG_NOTICE, "%s: detaching cleaner", lfs_sb_getfsmnt(cfsp[n])); 131 1.16 lukem free(cfsp[n]); 132 1.1 perseant if (n != nfss - 1) 133 1.16 lukem cfsp[n] = cfsp[nfss - 1]; 134 1.1 perseant --nfss; 135 1.1 perseant } 136 1.1 perseant 137 1.1 perseant /* 138 1.1 perseant * Reinitialize a filesystem if, e.g., its size changed. 139 1.1 perseant */ 140 1.1 perseant int 141 1.1 perseant reinit_fs(struct clfs *fs) 142 1.1 perseant { 143 1.44 dholland char fsname[FSMNT_SIZE]; 144 1.40 dholland 145 1.44 dholland memcpy(fsname, lfs_sb_getfsmnt(fs), sizeof(fsname)); 146 1.40 dholland fsname[sizeof(fsname) - 1] = '\0'; 147 1.1 perseant 148 1.21 pooka kops.ko_close(fs->clfs_ifilefd); 149 1.21 pooka kops.ko_close(fs->clfs_devfd); 150 1.1 perseant fd_reclaim(fs->clfs_devvp); 151 1.1 perseant fd_reclaim(fs->lfs_ivnode); 152 1.1 perseant free(fs->clfs_dev); 153 1.1 perseant free(fs->clfs_segtab); 154 1.1 perseant free(fs->clfs_segtabp); 155 1.1 perseant 156 1.1 perseant return init_fs(fs, fsname); 157 1.1 perseant } 158 1.1 perseant 159 1.1 perseant #ifdef REPAIR_ZERO_FINFO 160 1.1 perseant /* 161 1.1 perseant * Use fsck's lfs routines to load the Ifile from an unmounted fs. 162 1.1 perseant * We interpret "fsname" as the name of the raw disk device. 163 1.1 perseant */ 164 1.1 perseant int 165 1.1 perseant init_unmounted_fs(struct clfs *fs, char *fsname) 166 1.1 perseant { 167 1.1 perseant struct lfs *disc_fs; 168 1.1 perseant int i; 169 1.60 brad 170 1.1 perseant fs->clfs_dev = fsname; 171 1.21 pooka if ((fs->clfs_devfd = kops.ko_open(fs->clfs_dev, O_RDWR)) < 0) { 172 1.1 perseant syslog(LOG_ERR, "couldn't open device %s read/write", 173 1.1 perseant fs->clfs_dev); 174 1.1 perseant return -1; 175 1.1 perseant } 176 1.1 perseant 177 1.1 perseant disc_fs = lfs_init(fs->clfs_devfd, 0, 0, 0, 0); 178 1.1 perseant 179 1.1 perseant fs->lfs_dlfs = disc_fs->lfs_dlfs; /* Structure copy */ 180 1.1 perseant strncpy(fs->lfs_fsmnt, fsname, MNAMELEN); 181 1.1 perseant fs->lfs_ivnode = (struct uvnode *)disc_fs->lfs_ivnode; 182 1.1 perseant fs->clfs_devvp = fd_vget(fs->clfs_devfd, fs->lfs_fsize, fs->lfs_ssize, 183 1.1 perseant atatime); 184 1.1 perseant 185 1.1 perseant /* Allocate and clear segtab */ 186 1.40 dholland fs->clfs_segtab = (struct clfs_seguse *)malloc(lfs_sb_getnseg(fs) * 187 1.1 perseant sizeof(*fs->clfs_segtab)); 188 1.40 dholland fs->clfs_segtabp = (struct clfs_seguse **)malloc(lfs_sb_getnseg(fs) * 189 1.1 perseant sizeof(*fs->clfs_segtabp)); 190 1.40 dholland for (i = 0; i < lfs_sb_getnseg(fs); i++) { 191 1.1 perseant fs->clfs_segtabp[i] = &(fs->clfs_segtab[i]); 192 1.1 perseant fs->clfs_segtab[i].flags = 0x0; 193 1.1 perseant } 194 1.1 perseant syslog(LOG_NOTICE, "%s: unmounted cleaner starting", fsname); 195 1.1 perseant 196 1.1 perseant return 0; 197 1.1 perseant } 198 1.1 perseant #endif 199 1.1 perseant 200 1.1 perseant /* 201 1.1 perseant * Set up the file descriptors, including the Ifile descriptor. 202 1.1 perseant * If we can't get the Ifile, this is not an LFS (or the kernel is 203 1.1 perseant * too old to support the fcntl). 204 1.1 perseant * XXX Merge this and init_unmounted_fs, switching on whether 205 1.1 perseant * XXX "fsname" is a dir or a char special device. Should 206 1.1 perseant * XXX also be able to read unmounted devices out of fstab, the way 207 1.1 perseant * XXX fsck does. 208 1.1 perseant */ 209 1.1 perseant int 210 1.1 perseant init_fs(struct clfs *fs, char *fsname) 211 1.1 perseant { 212 1.44 dholland char mnttmp[FSMNT_SIZE]; 213 1.1 perseant struct statvfs sf; 214 1.1 perseant int rootfd; 215 1.1 perseant int i; 216 1.23 mlelstv void *sbuf; 217 1.59 brad size_t mlen; 218 1.1 perseant 219 1.60 brad if (do_asdevice != NULL) { 220 1.60 brad fs->clfs_dev = strndup(do_asdevice,strlen(do_asdevice) + 2); 221 1.60 brad if (fs->clfs_dev == NULL) { 222 1.60 brad syslog(LOG_ERR, "couldn't malloc device name string: %m"); 223 1.60 brad return -1; 224 1.60 brad } 225 1.60 brad } else { 226 1.60 brad /* 227 1.60 brad * Get the raw device from the block device. 228 1.60 brad * XXX this is ugly. Is there a way to discover the raw device 229 1.60 brad * XXX for a given mount point? 230 1.60 brad */ 231 1.60 brad if (kops.ko_statvfs(fsname, &sf, ST_WAIT) < 0) 232 1.60 brad return -1; 233 1.60 brad mlen = strlen(sf.f_mntfromname) + 2; 234 1.60 brad fs->clfs_dev = malloc(mlen); 235 1.60 brad if (fs->clfs_dev == NULL) { 236 1.60 brad syslog(LOG_ERR, "couldn't malloc device name string: %m"); 237 1.60 brad return -1; 238 1.60 brad } 239 1.60 brad if (getdiskrawname(fs->clfs_dev, mlen, sf.f_mntfromname) == NULL) { 240 1.60 brad syslog(LOG_ERR, "couldn't convert '%s' to raw name: %m", 241 1.60 brad sf.f_mntfromname); 242 1.60 brad return -1; 243 1.60 brad } 244 1.59 brad } 245 1.21 pooka if ((fs->clfs_devfd = kops.ko_open(fs->clfs_dev, O_RDONLY, 0)) < 0) { 246 1.60 brad syslog(LOG_ERR, "couldn't open device %s for reading: %m", 247 1.1 perseant fs->clfs_dev); 248 1.1 perseant return -1; 249 1.1 perseant } 250 1.1 perseant 251 1.1 perseant /* Find the Ifile and open it */ 252 1.21 pooka if ((rootfd = kops.ko_open(fsname, O_RDONLY, 0)) < 0) 253 1.1 perseant return -2; 254 1.21 pooka if (kops.ko_fcntl(rootfd, LFCNIFILEFH, &fs->clfs_ifilefh) < 0) 255 1.1 perseant return -3; 256 1.21 pooka if ((fs->clfs_ifilefd = kops.ko_fhopen(&fs->clfs_ifilefh, 257 1.10 martin sizeof(fs->clfs_ifilefh), O_RDONLY)) < 0) 258 1.1 perseant return -4; 259 1.21 pooka kops.ko_close(rootfd); 260 1.1 perseant 261 1.23 mlelstv sbuf = malloc(LFS_SBPAD); 262 1.23 mlelstv if (sbuf == NULL) { 263 1.23 mlelstv syslog(LOG_ERR, "couldn't malloc superblock buffer"); 264 1.23 mlelstv return -1; 265 1.23 mlelstv } 266 1.23 mlelstv 267 1.1 perseant /* Load in the superblock */ 268 1.23 mlelstv if (kops.ko_pread(fs->clfs_devfd, sbuf, LFS_SBPAD, LFS_LABELPAD) < 0) { 269 1.23 mlelstv free(sbuf); 270 1.1 perseant return -1; 271 1.23 mlelstv } 272 1.23 mlelstv 273 1.44 dholland __CTASSERT(sizeof(struct dlfs) == sizeof(struct dlfs64)); 274 1.44 dholland memcpy(&fs->lfs_dlfs_u, sbuf, sizeof(struct dlfs)); 275 1.23 mlelstv free(sbuf); 276 1.1 perseant 277 1.44 dholland /* If it is not LFS, complain and exit! */ 278 1.55 dholland switch (fs->lfs_dlfs_u.u_32.dlfs_magic) { 279 1.55 dholland case LFS_MAGIC: 280 1.55 dholland fs->lfs_is64 = false; 281 1.55 dholland fs->lfs_dobyteswap = false; 282 1.55 dholland break; 283 1.55 dholland case LFS_MAGIC_SWAPPED: 284 1.55 dholland fs->lfs_is64 = false; 285 1.55 dholland fs->lfs_dobyteswap = true; 286 1.55 dholland break; 287 1.55 dholland case LFS64_MAGIC: 288 1.55 dholland fs->lfs_is64 = true; 289 1.55 dholland fs->lfs_dobyteswap = false; 290 1.55 dholland break; 291 1.55 dholland case LFS64_MAGIC_SWAPPED: 292 1.55 dholland fs->lfs_is64 = true; 293 1.55 dholland fs->lfs_dobyteswap = true; 294 1.55 dholland break; 295 1.55 dholland default: 296 1.44 dholland syslog(LOG_ERR, "%s: not LFS", fsname); 297 1.44 dholland return -1; 298 1.44 dholland } 299 1.53 dholland /* XXX: can this ever need to be set? does the cleaner even care? */ 300 1.53 dholland fs->lfs_hasolddirfmt = 0; 301 1.44 dholland 302 1.1 perseant /* If this is not a version 2 filesystem, complain and exit */ 303 1.43 dholland if (lfs_sb_getversion(fs) != 2) { 304 1.1 perseant syslog(LOG_ERR, "%s: not a version 2 LFS", fsname); 305 1.1 perseant return -1; 306 1.1 perseant } 307 1.1 perseant 308 1.1 perseant /* Assume fsname is the mounted name */ 309 1.40 dholland strncpy(mnttmp, fsname, sizeof(mnttmp)); 310 1.40 dholland mnttmp[sizeof(mnttmp) - 1] = '\0'; 311 1.44 dholland lfs_sb_setfsmnt(fs, mnttmp); 312 1.1 perseant 313 1.1 perseant /* Set up vnodes for Ifile and raw device */ 314 1.41 dholland fs->lfs_ivnode = fd_vget(fs->clfs_ifilefd, lfs_sb_getbsize(fs), 0, 0); 315 1.41 dholland fs->clfs_devvp = fd_vget(fs->clfs_devfd, lfs_sb_getfsize(fs), lfs_sb_getssize(fs), 316 1.1 perseant atatime); 317 1.1 perseant 318 1.1 perseant /* Allocate and clear segtab */ 319 1.40 dholland fs->clfs_segtab = (struct clfs_seguse *)malloc(lfs_sb_getnseg(fs) * 320 1.1 perseant sizeof(*fs->clfs_segtab)); 321 1.40 dholland fs->clfs_segtabp = (struct clfs_seguse **)malloc(lfs_sb_getnseg(fs) * 322 1.1 perseant sizeof(*fs->clfs_segtabp)); 323 1.7 perseant if (fs->clfs_segtab == NULL || fs->clfs_segtabp == NULL) { 324 1.7 perseant syslog(LOG_ERR, "%s: couldn't malloc segment table: %m", 325 1.7 perseant fs->clfs_dev); 326 1.7 perseant return -1; 327 1.7 perseant } 328 1.7 perseant 329 1.40 dholland for (i = 0; i < lfs_sb_getnseg(fs); i++) { 330 1.1 perseant fs->clfs_segtabp[i] = &(fs->clfs_segtab[i]); 331 1.1 perseant fs->clfs_segtab[i].flags = 0x0; 332 1.1 perseant } 333 1.1 perseant 334 1.1 perseant syslog(LOG_NOTICE, "%s: attaching cleaner", fsname); 335 1.1 perseant return 0; 336 1.1 perseant } 337 1.1 perseant 338 1.1 perseant /* 339 1.1 perseant * Invalidate all the currently held Ifile blocks so they will be 340 1.1 perseant * reread when we clean. Check the size while we're at it, and 341 1.1 perseant * resize the buffer cache if necessary. 342 1.1 perseant */ 343 1.1 perseant void 344 1.1 perseant reload_ifile(struct clfs *fs) 345 1.1 perseant { 346 1.1 perseant struct ubuf *bp; 347 1.1 perseant struct stat st; 348 1.1 perseant int ohashmax; 349 1.1 perseant extern int hashmax; 350 1.1 perseant 351 1.2 christos while ((bp = LIST_FIRST(&fs->lfs_ivnode->v_dirtyblkhd)) != NULL) { 352 1.1 perseant bremfree(bp); 353 1.1 perseant buf_destroy(bp); 354 1.1 perseant } 355 1.2 christos while ((bp = LIST_FIRST(&fs->lfs_ivnode->v_cleanblkhd)) != NULL) { 356 1.1 perseant bremfree(bp); 357 1.1 perseant buf_destroy(bp); 358 1.1 perseant } 359 1.1 perseant 360 1.1 perseant /* If Ifile is larger than buffer cache, rehash */ 361 1.1 perseant fstat(fs->clfs_ifilefd, &st); 362 1.41 dholland if (st.st_size / lfs_sb_getbsize(fs) > hashmax) { 363 1.1 perseant ohashmax = hashmax; 364 1.41 dholland bufrehash(st.st_size / lfs_sb_getbsize(fs)); 365 1.1 perseant dlog("%s: resized buffer hash from %d to %d", 366 1.40 dholland lfs_sb_getfsmnt(fs), ohashmax, hashmax); 367 1.1 perseant } 368 1.1 perseant } 369 1.1 perseant 370 1.1 perseant /* 371 1.1 perseant * Get IFILE entry for the given inode, store in ifpp. The buffer 372 1.1 perseant * which contains that data is returned in bpp, and must be brelse()d 373 1.1 perseant * by the caller. 374 1.47 dholland * 375 1.47 dholland * XXX this is cutpaste of LFS_IENTRY from lfs.h; unify the two. 376 1.1 perseant */ 377 1.1 perseant void 378 1.1 perseant lfs_ientry(IFILE **ifpp, struct clfs *fs, ino_t ino, struct ubuf **bpp) 379 1.1 perseant { 380 1.47 dholland IFILE64 *ifp64; 381 1.47 dholland IFILE32 *ifp32; 382 1.47 dholland IFILE_V1 *ifp_v1; 383 1.1 perseant int error; 384 1.1 perseant 385 1.39 dholland error = bread(fs->lfs_ivnode, 386 1.39 dholland ino / lfs_sb_getifpb(fs) + lfs_sb_getcleansz(fs) + 387 1.41 dholland lfs_sb_getsegtabsz(fs), lfs_sb_getbsize(fs), 0, bpp); 388 1.8 perseant if (error) 389 1.8 perseant syslog(LOG_ERR, "%s: ientry failed for ino %d", 390 1.40 dholland lfs_sb_getfsmnt(fs), (int)ino); 391 1.47 dholland if (fs->lfs_is64) { 392 1.47 dholland ifp64 = (IFILE64 *)(*bpp)->b_data; 393 1.47 dholland ifp64 += ino % lfs_sb_getifpb(fs); 394 1.47 dholland *ifpp = (IFILE *)ifp64; 395 1.47 dholland } else if (lfs_sb_getversion(fs) > 1) { 396 1.47 dholland ifp32 = (IFILE32 *)(*bpp)->b_data; 397 1.47 dholland ifp32 += ino % lfs_sb_getifpb(fs); 398 1.47 dholland *ifpp = (IFILE *)ifp32; 399 1.47 dholland } else { 400 1.47 dholland ifp_v1 = (IFILE_V1 *)(*bpp)->b_data; 401 1.47 dholland ifp_v1 += ino % lfs_sb_getifpb(fs); 402 1.47 dholland *ifpp = (IFILE *)ifp_v1; 403 1.47 dholland } 404 1.1 perseant return; 405 1.1 perseant } 406 1.1 perseant 407 1.1 perseant #ifdef TEST_PATTERN 408 1.1 perseant /* 409 1.33 dholland * Check ULFS_ROOTINO for file data. The assumption is that we are running 410 1.1 perseant * the "twofiles" test with the rest of the filesystem empty. Files 411 1.33 dholland * created by "twofiles" match the test pattern, but ULFS_ROOTINO and the 412 1.1 perseant * executable itself (assumed to be inode 3) should not match. 413 1.1 perseant */ 414 1.1 perseant static void 415 1.1 perseant check_test_pattern(BLOCK_INFO *bip) 416 1.1 perseant { 417 1.1 perseant int j; 418 1.1 perseant unsigned char *cp = bip->bi_bp; 419 1.1 perseant 420 1.1 perseant /* Check inode sanity */ 421 1.1 perseant if (bip->bi_lbn == LFS_UNUSED_LBN) { 422 1.33 dholland assert(((struct ulfs1_dinode *)bip->bi_bp)->di_inumber == 423 1.1 perseant bip->bi_inode); 424 1.1 perseant } 425 1.1 perseant 426 1.1 perseant /* These can have the test pattern and it's all good */ 427 1.1 perseant if (bip->bi_inode > 3) 428 1.1 perseant return; 429 1.1 perseant 430 1.1 perseant for (j = 0; j < bip->bi_size; j++) { 431 1.1 perseant if (cp[j] != (j & 0xff)) 432 1.1 perseant break; 433 1.1 perseant } 434 1.1 perseant assert(j < bip->bi_size); 435 1.1 perseant } 436 1.1 perseant #endif /* TEST_PATTERN */ 437 1.1 perseant 438 1.1 perseant /* 439 1.1 perseant * Parse the partial segment at daddr, adding its information to 440 1.1 perseant * bip. Return the address of the next partial segment to read. 441 1.1 perseant */ 442 1.42 dholland static daddr_t 443 1.1 perseant parse_pseg(struct clfs *fs, daddr_t daddr, BLOCK_INFO **bipp, int *bic) 444 1.1 perseant { 445 1.1 perseant SEGSUM *ssp; 446 1.1 perseant IFILE *ifp; 447 1.3 perseant BLOCK_INFO *bip, *nbip; 448 1.42 dholland daddr_t idaddr, odaddr; 449 1.1 perseant FINFO *fip; 450 1.54 dholland IINFO *iip; 451 1.1 perseant struct ubuf *ifbp; 452 1.50 dholland union lfs_dinode *dip; 453 1.1 perseant u_int32_t ck, vers; 454 1.1 perseant int fic, inoc, obic; 455 1.48 dholland size_t sumstart; 456 1.1 perseant int i; 457 1.6 mrg char *cp; 458 1.1 perseant 459 1.1 perseant odaddr = daddr; 460 1.1 perseant obic = *bic; 461 1.1 perseant bip = *bipp; 462 1.1 perseant 463 1.1 perseant /* 464 1.1 perseant * Retrieve the segment header, set up the SEGSUM pointer 465 1.1 perseant * as well as the first FINFO and inode address pointer. 466 1.1 perseant */ 467 1.1 perseant cp = fd_ptrget(fs->clfs_devvp, daddr); 468 1.1 perseant ssp = (SEGSUM *)cp; 469 1.54 dholland iip = SEGSUM_IINFOSTART(fs, cp); 470 1.48 dholland fip = SEGSUM_FINFOBASE(fs, cp); 471 1.1 perseant 472 1.1 perseant /* 473 1.1 perseant * Check segment header magic and checksum 474 1.1 perseant */ 475 1.48 dholland if (lfs_ss_getmagic(fs, ssp) != SS_MAGIC) { 476 1.42 dholland syslog(LOG_WARNING, "%s: sumsum magic number bad at 0x%jx:" 477 1.40 dholland " read 0x%x, expected 0x%x", lfs_sb_getfsmnt(fs), 478 1.48 dholland (intmax_t)daddr, lfs_ss_getmagic(fs, ssp), SS_MAGIC); 479 1.1 perseant return 0x0; 480 1.1 perseant } 481 1.48 dholland sumstart = lfs_ss_getsumstart(fs); 482 1.48 dholland ck = cksum((char *)ssp + sumstart, lfs_sb_getsumsize(fs) - sumstart); 483 1.48 dholland if (ck != lfs_ss_getsumsum(fs, ssp)) { 484 1.42 dholland syslog(LOG_WARNING, "%s: sumsum checksum mismatch at 0x%jx:" 485 1.40 dholland " read 0x%x, computed 0x%x", lfs_sb_getfsmnt(fs), 486 1.48 dholland (intmax_t)daddr, lfs_ss_getsumsum(fs, ssp), ck); 487 1.1 perseant return 0x0; 488 1.1 perseant } 489 1.1 perseant 490 1.1 perseant /* Initialize data sum */ 491 1.1 perseant ck = 0; 492 1.1 perseant 493 1.1 perseant /* Point daddr at next block after segment summary */ 494 1.1 perseant ++daddr; 495 1.1 perseant 496 1.1 perseant /* 497 1.1 perseant * Loop over file info and inode pointers. We always move daddr 498 1.1 perseant * forward here because we are also computing the data checksum 499 1.1 perseant * as we go. 500 1.1 perseant */ 501 1.1 perseant fic = inoc = 0; 502 1.48 dholland while (fic < lfs_ss_getnfinfo(fs, ssp) || inoc < lfs_ss_getninos(fs, ssp)) { 503 1.1 perseant /* 504 1.3 perseant * We must have either a file block or an inode block. 505 1.3 perseant * If we don't have either one, it's an error. 506 1.3 perseant */ 507 1.54 dholland if (fic >= lfs_ss_getnfinfo(fs, ssp) && lfs_ii_getblock(fs, iip) != daddr) { 508 1.42 dholland syslog(LOG_WARNING, "%s: bad pseg at %jx (seg %d)", 509 1.42 dholland lfs_sb_getfsmnt(fs), (intmax_t)odaddr, lfs_dtosn(fs, odaddr)); 510 1.3 perseant *bipp = bip; 511 1.3 perseant return 0x0; 512 1.3 perseant } 513 1.3 perseant 514 1.3 perseant /* 515 1.1 perseant * Note each inode from the inode blocks 516 1.1 perseant */ 517 1.54 dholland if (inoc < lfs_ss_getninos(fs, ssp) && lfs_ii_getblock(fs, iip) == daddr) { 518 1.1 perseant cp = fd_ptrget(fs->clfs_devvp, daddr); 519 1.1 perseant ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); 520 1.40 dholland for (i = 0; i < lfs_sb_getinopb(fs); i++) { 521 1.50 dholland dip = DINO_IN_BLOCK(fs, cp, i); 522 1.50 dholland if (lfs_dino_getinumber(fs, dip) == 0) 523 1.1 perseant break; 524 1.1 perseant 525 1.1 perseant /* 526 1.1 perseant * Check currency before adding it 527 1.1 perseant */ 528 1.1 perseant #ifndef REPAIR_ZERO_FINFO 529 1.50 dholland lfs_ientry(&ifp, fs, lfs_dino_getinumber(fs, dip), &ifbp); 530 1.47 dholland idaddr = lfs_if_getdaddr(fs, ifp); 531 1.13 ad brelse(ifbp, 0); 532 1.1 perseant if (idaddr != daddr) 533 1.1 perseant #endif 534 1.1 perseant continue; 535 1.1 perseant 536 1.1 perseant /* 537 1.1 perseant * A current inode. Add it. 538 1.1 perseant */ 539 1.1 perseant ++*bic; 540 1.3 perseant nbip = (BLOCK_INFO *)realloc(bip, *bic * 541 1.3 perseant sizeof(*bip)); 542 1.3 perseant if (nbip) 543 1.3 perseant bip = nbip; 544 1.3 perseant else { 545 1.3 perseant --*bic; 546 1.3 perseant *bipp = bip; 547 1.3 perseant return 0x0; 548 1.3 perseant } 549 1.50 dholland bip[*bic - 1].bi_inode = lfs_dino_getinumber(fs, dip); 550 1.1 perseant bip[*bic - 1].bi_lbn = LFS_UNUSED_LBN; 551 1.1 perseant bip[*bic - 1].bi_daddr = daddr; 552 1.48 dholland bip[*bic - 1].bi_segcreate = lfs_ss_getcreate(fs, ssp); 553 1.50 dholland bip[*bic - 1].bi_version = lfs_dino_getgen(fs, dip); 554 1.50 dholland bip[*bic - 1].bi_bp = dip; 555 1.50 dholland bip[*bic - 1].bi_size = DINOSIZE(fs); 556 1.1 perseant } 557 1.1 perseant inoc += i; 558 1.40 dholland daddr += lfs_btofsb(fs, lfs_sb_getibsize(fs)); 559 1.54 dholland iip = NEXTLOWER_IINFO(fs, iip); 560 1.1 perseant continue; 561 1.1 perseant } 562 1.1 perseant 563 1.1 perseant /* 564 1.1 perseant * Note each file block from the finfo blocks 565 1.1 perseant */ 566 1.48 dholland if (fic >= lfs_ss_getnfinfo(fs, ssp)) 567 1.1 perseant continue; 568 1.1 perseant 569 1.1 perseant /* Count this finfo, whether or not we use it */ 570 1.1 perseant ++fic; 571 1.1 perseant 572 1.1 perseant /* 573 1.1 perseant * If this finfo has nblocks==0, it was written wrong. 574 1.1 perseant * Kernels with this problem always wrote this zero-sized 575 1.1 perseant * finfo last, so just ignore it. 576 1.1 perseant */ 577 1.49 dholland if (lfs_fi_getnblocks(fs, fip) == 0) { 578 1.1 perseant #ifdef REPAIR_ZERO_FINFO 579 1.1 perseant struct ubuf *nbp; 580 1.1 perseant SEGSUM *nssp; 581 1.1 perseant 582 1.42 dholland syslog(LOG_WARNING, "fixing short FINFO at %jx (seg %d)", 583 1.42 dholland (intmax_t)odaddr, lfs_dtosn(fs, odaddr)); 584 1.41 dholland bread(fs->clfs_devvp, odaddr, lfs_sb_getfsize(fs), 585 1.38 chopps 0, &nbp); 586 1.1 perseant nssp = (SEGSUM *)nbp->b_data; 587 1.1 perseant --nssp->ss_nfinfo; 588 1.1 perseant nssp->ss_sumsum = cksum(&nssp->ss_datasum, 589 1.40 dholland lfs_sb_getsumsize(fs) - sizeof(nssp->ss_sumsum)); 590 1.1 perseant bwrite(nbp); 591 1.1 perseant #endif 592 1.42 dholland syslog(LOG_WARNING, "zero-length FINFO at %jx (seg %d)", 593 1.42 dholland (intmax_t)odaddr, lfs_dtosn(fs, odaddr)); 594 1.1 perseant continue; 595 1.1 perseant } 596 1.1 perseant 597 1.1 perseant /* 598 1.1 perseant * Check currency before adding blocks 599 1.1 perseant */ 600 1.1 perseant #ifdef REPAIR_ZERO_FINFO 601 1.1 perseant vers = -1; 602 1.1 perseant #else 603 1.49 dholland lfs_ientry(&ifp, fs, lfs_fi_getino(fs, fip), &ifbp); 604 1.47 dholland vers = lfs_if_getversion(fs, ifp); 605 1.13 ad brelse(ifbp, 0); 606 1.1 perseant #endif 607 1.49 dholland if (vers != lfs_fi_getversion(fs, fip)) { 608 1.1 perseant size_t size; 609 1.1 perseant 610 1.1 perseant /* Read all the blocks from the data summary */ 611 1.49 dholland for (i = 0; i < lfs_fi_getnblocks(fs, fip); i++) { 612 1.49 dholland size = (i == lfs_fi_getnblocks(fs, fip) - 1) ? 613 1.49 dholland lfs_fi_getlastlength(fs, fip) : lfs_sb_getbsize(fs); 614 1.1 perseant cp = fd_ptrget(fs->clfs_devvp, daddr); 615 1.1 perseant ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); 616 1.36 christos daddr += lfs_btofsb(fs, size); 617 1.1 perseant } 618 1.49 dholland fip = NEXT_FINFO(fs, fip); 619 1.1 perseant continue; 620 1.1 perseant } 621 1.1 perseant 622 1.1 perseant /* Add all the blocks from the finfos (current or not) */ 623 1.49 dholland nbip = (BLOCK_INFO *)realloc(bip, (*bic + lfs_fi_getnblocks(fs, fip)) * 624 1.3 perseant sizeof(*bip)); 625 1.3 perseant if (nbip) 626 1.3 perseant bip = nbip; 627 1.3 perseant else { 628 1.3 perseant *bipp = bip; 629 1.3 perseant return 0x0; 630 1.3 perseant } 631 1.3 perseant 632 1.49 dholland for (i = 0; i < lfs_fi_getnblocks(fs, fip); i++) { 633 1.49 dholland bip[*bic + i].bi_inode = lfs_fi_getino(fs, fip); 634 1.49 dholland bip[*bic + i].bi_lbn = lfs_fi_getblock(fs, fip, i); 635 1.1 perseant bip[*bic + i].bi_daddr = daddr; 636 1.48 dholland bip[*bic + i].bi_segcreate = lfs_ss_getcreate(fs, ssp); 637 1.49 dholland bip[*bic + i].bi_version = lfs_fi_getversion(fs, fip); 638 1.49 dholland bip[*bic + i].bi_size = (i == lfs_fi_getnblocks(fs, fip) - 1) ? 639 1.49 dholland lfs_fi_getlastlength(fs, fip) : lfs_sb_getbsize(fs); 640 1.1 perseant cp = fd_ptrget(fs->clfs_devvp, daddr); 641 1.1 perseant ck = lfs_cksum_part(cp, sizeof(u_int32_t), ck); 642 1.1 perseant bip[*bic + i].bi_bp = cp; 643 1.36 christos daddr += lfs_btofsb(fs, bip[*bic + i].bi_size); 644 1.1 perseant 645 1.1 perseant #ifdef TEST_PATTERN 646 1.1 perseant check_test_pattern(bip + *bic + i); /* XXXDEBUG */ 647 1.1 perseant #endif 648 1.1 perseant } 649 1.49 dholland *bic += lfs_fi_getnblocks(fs, fip); 650 1.48 dholland fip = NEXT_FINFO(fs, fip); 651 1.1 perseant } 652 1.1 perseant 653 1.1 perseant #ifndef REPAIR_ZERO_FINFO 654 1.48 dholland if (lfs_ss_getdatasum(fs, ssp) != ck) { 655 1.42 dholland syslog(LOG_WARNING, "%s: data checksum bad at 0x%jx:" 656 1.42 dholland " read 0x%x, computed 0x%x", lfs_sb_getfsmnt(fs), 657 1.42 dholland (intmax_t)odaddr, 658 1.48 dholland lfs_ss_getdatasum(fs, ssp), ck); 659 1.1 perseant *bic = obic; 660 1.1 perseant return 0x0; 661 1.1 perseant } 662 1.1 perseant #endif 663 1.1 perseant 664 1.1 perseant *bipp = bip; 665 1.1 perseant return daddr; 666 1.1 perseant } 667 1.1 perseant 668 1.1 perseant static void 669 1.1 perseant log_segment_read(struct clfs *fs, int sn) 670 1.1 perseant { 671 1.1 perseant FILE *fp; 672 1.1 perseant char *cp; 673 1.1 perseant 674 1.1 perseant /* 675 1.1 perseant * Write the segment read, and its contents, into a log file in 676 1.1 perseant * the current directory. We don't need to log the location of 677 1.1 perseant * the segment, since that can be inferred from the segments up 678 1.1 perseant * to this point (ss_nextseg field of the previously written segment). 679 1.1 perseant * 680 1.1 perseant * We can use this info later to reconstruct the filesystem at any 681 1.1 perseant * given point in time for analysis, by replaying the log forward 682 1.1 perseant * indexed by the segment serial numbers; but it is not suitable 683 1.1 perseant * for everyday use since the copylog will be simply enormous. 684 1.1 perseant */ 685 1.36 christos cp = fd_ptrget(fs->clfs_devvp, lfs_sntod(fs, sn)); 686 1.1 perseant 687 1.1 perseant fp = fopen(copylog_filename, "ab"); 688 1.1 perseant if (fp != NULL) { 689 1.41 dholland if (fwrite(cp, (size_t)lfs_sb_getssize(fs), 1, fp) != 1) { 690 1.1 perseant perror("writing segment to copy log"); 691 1.1 perseant } 692 1.1 perseant } 693 1.1 perseant fclose(fp); 694 1.1 perseant } 695 1.1 perseant 696 1.1 perseant /* 697 1.1 perseant * Read a segment to populate the BLOCK_INFO structures. 698 1.1 perseant * Return the number of partial segments read and parsed. 699 1.1 perseant */ 700 1.1 perseant int 701 1.1 perseant load_segment(struct clfs *fs, int sn, BLOCK_INFO **bipp, int *bic) 702 1.1 perseant { 703 1.42 dholland daddr_t daddr; 704 1.1 perseant int i, npseg; 705 1.1 perseant 706 1.36 christos daddr = lfs_sntod(fs, sn); 707 1.36 christos if (daddr < lfs_btofsb(fs, LFS_LABELPAD)) 708 1.36 christos daddr = lfs_btofsb(fs, LFS_LABELPAD); 709 1.1 perseant for (i = 0; i < LFS_MAXNUMSB; i++) { 710 1.40 dholland if (lfs_sb_getsboff(fs, i) == daddr) { 711 1.36 christos daddr += lfs_btofsb(fs, LFS_SBPAD); 712 1.1 perseant break; 713 1.1 perseant } 714 1.1 perseant } 715 1.1 perseant 716 1.1 perseant /* Preload the segment buffer */ 717 1.36 christos if (fd_preload(fs->clfs_devvp, lfs_sntod(fs, sn)) < 0) 718 1.1 perseant return -1; 719 1.1 perseant 720 1.1 perseant if (copylog_filename) 721 1.1 perseant log_segment_read(fs, sn); 722 1.1 perseant 723 1.1 perseant /* Note bytes read for stats */ 724 1.1 perseant cleaner_stats.segs_cleaned++; 725 1.41 dholland cleaner_stats.bytes_read += lfs_sb_getssize(fs); 726 1.1 perseant ++fs->clfs_nactive; 727 1.1 perseant 728 1.1 perseant npseg = 0; 729 1.36 christos while(lfs_dtosn(fs, daddr) == sn && 730 1.41 dholland lfs_dtosn(fs, daddr + lfs_btofsb(fs, lfs_sb_getbsize(fs))) == sn) { 731 1.1 perseant daddr = parse_pseg(fs, daddr, bipp, bic); 732 1.1 perseant if (daddr == 0x0) { 733 1.1 perseant ++cleaner_stats.segs_error; 734 1.1 perseant break; 735 1.1 perseant } 736 1.1 perseant ++npseg; 737 1.1 perseant } 738 1.1 perseant 739 1.1 perseant return npseg; 740 1.1 perseant } 741 1.1 perseant 742 1.1 perseant void 743 1.1 perseant calc_cb(struct clfs *fs, int sn, struct clfs_seguse *t) 744 1.1 perseant { 745 1.1 perseant time_t now; 746 1.1 perseant int64_t age, benefit, cost; 747 1.1 perseant 748 1.1 perseant time(&now); 749 1.1 perseant age = (now < t->lastmod ? 0 : now - t->lastmod); 750 1.1 perseant 751 1.1 perseant /* Under no circumstances clean active or already-clean segments */ 752 1.1 perseant if ((t->flags & SEGUSE_ACTIVE) || !(t->flags & SEGUSE_DIRTY)) { 753 1.1 perseant t->priority = 0; 754 1.1 perseant return; 755 1.1 perseant } 756 1.1 perseant 757 1.1 perseant /* 758 1.1 perseant * If the segment is empty, there is no reason to clean it. 759 1.1 perseant * Clear its error condition, if any, since we are never going to 760 1.1 perseant * try to parse this one. 761 1.1 perseant */ 762 1.1 perseant if (t->nbytes == 0) { 763 1.1 perseant t->flags &= ~SEGUSE_ERROR; /* Strip error once empty */ 764 1.1 perseant t->priority = 0; 765 1.1 perseant return; 766 1.1 perseant } 767 1.1 perseant 768 1.1 perseant if (t->flags & SEGUSE_ERROR) { /* No good if not already empty */ 769 1.1 perseant /* No benefit */ 770 1.1 perseant t->priority = 0; 771 1.1 perseant return; 772 1.1 perseant } 773 1.1 perseant 774 1.41 dholland if (t->nbytes > lfs_sb_getssize(fs)) { 775 1.1 perseant /* Another type of error */ 776 1.1 perseant syslog(LOG_WARNING, "segment %d: bad seguse count %d", 777 1.1 perseant sn, t->nbytes); 778 1.1 perseant t->flags |= SEGUSE_ERROR; 779 1.1 perseant t->priority = 0; 780 1.1 perseant return; 781 1.1 perseant } 782 1.1 perseant 783 1.1 perseant /* 784 1.1 perseant * The non-degenerate case. Use Rosenblum's cost-benefit algorithm. 785 1.1 perseant * Calculate the benefit from cleaning this segment (one segment, 786 1.1 perseant * minus fragmentation, dirty blocks and a segment summary block) 787 1.1 perseant * and weigh that against the cost (bytes read plus bytes written). 788 1.1 perseant * We count the summary headers as "dirty" to avoid cleaning very 789 1.1 perseant * old and very full segments. 790 1.1 perseant */ 791 1.41 dholland benefit = (int64_t)lfs_sb_getssize(fs) - t->nbytes - 792 1.41 dholland (t->nsums + 1) * lfs_sb_getfsize(fs); 793 1.41 dholland if (lfs_sb_getbsize(fs) > lfs_sb_getfsize(fs)) /* fragmentation */ 794 1.41 dholland benefit -= (lfs_sb_getbsize(fs) / 2); 795 1.1 perseant if (benefit <= 0) { 796 1.1 perseant t->priority = 0; 797 1.1 perseant return; 798 1.1 perseant } 799 1.1 perseant 800 1.41 dholland cost = lfs_sb_getssize(fs) + t->nbytes; 801 1.1 perseant t->priority = (256 * benefit * age) / cost; 802 1.1 perseant 803 1.1 perseant return; 804 1.1 perseant } 805 1.1 perseant 806 1.1 perseant /* 807 1.1 perseant * Comparator for BLOCK_INFO structures. Anything not in one of the segments 808 1.1 perseant * we're looking at sorts higher; after that we sort first by inode number 809 1.1 perseant * and then by block number (unsigned, i.e., negative sorts higher) *but* 810 1.1 perseant * sort inodes before data blocks. 811 1.1 perseant */ 812 1.1 perseant static int 813 1.1 perseant bi_comparator(const void *va, const void *vb) 814 1.1 perseant { 815 1.17 lukem const BLOCK_INFO *a, *b; 816 1.1 perseant 817 1.17 lukem a = (const BLOCK_INFO *)va; 818 1.17 lukem b = (const BLOCK_INFO *)vb; 819 1.1 perseant 820 1.1 perseant /* Check for out-of-place block */ 821 1.1 perseant if (a->bi_segcreate == a->bi_daddr && 822 1.1 perseant b->bi_segcreate != b->bi_daddr) 823 1.1 perseant return -1; 824 1.1 perseant if (a->bi_segcreate != a->bi_daddr && 825 1.1 perseant b->bi_segcreate == b->bi_daddr) 826 1.1 perseant return 1; 827 1.1 perseant if (a->bi_size <= 0 && b->bi_size > 0) 828 1.1 perseant return 1; 829 1.1 perseant if (b->bi_size <= 0 && a->bi_size > 0) 830 1.1 perseant return -1; 831 1.1 perseant 832 1.1 perseant /* Check inode number */ 833 1.1 perseant if (a->bi_inode != b->bi_inode) 834 1.1 perseant return a->bi_inode - b->bi_inode; 835 1.1 perseant 836 1.1 perseant /* Check lbn */ 837 1.1 perseant if (a->bi_lbn == LFS_UNUSED_LBN) /* Inodes sort lower than blocks */ 838 1.1 perseant return -1; 839 1.1 perseant if (b->bi_lbn == LFS_UNUSED_LBN) 840 1.1 perseant return 1; 841 1.45 dholland if ((u_int64_t)a->bi_lbn > (u_int64_t)b->bi_lbn) 842 1.1 perseant return 1; 843 1.1 perseant else 844 1.1 perseant return -1; 845 1.8 perseant 846 1.8 perseant return 0; 847 1.1 perseant } 848 1.1 perseant 849 1.1 perseant /* 850 1.1 perseant * Comparator for sort_segments: cost-benefit equation. 851 1.1 perseant */ 852 1.1 perseant static int 853 1.1 perseant cb_comparator(const void *va, const void *vb) 854 1.1 perseant { 855 1.17 lukem const struct clfs_seguse *a, *b; 856 1.1 perseant 857 1.17 lukem a = *(const struct clfs_seguse * const *)va; 858 1.17 lukem b = *(const struct clfs_seguse * const *)vb; 859 1.1 perseant return a->priority > b->priority ? -1 : 1; 860 1.1 perseant } 861 1.1 perseant 862 1.1 perseant void 863 1.45 dholland toss_old_blocks(struct clfs *fs, BLOCK_INFO **bipp, blkcnt_t *bic, int *sizep) 864 1.1 perseant { 865 1.45 dholland blkcnt_t i; 866 1.45 dholland int r; 867 1.1 perseant BLOCK_INFO *bip = *bipp; 868 1.1 perseant struct lfs_fcntl_markv /* { 869 1.1 perseant BLOCK_INFO *blkiov; 870 1.1 perseant int blkcnt; 871 1.1 perseant } */ lim; 872 1.1 perseant 873 1.3 perseant if (bic == 0 || bip == NULL) 874 1.3 perseant return; 875 1.3 perseant 876 1.1 perseant /* 877 1.1 perseant * Kludge: Store the disk address in segcreate so we know which 878 1.1 perseant * ones to toss. 879 1.1 perseant */ 880 1.1 perseant for (i = 0; i < *bic; i++) 881 1.1 perseant bip[i].bi_segcreate = bip[i].bi_daddr; 882 1.1 perseant 883 1.45 dholland /* 884 1.45 dholland * XXX: blkcnt_t is 64 bits, so *bic might overflow size_t 885 1.45 dholland * (the argument type of heapsort's number argument) on a 886 1.45 dholland * 32-bit platform. However, if so we won't have got this far 887 1.45 dholland * because we'll have failed trying to allocate the array. So 888 1.45 dholland * while *bic here might cause a 64->32 truncation, it's safe. 889 1.45 dholland */ 890 1.1 perseant /* Sort the blocks */ 891 1.1 perseant heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator); 892 1.1 perseant 893 1.1 perseant /* Use bmapv to locate the blocks */ 894 1.1 perseant lim.blkiov = bip; 895 1.1 perseant lim.blkcnt = *bic; 896 1.21 pooka if ((r = kops.ko_fcntl(fs->clfs_ifilefd, LFCNBMAPV, &lim)) < 0) { 897 1.1 perseant syslog(LOG_WARNING, "%s: bmapv returned %d (%m)", 898 1.40 dholland lfs_sb_getfsmnt(fs), r); 899 1.1 perseant return; 900 1.1 perseant } 901 1.1 perseant 902 1.1 perseant /* Toss blocks not in this segment */ 903 1.1 perseant heapsort(bip, *bic, sizeof(BLOCK_INFO), bi_comparator); 904 1.1 perseant 905 1.1 perseant /* Get rid of stale blocks */ 906 1.7 perseant if (sizep) 907 1.7 perseant *sizep = 0; 908 1.7 perseant for (i = 0; i < *bic; i++) { 909 1.1 perseant if (bip[i].bi_segcreate != bip[i].bi_daddr) 910 1.1 perseant break; 911 1.7 perseant if (sizep) 912 1.7 perseant *sizep += bip[i].bi_size; 913 1.7 perseant } 914 1.28 perseant *bic = i; /* XXX should we shrink bip? */ 915 1.1 perseant *bipp = bip; 916 1.1 perseant 917 1.1 perseant return; 918 1.1 perseant } 919 1.1 perseant 920 1.1 perseant /* 921 1.1 perseant * Clean a segment and mark it invalid. 922 1.1 perseant */ 923 1.1 perseant int 924 1.1 perseant invalidate_segment(struct clfs *fs, int sn) 925 1.1 perseant { 926 1.1 perseant BLOCK_INFO *bip; 927 1.1 perseant int i, r, bic; 928 1.45 dholland blkcnt_t widebic; 929 1.1 perseant off_t nb; 930 1.1 perseant double util; 931 1.1 perseant struct lfs_fcntl_markv /* { 932 1.1 perseant BLOCK_INFO *blkiov; 933 1.1 perseant int blkcnt; 934 1.1 perseant } */ lim; 935 1.1 perseant 936 1.40 dholland dlog("%s: inval seg %d", lfs_sb_getfsmnt(fs), sn); 937 1.1 perseant 938 1.1 perseant bip = NULL; 939 1.1 perseant bic = 0; 940 1.1 perseant fs->clfs_nactive = 0; 941 1.7 perseant if (load_segment(fs, sn, &bip, &bic) <= 0) 942 1.7 perseant return -1; 943 1.45 dholland widebic = bic; 944 1.45 dholland toss_old_blocks(fs, &bip, &widebic, NULL); 945 1.45 dholland bic = widebic; 946 1.1 perseant 947 1.1 perseant /* Record statistics */ 948 1.1 perseant for (i = nb = 0; i < bic; i++) 949 1.1 perseant nb += bip[i].bi_size; 950 1.41 dholland util = ((double)nb) / (fs->clfs_nactive * lfs_sb_getssize(fs)); 951 1.1 perseant cleaner_stats.util_tot += util; 952 1.1 perseant cleaner_stats.util_sos += util * util; 953 1.1 perseant cleaner_stats.bytes_written += nb; 954 1.1 perseant 955 1.1 perseant /* 956 1.1 perseant * Use markv to move the blocks. 957 1.1 perseant */ 958 1.1 perseant lim.blkiov = bip; 959 1.1 perseant lim.blkcnt = bic; 960 1.21 pooka if ((r = kops.ko_fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim)) < 0) { 961 1.1 perseant syslog(LOG_WARNING, "%s: markv returned %d (%m) " 962 1.40 dholland "for seg %d", lfs_sb_getfsmnt(fs), r, sn); 963 1.1 perseant return r; 964 1.1 perseant } 965 1.1 perseant 966 1.1 perseant /* 967 1.1 perseant * Finally call invalidate to invalidate the segment. 968 1.1 perseant */ 969 1.21 pooka if ((r = kops.ko_fcntl(fs->clfs_ifilefd, LFCNINVAL, &sn)) < 0) { 970 1.1 perseant syslog(LOG_WARNING, "%s: inval returned %d (%m) " 971 1.40 dholland "for seg %d", lfs_sb_getfsmnt(fs), r, sn); 972 1.1 perseant return r; 973 1.1 perseant } 974 1.1 perseant 975 1.1 perseant return 0; 976 1.1 perseant } 977 1.1 perseant 978 1.1 perseant /* 979 1.1 perseant * Check to see if the given ino/lbn pair is represented in the BLOCK_INFO 980 1.1 perseant * array we are sending to the kernel, or if the kernel will have to add it. 981 1.1 perseant * The kernel will only add each such pair once, though, so keep track of 982 1.1 perseant * previous requests in a separate "extra" BLOCK_INFO array. Returns 1 983 1.1 perseant * if the block needs to be added, 0 if it is already represented. 984 1.1 perseant */ 985 1.1 perseant static int 986 1.45 dholland check_or_add(ino_t ino, daddr_t lbn, BLOCK_INFO *bip, int bic, BLOCK_INFO **ebipp, int *ebicp) 987 1.1 perseant { 988 1.1 perseant BLOCK_INFO *t, *ebip = *ebipp; 989 1.1 perseant int ebic = *ebicp; 990 1.1 perseant int k; 991 1.1 perseant 992 1.1 perseant for (k = 0; k < bic; k++) { 993 1.1 perseant if (bip[k].bi_inode != ino) 994 1.1 perseant break; 995 1.1 perseant if (bip[k].bi_lbn == lbn) { 996 1.1 perseant return 0; 997 1.1 perseant } 998 1.1 perseant } 999 1.1 perseant 1000 1.1 perseant /* Look on the list of extra blocks, too */ 1001 1.1 perseant for (k = 0; k < ebic; k++) { 1002 1.1 perseant if (ebip[k].bi_inode == ino && ebip[k].bi_lbn == lbn) { 1003 1.1 perseant return 0; 1004 1.1 perseant } 1005 1.1 perseant } 1006 1.1 perseant 1007 1.1 perseant ++ebic; 1008 1.1 perseant t = realloc(ebip, ebic * sizeof(BLOCK_INFO)); 1009 1.1 perseant if (t == NULL) 1010 1.28 perseant return 1; /* Note *ebicp is unchanged */ 1011 1.1 perseant 1012 1.1 perseant ebip = t; 1013 1.1 perseant ebip[ebic - 1].bi_inode = ino; 1014 1.1 perseant ebip[ebic - 1].bi_lbn = lbn; 1015 1.1 perseant 1016 1.1 perseant *ebipp = ebip; 1017 1.1 perseant *ebicp = ebic; 1018 1.1 perseant return 1; 1019 1.1 perseant } 1020 1.1 perseant 1021 1.1 perseant /* 1022 1.1 perseant * Look for indirect blocks we will have to write which are not 1023 1.1 perseant * contained in this collection of blocks. This constitutes 1024 1.1 perseant * a hidden cleaning cost, since we are unaware of it until we 1025 1.1 perseant * have already read the segments. Return the total cost, and fill 1026 1.1 perseant * in *ifc with the part of that cost due to rewriting the Ifile. 1027 1.1 perseant */ 1028 1.1 perseant static off_t 1029 1.1 perseant check_hidden_cost(struct clfs *fs, BLOCK_INFO *bip, int bic, off_t *ifc) 1030 1.1 perseant { 1031 1.1 perseant int start; 1032 1.33 dholland struct indir in[ULFS_NIADDR + 1]; 1033 1.1 perseant int num; 1034 1.1 perseant int i, j, ebic; 1035 1.1 perseant BLOCK_INFO *ebip; 1036 1.45 dholland daddr_t lbn; 1037 1.1 perseant 1038 1.1 perseant start = 0; 1039 1.1 perseant ebip = NULL; 1040 1.1 perseant ebic = 0; 1041 1.1 perseant for (i = 0; i < bic; i++) { 1042 1.1 perseant if (i == 0 || bip[i].bi_inode != bip[start].bi_inode) { 1043 1.1 perseant start = i; 1044 1.1 perseant /* 1045 1.1 perseant * Look for IFILE blocks, unless this is the Ifile. 1046 1.1 perseant */ 1047 1.52 dholland if (bip[i].bi_inode != LFS_IFILE_INUM) { 1048 1.39 dholland lbn = lfs_sb_getcleansz(fs) + bip[i].bi_inode / 1049 1.39 dholland lfs_sb_getifpb(fs); 1050 1.52 dholland *ifc += check_or_add(LFS_IFILE_INUM, lbn, 1051 1.1 perseant bip, bic, &ebip, &ebic); 1052 1.1 perseant } 1053 1.1 perseant } 1054 1.1 perseant if (bip[i].bi_lbn == LFS_UNUSED_LBN) 1055 1.1 perseant continue; 1056 1.33 dholland if (bip[i].bi_lbn < ULFS_NDADDR) 1057 1.5 perseant continue; 1058 1.5 perseant 1059 1.45 dholland /* XXX the struct lfs cast is completely wrong/unsafe */ 1060 1.33 dholland ulfs_getlbns((struct lfs *)fs, NULL, (daddr_t)bip[i].bi_lbn, in, &num); 1061 1.1 perseant for (j = 0; j < num; j++) { 1062 1.1 perseant check_or_add(bip[i].bi_inode, in[j].in_lbn, 1063 1.1 perseant bip + start, bic - start, &ebip, &ebic); 1064 1.1 perseant } 1065 1.1 perseant } 1066 1.1 perseant return ebic; 1067 1.1 perseant } 1068 1.1 perseant 1069 1.1 perseant /* 1070 1.1 perseant * Select segments to clean, add blocks from these segments to a cleaning 1071 1.1 perseant * list, and send this list through lfs_markv() to move them to new 1072 1.1 perseant * locations on disk. 1073 1.1 perseant */ 1074 1.46 dholland static int 1075 1.46 dholland clean_fs(struct clfs *fs, const CLEANERINFO64 *cip) 1076 1.1 perseant { 1077 1.7 perseant int i, j, ngood, sn, bic, r, npos; 1078 1.45 dholland blkcnt_t widebic; 1079 1.7 perseant int bytes, totbytes; 1080 1.1 perseant struct ubuf *bp; 1081 1.1 perseant SEGUSE *sup; 1082 1.1 perseant static BLOCK_INFO *bip; 1083 1.1 perseant struct lfs_fcntl_markv /* { 1084 1.1 perseant BLOCK_INFO *blkiov; 1085 1.1 perseant int blkcnt; 1086 1.1 perseant } */ lim; 1087 1.1 perseant int mc; 1088 1.1 perseant BLOCK_INFO *mbip; 1089 1.1 perseant int inc; 1090 1.1 perseant off_t nb; 1091 1.1 perseant off_t goal; 1092 1.1 perseant off_t extra, if_extra; 1093 1.1 perseant double util; 1094 1.1 perseant 1095 1.1 perseant /* Read the segment table into our private structure */ 1096 1.7 perseant npos = 0; 1097 1.39 dholland for (i = 0; i < lfs_sb_getnseg(fs); i+= lfs_sb_getsepb(fs)) { 1098 1.39 dholland bread(fs->lfs_ivnode, 1099 1.39 dholland lfs_sb_getcleansz(fs) + i / lfs_sb_getsepb(fs), 1100 1.41 dholland lfs_sb_getbsize(fs), 0, &bp); 1101 1.39 dholland for (j = 0; j < lfs_sb_getsepb(fs) && i + j < lfs_sb_getnseg(fs); j++) { 1102 1.1 perseant sup = ((SEGUSE *)bp->b_data) + j; 1103 1.1 perseant fs->clfs_segtab[i + j].nbytes = sup->su_nbytes; 1104 1.1 perseant fs->clfs_segtab[i + j].nsums = sup->su_nsums; 1105 1.1 perseant fs->clfs_segtab[i + j].lastmod = sup->su_lastmod; 1106 1.1 perseant /* Keep error status but renew other flags */ 1107 1.1 perseant fs->clfs_segtab[i + j].flags &= SEGUSE_ERROR; 1108 1.1 perseant fs->clfs_segtab[i + j].flags |= sup->su_flags; 1109 1.1 perseant 1110 1.1 perseant /* Compute cost-benefit coefficient */ 1111 1.1 perseant calc_cb(fs, i + j, fs->clfs_segtab + i + j); 1112 1.7 perseant if (fs->clfs_segtab[i + j].priority > 0) 1113 1.7 perseant ++npos; 1114 1.1 perseant } 1115 1.13 ad brelse(bp, 0); 1116 1.1 perseant } 1117 1.1 perseant 1118 1.1 perseant /* Sort segments based on cleanliness, fulness, and condition */ 1119 1.40 dholland heapsort(fs->clfs_segtabp, lfs_sb_getnseg(fs), sizeof(struct clfs_seguse *), 1120 1.1 perseant cb_comparator); 1121 1.1 perseant 1122 1.1 perseant /* If no segment is cleanable, just return */ 1123 1.1 perseant if (fs->clfs_segtabp[0]->priority == 0) { 1124 1.40 dholland dlog("%s: no segment cleanable", lfs_sb_getfsmnt(fs)); 1125 1.1 perseant return 0; 1126 1.1 perseant } 1127 1.1 perseant 1128 1.1 perseant /* Load some segments' blocks into bip */ 1129 1.1 perseant bic = 0; 1130 1.1 perseant fs->clfs_nactive = 0; 1131 1.1 perseant ngood = 0; 1132 1.1 perseant if (use_bytes) { 1133 1.1 perseant /* Set attainable goal */ 1134 1.41 dholland goal = lfs_sb_getssize(fs) * atatime; 1135 1.41 dholland if (goal > (cip->clean - 1) * lfs_sb_getssize(fs) / 2) 1136 1.41 dholland goal = MAX((cip->clean - 1) * lfs_sb_getssize(fs), 1137 1.41 dholland lfs_sb_getssize(fs)) / 2; 1138 1.1 perseant 1139 1.7 perseant dlog("%s: cleaning with goal %" PRId64 1140 1.7 perseant " bytes (%d segs clean, %d cleanable)", 1141 1.40 dholland lfs_sb_getfsmnt(fs), goal, cip->clean, npos); 1142 1.7 perseant syslog(LOG_INFO, "%s: cleaning with goal %" PRId64 1143 1.7 perseant " bytes (%d segs clean, %d cleanable)", 1144 1.40 dholland lfs_sb_getfsmnt(fs), goal, cip->clean, npos); 1145 1.7 perseant totbytes = 0; 1146 1.40 dholland for (i = 0; i < lfs_sb_getnseg(fs) && totbytes < goal; i++) { 1147 1.1 perseant if (fs->clfs_segtabp[i]->priority == 0) 1148 1.1 perseant break; 1149 1.8 perseant /* Upper bound on number of segments at once */ 1150 1.41 dholland if (ngood * lfs_sb_getssize(fs) > 4 * goal) 1151 1.8 perseant break; 1152 1.1 perseant sn = (fs->clfs_segtabp[i] - fs->clfs_segtab); 1153 1.1 perseant dlog("%s: add seg %d prio %" PRIu64 1154 1.1 perseant " containing %ld bytes", 1155 1.40 dholland lfs_sb_getfsmnt(fs), sn, fs->clfs_segtabp[i]->priority, 1156 1.1 perseant fs->clfs_segtabp[i]->nbytes); 1157 1.7 perseant if ((r = load_segment(fs, sn, &bip, &bic)) > 0) { 1158 1.1 perseant ++ngood; 1159 1.45 dholland widebic = bic; 1160 1.45 dholland toss_old_blocks(fs, &bip, &widebic, &bytes); 1161 1.45 dholland bic = widebic; 1162 1.7 perseant totbytes += bytes; 1163 1.7 perseant } else if (r == 0) 1164 1.7 perseant fd_release(fs->clfs_devvp); 1165 1.1 perseant else 1166 1.1 perseant break; 1167 1.1 perseant } 1168 1.1 perseant } else { 1169 1.1 perseant /* Set attainable goal */ 1170 1.1 perseant goal = atatime; 1171 1.1 perseant if (goal > cip->clean - 1) 1172 1.1 perseant goal = MAX(cip->clean - 1, 1); 1173 1.1 perseant 1174 1.7 perseant dlog("%s: cleaning with goal %d segments (%d clean, %d cleanable)", 1175 1.40 dholland lfs_sb_getfsmnt(fs), (int)goal, cip->clean, npos); 1176 1.40 dholland for (i = 0; i < lfs_sb_getnseg(fs) && ngood < goal; i++) { 1177 1.1 perseant if (fs->clfs_segtabp[i]->priority == 0) 1178 1.1 perseant break; 1179 1.1 perseant sn = (fs->clfs_segtabp[i] - fs->clfs_segtab); 1180 1.1 perseant dlog("%s: add seg %d prio %" PRIu64, 1181 1.40 dholland lfs_sb_getfsmnt(fs), sn, fs->clfs_segtabp[i]->priority); 1182 1.1 perseant if ((r = load_segment(fs, sn, &bip, &bic)) > 0) 1183 1.1 perseant ++ngood; 1184 1.7 perseant else if (r == 0) 1185 1.7 perseant fd_release(fs->clfs_devvp); 1186 1.7 perseant else 1187 1.1 perseant break; 1188 1.1 perseant } 1189 1.45 dholland widebic = bic; 1190 1.45 dholland toss_old_blocks(fs, &bip, &widebic, NULL); 1191 1.45 dholland bic = widebic; 1192 1.1 perseant } 1193 1.1 perseant 1194 1.1 perseant /* If there is nothing to do, try again later. */ 1195 1.1 perseant if (bic == 0) { 1196 1.3 perseant dlog("%s: no blocks to clean in %d cleanable segments", 1197 1.40 dholland lfs_sb_getfsmnt(fs), (int)ngood); 1198 1.1 perseant fd_release_all(fs->clfs_devvp); 1199 1.1 perseant return 0; 1200 1.1 perseant } 1201 1.1 perseant 1202 1.1 perseant /* Record statistics */ 1203 1.1 perseant for (i = nb = 0; i < bic; i++) 1204 1.1 perseant nb += bip[i].bi_size; 1205 1.41 dholland util = ((double)nb) / (fs->clfs_nactive * lfs_sb_getssize(fs)); 1206 1.1 perseant cleaner_stats.util_tot += util; 1207 1.1 perseant cleaner_stats.util_sos += util * util; 1208 1.1 perseant cleaner_stats.bytes_written += nb; 1209 1.1 perseant 1210 1.1 perseant /* 1211 1.1 perseant * Check out our blocks to see if there are hidden cleaning costs. 1212 1.1 perseant * If there are, we might be cleaning ourselves deeper into a hole 1213 1.1 perseant * rather than doing anything useful. 1214 1.1 perseant * XXX do something about this. 1215 1.1 perseant */ 1216 1.1 perseant if_extra = 0; 1217 1.41 dholland extra = lfs_sb_getbsize(fs) * (off_t)check_hidden_cost(fs, bip, bic, &if_extra); 1218 1.41 dholland if_extra *= lfs_sb_getbsize(fs); 1219 1.1 perseant 1220 1.1 perseant /* 1221 1.1 perseant * Use markv to move the blocks. 1222 1.1 perseant */ 1223 1.1 perseant if (do_small) 1224 1.41 dholland inc = MAXPHYS / lfs_sb_getbsize(fs) - 1; 1225 1.1 perseant else 1226 1.1 perseant inc = LFS_MARKV_MAXBLKCNT / 2; 1227 1.1 perseant for (mc = 0, mbip = bip; mc < bic; mc += inc, mbip += inc) { 1228 1.1 perseant lim.blkiov = mbip; 1229 1.1 perseant lim.blkcnt = (bic - mc > inc ? inc : bic - mc); 1230 1.1 perseant #ifdef TEST_PATTERN 1231 1.1 perseant dlog("checking blocks %d-%d", mc, mc + lim.blkcnt - 1); 1232 1.1 perseant for (i = 0; i < lim.blkcnt; i++) { 1233 1.1 perseant check_test_pattern(mbip + i); 1234 1.1 perseant } 1235 1.1 perseant #endif /* TEST_PATTERN */ 1236 1.1 perseant dlog("sending blocks %d-%d", mc, mc + lim.blkcnt - 1); 1237 1.21 pooka if ((r = kops.ko_fcntl(fs->clfs_ifilefd, LFCNMARKV, &lim))<0) { 1238 1.28 perseant int oerrno = errno; 1239 1.28 perseant syslog(LOG_WARNING, "%s: markv returned %d (errno %d, %m)", 1240 1.40 dholland lfs_sb_getfsmnt(fs), r, errno); 1241 1.28 perseant if (oerrno != EAGAIN && oerrno != ESHUTDOWN) { 1242 1.28 perseant syslog(LOG_DEBUG, "%s: errno %d, returning", 1243 1.40 dholland lfs_sb_getfsmnt(fs), oerrno); 1244 1.1 perseant fd_release_all(fs->clfs_devvp); 1245 1.1 perseant return r; 1246 1.1 perseant } 1247 1.29 perseant if (oerrno == ESHUTDOWN) { 1248 1.29 perseant syslog(LOG_NOTICE, "%s: filesystem unmounted", 1249 1.40 dholland lfs_sb_getfsmnt(fs)); 1250 1.29 perseant fd_release_all(fs->clfs_devvp); 1251 1.29 perseant return r; 1252 1.29 perseant } 1253 1.1 perseant } 1254 1.1 perseant } 1255 1.1 perseant 1256 1.1 perseant /* 1257 1.1 perseant * Report progress (or lack thereof) 1258 1.1 perseant */ 1259 1.1 perseant syslog(LOG_INFO, "%s: wrote %" PRId64 " dirty + %" 1260 1.1 perseant PRId64 " supporting indirect + %" 1261 1.1 perseant PRId64 " supporting Ifile = %" 1262 1.1 perseant PRId64 " bytes to clean %d segs (%" PRId64 "%% recovery)", 1263 1.40 dholland lfs_sb_getfsmnt(fs), (int64_t)nb, (int64_t)(extra - if_extra), 1264 1.3 perseant (int64_t)if_extra, (int64_t)(nb + extra), ngood, 1265 1.3 perseant (ngood ? (int64_t)(100 - (100 * (nb + extra)) / 1266 1.41 dholland (ngood * lfs_sb_getssize(fs))) : 1267 1.3 perseant (int64_t)0)); 1268 1.41 dholland if (nb + extra >= ngood * lfs_sb_getssize(fs)) 1269 1.1 perseant syslog(LOG_WARNING, "%s: cleaner not making forward progress", 1270 1.40 dholland lfs_sb_getfsmnt(fs)); 1271 1.1 perseant 1272 1.1 perseant /* 1273 1.1 perseant * Finally call reclaim to prompt cleaning of the segments. 1274 1.1 perseant */ 1275 1.21 pooka kops.ko_fcntl(fs->clfs_ifilefd, LFCNRECLAIM, NULL); 1276 1.1 perseant 1277 1.1 perseant fd_release_all(fs->clfs_devvp); 1278 1.1 perseant return 0; 1279 1.1 perseant } 1280 1.1 perseant 1281 1.1 perseant /* 1282 1.1 perseant * Read the cleanerinfo block and apply cleaning policy to determine whether 1283 1.1 perseant * the given filesystem needs to be cleaned. Returns 1 if it does, 0 if it 1284 1.1 perseant * does not, or -1 on error. 1285 1.1 perseant */ 1286 1.46 dholland static int 1287 1.46 dholland needs_cleaning(struct clfs *fs, CLEANERINFO64 *cip) 1288 1.1 perseant { 1289 1.46 dholland CLEANERINFO *cipu; 1290 1.1 perseant struct ubuf *bp; 1291 1.1 perseant struct stat st; 1292 1.1 perseant daddr_t fsb_per_seg, max_free_segs; 1293 1.1 perseant time_t now; 1294 1.1 perseant double loadavg; 1295 1.1 perseant 1296 1.1 perseant /* If this fs is "on hold", don't clean it. */ 1297 1.58 mrg if (fs->clfs_onhold) { 1298 1.58 mrg #if defined(__GNUC__) && \ 1299 1.58 mrg (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && \ 1300 1.58 mrg defined(__OPTIMIZE_SIZE__) 1301 1.58 mrg /* 1302 1.58 mrg * XXX: Work around apparent bug with GCC >= 4.8 and -Os: it 1303 1.58 mrg * claims that ci.clean is uninitialized in clean_fs (at one 1304 1.58 mrg * of the several uses of it, which is neither the first nor 1305 1.58 mrg * last use) -- this doesn't happen with plain -O2. 1306 1.58 mrg * 1307 1.58 mrg * Hopefully in the future further rearrangements will allow 1308 1.58 mrg * removing this hack. 1309 1.58 mrg */ 1310 1.58 mrg cip->clean = 0; 1311 1.58 mrg #endif 1312 1.1 perseant return 0; 1313 1.58 mrg } 1314 1.1 perseant 1315 1.1 perseant /* 1316 1.1 perseant * Read the cleanerinfo block from the Ifile. We don't want 1317 1.1 perseant * the cached information, so invalidate the buffer before 1318 1.1 perseant * handing it back. 1319 1.1 perseant */ 1320 1.41 dholland if (bread(fs->lfs_ivnode, 0, lfs_sb_getbsize(fs), 0, &bp)) { 1321 1.40 dholland syslog(LOG_ERR, "%s: can't read inode", lfs_sb_getfsmnt(fs)); 1322 1.1 perseant return -1; 1323 1.1 perseant } 1324 1.46 dholland cipu = (CLEANERINFO *)bp->b_data; 1325 1.46 dholland if (fs->lfs_is64) { 1326 1.46 dholland /* Structure copy */ 1327 1.46 dholland *cip = cipu->u_64; 1328 1.46 dholland } else { 1329 1.46 dholland /* Copy the fields and promote to 64 bit */ 1330 1.46 dholland cip->clean = cipu->u_32.clean; 1331 1.46 dholland cip->dirty = cipu->u_32.dirty; 1332 1.46 dholland cip->bfree = cipu->u_32.bfree; 1333 1.46 dholland cip->avail = cipu->u_32.avail; 1334 1.46 dholland cip->free_head = cipu->u_32.free_head; 1335 1.46 dholland cip->free_tail = cipu->u_32.free_tail; 1336 1.46 dholland cip->flags = cipu->u_32.flags; 1337 1.46 dholland } 1338 1.13 ad brelse(bp, B_INVAL); 1339 1.41 dholland cleaner_stats.bytes_read += lfs_sb_getbsize(fs); 1340 1.1 perseant 1341 1.1 perseant /* 1342 1.1 perseant * If the number of segments changed under us, reinit. 1343 1.1 perseant * We don't have to start over from scratch, however, 1344 1.1 perseant * since we don't hold any buffers. 1345 1.1 perseant */ 1346 1.40 dholland if (lfs_sb_getnseg(fs) != cip->clean + cip->dirty) { 1347 1.1 perseant if (reinit_fs(fs) < 0) { 1348 1.1 perseant /* The normal case for unmount */ 1349 1.40 dholland syslog(LOG_NOTICE, "%s: filesystem unmounted", lfs_sb_getfsmnt(fs)); 1350 1.1 perseant return -1; 1351 1.1 perseant } 1352 1.40 dholland syslog(LOG_NOTICE, "%s: nsegs changed", lfs_sb_getfsmnt(fs)); 1353 1.1 perseant } 1354 1.1 perseant 1355 1.1 perseant /* Compute theoretical "free segments" maximum based on usage */ 1356 1.36 christos fsb_per_seg = lfs_segtod(fs, 1); 1357 1.40 dholland max_free_segs = MAX(cip->bfree, 0) / fsb_per_seg + lfs_sb_getminfreeseg(fs); 1358 1.1 perseant 1359 1.1 perseant dlog("%s: bfree = %d, avail = %d, clean = %d/%d", 1360 1.40 dholland lfs_sb_getfsmnt(fs), cip->bfree, cip->avail, cip->clean, 1361 1.40 dholland lfs_sb_getnseg(fs)); 1362 1.1 perseant 1363 1.1 perseant /* If the writer is waiting on us, clean it */ 1364 1.40 dholland if (cip->clean <= lfs_sb_getminfreeseg(fs) || 1365 1.11 perseant (cip->flags & LFS_CLEANER_MUST_CLEAN)) 1366 1.1 perseant return 1; 1367 1.1 perseant 1368 1.1 perseant /* If there are enough segments, don't clean it */ 1369 1.1 perseant if (cip->bfree - cip->avail <= fsb_per_seg && 1370 1.1 perseant cip->avail > fsb_per_seg) 1371 1.1 perseant return 0; 1372 1.1 perseant 1373 1.1 perseant /* If we are in dire straits, clean it */ 1374 1.1 perseant if (cip->bfree - cip->avail > fsb_per_seg && 1375 1.1 perseant cip->avail <= fsb_per_seg) 1376 1.1 perseant return 1; 1377 1.1 perseant 1378 1.1 perseant /* If under busy threshold, clean regardless of load */ 1379 1.1 perseant if (cip->clean < max_free_segs * BUSY_LIM) 1380 1.1 perseant return 1; 1381 1.1 perseant 1382 1.1 perseant /* Check busy status; clean if idle and under idle limit */ 1383 1.1 perseant if (use_fs_idle) { 1384 1.1 perseant /* Filesystem idle */ 1385 1.1 perseant time(&now); 1386 1.1 perseant if (fstat(fs->clfs_ifilefd, &st) < 0) { 1387 1.1 perseant syslog(LOG_ERR, "%s: failed to stat ifile", 1388 1.40 dholland lfs_sb_getfsmnt(fs)); 1389 1.1 perseant return -1; 1390 1.1 perseant } 1391 1.1 perseant if (now - st.st_mtime > segwait_timeout && 1392 1.1 perseant cip->clean < max_free_segs * IDLE_LIM) 1393 1.1 perseant return 1; 1394 1.1 perseant } else { 1395 1.1 perseant /* CPU idle - use one-minute load avg */ 1396 1.1 perseant if (getloadavg(&loadavg, 1) == -1) { 1397 1.1 perseant syslog(LOG_ERR, "%s: failed to get load avg", 1398 1.40 dholland lfs_sb_getfsmnt(fs)); 1399 1.1 perseant return -1; 1400 1.1 perseant } 1401 1.1 perseant if (loadavg < load_threshold && 1402 1.1 perseant cip->clean < max_free_segs * IDLE_LIM) 1403 1.1 perseant return 1; 1404 1.1 perseant } 1405 1.1 perseant 1406 1.1 perseant return 0; 1407 1.1 perseant } 1408 1.1 perseant 1409 1.1 perseant /* 1410 1.1 perseant * Report statistics. If the signal was SIGUSR2, clear the statistics too. 1411 1.1 perseant * If the signal was SIGINT, exit. 1412 1.1 perseant */ 1413 1.1 perseant static void 1414 1.1 perseant sig_report(int sig) 1415 1.1 perseant { 1416 1.1 perseant double avg = 0.0, stddev; 1417 1.1 perseant 1418 1.1 perseant avg = cleaner_stats.util_tot / MAX(cleaner_stats.segs_cleaned, 1.0); 1419 1.1 perseant stddev = cleaner_stats.util_sos / MAX(cleaner_stats.segs_cleaned - 1420 1.1 perseant avg * avg, 1.0); 1421 1.1 perseant syslog(LOG_INFO, "bytes read: %" PRId64, cleaner_stats.bytes_read); 1422 1.1 perseant syslog(LOG_INFO, "bytes written: %" PRId64, cleaner_stats.bytes_written); 1423 1.1 perseant syslog(LOG_INFO, "segments cleaned: %" PRId64, cleaner_stats.segs_cleaned); 1424 1.1 perseant #if 0 1425 1.1 perseant /* "Empty segments" is meaningless, since the kernel handles those */ 1426 1.1 perseant syslog(LOG_INFO, "empty segments: %" PRId64, cleaner_stats.segs_empty); 1427 1.1 perseant #endif 1428 1.1 perseant syslog(LOG_INFO, "error segments: %" PRId64, cleaner_stats.segs_error); 1429 1.1 perseant syslog(LOG_INFO, "utilization total: %g", cleaner_stats.util_tot); 1430 1.1 perseant syslog(LOG_INFO, "utilization sos: %g", cleaner_stats.util_sos); 1431 1.1 perseant syslog(LOG_INFO, "utilization avg: %4.2f", avg); 1432 1.1 perseant syslog(LOG_INFO, "utilization sdev: %9.6f", stddev); 1433 1.1 perseant 1434 1.1 perseant if (debug) 1435 1.1 perseant bufstats(); 1436 1.1 perseant 1437 1.1 perseant if (sig == SIGUSR2) 1438 1.1 perseant memset(&cleaner_stats, 0, sizeof(cleaner_stats)); 1439 1.1 perseant if (sig == SIGINT) 1440 1.1 perseant exit(0); 1441 1.1 perseant } 1442 1.1 perseant 1443 1.1 perseant static void 1444 1.1 perseant sig_exit(int sig) 1445 1.1 perseant { 1446 1.1 perseant exit(0); 1447 1.1 perseant } 1448 1.1 perseant 1449 1.1 perseant static void 1450 1.1 perseant usage(void) 1451 1.1 perseant { 1452 1.60 brad fprintf(stderr, "usage: lfs_cleanerd [-bcdfmqsJ] [-i segnum] [-l load] " 1453 1.1 perseant "[-n nsegs] [-r report_freq] [-t timeout] fs_name ..."); 1454 1.1 perseant } 1455 1.1 perseant 1456 1.21 pooka #ifndef LFS_CLEANER_AS_LIB 1457 1.1 perseant /* 1458 1.1 perseant * Main. 1459 1.1 perseant */ 1460 1.1 perseant int 1461 1.1 perseant main(int argc, char **argv) 1462 1.1 perseant { 1463 1.21 pooka 1464 1.21 pooka return lfs_cleaner_main(argc, argv); 1465 1.21 pooka } 1466 1.21 pooka #endif 1467 1.21 pooka 1468 1.21 pooka int 1469 1.21 pooka lfs_cleaner_main(int argc, char **argv) 1470 1.21 pooka { 1471 1.19 pooka int i, opt, error, r, loopcount, nodetach; 1472 1.1 perseant struct timeval tv; 1473 1.37 christos #ifdef LFS_CLEANER_AS_LIB 1474 1.24 pooka sem_t *semaddr = NULL; 1475 1.37 christos #endif 1476 1.46 dholland CLEANERINFO64 ci; 1477 1.1 perseant #ifndef USE_CLIENT_SERVER 1478 1.1 perseant char *cp, *pidname; 1479 1.1 perseant #endif 1480 1.1 perseant 1481 1.1 perseant /* 1482 1.1 perseant * Set up defaults 1483 1.1 perseant */ 1484 1.1 perseant atatime = 1; 1485 1.1 perseant segwait_timeout = 300; /* Five minutes */ 1486 1.31 joerg load_threshold = 0.2; 1487 1.1 perseant stat_report = 0; 1488 1.1 perseant inval_segment = -1; 1489 1.1 perseant copylog_filename = NULL; 1490 1.19 pooka nodetach = 0; 1491 1.60 brad do_asdevice = NULL; 1492 1.1 perseant 1493 1.1 perseant /* 1494 1.1 perseant * Parse command-line arguments 1495 1.1 perseant */ 1496 1.60 brad while ((opt = getopt(argc, argv, "bC:cdDfi:J:l:mn:qr:sS:t:")) != -1) { 1497 1.1 perseant switch (opt) { 1498 1.1 perseant case 'b': /* Use bytes written, not segments read */ 1499 1.1 perseant use_bytes = 1; 1500 1.1 perseant break; 1501 1.1 perseant case 'C': /* copy log */ 1502 1.1 perseant copylog_filename = optarg; 1503 1.1 perseant break; 1504 1.1 perseant case 'c': /* Coalesce files */ 1505 1.1 perseant do_coalesce++; 1506 1.1 perseant break; 1507 1.1 perseant case 'd': /* Debug mode. */ 1508 1.19 pooka nodetach++; 1509 1.1 perseant debug++; 1510 1.1 perseant break; 1511 1.19 pooka case 'D': /* stay-on-foreground */ 1512 1.19 pooka nodetach++; 1513 1.19 pooka break; 1514 1.1 perseant case 'f': /* Use fs idle time rather than cpu idle */ 1515 1.1 perseant use_fs_idle = 1; 1516 1.1 perseant break; 1517 1.1 perseant case 'i': /* Invalidate this segment */ 1518 1.1 perseant inval_segment = atoi(optarg); 1519 1.1 perseant break; 1520 1.1 perseant case 'l': /* Load below which to clean */ 1521 1.1 perseant load_threshold = atof(optarg); 1522 1.1 perseant break; 1523 1.1 perseant case 'm': /* [compat only] */ 1524 1.1 perseant break; 1525 1.1 perseant case 'n': /* How many segs to clean at once */ 1526 1.1 perseant atatime = atoi(optarg); 1527 1.1 perseant break; 1528 1.1 perseant case 'q': /* Quit after one run */ 1529 1.1 perseant do_quit = 1; 1530 1.1 perseant break; 1531 1.1 perseant case 'r': /* Report every stat_report segments */ 1532 1.1 perseant stat_report = atoi(optarg); 1533 1.1 perseant break; 1534 1.1 perseant case 's': /* Small writes */ 1535 1.1 perseant do_small = 1; 1536 1.1 perseant break; 1537 1.37 christos #ifdef LFS_CLEANER_AS_LIB 1538 1.24 pooka case 'S': /* semaphore */ 1539 1.24 pooka semaddr = (void*)(uintptr_t)strtoull(optarg,NULL,0); 1540 1.24 pooka break; 1541 1.37 christos #endif 1542 1.1 perseant case 't': /* timeout */ 1543 1.1 perseant segwait_timeout = atoi(optarg); 1544 1.1 perseant break; 1545 1.60 brad case 'J': /* do as a device */ 1546 1.60 brad do_asdevice = optarg; 1547 1.60 brad break; 1548 1.1 perseant default: 1549 1.1 perseant usage(); 1550 1.1 perseant /* NOTREACHED */ 1551 1.1 perseant } 1552 1.1 perseant } 1553 1.1 perseant argc -= optind; 1554 1.1 perseant argv += optind; 1555 1.1 perseant 1556 1.1 perseant if (argc < 1) 1557 1.1 perseant usage(); 1558 1.1 perseant if (inval_segment >= 0 && argc != 1) { 1559 1.1 perseant errx(1, "lfs_cleanerd: may only specify one filesystem when " 1560 1.1 perseant "using -i flag"); 1561 1.1 perseant } 1562 1.1 perseant 1563 1.12 tls if (do_coalesce) { 1564 1.12 tls errx(1, "lfs_cleanerd: -c disabled due to reports of file " 1565 1.12 tls "corruption; you may re-enable it by rebuilding the " 1566 1.12 tls "cleaner"); 1567 1.12 tls } 1568 1.12 tls 1569 1.1 perseant /* 1570 1.19 pooka * Set up daemon mode or foreground mode 1571 1.1 perseant */ 1572 1.19 pooka if (nodetach) { 1573 1.1 perseant openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID | LOG_PERROR, 1574 1.1 perseant LOG_DAEMON); 1575 1.1 perseant signal(SIGINT, sig_report); 1576 1.1 perseant } else { 1577 1.1 perseant if (daemon(0, 0) == -1) 1578 1.1 perseant err(1, "lfs_cleanerd: couldn't become a daemon!"); 1579 1.1 perseant openlog("lfs_cleanerd", LOG_NDELAY | LOG_PID, LOG_DAEMON); 1580 1.1 perseant signal(SIGINT, sig_exit); 1581 1.1 perseant } 1582 1.1 perseant 1583 1.1 perseant /* 1584 1.1 perseant * Look for an already-running master daemon. If there is one, 1585 1.1 perseant * send it our filesystems to add to its list and exit. 1586 1.1 perseant * If there is none, become the master. 1587 1.1 perseant */ 1588 1.1 perseant #ifdef USE_CLIENT_SERVER 1589 1.1 perseant try_to_become_master(argc, argv); 1590 1.1 perseant #else 1591 1.1 perseant /* XXX think about this */ 1592 1.1 perseant asprintf(&pidname, "lfs_cleanerd:m:%s", argv[0]); 1593 1.1 perseant if (pidname == NULL) { 1594 1.1 perseant syslog(LOG_ERR, "malloc failed: %m"); 1595 1.1 perseant exit(1); 1596 1.1 perseant } 1597 1.1 perseant for (cp = pidname; cp != NULL; cp = strchr(cp, '/')) 1598 1.1 perseant *cp = '|'; 1599 1.1 perseant pidfile(pidname); 1600 1.1 perseant #endif 1601 1.1 perseant 1602 1.1 perseant /* 1603 1.1 perseant * Signals mean daemon should report its statistics 1604 1.1 perseant */ 1605 1.1 perseant memset(&cleaner_stats, 0, sizeof(cleaner_stats)); 1606 1.1 perseant signal(SIGUSR1, sig_report); 1607 1.1 perseant signal(SIGUSR2, sig_report); 1608 1.1 perseant 1609 1.1 perseant /* 1610 1.1 perseant * Start up buffer cache. We only use this for the Ifile, 1611 1.1 perseant * and we will resize it if necessary, so it can start small. 1612 1.1 perseant */ 1613 1.1 perseant bufinit(4); 1614 1.1 perseant 1615 1.1 perseant #ifdef REPAIR_ZERO_FINFO 1616 1.1 perseant { 1617 1.1 perseant BLOCK_INFO *bip = NULL; 1618 1.1 perseant int bic = 0; 1619 1.1 perseant 1620 1.1 perseant nfss = 1; 1621 1.1 perseant fsp = (struct clfs **)malloc(sizeof(*fsp)); 1622 1.1 perseant fsp[0] = (struct clfs *)calloc(1, sizeof(**fsp)); 1623 1.1 perseant 1624 1.1 perseant if (init_unmounted_fs(fsp[0], argv[0]) < 0) { 1625 1.1 perseant err(1, "init_unmounted_fs"); 1626 1.1 perseant } 1627 1.1 perseant dlog("Filesystem has %d segments", fsp[0]->lfs_nseg); 1628 1.1 perseant for (i = 0; i < fsp[0]->lfs_nseg; i++) { 1629 1.1 perseant load_segment(fsp[0], i, &bip, &bic); 1630 1.1 perseant bic = 0; 1631 1.1 perseant } 1632 1.1 perseant exit(0); 1633 1.1 perseant } 1634 1.1 perseant #endif 1635 1.1 perseant 1636 1.1 perseant /* 1637 1.1 perseant * Initialize cleaning structures, open devices, etc. 1638 1.1 perseant */ 1639 1.1 perseant nfss = argc; 1640 1.1 perseant fsp = (struct clfs **)malloc(nfss * sizeof(*fsp)); 1641 1.7 perseant if (fsp == NULL) { 1642 1.7 perseant syslog(LOG_ERR, "couldn't allocate fs table: %m"); 1643 1.7 perseant exit(1); 1644 1.7 perseant } 1645 1.1 perseant for (i = 0; i < nfss; i++) { 1646 1.1 perseant fsp[i] = (struct clfs *)calloc(1, sizeof(**fsp)); 1647 1.1 perseant if ((r = init_fs(fsp[i], argv[i])) < 0) { 1648 1.1 perseant syslog(LOG_ERR, "%s: couldn't init: error code %d", 1649 1.1 perseant argv[i], r); 1650 1.1 perseant handle_error(fsp, i); 1651 1.1 perseant --i; /* Do the new #i over again */ 1652 1.1 perseant } 1653 1.1 perseant } 1654 1.1 perseant 1655 1.1 perseant /* 1656 1.1 perseant * If asked to coalesce, do so and exit. 1657 1.1 perseant */ 1658 1.1 perseant if (do_coalesce) { 1659 1.1 perseant for (i = 0; i < nfss; i++) 1660 1.1 perseant clean_all_inodes(fsp[i]); 1661 1.1 perseant exit(0); 1662 1.1 perseant } 1663 1.1 perseant 1664 1.1 perseant /* 1665 1.1 perseant * If asked to invalidate a segment, do that and exit. 1666 1.1 perseant */ 1667 1.1 perseant if (inval_segment >= 0) { 1668 1.1 perseant invalidate_segment(fsp[0], inval_segment); 1669 1.1 perseant exit(0); 1670 1.1 perseant } 1671 1.1 perseant 1672 1.1 perseant /* 1673 1.1 perseant * Main cleaning loop. 1674 1.1 perseant */ 1675 1.1 perseant loopcount = 0; 1676 1.25 pooka #ifdef LFS_CLEANER_AS_LIB 1677 1.24 pooka if (semaddr) 1678 1.24 pooka sem_post(semaddr); 1679 1.25 pooka #endif 1680 1.26 pooka error = 0; 1681 1.1 perseant while (nfss > 0) { 1682 1.1 perseant int cleaned_one; 1683 1.1 perseant do { 1684 1.1 perseant #ifdef USE_CLIENT_SERVER 1685 1.1 perseant check_control_socket(); 1686 1.1 perseant #endif 1687 1.1 perseant cleaned_one = 0; 1688 1.1 perseant for (i = 0; i < nfss; i++) { 1689 1.1 perseant if ((error = needs_cleaning(fsp[i], &ci)) < 0) { 1690 1.28 perseant syslog(LOG_DEBUG, "%s: needs_cleaning returned %d", 1691 1.28 perseant getprogname(), error); 1692 1.1 perseant handle_error(fsp, i); 1693 1.1 perseant continue; 1694 1.1 perseant } 1695 1.1 perseant if (error == 0) /* No need to clean */ 1696 1.1 perseant continue; 1697 1.1 perseant 1698 1.1 perseant reload_ifile(fsp[i]); 1699 1.28 perseant if ((error = clean_fs(fsp[i], &ci)) < 0) { 1700 1.28 perseant syslog(LOG_DEBUG, "%s: clean_fs returned %d", 1701 1.28 perseant getprogname(), error); 1702 1.1 perseant handle_error(fsp, i); 1703 1.1 perseant continue; 1704 1.1 perseant } 1705 1.1 perseant ++cleaned_one; 1706 1.1 perseant } 1707 1.1 perseant ++loopcount; 1708 1.1 perseant if (stat_report && loopcount % stat_report == 0) 1709 1.1 perseant sig_report(0); 1710 1.1 perseant if (do_quit) 1711 1.1 perseant exit(0); 1712 1.1 perseant } while(cleaned_one); 1713 1.1 perseant tv.tv_sec = segwait_timeout; 1714 1.1 perseant tv.tv_usec = 0; 1715 1.22 pooka /* XXX: why couldn't others work if fsp socket is shutdown? */ 1716 1.21 pooka error = kops.ko_fcntl(fsp[0]->clfs_ifilefd,LFCNSEGWAITALL,&tv); 1717 1.22 pooka if (error) { 1718 1.22 pooka if (errno == ESHUTDOWN) { 1719 1.22 pooka for (i = 0; i < nfss; i++) { 1720 1.28 perseant syslog(LOG_INFO, "%s: shutdown", 1721 1.28 perseant getprogname()); 1722 1.22 pooka handle_error(fsp, i); 1723 1.22 pooka assert(nfss == 0); 1724 1.22 pooka } 1725 1.26 pooka } else { 1726 1.26 pooka #ifdef LFS_CLEANER_AS_LIB 1727 1.26 pooka error = ESHUTDOWN; 1728 1.26 pooka break; 1729 1.26 pooka #else 1730 1.22 pooka err(1, "LFCNSEGWAITALL"); 1731 1.26 pooka #endif 1732 1.26 pooka } 1733 1.22 pooka } 1734 1.1 perseant } 1735 1.1 perseant 1736 1.1 perseant /* NOTREACHED */ 1737 1.26 pooka return error; 1738 1.1 perseant } 1739