1 1.7 wiz /* Id: mandocdb.c,v 1.269 2021/08/19 16:55:31 schwarze Exp */ 2 1.1 joerg /* 3 1.7 wiz * Copyright (c) 2011-2020 Ingo Schwarze <schwarze (at) openbsd.org> 4 1.2 christos * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps (at) bsd.lv> 5 1.3 christos * Copyright (c) 2016 Ed Maste <emaste (at) freebsd.org> 6 1.1 joerg * 7 1.1 joerg * Permission to use, copy, modify, and distribute this software for any 8 1.1 joerg * purpose with or without fee is hereby granted, provided that the above 9 1.1 joerg * copyright notice and this permission notice appear in all copies. 10 1.1 joerg * 11 1.2 christos * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12 1.1 joerg * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 1.2 christos * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14 1.1 joerg * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 1.1 joerg * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 1.1 joerg * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 1.1 joerg * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 1.7 wiz * 19 1.7 wiz * Implementation of the makewhatis(8) program. 20 1.1 joerg */ 21 1.1 joerg #include "config.h" 22 1.1 joerg 23 1.2 christos #include <sys/types.h> 24 1.4 christos #include <sys/mman.h> 25 1.2 christos #include <sys/stat.h> 26 1.1 joerg 27 1.1 joerg #include <assert.h> 28 1.2 christos #include <ctype.h> 29 1.2 christos #if HAVE_ERR 30 1.2 christos #include <err.h> 31 1.2 christos #endif 32 1.2 christos #include <errno.h> 33 1.1 joerg #include <fcntl.h> 34 1.2 christos #if HAVE_FTS 35 1.2 christos #include <fts.h> 36 1.2 christos #else 37 1.2 christos #include "compat_fts.h" 38 1.2 christos #endif 39 1.2 christos #include <limits.h> 40 1.2 christos #if HAVE_SANDBOX_INIT 41 1.2 christos #include <sandbox.h> 42 1.2 christos #endif 43 1.3 christos #include <stdarg.h> 44 1.2 christos #include <stddef.h> 45 1.1 joerg #include <stdio.h> 46 1.1 joerg #include <stdint.h> 47 1.1 joerg #include <stdlib.h> 48 1.1 joerg #include <string.h> 49 1.2 christos #include <unistd.h> 50 1.1 joerg 51 1.2 christos #include "mandoc_aux.h" 52 1.2 christos #include "mandoc_ohash.h" 53 1.2 christos #include "mandoc.h" 54 1.2 christos #include "roff.h" 55 1.2 christos #include "mdoc.h" 56 1.2 christos #include "main.h" 57 1.1 joerg #include "man.h" 58 1.5 christos #include "mandoc_parse.h" 59 1.2 christos #include "manconf.h" 60 1.2 christos #include "mansearch.h" 61 1.3 christos #include "dba_array.h" 62 1.3 christos #include "dba.h" 63 1.2 christos 64 1.2 christos extern const char *const mansearch_keynames[]; 65 1.2 christos 66 1.2 christos enum op { 67 1.2 christos OP_DEFAULT = 0, /* new dbs from dir list or default config */ 68 1.2 christos OP_CONFFILE, /* new databases from custom config file */ 69 1.2 christos OP_UPDATE, /* delete/add entries in existing database */ 70 1.2 christos OP_DELETE, /* delete entries from existing database */ 71 1.2 christos OP_TEST /* change no databases, report potential problems */ 72 1.2 christos }; 73 1.2 christos 74 1.2 christos struct str { 75 1.2 christos const struct mpage *mpage; /* if set, the owning parse */ 76 1.2 christos uint64_t mask; /* bitmask in sequence */ 77 1.2 christos char key[]; /* rendered text */ 78 1.2 christos }; 79 1.2 christos 80 1.2 christos struct inodev { 81 1.2 christos ino_t st_ino; 82 1.2 christos dev_t st_dev; 83 1.2 christos }; 84 1.1 joerg 85 1.2 christos struct mpage { 86 1.2 christos struct inodev inodev; /* used for hashing routine */ 87 1.3 christos struct dba_array *dba; 88 1.2 christos char *sec; /* section from file content */ 89 1.2 christos char *arch; /* architecture from file content */ 90 1.2 christos char *title; /* title from file content */ 91 1.2 christos char *desc; /* description from file content */ 92 1.3 christos struct mpage *next; /* singly linked list */ 93 1.2 christos struct mlink *mlinks; /* singly linked list */ 94 1.2 christos int name_head_done; 95 1.3 christos enum form form; /* format from file content */ 96 1.1 joerg }; 97 1.1 joerg 98 1.2 christos struct mlink { 99 1.2 christos char file[PATH_MAX]; /* filename rel. to manpath */ 100 1.2 christos char *dsec; /* section from directory */ 101 1.2 christos char *arch; /* architecture from directory */ 102 1.2 christos char *name; /* name from file name (not empty) */ 103 1.2 christos char *fsec; /* section from file name suffix */ 104 1.2 christos struct mlink *next; /* singly linked list */ 105 1.2 christos struct mpage *mpage; /* parent */ 106 1.2 christos int gzip; /* filename has a .gz suffix */ 107 1.3 christos enum form dform; /* format from directory */ 108 1.3 christos enum form fform; /* format from file name suffix */ 109 1.1 joerg }; 110 1.1 joerg 111 1.2 christos typedef int (*mdoc_fp)(struct mpage *, const struct roff_meta *, 112 1.2 christos const struct roff_node *); 113 1.1 joerg 114 1.2 christos struct mdoc_handler { 115 1.2 christos mdoc_fp fp; /* optional handler */ 116 1.2 christos uint64_t mask; /* set unless handler returns 0 */ 117 1.3 christos int taboo; /* node flags that must not be set */ 118 1.1 joerg }; 119 1.1 joerg 120 1.3 christos 121 1.3 christos int mandocdb(int, char *[]); 122 1.3 christos 123 1.3 christos static void dbadd(struct dba *, struct mpage *); 124 1.7 wiz static void dbadd_mlink(const struct mlink *); 125 1.3 christos static void dbprune(struct dba *); 126 1.3 christos static void dbwrite(struct dba *); 127 1.2 christos static void filescan(const char *); 128 1.3 christos #if HAVE_FTS_COMPARE_CONST 129 1.3 christos static int fts_compare(const FTSENT *const *, const FTSENT *const *); 130 1.3 christos #else 131 1.3 christos static int fts_compare(const FTSENT **, const FTSENT **); 132 1.3 christos #endif 133 1.2 christos static void mlink_add(struct mlink *, const struct stat *); 134 1.2 christos static void mlink_check(struct mpage *, struct mlink *); 135 1.2 christos static void mlink_free(struct mlink *); 136 1.2 christos static void mlinks_undupe(struct mpage *); 137 1.2 christos static void mpages_free(void); 138 1.3 christos static void mpages_merge(struct dba *, struct mparse *); 139 1.2 christos static void parse_cat(struct mpage *, int); 140 1.2 christos static void parse_man(struct mpage *, const struct roff_meta *, 141 1.2 christos const struct roff_node *); 142 1.2 christos static void parse_mdoc(struct mpage *, const struct roff_meta *, 143 1.2 christos const struct roff_node *); 144 1.2 christos static int parse_mdoc_head(struct mpage *, const struct roff_meta *, 145 1.2 christos const struct roff_node *); 146 1.4 christos static int parse_mdoc_Fa(struct mpage *, const struct roff_meta *, 147 1.4 christos const struct roff_node *); 148 1.2 christos static int parse_mdoc_Fd(struct mpage *, const struct roff_meta *, 149 1.2 christos const struct roff_node *); 150 1.2 christos static void parse_mdoc_fname(struct mpage *, const struct roff_node *); 151 1.2 christos static int parse_mdoc_Fn(struct mpage *, const struct roff_meta *, 152 1.2 christos const struct roff_node *); 153 1.2 christos static int parse_mdoc_Fo(struct mpage *, const struct roff_meta *, 154 1.2 christos const struct roff_node *); 155 1.2 christos static int parse_mdoc_Nd(struct mpage *, const struct roff_meta *, 156 1.2 christos const struct roff_node *); 157 1.2 christos static int parse_mdoc_Nm(struct mpage *, const struct roff_meta *, 158 1.2 christos const struct roff_node *); 159 1.2 christos static int parse_mdoc_Sh(struct mpage *, const struct roff_meta *, 160 1.2 christos const struct roff_node *); 161 1.2 christos static int parse_mdoc_Va(struct mpage *, const struct roff_meta *, 162 1.2 christos const struct roff_node *); 163 1.2 christos static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *, 164 1.2 christos const struct roff_node *); 165 1.2 christos static void putkey(const struct mpage *, char *, uint64_t); 166 1.2 christos static void putkeys(const struct mpage *, char *, size_t, uint64_t); 167 1.2 christos static void putmdockey(const struct mpage *, 168 1.3 christos const struct roff_node *, uint64_t, int); 169 1.7 wiz #ifdef READ_ALLOWED_PATH 170 1.7 wiz static int read_allowed(const char *); 171 1.7 wiz #endif 172 1.2 christos static int render_string(char **, size_t *); 173 1.3 christos static void say(const char *, const char *, ...) 174 1.3 christos __attribute__((__format__ (__printf__, 2, 3))); 175 1.2 christos static int set_basedir(const char *, int); 176 1.2 christos static int treescan(void); 177 1.2 christos static size_t utf8(unsigned int, char [7]); 178 1.2 christos 179 1.2 christos static int nodb; /* no database changes */ 180 1.2 christos static int mparse_options; /* abort the parse early */ 181 1.2 christos static int use_all; /* use all found files */ 182 1.2 christos static int debug; /* print what we're doing */ 183 1.2 christos static int warnings; /* warn about crap */ 184 1.2 christos static int write_utf8; /* write UTF-8 output; else ASCII */ 185 1.2 christos static int exitcode; /* to be returned by main */ 186 1.2 christos static enum op op; /* operational mode */ 187 1.2 christos static char basedir[PATH_MAX]; /* current base directory */ 188 1.7 wiz static size_t basedir_len; /* strlen(basedir) */ 189 1.3 christos static struct mpage *mpage_head; /* list of distinct manual pages */ 190 1.2 christos static struct ohash mpages; /* table of distinct manual pages */ 191 1.2 christos static struct ohash mlinks; /* table of directory entries */ 192 1.2 christos static struct ohash names; /* table of all names */ 193 1.2 christos static struct ohash strings; /* table of all strings */ 194 1.2 christos static uint64_t name_mask; 195 1.2 christos 196 1.5 christos static const struct mdoc_handler mdoc_handlers[MDOC_MAX - MDOC_Dd] = { 197 1.3 christos { NULL, 0, NODE_NOPRT }, /* Dd */ 198 1.3 christos { NULL, 0, NODE_NOPRT }, /* Dt */ 199 1.3 christos { NULL, 0, NODE_NOPRT }, /* Os */ 200 1.3 christos { parse_mdoc_Sh, TYPE_Sh, 0 }, /* Sh */ 201 1.3 christos { parse_mdoc_head, TYPE_Ss, 0 }, /* Ss */ 202 1.3 christos { NULL, 0, 0 }, /* Pp */ 203 1.3 christos { NULL, 0, 0 }, /* D1 */ 204 1.3 christos { NULL, 0, 0 }, /* Dl */ 205 1.3 christos { NULL, 0, 0 }, /* Bd */ 206 1.3 christos { NULL, 0, 0 }, /* Ed */ 207 1.3 christos { NULL, 0, 0 }, /* Bl */ 208 1.3 christos { NULL, 0, 0 }, /* El */ 209 1.3 christos { NULL, 0, 0 }, /* It */ 210 1.3 christos { NULL, 0, 0 }, /* Ad */ 211 1.3 christos { NULL, TYPE_An, 0 }, /* An */ 212 1.4 christos { NULL, 0, 0 }, /* Ap */ 213 1.3 christos { NULL, TYPE_Ar, 0 }, /* Ar */ 214 1.3 christos { NULL, TYPE_Cd, 0 }, /* Cd */ 215 1.3 christos { NULL, TYPE_Cm, 0 }, /* Cm */ 216 1.3 christos { NULL, TYPE_Dv, 0 }, /* Dv */ 217 1.3 christos { NULL, TYPE_Er, 0 }, /* Er */ 218 1.3 christos { NULL, TYPE_Ev, 0 }, /* Ev */ 219 1.3 christos { NULL, 0, 0 }, /* Ex */ 220 1.4 christos { parse_mdoc_Fa, 0, 0 }, /* Fa */ 221 1.3 christos { parse_mdoc_Fd, 0, 0 }, /* Fd */ 222 1.3 christos { NULL, TYPE_Fl, 0 }, /* Fl */ 223 1.3 christos { parse_mdoc_Fn, 0, 0 }, /* Fn */ 224 1.4 christos { NULL, TYPE_Ft | TYPE_Vt, 0 }, /* Ft */ 225 1.3 christos { NULL, TYPE_Ic, 0 }, /* Ic */ 226 1.3 christos { NULL, TYPE_In, 0 }, /* In */ 227 1.3 christos { NULL, TYPE_Li, 0 }, /* Li */ 228 1.3 christos { parse_mdoc_Nd, 0, 0 }, /* Nd */ 229 1.3 christos { parse_mdoc_Nm, 0, 0 }, /* Nm */ 230 1.3 christos { NULL, 0, 0 }, /* Op */ 231 1.3 christos { NULL, 0, 0 }, /* Ot */ 232 1.3 christos { NULL, TYPE_Pa, NODE_NOSRC }, /* Pa */ 233 1.3 christos { NULL, 0, 0 }, /* Rv */ 234 1.3 christos { NULL, TYPE_St, 0 }, /* St */ 235 1.3 christos { parse_mdoc_Va, TYPE_Va, 0 }, /* Va */ 236 1.3 christos { parse_mdoc_Va, TYPE_Vt, 0 }, /* Vt */ 237 1.3 christos { parse_mdoc_Xr, 0, 0 }, /* Xr */ 238 1.3 christos { NULL, 0, 0 }, /* %A */ 239 1.3 christos { NULL, 0, 0 }, /* %B */ 240 1.3 christos { NULL, 0, 0 }, /* %D */ 241 1.3 christos { NULL, 0, 0 }, /* %I */ 242 1.3 christos { NULL, 0, 0 }, /* %J */ 243 1.3 christos { NULL, 0, 0 }, /* %N */ 244 1.3 christos { NULL, 0, 0 }, /* %O */ 245 1.3 christos { NULL, 0, 0 }, /* %P */ 246 1.3 christos { NULL, 0, 0 }, /* %R */ 247 1.3 christos { NULL, 0, 0 }, /* %T */ 248 1.3 christos { NULL, 0, 0 }, /* %V */ 249 1.3 christos { NULL, 0, 0 }, /* Ac */ 250 1.3 christos { NULL, 0, 0 }, /* Ao */ 251 1.3 christos { NULL, 0, 0 }, /* Aq */ 252 1.3 christos { NULL, TYPE_At, 0 }, /* At */ 253 1.3 christos { NULL, 0, 0 }, /* Bc */ 254 1.3 christos { NULL, 0, 0 }, /* Bf */ 255 1.3 christos { NULL, 0, 0 }, /* Bo */ 256 1.3 christos { NULL, 0, 0 }, /* Bq */ 257 1.3 christos { NULL, TYPE_Bsx, NODE_NOSRC }, /* Bsx */ 258 1.3 christos { NULL, TYPE_Bx, NODE_NOSRC }, /* Bx */ 259 1.3 christos { NULL, 0, 0 }, /* Db */ 260 1.3 christos { NULL, 0, 0 }, /* Dc */ 261 1.3 christos { NULL, 0, 0 }, /* Do */ 262 1.3 christos { NULL, 0, 0 }, /* Dq */ 263 1.3 christos { NULL, 0, 0 }, /* Ec */ 264 1.3 christos { NULL, 0, 0 }, /* Ef */ 265 1.3 christos { NULL, TYPE_Em, 0 }, /* Em */ 266 1.3 christos { NULL, 0, 0 }, /* Eo */ 267 1.3 christos { NULL, TYPE_Fx, NODE_NOSRC }, /* Fx */ 268 1.3 christos { NULL, TYPE_Ms, 0 }, /* Ms */ 269 1.3 christos { NULL, 0, 0 }, /* No */ 270 1.3 christos { NULL, 0, 0 }, /* Ns */ 271 1.3 christos { NULL, TYPE_Nx, NODE_NOSRC }, /* Nx */ 272 1.3 christos { NULL, TYPE_Ox, NODE_NOSRC }, /* Ox */ 273 1.3 christos { NULL, 0, 0 }, /* Pc */ 274 1.3 christos { NULL, 0, 0 }, /* Pf */ 275 1.3 christos { NULL, 0, 0 }, /* Po */ 276 1.3 christos { NULL, 0, 0 }, /* Pq */ 277 1.3 christos { NULL, 0, 0 }, /* Qc */ 278 1.3 christos { NULL, 0, 0 }, /* Ql */ 279 1.3 christos { NULL, 0, 0 }, /* Qo */ 280 1.3 christos { NULL, 0, 0 }, /* Qq */ 281 1.3 christos { NULL, 0, 0 }, /* Re */ 282 1.3 christos { NULL, 0, 0 }, /* Rs */ 283 1.3 christos { NULL, 0, 0 }, /* Sc */ 284 1.3 christos { NULL, 0, 0 }, /* So */ 285 1.3 christos { NULL, 0, 0 }, /* Sq */ 286 1.3 christos { NULL, 0, 0 }, /* Sm */ 287 1.3 christos { NULL, 0, 0 }, /* Sx */ 288 1.3 christos { NULL, TYPE_Sy, 0 }, /* Sy */ 289 1.3 christos { NULL, TYPE_Tn, 0 }, /* Tn */ 290 1.3 christos { NULL, 0, NODE_NOSRC }, /* Ux */ 291 1.3 christos { NULL, 0, 0 }, /* Xc */ 292 1.3 christos { NULL, 0, 0 }, /* Xo */ 293 1.3 christos { parse_mdoc_Fo, 0, 0 }, /* Fo */ 294 1.3 christos { NULL, 0, 0 }, /* Fc */ 295 1.3 christos { NULL, 0, 0 }, /* Oo */ 296 1.3 christos { NULL, 0, 0 }, /* Oc */ 297 1.3 christos { NULL, 0, 0 }, /* Bk */ 298 1.3 christos { NULL, 0, 0 }, /* Ek */ 299 1.3 christos { NULL, 0, 0 }, /* Bt */ 300 1.3 christos { NULL, 0, 0 }, /* Hf */ 301 1.3 christos { NULL, 0, 0 }, /* Fr */ 302 1.3 christos { NULL, 0, 0 }, /* Ud */ 303 1.3 christos { NULL, TYPE_Lb, NODE_NOSRC }, /* Lb */ 304 1.3 christos { NULL, 0, 0 }, /* Lp */ 305 1.3 christos { NULL, TYPE_Lk, 0 }, /* Lk */ 306 1.3 christos { NULL, TYPE_Mt, NODE_NOSRC }, /* Mt */ 307 1.3 christos { NULL, 0, 0 }, /* Brq */ 308 1.3 christos { NULL, 0, 0 }, /* Bro */ 309 1.3 christos { NULL, 0, 0 }, /* Brc */ 310 1.3 christos { NULL, 0, 0 }, /* %C */ 311 1.3 christos { NULL, 0, 0 }, /* Es */ 312 1.3 christos { NULL, 0, 0 }, /* En */ 313 1.3 christos { NULL, TYPE_Dx, NODE_NOSRC }, /* Dx */ 314 1.3 christos { NULL, 0, 0 }, /* %Q */ 315 1.3 christos { NULL, 0, 0 }, /* %U */ 316 1.3 christos { NULL, 0, 0 }, /* Ta */ 317 1.1 joerg }; 318 1.1 joerg 319 1.1 joerg 320 1.1 joerg int 321 1.2 christos mandocdb(int argc, char *argv[]) 322 1.1 joerg { 323 1.2 christos struct manconf conf; 324 1.2 christos struct mparse *mp; 325 1.3 christos struct dba *dba; 326 1.2 christos const char *path_arg, *progname; 327 1.2 christos size_t j, sz; 328 1.2 christos int ch, i; 329 1.2 christos 330 1.2 christos #if HAVE_PLEDGE 331 1.4 christos if (pledge("stdio rpath wpath cpath", NULL) == -1) { 332 1.2 christos warn("pledge"); 333 1.2 christos return (int)MANDOCLEVEL_SYSERR; 334 1.2 christos } 335 1.2 christos #endif 336 1.2 christos 337 1.2 christos #if HAVE_SANDBOX_INIT 338 1.2 christos if (sandbox_init(kSBXProfileNoInternet, SANDBOX_NAMED, NULL) == -1) { 339 1.2 christos warnx("sandbox_init"); 340 1.2 christos return (int)MANDOCLEVEL_SYSERR; 341 1.2 christos } 342 1.2 christos #endif 343 1.2 christos 344 1.2 christos memset(&conf, 0, sizeof(conf)); 345 1.2 christos 346 1.2 christos /* 347 1.2 christos * We accept a few different invocations. 348 1.2 christos * The CHECKOP macro makes sure that invocation styles don't 349 1.2 christos * clobber each other. 350 1.2 christos */ 351 1.2 christos #define CHECKOP(_op, _ch) do \ 352 1.7 wiz if ((_op) != OP_DEFAULT) { \ 353 1.2 christos warnx("-%c: Conflicting option", (_ch)); \ 354 1.2 christos goto usage; \ 355 1.2 christos } while (/*CONSTCOND*/0) 356 1.1 joerg 357 1.5 christos mparse_options = MPARSE_VALIDATE; 358 1.2 christos path_arg = NULL; 359 1.2 christos op = OP_DEFAULT; 360 1.1 joerg 361 1.7 wiz while ((ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")) != -1) 362 1.1 joerg switch (ch) { 363 1.2 christos case 'a': 364 1.2 christos use_all = 1; 365 1.2 christos break; 366 1.2 christos case 'C': 367 1.2 christos CHECKOP(op, ch); 368 1.2 christos path_arg = optarg; 369 1.2 christos op = OP_CONFFILE; 370 1.2 christos break; 371 1.2 christos case 'D': 372 1.2 christos debug++; 373 1.2 christos break; 374 1.2 christos case 'd': 375 1.2 christos CHECKOP(op, ch); 376 1.2 christos path_arg = optarg; 377 1.1 joerg op = OP_UPDATE; 378 1.1 joerg break; 379 1.2 christos case 'n': 380 1.2 christos nodb = 1; 381 1.2 christos break; 382 1.2 christos case 'p': 383 1.2 christos warnings = 1; 384 1.2 christos break; 385 1.2 christos case 'Q': 386 1.2 christos mparse_options |= MPARSE_QUICK; 387 1.2 christos break; 388 1.2 christos case 'T': 389 1.7 wiz if (strcmp(optarg, "utf8") != 0) { 390 1.2 christos warnx("-T%s: Unsupported output format", 391 1.2 christos optarg); 392 1.2 christos goto usage; 393 1.2 christos } 394 1.2 christos write_utf8 = 1; 395 1.2 christos break; 396 1.2 christos case 't': 397 1.2 christos CHECKOP(op, ch); 398 1.2 christos dup2(STDOUT_FILENO, STDERR_FILENO); 399 1.2 christos op = OP_TEST; 400 1.2 christos nodb = warnings = 1; 401 1.2 christos break; 402 1.2 christos case 'u': 403 1.2 christos CHECKOP(op, ch); 404 1.2 christos path_arg = optarg; 405 1.1 joerg op = OP_DELETE; 406 1.1 joerg break; 407 1.2 christos case 'v': 408 1.2 christos /* Compatibility with espie@'s makewhatis. */ 409 1.1 joerg break; 410 1.1 joerg default: 411 1.2 christos goto usage; 412 1.1 joerg } 413 1.1 joerg 414 1.1 joerg argc -= optind; 415 1.1 joerg argv += optind; 416 1.1 joerg 417 1.2 christos #if HAVE_PLEDGE 418 1.2 christos if (nodb) { 419 1.2 christos if (pledge("stdio rpath", NULL) == -1) { 420 1.2 christos warn("pledge"); 421 1.2 christos return (int)MANDOCLEVEL_SYSERR; 422 1.2 christos } 423 1.2 christos } 424 1.2 christos #endif 425 1.2 christos 426 1.7 wiz if (op == OP_CONFFILE && argc > 0) { 427 1.2 christos warnx("-C: Too many arguments"); 428 1.2 christos goto usage; 429 1.2 christos } 430 1.1 joerg 431 1.2 christos exitcode = (int)MANDOCLEVEL_OK; 432 1.2 christos mchars_alloc(); 433 1.5 christos mp = mparse_alloc(mparse_options, MANDOC_OS_OTHER, NULL); 434 1.2 christos mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev)); 435 1.2 christos mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file)); 436 1.1 joerg 437 1.7 wiz if (op == OP_UPDATE || op == OP_DELETE || op == OP_TEST) { 438 1.1 joerg 439 1.2 christos /* 440 1.2 christos * Most of these deal with a specific directory. 441 1.2 christos * Jump into that directory first. 442 1.2 christos */ 443 1.7 wiz if (op != OP_TEST && set_basedir(path_arg, 1) == 0) 444 1.2 christos goto out; 445 1.1 joerg 446 1.3 christos dba = nodb ? dba_new(128) : dba_read(MANDOC_DB); 447 1.3 christos if (dba != NULL) { 448 1.2 christos /* 449 1.2 christos * The existing database is usable. Process 450 1.2 christos * all files specified on the command-line. 451 1.2 christos */ 452 1.2 christos use_all = 1; 453 1.2 christos for (i = 0; i < argc; i++) 454 1.2 christos filescan(argv[i]); 455 1.3 christos if (nodb == 0) 456 1.3 christos dbprune(dba); 457 1.2 christos } else { 458 1.3 christos /* Database missing or corrupt. */ 459 1.3 christos if (op != OP_UPDATE || errno != ENOENT) 460 1.3 christos say(MANDOC_DB, "%s: Automatically recreating" 461 1.3 christos " from scratch", strerror(errno)); 462 1.2 christos exitcode = (int)MANDOCLEVEL_OK; 463 1.2 christos op = OP_DEFAULT; 464 1.7 wiz if (treescan() == 0) 465 1.2 christos goto out; 466 1.3 christos dba = dba_new(128); 467 1.2 christos } 468 1.7 wiz if (op != OP_DELETE) 469 1.3 christos mpages_merge(dba, mp); 470 1.3 christos if (nodb == 0) 471 1.3 christos dbwrite(dba); 472 1.3 christos dba_free(dba); 473 1.2 christos } else { 474 1.2 christos /* 475 1.2 christos * If we have arguments, use them as our manpaths. 476 1.3 christos * If we don't, use man.conf(5). 477 1.2 christos */ 478 1.2 christos if (argc > 0) { 479 1.2 christos conf.manpath.paths = mandoc_reallocarray(NULL, 480 1.2 christos argc, sizeof(char *)); 481 1.2 christos conf.manpath.sz = (size_t)argc; 482 1.2 christos for (i = 0; i < argc; i++) 483 1.2 christos conf.manpath.paths[i] = mandoc_strdup(argv[i]); 484 1.2 christos } else 485 1.2 christos manconf_parse(&conf, path_arg, NULL, NULL); 486 1.1 joerg 487 1.2 christos if (conf.manpath.sz == 0) { 488 1.2 christos exitcode = (int)MANDOCLEVEL_BADARG; 489 1.2 christos say("", "Empty manpath"); 490 1.2 christos } 491 1.1 joerg 492 1.2 christos /* 493 1.2 christos * First scan the tree rooted at a base directory, then 494 1.2 christos * build a new database and finally move it into place. 495 1.2 christos * Ignore zero-length directories and strip trailing 496 1.2 christos * slashes. 497 1.2 christos */ 498 1.2 christos for (j = 0; j < conf.manpath.sz; j++) { 499 1.2 christos sz = strlen(conf.manpath.paths[j]); 500 1.2 christos if (sz && conf.manpath.paths[j][sz - 1] == '/') 501 1.2 christos conf.manpath.paths[j][--sz] = '\0'; 502 1.7 wiz if (sz == 0) 503 1.2 christos continue; 504 1.1 joerg 505 1.2 christos if (j) { 506 1.2 christos mandoc_ohash_init(&mpages, 6, 507 1.2 christos offsetof(struct mpage, inodev)); 508 1.2 christos mandoc_ohash_init(&mlinks, 6, 509 1.2 christos offsetof(struct mlink, file)); 510 1.2 christos } 511 1.1 joerg 512 1.7 wiz if (set_basedir(conf.manpath.paths[j], argc > 0) == 0) 513 1.2 christos continue; 514 1.7 wiz if (treescan() == 0) 515 1.2 christos continue; 516 1.3 christos dba = dba_new(128); 517 1.3 christos mpages_merge(dba, mp); 518 1.3 christos if (nodb == 0) 519 1.3 christos dbwrite(dba); 520 1.3 christos dba_free(dba); 521 1.2 christos 522 1.2 christos if (j + 1 < conf.manpath.sz) { 523 1.2 christos mpages_free(); 524 1.2 christos ohash_delete(&mpages); 525 1.2 christos ohash_delete(&mlinks); 526 1.2 christos } 527 1.1 joerg } 528 1.2 christos } 529 1.2 christos out: 530 1.2 christos manconf_free(&conf); 531 1.2 christos mparse_free(mp); 532 1.2 christos mchars_free(); 533 1.2 christos mpages_free(); 534 1.2 christos ohash_delete(&mpages); 535 1.2 christos ohash_delete(&mlinks); 536 1.2 christos return exitcode; 537 1.2 christos usage: 538 1.2 christos progname = getprogname(); 539 1.2 christos fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n" 540 1.2 christos " %s [-aDnpQ] [-Tutf8] dir ...\n" 541 1.2 christos " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n" 542 1.2 christos " %s [-Dnp] -u dir [file ...]\n" 543 1.2 christos " %s [-Q] -t file ...\n", 544 1.2 christos progname, progname, progname, progname, progname); 545 1.2 christos 546 1.2 christos return (int)MANDOCLEVEL_BADARG; 547 1.2 christos } 548 1.2 christos 549 1.2 christos /* 550 1.3 christos * To get a singly linked list in alpha order while inserting entries 551 1.3 christos * at the beginning, process directory entries in reverse alpha order. 552 1.3 christos */ 553 1.3 christos static int 554 1.3 christos #if HAVE_FTS_COMPARE_CONST 555 1.3 christos fts_compare(const FTSENT *const *a, const FTSENT *const *b) 556 1.3 christos #else 557 1.3 christos fts_compare(const FTSENT **a, const FTSENT **b) 558 1.3 christos #endif 559 1.3 christos { 560 1.3 christos return -strcmp((*a)->fts_name, (*b)->fts_name); 561 1.3 christos } 562 1.3 christos 563 1.3 christos /* 564 1.2 christos * Scan a directory tree rooted at "basedir" for manpages. 565 1.2 christos * We use fts(), scanning directory parts along the way for clues to our 566 1.2 christos * section and architecture. 567 1.2 christos * 568 1.2 christos * If use_all has been specified, grok all files. 569 1.2 christos * If not, sanitise paths to the following: 570 1.2 christos * 571 1.2 christos * [./]man*[/<arch>]/<name>.<section> 572 1.2 christos * or 573 1.2 christos * [./]cat<section>[/<arch>]/<name>.0 574 1.2 christos * 575 1.2 christos * TODO: accommodate for multi-language directories. 576 1.2 christos */ 577 1.2 christos static int 578 1.2 christos treescan(void) 579 1.2 christos { 580 1.2 christos char buf[PATH_MAX]; 581 1.2 christos FTS *f; 582 1.2 christos FTSENT *ff; 583 1.2 christos struct mlink *mlink; 584 1.3 christos int gzip; 585 1.3 christos enum form dform; 586 1.2 christos char *dsec, *arch, *fsec, *cp; 587 1.2 christos const char *path; 588 1.2 christos const char *argv[2]; 589 1.2 christos 590 1.2 christos argv[0] = "."; 591 1.2 christos argv[1] = NULL; 592 1.2 christos 593 1.6 christos f = fts_open((char * const *)__UNCONST(*argv), 594 1.6 christos FTS_PHYSICAL | FTS_NOCHDIR, fts_compare); 595 1.2 christos if (f == NULL) { 596 1.2 christos exitcode = (int)MANDOCLEVEL_SYSERR; 597 1.2 christos say("", "&fts_open"); 598 1.2 christos return 0; 599 1.2 christos } 600 1.2 christos 601 1.2 christos dsec = arch = NULL; 602 1.2 christos dform = FORM_NONE; 603 1.1 joerg 604 1.2 christos while ((ff = fts_read(f)) != NULL) { 605 1.2 christos path = ff->fts_path + 2; 606 1.2 christos switch (ff->fts_info) { 607 1.1 joerg 608 1.2 christos /* 609 1.2 christos * Symbolic links require various sanity checks, 610 1.2 christos * then get handled just like regular files. 611 1.2 christos */ 612 1.2 christos case FTS_SL: 613 1.2 christos if (realpath(path, buf) == NULL) { 614 1.2 christos if (warnings) 615 1.2 christos say(path, "&realpath"); 616 1.2 christos continue; 617 1.2 christos } 618 1.7 wiz if (strncmp(buf, basedir, basedir_len) != 0 619 1.7 wiz #ifdef READ_ALLOWED_PATH 620 1.7 wiz && !read_allowed(buf) 621 1.2 christos #endif 622 1.2 christos ) { 623 1.2 christos if (warnings) say("", 624 1.2 christos "%s: outside base directory", buf); 625 1.2 christos continue; 626 1.2 christos } 627 1.2 christos /* Use logical inode to avoid mpages dupe. */ 628 1.2 christos if (stat(path, ff->fts_statp) == -1) { 629 1.2 christos if (warnings) 630 1.2 christos say(path, "&stat"); 631 1.2 christos continue; 632 1.2 christos } 633 1.7 wiz if ((ff->fts_statp->st_mode & S_IFMT) != S_IFREG) 634 1.7 wiz continue; 635 1.2 christos /* FALLTHROUGH */ 636 1.2 christos 637 1.2 christos /* 638 1.2 christos * If we're a regular file, add an mlink by using the 639 1.2 christos * stored directory data and handling the filename. 640 1.2 christos */ 641 1.2 christos case FTS_F: 642 1.2 christos if ( ! strcmp(path, MANDOC_DB)) 643 1.2 christos continue; 644 1.2 christos if ( ! use_all && ff->fts_level < 2) { 645 1.2 christos if (warnings) 646 1.2 christos say(path, "Extraneous file"); 647 1.2 christos continue; 648 1.2 christos } 649 1.2 christos gzip = 0; 650 1.2 christos fsec = NULL; 651 1.2 christos while (fsec == NULL) { 652 1.2 christos fsec = strrchr(ff->fts_name, '.'); 653 1.2 christos if (fsec == NULL || strcmp(fsec+1, "gz")) 654 1.2 christos break; 655 1.2 christos gzip = 1; 656 1.2 christos *fsec = '\0'; 657 1.2 christos fsec = NULL; 658 1.2 christos } 659 1.2 christos if (fsec == NULL) { 660 1.2 christos if ( ! use_all) { 661 1.2 christos if (warnings) 662 1.2 christos say(path, 663 1.2 christos "No filename suffix"); 664 1.2 christos continue; 665 1.2 christos } 666 1.2 christos } else if ( ! strcmp(++fsec, "html")) { 667 1.2 christos if (warnings) 668 1.2 christos say(path, "Skip html"); 669 1.2 christos continue; 670 1.2 christos } else if ( ! strcmp(fsec, "ps")) { 671 1.2 christos if (warnings) 672 1.2 christos say(path, "Skip ps"); 673 1.2 christos continue; 674 1.2 christos } else if ( ! strcmp(fsec, "pdf")) { 675 1.2 christos if (warnings) 676 1.2 christos say(path, "Skip pdf"); 677 1.2 christos continue; 678 1.2 christos } else if ( ! use_all && 679 1.2 christos ((dform == FORM_SRC && 680 1.2 christos strncmp(fsec, dsec, strlen(dsec))) || 681 1.2 christos (dform == FORM_CAT && strcmp(fsec, "0")))) { 682 1.2 christos if (warnings) 683 1.2 christos say(path, "Wrong filename suffix"); 684 1.2 christos continue; 685 1.2 christos } else 686 1.2 christos fsec[-1] = '\0'; 687 1.2 christos 688 1.2 christos mlink = mandoc_calloc(1, sizeof(struct mlink)); 689 1.2 christos if (strlcpy(mlink->file, path, 690 1.2 christos sizeof(mlink->file)) >= 691 1.2 christos sizeof(mlink->file)) { 692 1.2 christos say(path, "Filename too long"); 693 1.2 christos free(mlink); 694 1.2 christos continue; 695 1.2 christos } 696 1.2 christos mlink->dform = dform; 697 1.2 christos mlink->dsec = dsec; 698 1.2 christos mlink->arch = arch; 699 1.2 christos mlink->name = ff->fts_name; 700 1.2 christos mlink->fsec = fsec; 701 1.2 christos mlink->gzip = gzip; 702 1.2 christos mlink_add(mlink, ff->fts_statp); 703 1.2 christos continue; 704 1.2 christos 705 1.2 christos case FTS_D: 706 1.2 christos case FTS_DP: 707 1.2 christos break; 708 1.2 christos 709 1.2 christos default: 710 1.2 christos if (warnings) 711 1.2 christos say(path, "Not a regular file"); 712 1.2 christos continue; 713 1.1 joerg } 714 1.1 joerg 715 1.2 christos switch (ff->fts_level) { 716 1.2 christos case 0: 717 1.2 christos /* Ignore the root directory. */ 718 1.2 christos break; 719 1.2 christos case 1: 720 1.2 christos /* 721 1.2 christos * This might contain manX/ or catX/. 722 1.2 christos * Try to infer this from the name. 723 1.2 christos * If we're not in use_all, enforce it. 724 1.2 christos */ 725 1.2 christos cp = ff->fts_name; 726 1.2 christos if (ff->fts_info == FTS_DP) { 727 1.2 christos dform = FORM_NONE; 728 1.2 christos dsec = NULL; 729 1.2 christos break; 730 1.2 christos } 731 1.2 christos 732 1.2 christos if ( ! strncmp(cp, "man", 3)) { 733 1.2 christos dform = FORM_SRC; 734 1.2 christos dsec = cp + 3; 735 1.2 christos } else if ( ! strncmp(cp, "cat", 3)) { 736 1.2 christos dform = FORM_CAT; 737 1.2 christos dsec = cp + 3; 738 1.2 christos } else { 739 1.2 christos dform = FORM_NONE; 740 1.2 christos dsec = NULL; 741 1.2 christos } 742 1.2 christos 743 1.2 christos if (dsec != NULL || use_all) 744 1.2 christos break; 745 1.2 christos 746 1.2 christos if (warnings) 747 1.2 christos say(path, "Unknown directory part"); 748 1.2 christos fts_set(f, ff, FTS_SKIP); 749 1.2 christos break; 750 1.2 christos case 2: 751 1.2 christos /* 752 1.2 christos * Possibly our architecture. 753 1.2 christos * If we're descending, keep tabs on it. 754 1.2 christos */ 755 1.2 christos if (ff->fts_info != FTS_DP && dsec != NULL) 756 1.2 christos arch = ff->fts_name; 757 1.2 christos else 758 1.2 christos arch = NULL; 759 1.2 christos break; 760 1.2 christos default: 761 1.2 christos if (ff->fts_info == FTS_DP || use_all) 762 1.2 christos break; 763 1.2 christos if (warnings) 764 1.2 christos say(path, "Extraneous directory part"); 765 1.2 christos fts_set(f, ff, FTS_SKIP); 766 1.2 christos break; 767 1.1 joerg } 768 1.2 christos } 769 1.1 joerg 770 1.2 christos fts_close(f); 771 1.2 christos return 1; 772 1.2 christos } 773 1.1 joerg 774 1.2 christos /* 775 1.2 christos * Add a file to the mlinks table. 776 1.2 christos * Do not verify that it's a "valid" looking manpage (we'll do that 777 1.2 christos * later). 778 1.2 christos * 779 1.2 christos * Try to infer the manual section, architecture, and page name from the 780 1.2 christos * path, assuming it looks like 781 1.2 christos * 782 1.2 christos * [./]man*[/<arch>]/<name>.<section> 783 1.2 christos * or 784 1.2 christos * [./]cat<section>[/<arch>]/<name>.0 785 1.2 christos * 786 1.2 christos * See treescan() for the fts(3) version of this. 787 1.2 christos */ 788 1.2 christos static void 789 1.7 wiz filescan(const char *infile) 790 1.2 christos { 791 1.2 christos struct stat st; 792 1.2 christos struct mlink *mlink; 793 1.7 wiz char *linkfile, *p, *realdir, *start, *usefile; 794 1.7 wiz size_t realdir_len; 795 1.1 joerg 796 1.2 christos assert(use_all); 797 1.1 joerg 798 1.7 wiz if (strncmp(infile, "./", 2) == 0) 799 1.7 wiz infile += 2; 800 1.1 joerg 801 1.2 christos /* 802 1.2 christos * We have to do lstat(2) before realpath(3) loses 803 1.2 christos * the information whether this is a symbolic link. 804 1.2 christos * We need to know that because for symbolic links, 805 1.2 christos * we want to use the orginal file name, while for 806 1.2 christos * regular files, we want to use the real path. 807 1.2 christos */ 808 1.7 wiz if (lstat(infile, &st) == -1) { 809 1.2 christos exitcode = (int)MANDOCLEVEL_BADARG; 810 1.7 wiz say(infile, "&lstat"); 811 1.2 christos return; 812 1.7 wiz } else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) { 813 1.2 christos exitcode = (int)MANDOCLEVEL_BADARG; 814 1.7 wiz say(infile, "Not a regular file"); 815 1.2 christos return; 816 1.1 joerg } 817 1.1 joerg 818 1.2 christos /* 819 1.2 christos * We have to resolve the file name to the real path 820 1.2 christos * in any case for the base directory check. 821 1.2 christos */ 822 1.7 wiz if ((usefile = realpath(infile, NULL)) == NULL) { 823 1.2 christos exitcode = (int)MANDOCLEVEL_BADARG; 824 1.7 wiz say(infile, "&realpath"); 825 1.2 christos return; 826 1.2 christos } 827 1.1 joerg 828 1.7 wiz if (op == OP_TEST) 829 1.7 wiz start = usefile; 830 1.7 wiz else if (strncmp(usefile, basedir, basedir_len) == 0) 831 1.7 wiz start = usefile + basedir_len; 832 1.7 wiz #ifdef READ_ALLOWED_PATH 833 1.7 wiz else if (read_allowed(usefile)) 834 1.7 wiz start = usefile; 835 1.2 christos #endif 836 1.2 christos else { 837 1.2 christos exitcode = (int)MANDOCLEVEL_BADARG; 838 1.7 wiz say("", "%s: outside base directory", infile); 839 1.7 wiz free(usefile); 840 1.2 christos return; 841 1.2 christos } 842 1.1 joerg 843 1.2 christos /* 844 1.2 christos * Now we are sure the file is inside our tree. 845 1.2 christos * If it is a symbolic link, ignore the real path 846 1.2 christos * and use the original name. 847 1.2 christos */ 848 1.7 wiz do { 849 1.7 wiz if (S_ISLNK(st.st_mode) == 0) 850 1.7 wiz break; 851 1.7 wiz 852 1.7 wiz /* 853 1.7 wiz * Some implementations of realpath(3) may succeed 854 1.7 wiz * even if the target of the link does not exist, 855 1.7 wiz * so check again for extra safety. 856 1.7 wiz */ 857 1.7 wiz if (stat(usefile, &st) == -1) { 858 1.2 christos exitcode = (int)MANDOCLEVEL_BADARG; 859 1.7 wiz say(infile, "&stat"); 860 1.7 wiz free(usefile); 861 1.2 christos return; 862 1.2 christos } 863 1.7 wiz linkfile = mandoc_strdup(infile); 864 1.7 wiz if (op == OP_TEST) { 865 1.7 wiz free(usefile); 866 1.7 wiz start = usefile = linkfile; 867 1.7 wiz break; 868 1.7 wiz } 869 1.7 wiz if (strncmp(infile, basedir, basedir_len) == 0) { 870 1.7 wiz free(usefile); 871 1.7 wiz usefile = linkfile; 872 1.7 wiz start = usefile + basedir_len; 873 1.7 wiz break; 874 1.7 wiz } 875 1.7 wiz 876 1.7 wiz /* 877 1.7 wiz * This symbolic link points into the basedir 878 1.7 wiz * from the outside. Let's see whether any of 879 1.7 wiz * the parent directories resolve to the basedir. 880 1.7 wiz */ 881 1.7 wiz p = strchr(linkfile, '\0'); 882 1.7 wiz do { 883 1.7 wiz while (*--p != '/') 884 1.7 wiz continue; 885 1.7 wiz *p = '\0'; 886 1.7 wiz if ((realdir = realpath(linkfile, NULL)) == NULL) { 887 1.7 wiz exitcode = (int)MANDOCLEVEL_BADARG; 888 1.7 wiz say(infile, "&realpath"); 889 1.7 wiz free(linkfile); 890 1.7 wiz free(usefile); 891 1.7 wiz return; 892 1.7 wiz } 893 1.7 wiz realdir_len = strlen(realdir) + 1; 894 1.7 wiz free(realdir); 895 1.7 wiz *p = '/'; 896 1.7 wiz } while (realdir_len > basedir_len); 897 1.7 wiz 898 1.7 wiz /* 899 1.7 wiz * If one of the directories resolves to the basedir, 900 1.7 wiz * use the rest of the original name. 901 1.7 wiz * Otherwise, the best we can do 902 1.7 wiz * is to use the filename pointed to. 903 1.7 wiz */ 904 1.7 wiz if (realdir_len == basedir_len) { 905 1.7 wiz free(usefile); 906 1.7 wiz usefile = linkfile; 907 1.7 wiz start = p + 1; 908 1.7 wiz } else { 909 1.7 wiz free(linkfile); 910 1.7 wiz start = usefile + basedir_len; 911 1.1 joerg } 912 1.7 wiz } while (/* CONSTCOND */ 0); 913 1.1 joerg 914 1.2 christos mlink = mandoc_calloc(1, sizeof(struct mlink)); 915 1.2 christos mlink->dform = FORM_NONE; 916 1.2 christos if (strlcpy(mlink->file, start, sizeof(mlink->file)) >= 917 1.2 christos sizeof(mlink->file)) { 918 1.2 christos say(start, "Filename too long"); 919 1.2 christos free(mlink); 920 1.7 wiz free(usefile); 921 1.2 christos return; 922 1.2 christos } 923 1.1 joerg 924 1.2 christos /* 925 1.3 christos * In test mode or when the original name is absolute 926 1.3 christos * but outside our tree, guess the base directory. 927 1.3 christos */ 928 1.3 christos 929 1.7 wiz if (op == OP_TEST || (start == usefile && *start == '/')) { 930 1.7 wiz if (strncmp(usefile, "man/", 4) == 0) 931 1.7 wiz start = usefile + 4; 932 1.7 wiz else if ((start = strstr(usefile, "/man/")) != NULL) 933 1.3 christos start += 5; 934 1.3 christos else 935 1.7 wiz start = usefile; 936 1.3 christos } 937 1.3 christos 938 1.3 christos /* 939 1.2 christos * First try to guess our directory structure. 940 1.2 christos * If we find a separator, try to look for man* or cat*. 941 1.2 christos * If we find one of these and what's underneath is a directory, 942 1.2 christos * assume it's an architecture. 943 1.2 christos */ 944 1.7 wiz if ((p = strchr(start, '/')) != NULL) { 945 1.2 christos *p++ = '\0'; 946 1.7 wiz if (strncmp(start, "man", 3) == 0) { 947 1.2 christos mlink->dform = FORM_SRC; 948 1.2 christos mlink->dsec = start + 3; 949 1.7 wiz } else if (strncmp(start, "cat", 3) == 0) { 950 1.2 christos mlink->dform = FORM_CAT; 951 1.2 christos mlink->dsec = start + 3; 952 1.1 joerg } 953 1.1 joerg 954 1.2 christos start = p; 955 1.7 wiz if (mlink->dsec != NULL && (p = strchr(start, '/')) != NULL) { 956 1.2 christos *p++ = '\0'; 957 1.2 christos mlink->arch = start; 958 1.2 christos start = p; 959 1.1 joerg } 960 1.2 christos } 961 1.2 christos 962 1.2 christos /* 963 1.2 christos * Now check the file suffix. 964 1.2 christos * Suffix of `.0' indicates a catpage, `.1-9' is a manpage. 965 1.2 christos */ 966 1.2 christos p = strrchr(start, '\0'); 967 1.7 wiz while (p-- > start && *p != '/' && *p != '.') 968 1.7 wiz continue; 969 1.2 christos 970 1.7 wiz if (*p == '.') { 971 1.2 christos *p++ = '\0'; 972 1.2 christos mlink->fsec = p; 973 1.2 christos } 974 1.2 christos 975 1.2 christos /* 976 1.2 christos * Now try to parse the name. 977 1.2 christos * Use the filename portion of the path. 978 1.2 christos */ 979 1.2 christos mlink->name = start; 980 1.7 wiz if ((p = strrchr(start, '/')) != NULL) { 981 1.2 christos mlink->name = p + 1; 982 1.2 christos *p = '\0'; 983 1.2 christos } 984 1.2 christos mlink_add(mlink, &st); 985 1.7 wiz free(usefile); 986 1.2 christos } 987 1.2 christos 988 1.2 christos static void 989 1.2 christos mlink_add(struct mlink *mlink, const struct stat *st) 990 1.2 christos { 991 1.2 christos struct inodev inodev; 992 1.2 christos struct mpage *mpage; 993 1.2 christos unsigned int slot; 994 1.2 christos 995 1.2 christos assert(NULL != mlink->file); 996 1.1 joerg 997 1.2 christos mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : ""); 998 1.2 christos mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : ""); 999 1.2 christos mlink->name = mandoc_strdup(mlink->name ? mlink->name : ""); 1000 1.2 christos mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : ""); 1001 1.1 joerg 1002 1.2 christos if ('0' == *mlink->fsec) { 1003 1.2 christos free(mlink->fsec); 1004 1.2 christos mlink->fsec = mandoc_strdup(mlink->dsec); 1005 1.2 christos mlink->fform = FORM_CAT; 1006 1.2 christos } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec) 1007 1.2 christos mlink->fform = FORM_SRC; 1008 1.2 christos else 1009 1.2 christos mlink->fform = FORM_NONE; 1010 1.1 joerg 1011 1.2 christos slot = ohash_qlookup(&mlinks, mlink->file); 1012 1.2 christos assert(NULL == ohash_find(&mlinks, slot)); 1013 1.2 christos ohash_insert(&mlinks, slot, mlink); 1014 1.2 christos 1015 1.2 christos memset(&inodev, 0, sizeof(inodev)); /* Clear padding. */ 1016 1.2 christos inodev.st_ino = st->st_ino; 1017 1.2 christos inodev.st_dev = st->st_dev; 1018 1.2 christos slot = ohash_lookup_memory(&mpages, (char *)&inodev, 1019 1.2 christos sizeof(struct inodev), inodev.st_ino); 1020 1.2 christos mpage = ohash_find(&mpages, slot); 1021 1.2 christos if (NULL == mpage) { 1022 1.2 christos mpage = mandoc_calloc(1, sizeof(struct mpage)); 1023 1.2 christos mpage->inodev.st_ino = inodev.st_ino; 1024 1.2 christos mpage->inodev.st_dev = inodev.st_dev; 1025 1.3 christos mpage->form = FORM_NONE; 1026 1.3 christos mpage->next = mpage_head; 1027 1.3 christos mpage_head = mpage; 1028 1.2 christos ohash_insert(&mpages, slot, mpage); 1029 1.2 christos } else 1030 1.2 christos mlink->next = mpage->mlinks; 1031 1.2 christos mpage->mlinks = mlink; 1032 1.2 christos mlink->mpage = mpage; 1033 1.2 christos } 1034 1.1 joerg 1035 1.2 christos static void 1036 1.2 christos mlink_free(struct mlink *mlink) 1037 1.2 christos { 1038 1.1 joerg 1039 1.2 christos free(mlink->dsec); 1040 1.2 christos free(mlink->arch); 1041 1.2 christos free(mlink->name); 1042 1.2 christos free(mlink->fsec); 1043 1.2 christos free(mlink); 1044 1.2 christos } 1045 1.1 joerg 1046 1.2 christos static void 1047 1.2 christos mpages_free(void) 1048 1.2 christos { 1049 1.2 christos struct mpage *mpage; 1050 1.2 christos struct mlink *mlink; 1051 1.1 joerg 1052 1.3 christos while ((mpage = mpage_head) != NULL) { 1053 1.3 christos while ((mlink = mpage->mlinks) != NULL) { 1054 1.2 christos mpage->mlinks = mlink->next; 1055 1.2 christos mlink_free(mlink); 1056 1.2 christos } 1057 1.3 christos mpage_head = mpage->next; 1058 1.2 christos free(mpage->sec); 1059 1.2 christos free(mpage->arch); 1060 1.2 christos free(mpage->title); 1061 1.2 christos free(mpage->desc); 1062 1.2 christos free(mpage); 1063 1.2 christos } 1064 1.2 christos } 1065 1.1 joerg 1066 1.2 christos /* 1067 1.2 christos * For each mlink to the mpage, check whether the path looks like 1068 1.2 christos * it is formatted, and if it does, check whether a source manual 1069 1.2 christos * exists by the same name, ignoring the suffix. 1070 1.2 christos * If both conditions hold, drop the mlink. 1071 1.2 christos */ 1072 1.2 christos static void 1073 1.2 christos mlinks_undupe(struct mpage *mpage) 1074 1.2 christos { 1075 1.2 christos char buf[PATH_MAX]; 1076 1.2 christos struct mlink **prev; 1077 1.2 christos struct mlink *mlink; 1078 1.2 christos char *bufp; 1079 1.2 christos 1080 1.2 christos mpage->form = FORM_CAT; 1081 1.2 christos prev = &mpage->mlinks; 1082 1.2 christos while (NULL != (mlink = *prev)) { 1083 1.2 christos if (FORM_CAT != mlink->dform) { 1084 1.2 christos mpage->form = FORM_NONE; 1085 1.2 christos goto nextlink; 1086 1.1 joerg } 1087 1.2 christos (void)strlcpy(buf, mlink->file, sizeof(buf)); 1088 1.2 christos bufp = strstr(buf, "cat"); 1089 1.2 christos assert(NULL != bufp); 1090 1.2 christos memcpy(bufp, "man", 3); 1091 1.2 christos if (NULL != (bufp = strrchr(buf, '.'))) 1092 1.2 christos *++bufp = '\0'; 1093 1.2 christos (void)strlcat(buf, mlink->dsec, sizeof(buf)); 1094 1.2 christos if (NULL == ohash_find(&mlinks, 1095 1.2 christos ohash_qlookup(&mlinks, buf))) 1096 1.2 christos goto nextlink; 1097 1.2 christos if (warnings) 1098 1.2 christos say(mlink->file, "Man source exists: %s", buf); 1099 1.2 christos if (use_all) 1100 1.2 christos goto nextlink; 1101 1.2 christos *prev = mlink->next; 1102 1.2 christos mlink_free(mlink); 1103 1.2 christos continue; 1104 1.2 christos nextlink: 1105 1.2 christos prev = &(*prev)->next; 1106 1.2 christos } 1107 1.2 christos } 1108 1.1 joerg 1109 1.2 christos static void 1110 1.2 christos mlink_check(struct mpage *mpage, struct mlink *mlink) 1111 1.2 christos { 1112 1.2 christos struct str *str; 1113 1.2 christos unsigned int slot; 1114 1.2 christos 1115 1.2 christos /* 1116 1.2 christos * Check whether the manual section given in a file 1117 1.2 christos * agrees with the directory where the file is located. 1118 1.2 christos * Some manuals have suffixes like (3p) on their 1119 1.2 christos * section number either inside the file or in the 1120 1.2 christos * directory name, some are linked into more than one 1121 1.2 christos * section, like encrypt(1) = makekey(8). 1122 1.2 christos */ 1123 1.1 joerg 1124 1.2 christos if (FORM_SRC == mpage->form && 1125 1.2 christos strcasecmp(mpage->sec, mlink->dsec)) 1126 1.2 christos say(mlink->file, "Section \"%s\" manual in %s directory", 1127 1.2 christos mpage->sec, mlink->dsec); 1128 1.1 joerg 1129 1.2 christos /* 1130 1.2 christos * Manual page directories exist for each kernel 1131 1.2 christos * architecture as returned by machine(1). 1132 1.2 christos * However, many manuals only depend on the 1133 1.2 christos * application architecture as returned by arch(1). 1134 1.2 christos * For example, some (2/ARM) manuals are shared 1135 1.2 christos * across the "armish" and "zaurus" kernel 1136 1.2 christos * architectures. 1137 1.2 christos * A few manuals are even shared across completely 1138 1.2 christos * different architectures, for example fdformat(1) 1139 1.3 christos * on amd64, i386, and sparc64. 1140 1.2 christos */ 1141 1.1 joerg 1142 1.2 christos if (strcasecmp(mpage->arch, mlink->arch)) 1143 1.2 christos say(mlink->file, "Architecture \"%s\" manual in " 1144 1.2 christos "\"%s\" directory", mpage->arch, mlink->arch); 1145 1.1 joerg 1146 1.2 christos /* 1147 1.2 christos * XXX 1148 1.2 christos * parse_cat() doesn't set NAME_TITLE yet. 1149 1.2 christos */ 1150 1.1 joerg 1151 1.2 christos if (FORM_CAT == mpage->form) 1152 1.2 christos return; 1153 1.1 joerg 1154 1.2 christos /* 1155 1.2 christos * Check whether this mlink 1156 1.2 christos * appears as a name in the NAME section. 1157 1.2 christos */ 1158 1.1 joerg 1159 1.2 christos slot = ohash_qlookup(&names, mlink->name); 1160 1.2 christos str = ohash_find(&names, slot); 1161 1.2 christos assert(NULL != str); 1162 1.2 christos if ( ! (NAME_TITLE & str->mask)) 1163 1.2 christos say(mlink->file, "Name missing in NAME section"); 1164 1.1 joerg } 1165 1.1 joerg 1166 1.1 joerg /* 1167 1.2 christos * Run through the files in the global vector "mpages" 1168 1.2 christos * and add them to the database specified in "basedir". 1169 1.2 christos * 1170 1.2 christos * This handles the parsing scheme itself, using the cues of directory 1171 1.2 christos * and filename to determine whether the file is parsable or not. 1172 1.1 joerg */ 1173 1.1 joerg static void 1174 1.3 christos mpages_merge(struct dba *dba, struct mparse *mp) 1175 1.2 christos { 1176 1.2 christos struct mpage *mpage, *mpage_dest; 1177 1.2 christos struct mlink *mlink, *mlink_dest; 1178 1.5 christos struct roff_meta *meta; 1179 1.2 christos char *cp; 1180 1.2 christos int fd; 1181 1.2 christos 1182 1.3 christos for (mpage = mpage_head; mpage != NULL; mpage = mpage->next) { 1183 1.2 christos mlinks_undupe(mpage); 1184 1.3 christos if ((mlink = mpage->mlinks) == NULL) 1185 1.1 joerg continue; 1186 1.1 joerg 1187 1.2 christos name_mask = NAME_MASK; 1188 1.2 christos mandoc_ohash_init(&names, 4, offsetof(struct str, key)); 1189 1.2 christos mandoc_ohash_init(&strings, 6, offsetof(struct str, key)); 1190 1.2 christos mparse_reset(mp); 1191 1.5 christos meta = NULL; 1192 1.1 joerg 1193 1.2 christos if ((fd = mparse_open(mp, mlink->file)) == -1) { 1194 1.2 christos say(mlink->file, "&open"); 1195 1.2 christos goto nextpage; 1196 1.2 christos } 1197 1.1 joerg 1198 1.2 christos /* 1199 1.2 christos * Interpret the file as mdoc(7) or man(7) source 1200 1.2 christos * code, unless it is known to be formatted. 1201 1.2 christos */ 1202 1.2 christos if (mlink->dform != FORM_CAT || mlink->fform != FORM_CAT) { 1203 1.2 christos mparse_readfd(mp, fd, mlink->file); 1204 1.2 christos close(fd); 1205 1.3 christos fd = -1; 1206 1.5 christos meta = mparse_result(mp); 1207 1.1 joerg } 1208 1.1 joerg 1209 1.5 christos if (meta != NULL && meta->sodest != NULL) { 1210 1.2 christos mlink_dest = ohash_find(&mlinks, 1211 1.5 christos ohash_qlookup(&mlinks, meta->sodest)); 1212 1.2 christos if (mlink_dest == NULL) { 1213 1.5 christos mandoc_asprintf(&cp, "%s.gz", meta->sodest); 1214 1.2 christos mlink_dest = ohash_find(&mlinks, 1215 1.2 christos ohash_qlookup(&mlinks, cp)); 1216 1.2 christos free(cp); 1217 1.2 christos } 1218 1.2 christos if (mlink_dest != NULL) { 1219 1.2 christos 1220 1.2 christos /* The .so target exists. */ 1221 1.2 christos 1222 1.2 christos mpage_dest = mlink_dest->mpage; 1223 1.2 christos while (1) { 1224 1.2 christos mlink->mpage = mpage_dest; 1225 1.2 christos 1226 1.2 christos /* 1227 1.2 christos * If the target was already 1228 1.2 christos * processed, add the links 1229 1.2 christos * to the database now. 1230 1.2 christos * Otherwise, this will 1231 1.2 christos * happen when we come 1232 1.2 christos * to the target. 1233 1.2 christos */ 1234 1.2 christos 1235 1.3 christos if (mpage_dest->dba != NULL) 1236 1.3 christos dbadd_mlink(mlink); 1237 1.2 christos 1238 1.2 christos if (mlink->next == NULL) 1239 1.2 christos break; 1240 1.2 christos mlink = mlink->next; 1241 1.2 christos } 1242 1.2 christos 1243 1.2 christos /* Move all links to the target. */ 1244 1.1 joerg 1245 1.2 christos mlink->next = mlink_dest->next; 1246 1.2 christos mlink_dest->next = mpage->mlinks; 1247 1.2 christos mpage->mlinks = NULL; 1248 1.7 wiz goto nextpage; 1249 1.2 christos } 1250 1.7 wiz meta->macroset = MACROSET_NONE; 1251 1.7 wiz } 1252 1.7 wiz if (meta != NULL && meta->macroset == MACROSET_MDOC) { 1253 1.2 christos mpage->form = FORM_SRC; 1254 1.5 christos mpage->sec = meta->msec; 1255 1.2 christos mpage->sec = mandoc_strdup( 1256 1.2 christos mpage->sec == NULL ? "" : mpage->sec); 1257 1.5 christos mpage->arch = meta->arch; 1258 1.2 christos mpage->arch = mandoc_strdup( 1259 1.2 christos mpage->arch == NULL ? "" : mpage->arch); 1260 1.5 christos mpage->title = mandoc_strdup(meta->title); 1261 1.5 christos } else if (meta != NULL && meta->macroset == MACROSET_MAN) { 1262 1.5 christos if (*meta->msec != '\0' || *meta->title != '\0') { 1263 1.3 christos mpage->form = FORM_SRC; 1264 1.5 christos mpage->sec = mandoc_strdup(meta->msec); 1265 1.3 christos mpage->arch = mandoc_strdup(mlink->arch); 1266 1.5 christos mpage->title = mandoc_strdup(meta->title); 1267 1.3 christos } else 1268 1.5 christos meta = NULL; 1269 1.3 christos } 1270 1.3 christos 1271 1.3 christos assert(mpage->desc == NULL); 1272 1.7 wiz if (meta == NULL || meta->sodest != NULL) { 1273 1.2 christos mpage->sec = mandoc_strdup(mlink->dsec); 1274 1.2 christos mpage->arch = mandoc_strdup(mlink->arch); 1275 1.2 christos mpage->title = mandoc_strdup(mlink->name); 1276 1.7 wiz if (meta == NULL) { 1277 1.7 wiz mpage->form = FORM_CAT; 1278 1.7 wiz parse_cat(mpage, fd); 1279 1.7 wiz } else 1280 1.7 wiz mpage->form = FORM_SRC; 1281 1.5 christos } else if (meta->macroset == MACROSET_MDOC) 1282 1.5 christos parse_mdoc(mpage, meta, meta->first); 1283 1.3 christos else 1284 1.5 christos parse_man(mpage, meta, meta->first); 1285 1.3 christos if (mpage->desc == NULL) { 1286 1.3 christos mpage->desc = mandoc_strdup(mlink->name); 1287 1.3 christos if (warnings) 1288 1.3 christos say(mlink->file, "No one-line description, " 1289 1.3 christos "using filename \"%s\"", mlink->name); 1290 1.3 christos } 1291 1.2 christos 1292 1.3 christos for (mlink = mpage->mlinks; 1293 1.3 christos mlink != NULL; 1294 1.3 christos mlink = mlink->next) { 1295 1.3 christos putkey(mpage, mlink->name, NAME_FILE); 1296 1.3 christos if (warnings && !use_all) 1297 1.2 christos mlink_check(mpage, mlink); 1298 1.3 christos } 1299 1.2 christos 1300 1.3 christos dbadd(dba, mpage); 1301 1.2 christos 1302 1.2 christos nextpage: 1303 1.2 christos ohash_delete(&strings); 1304 1.2 christos ohash_delete(&names); 1305 1.1 joerg } 1306 1.1 joerg } 1307 1.1 joerg 1308 1.1 joerg static void 1309 1.2 christos parse_cat(struct mpage *mpage, int fd) 1310 1.1 joerg { 1311 1.2 christos FILE *stream; 1312 1.3 christos struct mlink *mlink; 1313 1.3 christos char *line, *p, *title, *sec; 1314 1.2 christos size_t linesz, plen, titlesz; 1315 1.2 christos ssize_t len; 1316 1.2 christos int offs; 1317 1.2 christos 1318 1.3 christos mlink = mpage->mlinks; 1319 1.3 christos stream = fd == -1 ? fopen(mlink->file, "r") : fdopen(fd, "r"); 1320 1.3 christos if (stream == NULL) { 1321 1.3 christos if (fd != -1) 1322 1.2 christos close(fd); 1323 1.2 christos if (warnings) 1324 1.3 christos say(mlink->file, "&fopen"); 1325 1.2 christos return; 1326 1.2 christos } 1327 1.2 christos 1328 1.2 christos line = NULL; 1329 1.2 christos linesz = 0; 1330 1.1 joerg 1331 1.3 christos /* Parse the section number from the header line. */ 1332 1.3 christos 1333 1.3 christos while (getline(&line, &linesz, stream) != -1) { 1334 1.3 christos if (*line == '\n') 1335 1.3 christos continue; 1336 1.3 christos if ((sec = strchr(line, '(')) == NULL) 1337 1.3 christos break; 1338 1.3 christos if ((p = strchr(++sec, ')')) == NULL) 1339 1.3 christos break; 1340 1.3 christos free(mpage->sec); 1341 1.3 christos mpage->sec = mandoc_strndup(sec, p - sec); 1342 1.3 christos if (warnings && *mlink->dsec != '\0' && 1343 1.3 christos strcasecmp(mpage->sec, mlink->dsec)) 1344 1.3 christos say(mlink->file, 1345 1.3 christos "Section \"%s\" manual in %s directory", 1346 1.3 christos mpage->sec, mlink->dsec); 1347 1.3 christos break; 1348 1.3 christos } 1349 1.3 christos 1350 1.2 christos /* Skip to first blank line. */ 1351 1.1 joerg 1352 1.3 christos while (line == NULL || *line != '\n') 1353 1.3 christos if (getline(&line, &linesz, stream) == -1) 1354 1.2 christos break; 1355 1.2 christos 1356 1.2 christos /* 1357 1.2 christos * Assume the first line that is not indented 1358 1.2 christos * is the first section header. Skip to it. 1359 1.2 christos */ 1360 1.2 christos 1361 1.2 christos while (getline(&line, &linesz, stream) != -1) 1362 1.2 christos if (*line != '\n' && *line != ' ') 1363 1.2 christos break; 1364 1.2 christos 1365 1.2 christos /* 1366 1.2 christos * Read up until the next section into a buffer. 1367 1.2 christos * Strip the leading and trailing newline from each read line, 1368 1.2 christos * appending a trailing space. 1369 1.2 christos * Ignore empty (whitespace-only) lines. 1370 1.2 christos */ 1371 1.2 christos 1372 1.2 christos titlesz = 0; 1373 1.2 christos title = NULL; 1374 1.2 christos 1375 1.2 christos while ((len = getline(&line, &linesz, stream)) != -1) { 1376 1.2 christos if (*line != ' ') 1377 1.2 christos break; 1378 1.2 christos offs = 0; 1379 1.2 christos while (isspace((unsigned char)line[offs])) 1380 1.2 christos offs++; 1381 1.2 christos if (line[offs] == '\0') 1382 1.2 christos continue; 1383 1.2 christos title = mandoc_realloc(title, titlesz + len - offs); 1384 1.2 christos memcpy(title + titlesz, line + offs, len - offs); 1385 1.2 christos titlesz += len - offs; 1386 1.2 christos title[titlesz - 1] = ' '; 1387 1.1 joerg } 1388 1.2 christos free(line); 1389 1.2 christos 1390 1.2 christos /* 1391 1.2 christos * If no page content can be found, or the input line 1392 1.2 christos * is already the next section header, or there is no 1393 1.2 christos * trailing newline, reuse the page title as the page 1394 1.2 christos * description. 1395 1.2 christos */ 1396 1.1 joerg 1397 1.2 christos if (NULL == title || '\0' == *title) { 1398 1.2 christos if (warnings) 1399 1.3 christos say(mlink->file, "Cannot find NAME section"); 1400 1.2 christos fclose(stream); 1401 1.2 christos free(title); 1402 1.2 christos return; 1403 1.2 christos } 1404 1.2 christos 1405 1.2 christos title[titlesz - 1] = '\0'; 1406 1.2 christos 1407 1.2 christos /* 1408 1.2 christos * Skip to the first dash. 1409 1.2 christos * Use the remaining line as the description (no more than 70 1410 1.2 christos * bytes). 1411 1.2 christos */ 1412 1.2 christos 1413 1.2 christos if (NULL != (p = strstr(title, "- "))) { 1414 1.2 christos for (p += 2; ' ' == *p || '\b' == *p; p++) 1415 1.2 christos /* Skip to next word. */ ; 1416 1.2 christos } else { 1417 1.2 christos if (warnings) 1418 1.3 christos say(mlink->file, "No dash in title line, " 1419 1.3 christos "reusing \"%s\" as one-line description", title); 1420 1.2 christos p = title; 1421 1.2 christos } 1422 1.2 christos 1423 1.2 christos plen = strlen(p); 1424 1.2 christos 1425 1.2 christos /* Strip backspace-encoding from line. */ 1426 1.2 christos 1427 1.2 christos while (NULL != (line = memchr(p, '\b', plen))) { 1428 1.2 christos len = line - p; 1429 1.2 christos if (0 == len) { 1430 1.2 christos memmove(line, line + 1, plen--); 1431 1.2 christos continue; 1432 1.2 christos } 1433 1.2 christos memmove(line - 1, line + 1, plen - len); 1434 1.2 christos plen -= 2; 1435 1.2 christos } 1436 1.2 christos 1437 1.4 christos /* 1438 1.4 christos * Cut off excessive one-line descriptions. 1439 1.4 christos * Bad pages are not worth better heuristics. 1440 1.4 christos */ 1441 1.4 christos 1442 1.4 christos mpage->desc = mandoc_strndup(p, 150); 1443 1.2 christos fclose(stream); 1444 1.2 christos free(title); 1445 1.1 joerg } 1446 1.1 joerg 1447 1.1 joerg /* 1448 1.2 christos * Put a type/word pair into the word database for this particular file. 1449 1.1 joerg */ 1450 1.1 joerg static void 1451 1.2 christos putkey(const struct mpage *mpage, char *value, uint64_t type) 1452 1.1 joerg { 1453 1.2 christos putkeys(mpage, value, strlen(value), type); 1454 1.1 joerg } 1455 1.1 joerg 1456 1.1 joerg /* 1457 1.2 christos * Grok all nodes at or below a certain mdoc node into putkey(). 1458 1.1 joerg */ 1459 1.1 joerg static void 1460 1.2 christos putmdockey(const struct mpage *mpage, 1461 1.3 christos const struct roff_node *n, uint64_t m, int taboo) 1462 1.1 joerg { 1463 1.1 joerg 1464 1.2 christos for ( ; NULL != n; n = n->next) { 1465 1.3 christos if (n->flags & taboo) 1466 1.3 christos continue; 1467 1.2 christos if (NULL != n->child) 1468 1.3 christos putmdockey(mpage, n->child, m, taboo); 1469 1.2 christos if (n->type == ROFFT_TEXT) 1470 1.2 christos putkey(mpage, n->string, m); 1471 1.1 joerg } 1472 1.1 joerg } 1473 1.1 joerg 1474 1.1 joerg static void 1475 1.2 christos parse_man(struct mpage *mpage, const struct roff_meta *meta, 1476 1.2 christos const struct roff_node *n) 1477 1.1 joerg { 1478 1.2 christos const struct roff_node *head, *body; 1479 1.2 christos char *start, *title; 1480 1.2 christos char byte; 1481 1.2 christos size_t sz; 1482 1.2 christos 1483 1.2 christos if (n == NULL) 1484 1.1 joerg return; 1485 1.1 joerg 1486 1.2 christos /* 1487 1.2 christos * We're only searching for one thing: the first text child in 1488 1.2 christos * the BODY of a NAME section. Since we don't keep track of 1489 1.2 christos * sections in -man, run some hoops to find out whether we're in 1490 1.2 christos * the correct section or not. 1491 1.2 christos */ 1492 1.2 christos 1493 1.2 christos if (n->type == ROFFT_BODY && n->tok == MAN_SH) { 1494 1.2 christos body = n; 1495 1.2 christos if ((head = body->parent->head) != NULL && 1496 1.2 christos (head = head->child) != NULL && 1497 1.2 christos head->next == NULL && 1498 1.2 christos head->type == ROFFT_TEXT && 1499 1.2 christos strcmp(head->string, "NAME") == 0 && 1500 1.2 christos body->child != NULL) { 1501 1.2 christos 1502 1.2 christos /* 1503 1.2 christos * Suck the entire NAME section into memory. 1504 1.2 christos * Yes, we might run away. 1505 1.2 christos * But too many manuals have big, spread-out 1506 1.2 christos * NAME sections over many lines. 1507 1.2 christos */ 1508 1.2 christos 1509 1.2 christos title = NULL; 1510 1.2 christos deroff(&title, body); 1511 1.2 christos if (NULL == title) 1512 1.2 christos return; 1513 1.2 christos 1514 1.2 christos /* 1515 1.2 christos * Go through a special heuristic dance here. 1516 1.2 christos * Conventionally, one or more manual names are 1517 1.2 christos * comma-specified prior to a whitespace, then a 1518 1.2 christos * dash, then a description. Try to puzzle out 1519 1.2 christos * the name parts here. 1520 1.2 christos */ 1521 1.2 christos 1522 1.2 christos start = title; 1523 1.2 christos for ( ;; ) { 1524 1.2 christos sz = strcspn(start, " ,"); 1525 1.2 christos if ('\0' == start[sz]) 1526 1.2 christos break; 1527 1.2 christos 1528 1.2 christos byte = start[sz]; 1529 1.2 christos start[sz] = '\0'; 1530 1.2 christos 1531 1.2 christos /* 1532 1.2 christos * Assume a stray trailing comma in the 1533 1.2 christos * name list if a name begins with a dash. 1534 1.2 christos */ 1535 1.2 christos 1536 1.2 christos if ('-' == start[0] || 1537 1.2 christos ('\\' == start[0] && '-' == start[1])) 1538 1.2 christos break; 1539 1.2 christos 1540 1.2 christos putkey(mpage, start, NAME_TITLE); 1541 1.2 christos if ( ! (mpage->name_head_done || 1542 1.2 christos strcasecmp(start, meta->title))) { 1543 1.2 christos putkey(mpage, start, NAME_HEAD); 1544 1.2 christos mpage->name_head_done = 1; 1545 1.2 christos } 1546 1.2 christos 1547 1.2 christos if (' ' == byte) { 1548 1.2 christos start += sz + 1; 1549 1.2 christos break; 1550 1.2 christos } 1551 1.2 christos 1552 1.2 christos assert(',' == byte); 1553 1.2 christos start += sz + 1; 1554 1.2 christos while (' ' == *start) 1555 1.2 christos start++; 1556 1.2 christos } 1557 1.2 christos 1558 1.2 christos if (start == title) { 1559 1.2 christos putkey(mpage, start, NAME_TITLE); 1560 1.2 christos if ( ! (mpage->name_head_done || 1561 1.2 christos strcasecmp(start, meta->title))) { 1562 1.2 christos putkey(mpage, start, NAME_HEAD); 1563 1.2 christos mpage->name_head_done = 1; 1564 1.2 christos } 1565 1.2 christos free(title); 1566 1.2 christos return; 1567 1.2 christos } 1568 1.2 christos 1569 1.2 christos while (isspace((unsigned char)*start)) 1570 1.2 christos start++; 1571 1.2 christos 1572 1.2 christos if (0 == strncmp(start, "-", 1)) 1573 1.2 christos start += 1; 1574 1.2 christos else if (0 == strncmp(start, "\\-\\-", 4)) 1575 1.2 christos start += 4; 1576 1.2 christos else if (0 == strncmp(start, "\\-", 2)) 1577 1.2 christos start += 2; 1578 1.2 christos else if (0 == strncmp(start, "\\(en", 4)) 1579 1.2 christos start += 4; 1580 1.2 christos else if (0 == strncmp(start, "\\(em", 4)) 1581 1.2 christos start += 4; 1582 1.2 christos 1583 1.2 christos while (' ' == *start) 1584 1.2 christos start++; 1585 1.2 christos 1586 1.4 christos /* 1587 1.4 christos * Cut off excessive one-line descriptions. 1588 1.4 christos * Bad pages are not worth better heuristics. 1589 1.4 christos */ 1590 1.4 christos 1591 1.4 christos mpage->desc = mandoc_strndup(start, 150); 1592 1.2 christos free(title); 1593 1.2 christos return; 1594 1.2 christos } 1595 1.2 christos } 1596 1.2 christos 1597 1.2 christos for (n = n->child; n; n = n->next) { 1598 1.2 christos if (NULL != mpage->desc) 1599 1.2 christos break; 1600 1.2 christos parse_man(mpage, meta, n); 1601 1.2 christos } 1602 1.1 joerg } 1603 1.1 joerg 1604 1.1 joerg static void 1605 1.2 christos parse_mdoc(struct mpage *mpage, const struct roff_meta *meta, 1606 1.2 christos const struct roff_node *n) 1607 1.1 joerg { 1608 1.5 christos const struct mdoc_handler *handler; 1609 1.1 joerg 1610 1.4 christos for (n = n->child; n != NULL; n = n->next) { 1611 1.5 christos if (n->tok == TOKEN_NONE || n->tok < ROFF_MAX) 1612 1.3 christos continue; 1613 1.4 christos assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX); 1614 1.5 christos handler = mdoc_handlers + (n->tok - MDOC_Dd); 1615 1.5 christos if (n->flags & handler->taboo) 1616 1.5 christos continue; 1617 1.5 christos 1618 1.2 christos switch (n->type) { 1619 1.2 christos case ROFFT_ELEM: 1620 1.2 christos case ROFFT_BLOCK: 1621 1.2 christos case ROFFT_HEAD: 1622 1.2 christos case ROFFT_BODY: 1623 1.2 christos case ROFFT_TAIL: 1624 1.5 christos if (handler->fp != NULL && 1625 1.5 christos (*handler->fp)(mpage, meta, n) == 0) 1626 1.4 christos break; 1627 1.5 christos if (handler->mask) 1628 1.2 christos putmdockey(mpage, n->child, 1629 1.5 christos handler->mask, handler->taboo); 1630 1.2 christos break; 1631 1.2 christos default: 1632 1.2 christos continue; 1633 1.2 christos } 1634 1.2 christos if (NULL != n->child) 1635 1.2 christos parse_mdoc(mpage, meta, n); 1636 1.1 joerg } 1637 1.1 joerg } 1638 1.1 joerg 1639 1.2 christos static int 1640 1.4 christos parse_mdoc_Fa(struct mpage *mpage, const struct roff_meta *meta, 1641 1.4 christos const struct roff_node *n) 1642 1.4 christos { 1643 1.4 christos uint64_t mask; 1644 1.4 christos 1645 1.4 christos mask = TYPE_Fa; 1646 1.4 christos if (n->sec == SEC_SYNOPSIS) 1647 1.4 christos mask |= TYPE_Vt; 1648 1.4 christos 1649 1.4 christos putmdockey(mpage, n->child, mask, 0); 1650 1.4 christos return 0; 1651 1.4 christos } 1652 1.4 christos 1653 1.4 christos static int 1654 1.2 christos parse_mdoc_Fd(struct mpage *mpage, const struct roff_meta *meta, 1655 1.2 christos const struct roff_node *n) 1656 1.1 joerg { 1657 1.2 christos char *start, *end; 1658 1.1 joerg size_t sz; 1659 1.2 christos 1660 1.2 christos if (SEC_SYNOPSIS != n->sec || 1661 1.2 christos NULL == (n = n->child) || 1662 1.2 christos n->type != ROFFT_TEXT) 1663 1.2 christos return 0; 1664 1.1 joerg 1665 1.1 joerg /* 1666 1.1 joerg * Only consider those `Fd' macro fields that begin with an 1667 1.1 joerg * "inclusion" token (versus, e.g., #define). 1668 1.1 joerg */ 1669 1.2 christos 1670 1.1 joerg if (strcmp("#include", n->string)) 1671 1.2 christos return 0; 1672 1.1 joerg 1673 1.2 christos if ((n = n->next) == NULL || n->type != ROFFT_TEXT) 1674 1.2 christos return 0; 1675 1.1 joerg 1676 1.1 joerg /* 1677 1.1 joerg * Strip away the enclosing angle brackets and make sure we're 1678 1.1 joerg * not zero-length. 1679 1.1 joerg */ 1680 1.1 joerg 1681 1.1 joerg start = n->string; 1682 1.1 joerg if ('<' == *start || '"' == *start) 1683 1.1 joerg start++; 1684 1.1 joerg 1685 1.1 joerg if (0 == (sz = strlen(start))) 1686 1.2 christos return 0; 1687 1.1 joerg 1688 1.1 joerg end = &start[(int)sz - 1]; 1689 1.1 joerg if ('>' == *end || '"' == *end) 1690 1.1 joerg end--; 1691 1.1 joerg 1692 1.2 christos if (end > start) 1693 1.2 christos putkeys(mpage, start, end - start + 1, TYPE_In); 1694 1.2 christos return 0; 1695 1.1 joerg } 1696 1.1 joerg 1697 1.1 joerg static void 1698 1.2 christos parse_mdoc_fname(struct mpage *mpage, const struct roff_node *n) 1699 1.1 joerg { 1700 1.2 christos char *cp; 1701 1.2 christos size_t sz; 1702 1.2 christos 1703 1.2 christos if (n->type != ROFFT_TEXT) 1704 1.1 joerg return; 1705 1.1 joerg 1706 1.2 christos /* Skip function pointer punctuation. */ 1707 1.1 joerg 1708 1.2 christos cp = n->string; 1709 1.2 christos while (*cp == '(' || *cp == '*') 1710 1.2 christos cp++; 1711 1.2 christos sz = strcspn(cp, "()"); 1712 1.1 joerg 1713 1.2 christos putkeys(mpage, cp, sz, TYPE_Fn); 1714 1.2 christos if (n->sec == SEC_SYNOPSIS) 1715 1.2 christos putkeys(mpage, cp, sz, NAME_SYN); 1716 1.1 joerg } 1717 1.1 joerg 1718 1.2 christos static int 1719 1.2 christos parse_mdoc_Fn(struct mpage *mpage, const struct roff_meta *meta, 1720 1.2 christos const struct roff_node *n) 1721 1.1 joerg { 1722 1.4 christos uint64_t mask; 1723 1.1 joerg 1724 1.2 christos if (n->child == NULL) 1725 1.2 christos return 0; 1726 1.1 joerg 1727 1.2 christos parse_mdoc_fname(mpage, n->child); 1728 1.1 joerg 1729 1.4 christos n = n->child->next; 1730 1.4 christos if (n != NULL && n->type == ROFFT_TEXT) { 1731 1.4 christos mask = TYPE_Fa; 1732 1.4 christos if (n->sec == SEC_SYNOPSIS) 1733 1.4 christos mask |= TYPE_Vt; 1734 1.4 christos putmdockey(mpage, n, mask, 0); 1735 1.4 christos } 1736 1.1 joerg 1737 1.2 christos return 0; 1738 1.1 joerg } 1739 1.1 joerg 1740 1.2 christos static int 1741 1.2 christos parse_mdoc_Fo(struct mpage *mpage, const struct roff_meta *meta, 1742 1.2 christos const struct roff_node *n) 1743 1.1 joerg { 1744 1.1 joerg 1745 1.2 christos if (n->type != ROFFT_HEAD) 1746 1.2 christos return 1; 1747 1.2 christos 1748 1.2 christos if (n->child != NULL) 1749 1.2 christos parse_mdoc_fname(mpage, n->child); 1750 1.2 christos 1751 1.2 christos return 0; 1752 1.1 joerg } 1753 1.1 joerg 1754 1.2 christos static int 1755 1.2 christos parse_mdoc_Va(struct mpage *mpage, const struct roff_meta *meta, 1756 1.2 christos const struct roff_node *n) 1757 1.1 joerg { 1758 1.2 christos char *cp; 1759 1.1 joerg 1760 1.2 christos if (n->type != ROFFT_ELEM && n->type != ROFFT_BODY) 1761 1.2 christos return 0; 1762 1.1 joerg 1763 1.2 christos if (n->child != NULL && 1764 1.2 christos n->child->next == NULL && 1765 1.2 christos n->child->type == ROFFT_TEXT) 1766 1.2 christos return 1; 1767 1.2 christos 1768 1.2 christos cp = NULL; 1769 1.2 christos deroff(&cp, n); 1770 1.2 christos if (cp != NULL) { 1771 1.2 christos putkey(mpage, cp, TYPE_Vt | (n->tok == MDOC_Va || 1772 1.2 christos n->type == ROFFT_BODY ? TYPE_Va : 0)); 1773 1.2 christos free(cp); 1774 1.2 christos } 1775 1.1 joerg 1776 1.2 christos return 0; 1777 1.1 joerg } 1778 1.1 joerg 1779 1.2 christos static int 1780 1.2 christos parse_mdoc_Xr(struct mpage *mpage, const struct roff_meta *meta, 1781 1.2 christos const struct roff_node *n) 1782 1.1 joerg { 1783 1.2 christos char *cp; 1784 1.1 joerg 1785 1.2 christos if (NULL == (n = n->child)) 1786 1.2 christos return 0; 1787 1.1 joerg 1788 1.2 christos if (NULL == n->next) { 1789 1.2 christos putkey(mpage, n->string, TYPE_Xr); 1790 1.2 christos return 0; 1791 1.2 christos } 1792 1.1 joerg 1793 1.2 christos mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string); 1794 1.2 christos putkey(mpage, cp, TYPE_Xr); 1795 1.2 christos free(cp); 1796 1.2 christos return 0; 1797 1.2 christos } 1798 1.1 joerg 1799 1.2 christos static int 1800 1.2 christos parse_mdoc_Nd(struct mpage *mpage, const struct roff_meta *meta, 1801 1.2 christos const struct roff_node *n) 1802 1.2 christos { 1803 1.1 joerg 1804 1.2 christos if (n->type == ROFFT_BODY) 1805 1.2 christos deroff(&mpage->desc, n); 1806 1.2 christos return 0; 1807 1.2 christos } 1808 1.1 joerg 1809 1.2 christos static int 1810 1.2 christos parse_mdoc_Nm(struct mpage *mpage, const struct roff_meta *meta, 1811 1.2 christos const struct roff_node *n) 1812 1.2 christos { 1813 1.1 joerg 1814 1.2 christos if (SEC_NAME == n->sec) 1815 1.3 christos putmdockey(mpage, n->child, NAME_TITLE, 0); 1816 1.2 christos else if (n->sec == SEC_SYNOPSIS && n->type == ROFFT_HEAD) { 1817 1.2 christos if (n->child == NULL) 1818 1.2 christos putkey(mpage, meta->name, NAME_SYN); 1819 1.2 christos else 1820 1.3 christos putmdockey(mpage, n->child, NAME_SYN, 0); 1821 1.2 christos } 1822 1.2 christos if ( ! (mpage->name_head_done || 1823 1.2 christos n->child == NULL || n->child->string == NULL || 1824 1.2 christos strcasecmp(n->child->string, meta->title))) { 1825 1.3 christos putkey(mpage, n->child->string, NAME_HEAD); 1826 1.2 christos mpage->name_head_done = 1; 1827 1.2 christos } 1828 1.2 christos return 0; 1829 1.1 joerg } 1830 1.1 joerg 1831 1.2 christos static int 1832 1.2 christos parse_mdoc_Sh(struct mpage *mpage, const struct roff_meta *meta, 1833 1.2 christos const struct roff_node *n) 1834 1.1 joerg { 1835 1.1 joerg 1836 1.2 christos return n->sec == SEC_CUSTOM && n->type == ROFFT_HEAD; 1837 1.1 joerg } 1838 1.1 joerg 1839 1.2 christos static int 1840 1.2 christos parse_mdoc_head(struct mpage *mpage, const struct roff_meta *meta, 1841 1.2 christos const struct roff_node *n) 1842 1.2 christos { 1843 1.2 christos 1844 1.2 christos return n->type == ROFFT_HEAD; 1845 1.2 christos } 1846 1.1 joerg 1847 1.2 christos /* 1848 1.2 christos * Add a string to the hash table for the current manual. 1849 1.2 christos * Each string has a bitmask telling which macros it belongs to. 1850 1.2 christos * When we finish the manual, we'll dump the table. 1851 1.2 christos */ 1852 1.1 joerg static void 1853 1.2 christos putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v) 1854 1.1 joerg { 1855 1.2 christos struct ohash *htab; 1856 1.2 christos struct str *s; 1857 1.2 christos const char *end; 1858 1.2 christos unsigned int slot; 1859 1.2 christos int i, mustfree; 1860 1.1 joerg 1861 1.2 christos if (0 == sz) 1862 1.1 joerg return; 1863 1.1 joerg 1864 1.2 christos mustfree = render_string(&cp, &sz); 1865 1.2 christos 1866 1.2 christos if (TYPE_Nm & v) { 1867 1.2 christos htab = &names; 1868 1.2 christos v &= name_mask; 1869 1.2 christos if (v & NAME_FIRST) 1870 1.2 christos name_mask &= ~NAME_FIRST; 1871 1.2 christos if (debug > 1) 1872 1.2 christos say(mpage->mlinks->file, 1873 1.3 christos "Adding name %*s, bits=0x%llx", (int)sz, cp, 1874 1.3 christos (unsigned long long)v); 1875 1.2 christos } else { 1876 1.2 christos htab = &strings; 1877 1.2 christos if (debug > 1) 1878 1.3 christos for (i = 0; i < KEY_MAX; i++) 1879 1.2 christos if ((uint64_t)1 << i & v) 1880 1.2 christos say(mpage->mlinks->file, 1881 1.2 christos "Adding key %s=%*s", 1882 1.3 christos mansearch_keynames[i], (int)sz, cp); 1883 1.2 christos } 1884 1.2 christos 1885 1.2 christos end = cp + sz; 1886 1.2 christos slot = ohash_qlookupi(htab, cp, &end); 1887 1.2 christos s = ohash_find(htab, slot); 1888 1.2 christos 1889 1.2 christos if (NULL != s && mpage == s->mpage) { 1890 1.2 christos s->mask |= v; 1891 1.2 christos return; 1892 1.2 christos } else if (NULL == s) { 1893 1.2 christos s = mandoc_calloc(1, sizeof(struct str) + sz + 1); 1894 1.2 christos memcpy(s->key, cp, sz); 1895 1.2 christos ohash_insert(htab, slot, s); 1896 1.2 christos } 1897 1.2 christos s->mpage = mpage; 1898 1.2 christos s->mask = v; 1899 1.1 joerg 1900 1.2 christos if (mustfree) 1901 1.2 christos free(cp); 1902 1.1 joerg } 1903 1.1 joerg 1904 1.2 christos /* 1905 1.2 christos * Take a Unicode codepoint and produce its UTF-8 encoding. 1906 1.2 christos * This isn't the best way to do this, but it works. 1907 1.2 christos * The magic numbers are from the UTF-8 packaging. 1908 1.2 christos * They're not as scary as they seem: read the UTF-8 spec for details. 1909 1.2 christos */ 1910 1.2 christos static size_t 1911 1.2 christos utf8(unsigned int cp, char out[7]) 1912 1.1 joerg { 1913 1.2 christos size_t rc; 1914 1.2 christos 1915 1.2 christos rc = 0; 1916 1.2 christos if (cp <= 0x0000007F) { 1917 1.2 christos rc = 1; 1918 1.2 christos out[0] = (char)cp; 1919 1.2 christos } else if (cp <= 0x000007FF) { 1920 1.2 christos rc = 2; 1921 1.2 christos out[0] = (cp >> 6 & 31) | 192; 1922 1.2 christos out[1] = (cp & 63) | 128; 1923 1.2 christos } else if (cp <= 0x0000FFFF) { 1924 1.2 christos rc = 3; 1925 1.2 christos out[0] = (cp >> 12 & 15) | 224; 1926 1.2 christos out[1] = (cp >> 6 & 63) | 128; 1927 1.2 christos out[2] = (cp & 63) | 128; 1928 1.2 christos } else if (cp <= 0x001FFFFF) { 1929 1.2 christos rc = 4; 1930 1.2 christos out[0] = (cp >> 18 & 7) | 240; 1931 1.2 christos out[1] = (cp >> 12 & 63) | 128; 1932 1.2 christos out[2] = (cp >> 6 & 63) | 128; 1933 1.2 christos out[3] = (cp & 63) | 128; 1934 1.2 christos } else if (cp <= 0x03FFFFFF) { 1935 1.2 christos rc = 5; 1936 1.2 christos out[0] = (cp >> 24 & 3) | 248; 1937 1.2 christos out[1] = (cp >> 18 & 63) | 128; 1938 1.2 christos out[2] = (cp >> 12 & 63) | 128; 1939 1.2 christos out[3] = (cp >> 6 & 63) | 128; 1940 1.2 christos out[4] = (cp & 63) | 128; 1941 1.2 christos } else if (cp <= 0x7FFFFFFF) { 1942 1.2 christos rc = 6; 1943 1.2 christos out[0] = (cp >> 30 & 1) | 252; 1944 1.2 christos out[1] = (cp >> 24 & 63) | 128; 1945 1.2 christos out[2] = (cp >> 18 & 63) | 128; 1946 1.2 christos out[3] = (cp >> 12 & 63) | 128; 1947 1.2 christos out[4] = (cp >> 6 & 63) | 128; 1948 1.2 christos out[5] = (cp & 63) | 128; 1949 1.2 christos } else 1950 1.2 christos return 0; 1951 1.1 joerg 1952 1.2 christos out[rc] = '\0'; 1953 1.2 christos return rc; 1954 1.1 joerg } 1955 1.1 joerg 1956 1.2 christos /* 1957 1.2 christos * If the string contains escape sequences, 1958 1.2 christos * replace it with an allocated rendering and return 1, 1959 1.2 christos * such that the caller can free it after use. 1960 1.2 christos * Otherwise, do nothing and return 0. 1961 1.2 christos */ 1962 1.2 christos static int 1963 1.2 christos render_string(char **public, size_t *psz) 1964 1.1 joerg { 1965 1.2 christos const char *src, *scp, *addcp, *seq; 1966 1.2 christos char *dst; 1967 1.2 christos size_t ssz, dsz, addsz; 1968 1.2 christos char utfbuf[7], res[6]; 1969 1.2 christos int seqlen, unicode; 1970 1.2 christos 1971 1.2 christos res[0] = '\\'; 1972 1.2 christos res[1] = '\t'; 1973 1.2 christos res[2] = ASCII_NBRSP; 1974 1.2 christos res[3] = ASCII_HYPH; 1975 1.2 christos res[4] = ASCII_BREAK; 1976 1.2 christos res[5] = '\0'; 1977 1.2 christos 1978 1.2 christos src = scp = *public; 1979 1.2 christos ssz = *psz; 1980 1.2 christos dst = NULL; 1981 1.2 christos dsz = 0; 1982 1.2 christos 1983 1.2 christos while (scp < src + *psz) { 1984 1.2 christos 1985 1.2 christos /* Leave normal characters unchanged. */ 1986 1.2 christos 1987 1.2 christos if (strchr(res, *scp) == NULL) { 1988 1.2 christos if (dst != NULL) 1989 1.2 christos dst[dsz++] = *scp; 1990 1.2 christos scp++; 1991 1.2 christos continue; 1992 1.2 christos } 1993 1.2 christos 1994 1.2 christos /* 1995 1.2 christos * Found something that requires replacing, 1996 1.2 christos * make sure we have a destination buffer. 1997 1.2 christos */ 1998 1.2 christos 1999 1.2 christos if (dst == NULL) { 2000 1.2 christos dst = mandoc_malloc(ssz + 1); 2001 1.2 christos dsz = scp - src; 2002 1.2 christos memcpy(dst, src, dsz); 2003 1.2 christos } 2004 1.2 christos 2005 1.2 christos /* Handle single-char special characters. */ 2006 1.2 christos 2007 1.2 christos switch (*scp) { 2008 1.2 christos case '\\': 2009 1.2 christos break; 2010 1.2 christos case '\t': 2011 1.2 christos case ASCII_NBRSP: 2012 1.2 christos dst[dsz++] = ' '; 2013 1.2 christos scp++; 2014 1.2 christos continue; 2015 1.2 christos case ASCII_HYPH: 2016 1.2 christos dst[dsz++] = '-'; 2017 1.2 christos /* FALLTHROUGH */ 2018 1.2 christos case ASCII_BREAK: 2019 1.2 christos scp++; 2020 1.2 christos continue; 2021 1.2 christos default: 2022 1.2 christos abort(); 2023 1.2 christos } 2024 1.2 christos 2025 1.2 christos /* 2026 1.2 christos * Found an escape sequence. 2027 1.2 christos * Read past the slash, then parse it. 2028 1.2 christos * Ignore everything except characters. 2029 1.2 christos */ 2030 1.2 christos 2031 1.2 christos scp++; 2032 1.2 christos if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL) 2033 1.2 christos continue; 2034 1.1 joerg 2035 1.2 christos /* 2036 1.2 christos * Render the special character 2037 1.2 christos * as either UTF-8 or ASCII. 2038 1.2 christos */ 2039 1.2 christos 2040 1.2 christos if (write_utf8) { 2041 1.2 christos unicode = mchars_spec2cp(seq, seqlen); 2042 1.2 christos if (unicode <= 0) 2043 1.2 christos continue; 2044 1.2 christos addsz = utf8(unicode, utfbuf); 2045 1.2 christos if (addsz == 0) 2046 1.2 christos continue; 2047 1.2 christos addcp = utfbuf; 2048 1.2 christos } else { 2049 1.2 christos addcp = mchars_spec2str(seq, seqlen, &addsz); 2050 1.2 christos if (addcp == NULL) 2051 1.2 christos continue; 2052 1.2 christos if (*addcp == ASCII_NBRSP) { 2053 1.2 christos addcp = " "; 2054 1.2 christos addsz = 1; 2055 1.2 christos } 2056 1.2 christos } 2057 1.2 christos 2058 1.2 christos /* Copy the rendered glyph into the stream. */ 2059 1.2 christos 2060 1.2 christos ssz += addsz; 2061 1.2 christos dst = mandoc_realloc(dst, ssz + 1); 2062 1.2 christos memcpy(dst + dsz, addcp, addsz); 2063 1.2 christos dsz += addsz; 2064 1.2 christos } 2065 1.2 christos if (dst != NULL) { 2066 1.2 christos *public = dst; 2067 1.2 christos *psz = dsz; 2068 1.2 christos } 2069 1.2 christos 2070 1.2 christos /* Trim trailing whitespace and NUL-terminate. */ 2071 1.2 christos 2072 1.2 christos while (*psz > 0 && (*public)[*psz - 1] == ' ') 2073 1.2 christos --*psz; 2074 1.2 christos if (dst != NULL) { 2075 1.2 christos (*public)[*psz] = '\0'; 2076 1.2 christos return 1; 2077 1.2 christos } else 2078 1.2 christos return 0; 2079 1.1 joerg } 2080 1.1 joerg 2081 1.1 joerg static void 2082 1.2 christos dbadd_mlink(const struct mlink *mlink) 2083 1.1 joerg { 2084 1.3 christos dba_page_alias(mlink->mpage->dba, mlink->name, NAME_FILE); 2085 1.3 christos dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->dsec); 2086 1.3 christos dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->fsec); 2087 1.3 christos dba_page_add(mlink->mpage->dba, DBP_ARCH, mlink->arch); 2088 1.3 christos dba_page_add(mlink->mpage->dba, DBP_FILE, mlink->file); 2089 1.1 joerg } 2090 1.1 joerg 2091 1.2 christos /* 2092 1.2 christos * Flush the current page's terms (and their bits) into the database. 2093 1.2 christos * Also, handle escape sequences at the last possible moment. 2094 1.2 christos */ 2095 1.1 joerg static void 2096 1.3 christos dbadd(struct dba *dba, struct mpage *mpage) 2097 1.1 joerg { 2098 1.2 christos struct mlink *mlink; 2099 1.2 christos struct str *key; 2100 1.2 christos char *cp; 2101 1.3 christos uint64_t mask; 2102 1.2 christos size_t i; 2103 1.2 christos unsigned int slot; 2104 1.2 christos int mustfree; 2105 1.2 christos 2106 1.2 christos mlink = mpage->mlinks; 2107 1.2 christos 2108 1.2 christos if (nodb) { 2109 1.2 christos for (key = ohash_first(&names, &slot); NULL != key; 2110 1.2 christos key = ohash_next(&names, &slot)) 2111 1.2 christos free(key); 2112 1.2 christos for (key = ohash_first(&strings, &slot); NULL != key; 2113 1.2 christos key = ohash_next(&strings, &slot)) 2114 1.2 christos free(key); 2115 1.2 christos if (0 == debug) 2116 1.2 christos return; 2117 1.2 christos while (NULL != mlink) { 2118 1.2 christos fputs(mlink->name, stdout); 2119 1.2 christos if (NULL == mlink->next || 2120 1.2 christos strcmp(mlink->dsec, mlink->next->dsec) || 2121 1.2 christos strcmp(mlink->fsec, mlink->next->fsec) || 2122 1.2 christos strcmp(mlink->arch, mlink->next->arch)) { 2123 1.2 christos putchar('('); 2124 1.2 christos if ('\0' == *mlink->dsec) 2125 1.2 christos fputs(mlink->fsec, stdout); 2126 1.2 christos else 2127 1.2 christos fputs(mlink->dsec, stdout); 2128 1.2 christos if ('\0' != *mlink->arch) 2129 1.2 christos printf("/%s", mlink->arch); 2130 1.2 christos putchar(')'); 2131 1.2 christos } 2132 1.2 christos mlink = mlink->next; 2133 1.2 christos if (NULL != mlink) 2134 1.2 christos fputs(", ", stdout); 2135 1.2 christos } 2136 1.2 christos printf(" - %s\n", mpage->desc); 2137 1.1 joerg return; 2138 1.2 christos } 2139 1.1 joerg 2140 1.2 christos if (debug) 2141 1.2 christos say(mlink->file, "Adding to database"); 2142 1.1 joerg 2143 1.2 christos cp = mpage->desc; 2144 1.2 christos i = strlen(cp); 2145 1.2 christos mustfree = render_string(&cp, &i); 2146 1.3 christos mpage->dba = dba_page_new(dba->pages, 2147 1.3 christos *mpage->arch == '\0' ? mlink->arch : mpage->arch, 2148 1.3 christos cp, mlink->file, mpage->form); 2149 1.2 christos if (mustfree) 2150 1.2 christos free(cp); 2151 1.3 christos dba_page_add(mpage->dba, DBP_SECT, mpage->sec); 2152 1.2 christos 2153 1.3 christos while (mlink != NULL) { 2154 1.2 christos dbadd_mlink(mlink); 2155 1.2 christos mlink = mlink->next; 2156 1.2 christos } 2157 1.1 joerg 2158 1.2 christos for (key = ohash_first(&names, &slot); NULL != key; 2159 1.2 christos key = ohash_next(&names, &slot)) { 2160 1.2 christos assert(key->mpage == mpage); 2161 1.3 christos dba_page_alias(mpage->dba, key->key, key->mask); 2162 1.2 christos free(key); 2163 1.2 christos } 2164 1.2 christos for (key = ohash_first(&strings, &slot); NULL != key; 2165 1.2 christos key = ohash_next(&strings, &slot)) { 2166 1.2 christos assert(key->mpage == mpage); 2167 1.3 christos i = 0; 2168 1.3 christos for (mask = TYPE_Xr; mask <= TYPE_Lb; mask *= 2) { 2169 1.3 christos if (key->mask & mask) 2170 1.3 christos dba_macro_add(dba->macros, i, 2171 1.3 christos key->key, mpage->dba); 2172 1.3 christos i++; 2173 1.3 christos } 2174 1.2 christos free(key); 2175 1.2 christos } 2176 1.1 joerg } 2177 1.1 joerg 2178 1.1 joerg static void 2179 1.3 christos dbprune(struct dba *dba) 2180 1.1 joerg { 2181 1.3 christos struct dba_array *page, *files; 2182 1.3 christos char *file; 2183 1.2 christos 2184 1.3 christos dba_array_FOREACH(dba->pages, page) { 2185 1.3 christos files = dba_array_get(page, DBP_FILE); 2186 1.3 christos dba_array_FOREACH(files, file) { 2187 1.3 christos if (*file < ' ') 2188 1.3 christos file++; 2189 1.3 christos if (ohash_find(&mlinks, ohash_qlookup(&mlinks, 2190 1.3 christos file)) != NULL) { 2191 1.3 christos if (debug) 2192 1.3 christos say(file, "Deleting from database"); 2193 1.3 christos dba_array_del(dba->pages); 2194 1.3 christos break; 2195 1.3 christos } 2196 1.2 christos } 2197 1.2 christos } 2198 1.1 joerg } 2199 1.1 joerg 2200 1.1 joerg /* 2201 1.3 christos * Write the database from memory to disk. 2202 1.1 joerg */ 2203 1.1 joerg static void 2204 1.3 christos dbwrite(struct dba *dba) 2205 1.1 joerg { 2206 1.4 christos struct stat sb1, sb2; 2207 1.4 christos char tfn[33], *cp1, *cp2; 2208 1.4 christos off_t i; 2209 1.4 christos int fd1, fd2; 2210 1.4 christos 2211 1.4 christos /* 2212 1.4 christos * Do not write empty databases, and delete existing ones 2213 1.4 christos * when makewhatis -u causes them to become empty. 2214 1.4 christos */ 2215 1.4 christos 2216 1.4 christos dba_array_start(dba->pages); 2217 1.4 christos if (dba_array_next(dba->pages) == NULL) { 2218 1.4 christos if (unlink(MANDOC_DB) == -1 && errno != ENOENT) 2219 1.4 christos say(MANDOC_DB, "&unlink"); 2220 1.4 christos return; 2221 1.4 christos } 2222 1.4 christos 2223 1.4 christos /* 2224 1.4 christos * Build the database in a temporary file, 2225 1.4 christos * then atomically move it into place. 2226 1.4 christos */ 2227 1.2 christos 2228 1.3 christos if (dba_write(MANDOC_DB "~", dba) != -1) { 2229 1.3 christos if (rename(MANDOC_DB "~", MANDOC_DB) == -1) { 2230 1.3 christos exitcode = (int)MANDOCLEVEL_SYSERR; 2231 1.3 christos say(MANDOC_DB, "&rename"); 2232 1.3 christos unlink(MANDOC_DB "~"); 2233 1.3 christos } 2234 1.2 christos return; 2235 1.2 christos } 2236 1.2 christos 2237 1.4 christos /* 2238 1.4 christos * We lack write permission and cannot replace the database 2239 1.4 christos * file, but let's at least check whether the data changed. 2240 1.4 christos */ 2241 1.4 christos 2242 1.3 christos (void)strlcpy(tfn, "/tmp/mandocdb.XXXXXXXX", sizeof(tfn)); 2243 1.3 christos if (mkdtemp(tfn) == NULL) { 2244 1.3 christos exitcode = (int)MANDOCLEVEL_SYSERR; 2245 1.3 christos say("", "&%s", tfn); 2246 1.1 joerg return; 2247 1.3 christos } 2248 1.4 christos cp1 = cp2 = MAP_FAILED; 2249 1.4 christos fd1 = fd2 = -1; 2250 1.3 christos (void)strlcat(tfn, "/" MANDOC_DB, sizeof(tfn)); 2251 1.3 christos if (dba_write(tfn, dba) == -1) { 2252 1.3 christos say(tfn, "&dba_write"); 2253 1.4 christos goto err; 2254 1.2 christos } 2255 1.4 christos if ((fd1 = open(MANDOC_DB, O_RDONLY, 0)) == -1) { 2256 1.4 christos say(MANDOC_DB, "&open"); 2257 1.4 christos goto err; 2258 1.4 christos } 2259 1.4 christos if ((fd2 = open(tfn, O_RDONLY, 0)) == -1) { 2260 1.4 christos say(tfn, "&open"); 2261 1.4 christos goto err; 2262 1.4 christos } 2263 1.4 christos if (fstat(fd1, &sb1) == -1) { 2264 1.4 christos say(MANDOC_DB, "&fstat"); 2265 1.4 christos goto err; 2266 1.4 christos } 2267 1.4 christos if (fstat(fd2, &sb2) == -1) { 2268 1.4 christos say(tfn, "&fstat"); 2269 1.4 christos goto err; 2270 1.4 christos } 2271 1.4 christos if (sb1.st_size != sb2.st_size) 2272 1.4 christos goto err; 2273 1.4 christos if ((cp1 = mmap(NULL, sb1.st_size, PROT_READ, MAP_PRIVATE, 2274 1.4 christos fd1, 0)) == MAP_FAILED) { 2275 1.4 christos say(MANDOC_DB, "&mmap"); 2276 1.4 christos goto err; 2277 1.4 christos } 2278 1.4 christos if ((cp2 = mmap(NULL, sb2.st_size, PROT_READ, MAP_PRIVATE, 2279 1.4 christos fd2, 0)) == MAP_FAILED) { 2280 1.4 christos say(tfn, "&mmap"); 2281 1.4 christos goto err; 2282 1.4 christos } 2283 1.4 christos for (i = 0; i < sb1.st_size; i++) 2284 1.4 christos if (cp1[i] != cp2[i]) 2285 1.4 christos goto err; 2286 1.4 christos goto out; 2287 1.4 christos 2288 1.4 christos err: 2289 1.4 christos exitcode = (int)MANDOCLEVEL_SYSERR; 2290 1.4 christos say(MANDOC_DB, "Data changed, but cannot replace database"); 2291 1.2 christos 2292 1.3 christos out: 2293 1.4 christos if (cp1 != MAP_FAILED) 2294 1.4 christos munmap(cp1, sb1.st_size); 2295 1.4 christos if (cp2 != MAP_FAILED) 2296 1.4 christos munmap(cp2, sb2.st_size); 2297 1.4 christos if (fd1 != -1) 2298 1.4 christos close(fd1); 2299 1.4 christos if (fd2 != -1) 2300 1.4 christos close(fd2); 2301 1.4 christos unlink(tfn); 2302 1.3 christos *strrchr(tfn, '/') = '\0'; 2303 1.4 christos rmdir(tfn); 2304 1.1 joerg } 2305 1.1 joerg 2306 1.2 christos static int 2307 1.2 christos set_basedir(const char *targetdir, int report_baddir) 2308 1.1 joerg { 2309 1.2 christos static char startdir[PATH_MAX]; 2310 1.2 christos static int getcwd_status; /* 1 = ok, 2 = failure */ 2311 1.2 christos static int chdir_status; /* 1 = changed directory */ 2312 1.1 joerg 2313 1.2 christos /* 2314 1.2 christos * Remember the original working directory, if possible. 2315 1.2 christos * This will be needed if the second or a later directory 2316 1.2 christos * on the command line is given as a relative path. 2317 1.2 christos * Do not error out if the current directory is not 2318 1.2 christos * searchable: Maybe it won't be needed after all. 2319 1.2 christos */ 2320 1.7 wiz if (getcwd_status == 0) { 2321 1.7 wiz if (getcwd(startdir, sizeof(startdir)) == NULL) { 2322 1.2 christos getcwd_status = 2; 2323 1.2 christos (void)strlcpy(startdir, strerror(errno), 2324 1.2 christos sizeof(startdir)); 2325 1.2 christos } else 2326 1.2 christos getcwd_status = 1; 2327 1.1 joerg } 2328 1.1 joerg 2329 1.2 christos /* 2330 1.2 christos * We are leaving the old base directory. 2331 1.2 christos * Do not use it any longer, not even for messages. 2332 1.2 christos */ 2333 1.2 christos *basedir = '\0'; 2334 1.7 wiz basedir_len = 0; 2335 1.2 christos 2336 1.2 christos /* 2337 1.2 christos * If and only if the directory was changed earlier and 2338 1.2 christos * the next directory to process is given as a relative path, 2339 1.2 christos * first go back, or bail out if that is impossible. 2340 1.2 christos */ 2341 1.7 wiz if (chdir_status && *targetdir != '/') { 2342 1.7 wiz if (getcwd_status == 2) { 2343 1.2 christos exitcode = (int)MANDOCLEVEL_SYSERR; 2344 1.2 christos say("", "getcwd: %s", startdir); 2345 1.2 christos return 0; 2346 1.2 christos } 2347 1.7 wiz if (chdir(startdir) == -1) { 2348 1.2 christos exitcode = (int)MANDOCLEVEL_SYSERR; 2349 1.2 christos say("", "&chdir %s", startdir); 2350 1.2 christos return 0; 2351 1.2 christos } 2352 1.1 joerg } 2353 1.1 joerg 2354 1.2 christos /* 2355 1.2 christos * Always resolve basedir to the canonicalized absolute 2356 1.2 christos * pathname and append a trailing slash, such that 2357 1.2 christos * we can reliably check whether files are inside. 2358 1.2 christos */ 2359 1.7 wiz if (realpath(targetdir, basedir) == NULL) { 2360 1.2 christos if (report_baddir || errno != ENOENT) { 2361 1.2 christos exitcode = (int)MANDOCLEVEL_BADARG; 2362 1.2 christos say("", "&%s: realpath", targetdir); 2363 1.2 christos } 2364 1.7 wiz *basedir = '\0'; 2365 1.2 christos return 0; 2366 1.7 wiz } else if (chdir(basedir) == -1) { 2367 1.2 christos if (report_baddir || errno != ENOENT) { 2368 1.2 christos exitcode = (int)MANDOCLEVEL_BADARG; 2369 1.2 christos say("", "&chdir"); 2370 1.1 joerg } 2371 1.7 wiz *basedir = '\0'; 2372 1.2 christos return 0; 2373 1.2 christos } 2374 1.2 christos chdir_status = 1; 2375 1.7 wiz basedir_len = strlen(basedir); 2376 1.7 wiz if (basedir[basedir_len - 1] != '/') { 2377 1.7 wiz if (basedir_len >= PATH_MAX - 1) { 2378 1.2 christos exitcode = (int)MANDOCLEVEL_SYSERR; 2379 1.2 christos say("", "Filename too long"); 2380 1.7 wiz *basedir = '\0'; 2381 1.7 wiz basedir_len = 0; 2382 1.2 christos return 0; 2383 1.1 joerg } 2384 1.7 wiz basedir[basedir_len++] = '/'; 2385 1.7 wiz basedir[basedir_len] = '\0'; 2386 1.1 joerg } 2387 1.2 christos return 1; 2388 1.1 joerg } 2389 1.1 joerg 2390 1.7 wiz #ifdef READ_ALLOWED_PATH 2391 1.7 wiz static int 2392 1.7 wiz read_allowed(const char *candidate) 2393 1.7 wiz { 2394 1.7 wiz const char *cp; 2395 1.7 wiz size_t len; 2396 1.7 wiz 2397 1.7 wiz for (cp = READ_ALLOWED_PATH;; cp += len) { 2398 1.7 wiz while (*cp == ':') 2399 1.7 wiz cp++; 2400 1.7 wiz if (*cp == '\0') 2401 1.7 wiz return 0; 2402 1.7 wiz len = strcspn(cp, ":"); 2403 1.7 wiz if (strncmp(candidate, cp, len) == 0) 2404 1.7 wiz return 1; 2405 1.7 wiz } 2406 1.7 wiz } 2407 1.7 wiz #endif 2408 1.7 wiz 2409 1.1 joerg static void 2410 1.2 christos say(const char *file, const char *format, ...) 2411 1.1 joerg { 2412 1.2 christos va_list ap; 2413 1.2 christos int use_errno; 2414 1.2 christos 2415 1.7 wiz if (*basedir != '\0') 2416 1.2 christos fprintf(stderr, "%s", basedir); 2417 1.7 wiz if (*basedir != '\0' && *file != '\0') 2418 1.2 christos fputc('/', stderr); 2419 1.7 wiz if (*file != '\0') 2420 1.2 christos fprintf(stderr, "%s", file); 2421 1.1 joerg 2422 1.2 christos use_errno = 1; 2423 1.7 wiz if (format != NULL) { 2424 1.2 christos switch (*format) { 2425 1.2 christos case '&': 2426 1.2 christos format++; 2427 1.2 christos break; 2428 1.2 christos case '\0': 2429 1.2 christos format = NULL; 2430 1.2 christos break; 2431 1.2 christos default: 2432 1.2 christos use_errno = 0; 2433 1.2 christos break; 2434 1.2 christos } 2435 1.2 christos } 2436 1.7 wiz if (format != NULL) { 2437 1.7 wiz if (*basedir != '\0' || *file != '\0') 2438 1.2 christos fputs(": ", stderr); 2439 1.2 christos va_start(ap, format); 2440 1.2 christos vfprintf(stderr, format, ap); 2441 1.2 christos va_end(ap); 2442 1.1 joerg } 2443 1.2 christos if (use_errno) { 2444 1.7 wiz if (*basedir != '\0' || *file != '\0' || format != NULL) 2445 1.2 christos fputs(": ", stderr); 2446 1.2 christos perror(NULL); 2447 1.2 christos } else 2448 1.2 christos fputc('\n', stderr); 2449 1.1 joerg } 2450