makemandb.c revision 1.4 1 /* $NetBSD: makemandb.c,v 1.4 2012/02/15 23:36:10 joerg Exp $ */
2 /*
3 * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay (at) gmail.com>
4 * Copyright (c) 2011 Kristaps Dzonsons <kristaps (at) bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/cdefs.h>
20 __RCSID("$NetBSD: makemandb.c,v 1.4 2012/02/15 23:36:10 joerg Exp $");
21
22 #include <sys/stat.h>
23 #include <sys/types.h>
24
25 #include <assert.h>
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <err.h>
29 #include <libgen.h>
30 #include <md5.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 #include <util.h>
36
37 #include "apropos-utils.h"
38 #include "man.h"
39 #include "mandoc.h"
40 #include "mdoc.h"
41 #include "sqlite3.h"
42
43 #define BUFLEN 1024
44 #define MDOC 0 //If the page is of mdoc(7) type
45 #define MAN 1 //If the page is of man(7) type
46
47 /*
48 * A data structure for holding section specific data.
49 */
50 typedef struct secbuff {
51 char *data;
52 size_t buflen; //Total length of buffer allocated initially
53 size_t offset; // Current offset in the buffer.
54 } secbuff;
55
56 typedef struct makemandb_flags {
57 int optimize;
58 int limit; // limit the indexing to only NAME section
59 int recreate; // Database was created from scratch
60 int verbosity; // 0: quiet, 1: default, 2: verbose
61 } makemandb_flags;
62
63 typedef struct mandb_rec {
64 /* Fields for mandb table */
65 char *name; // for storing the name of the man page
66 char *name_desc; // for storing the one line description (.Nd)
67 secbuff desc; // for storing the DESCRIPTION section
68 secbuff lib; // for the LIBRARY section
69 secbuff return_vals; // RETURN VALUES
70 secbuff env; // ENVIRONMENT
71 secbuff files; // FILES
72 secbuff exit_status; // EXIT STATUS
73 secbuff diagnostics; // DIAGNOSTICS
74 secbuff errors; // ERRORS
75 char section[2];
76
77 int xr_found;
78
79 /* Fields for mandb_meta table */
80 char *md5_hash;
81 dev_t device;
82 ino_t inode;
83 time_t mtime;
84
85 /* Fields for mandb_links table */
86 char *machine;
87 char *links; //all the links to a page in a space separated form
88 char *file_path;
89
90 /* Non-db fields */
91 int page_type; //Indicates the type of page: mdoc or man
92 } mandb_rec;
93
94 static void append(secbuff *sbuff, const char *src);
95 static void init_secbuffs(mandb_rec *);
96 static void free_secbuffs(mandb_rec *);
97 static int check_md5(const char *, sqlite3 *, const char *, char **);
98 static void cleanup(mandb_rec *);
99 static void set_section(const struct mdoc *, const struct man *, mandb_rec *);
100 static void set_machine(const struct mdoc *, mandb_rec *);
101 static int insert_into_db(sqlite3 *, mandb_rec *);
102 static void begin_parse(const char *, struct mparse *, mandb_rec *);
103 static void pmdoc_node(const struct mdoc_node *, mandb_rec *);
104 static void pmdoc_Nm(const struct mdoc_node *, mandb_rec *);
105 static void pmdoc_Nd(const struct mdoc_node *, mandb_rec *);
106 static void pmdoc_Sh(const struct mdoc_node *, mandb_rec *);
107 static void pmdoc_Xr(const struct mdoc_node *, mandb_rec *);
108 static void pmdoc_Pp(const struct mdoc_node *, mandb_rec *);
109 static void pmdoc_macro_handler(const struct mdoc_node *, mandb_rec *,
110 enum mdoct);
111 static void pman_node(const struct man_node *n, mandb_rec *);
112 static void pman_parse_node(const struct man_node *, secbuff *);
113 static void pman_parse_name(const struct man_node *, mandb_rec *);
114 static void pman_sh(const struct man_node *, mandb_rec *);
115 static void pman_block(const struct man_node *, mandb_rec *);
116 static void traversedir(const char *, sqlite3 *, struct mparse *);
117 static void mdoc_parse_section(enum mdoc_sec, const char *, mandb_rec *);
118 static void man_parse_section(enum man_sec, const struct man_node *, mandb_rec *);
119 static void build_file_cache(sqlite3 *, const char *, struct stat *);
120 static void update_db(sqlite3 *, struct mparse *, mandb_rec *);
121 __dead static void usage(void);
122 static void optimize(sqlite3 *);
123 static char *parse_escape(const char *);
124 static makemandb_flags mflags = { .verbosity = 1 };
125
126 typedef void (*pman_nf)(const struct man_node *n, mandb_rec *);
127 typedef void (*pmdoc_nf)(const struct mdoc_node *n, mandb_rec *);
128 static const pmdoc_nf mdocs[MDOC_MAX] = {
129 NULL, /* Ap */
130 NULL, /* Dd */
131 NULL, /* Dt */
132 NULL, /* Os */
133 pmdoc_Sh, /* Sh */
134 NULL, /* Ss */
135 pmdoc_Pp, /* Pp */
136 NULL, /* D1 */
137 NULL, /* Dl */
138 NULL, /* Bd */
139 NULL, /* Ed */
140 NULL, /* Bl */
141 NULL, /* El */
142 NULL, /* It */
143 NULL, /* Ad */
144 NULL, /* An */
145 NULL, /* Ar */
146 NULL, /* Cd */
147 NULL, /* Cm */
148 NULL, /* Dv */
149 NULL, /* Er */
150 NULL, /* Ev */
151 NULL, /* Ex */
152 NULL, /* Fa */
153 NULL, /* Fd */
154 NULL, /* Fl */
155 NULL, /* Fn */
156 NULL, /* Ft */
157 NULL, /* Ic */
158 NULL, /* In */
159 NULL, /* Li */
160 pmdoc_Nd, /* Nd */
161 pmdoc_Nm, /* Nm */
162 NULL, /* Op */
163 NULL, /* Ot */
164 NULL, /* Pa */
165 NULL, /* Rv */
166 NULL, /* St */
167 NULL, /* Va */
168 NULL, /* Vt */
169 pmdoc_Xr, /* Xr */
170 NULL, /* %A */
171 NULL, /* %B */
172 NULL, /* %D */
173 NULL, /* %I */
174 NULL, /* %J */
175 NULL, /* %N */
176 NULL, /* %O */
177 NULL, /* %P */
178 NULL, /* %R */
179 NULL, /* %T */
180 NULL, /* %V */
181 NULL, /* Ac */
182 NULL, /* Ao */
183 NULL, /* Aq */
184 NULL, /* At */
185 NULL, /* Bc */
186 NULL, /* Bf */
187 NULL, /* Bo */
188 NULL, /* Bq */
189 NULL, /* Bsx */
190 NULL, /* Bx */
191 NULL, /* Db */
192 NULL, /* Dc */
193 NULL, /* Do */
194 NULL, /* Dq */
195 NULL, /* Ec */
196 NULL, /* Ef */
197 NULL, /* Em */
198 NULL, /* Eo */
199 NULL, /* Fx */
200 NULL, /* Ms */
201 NULL, /* No */
202 NULL, /* Ns */
203 NULL, /* Nx */
204 NULL, /* Ox */
205 NULL, /* Pc */
206 NULL, /* Pf */
207 NULL, /* Po */
208 NULL, /* Pq */
209 NULL, /* Qc */
210 NULL, /* Ql */
211 NULL, /* Qo */
212 NULL, /* Qq */
213 NULL, /* Re */
214 NULL, /* Rs */
215 NULL, /* Sc */
216 NULL, /* So */
217 NULL, /* Sq */
218 NULL, /* Sm */
219 NULL, /* Sx */
220 NULL, /* Sy */
221 NULL, /* Tn */
222 NULL, /* Ux */
223 NULL, /* Xc */
224 NULL, /* Xo */
225 NULL, /* Fo */
226 NULL, /* Fc */
227 NULL, /* Oo */
228 NULL, /* Oc */
229 NULL, /* Bk */
230 NULL, /* Ek */
231 NULL, /* Bt */
232 NULL, /* Hf */
233 NULL, /* Fr */
234 NULL, /* Ud */
235 NULL, /* Lb */
236 NULL, /* Lp */
237 NULL, /* Lk */
238 NULL, /* Mt */
239 NULL, /* Brq */
240 NULL, /* Bro */
241 NULL, /* Brc */
242 NULL, /* %C */
243 NULL, /* Es */
244 NULL, /* En */
245 NULL, /* Dx */
246 NULL, /* %Q */
247 NULL, /* br */
248 NULL, /* sp */
249 NULL, /* %U */
250 NULL, /* Ta */
251 };
252
253 static const pman_nf mans[MAN_MAX] = {
254 NULL, //br
255 NULL, //TH
256 pman_sh, //SH
257 NULL, //SS
258 NULL, //TP
259 NULL, //LP
260 NULL, //PP
261 NULL, //P
262 NULL, //IP
263 NULL, //HP
264 NULL, //SM
265 NULL, //SB
266 NULL, //BI
267 NULL, //IB
268 NULL, //BR
269 NULL, //RB
270 NULL, //R
271 pman_block, //B
272 NULL, //I
273 NULL, //IR
274 NULL, //RI
275 NULL, //na
276 NULL, //sp
277 NULL, //nf
278 NULL, //fi
279 NULL, //RE
280 NULL, //RS
281 NULL, //DT
282 NULL, //UC
283 NULL, //PD
284 NULL, //AT
285 NULL, //in
286 NULL, //ft
287 };
288
289
290 int
291 main(int argc, char *argv[])
292 {
293 FILE *file;
294 const char *sqlstr, *manconf = NULL;
295 char *line, *command, *parent;
296 char *errmsg;
297 int ch;
298 struct mparse *mp;
299 sqlite3 *db;
300 ssize_t len;
301 size_t linesize;
302 struct mandb_rec rec;
303
304 while ((ch = getopt(argc, argv, "C:floqv")) != -1) {
305 switch (ch) {
306 case 'C':
307 manconf = optarg;
308 break;
309 case 'f':
310 remove(DBPATH);
311 mflags.recreate = 1;
312 break;
313 case 'l':
314 mflags.limit = 1;
315 break;
316 case 'o':
317 mflags.optimize = 1;
318 break;
319 case 'q':
320 mflags.verbosity = 0;
321 break;
322 case 'v':
323 mflags.verbosity = 2;
324 break;
325 default:
326 usage();
327 }
328 }
329
330 memset(&rec, 0, sizeof(rec));
331
332 init_secbuffs(&rec);
333 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
334
335 if ((db = init_db(MANDB_CREATE)) == NULL)
336 exit(EXIT_FAILURE);
337
338 sqlite3_exec(db, "PRAGMA synchronous = 0", NULL, NULL, &errmsg);
339 if (errmsg != NULL) {
340 warnx("%s", errmsg);
341 free(errmsg);
342 close_db(db);
343 exit(EXIT_FAILURE);
344 }
345
346 sqlite3_exec(db, "ATTACH DATABASE \':memory:\' AS metadb", NULL, NULL,
347 &errmsg);
348 if (errmsg != NULL) {
349 warnx("%s", errmsg);
350 free(errmsg);
351 close_db(db);
352 exit(EXIT_FAILURE);
353 }
354
355 if (manconf) {
356 char *arg;
357 size_t command_len = shquote(manconf, NULL, 0) + 1;
358 arg = malloc(command_len );
359 shquote(manconf, arg, command_len);
360 easprintf(&command, "man -p -C %s", arg);
361 free(arg);
362 } else {
363 command = estrdup("man -p");
364 }
365
366 /* Call man -p to get the list of man page dirs */
367 if ((file = popen(command, "r")) == NULL) {
368 close_db(db);
369 err(EXIT_FAILURE, "fopen failed");
370 }
371 free(command);
372
373 /* Begin the transaction for indexing the pages */
374 sqlite3_exec(db, "BEGIN", NULL, NULL, &errmsg);
375 if (errmsg != NULL) {
376 warnx("%s", errmsg);
377 free(errmsg);
378 exit(EXIT_FAILURE);
379 }
380
381 sqlstr = "CREATE TABLE IF NOT EXISTS metadb.file_cache(device, inode,"
382 " mtime, file PRIMARY KEY);"
383 "CREATE UNIQUE INDEX IF NOT EXISTS metadb.index_file_cache_dev"
384 " ON file_cache (device, inode)";
385
386 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
387 if (errmsg != NULL) {
388 warnx("%s", errmsg);
389 free(errmsg);
390 close_db(db);
391 exit(EXIT_FAILURE);
392 }
393
394 if (mflags.verbosity)
395 printf("Building temporary file cache\n");
396 line = NULL;
397 linesize = 0;
398 while ((len = getline(&line, &linesize, file)) != -1) {
399 /* Replace the new line character at the end of string with '\0' */
400 line[len - 1] = '\0';
401 parent = estrdup(line);
402 chdir(dirname(parent));
403 free(parent);
404 /* Traverse the man page directories and parse the pages */
405 traversedir(line, db, mp);
406 }
407 free(line);
408
409 if (pclose(file) == -1) {
410 close_db(db);
411 cleanup(&rec);
412 free_secbuffs(&rec);
413 err(EXIT_FAILURE, "pclose error");
414 }
415
416 update_db(db, mp, &rec);
417 mparse_free(mp);
418 free_secbuffs(&rec);
419
420 /* Commit the transaction */
421 sqlite3_exec(db, "COMMIT", NULL, NULL, &errmsg);
422 if (errmsg != NULL) {
423 warnx("%s", errmsg);
424 free(errmsg);
425 exit(EXIT_FAILURE);
426 }
427
428 if (mflags.optimize)
429 optimize(db);
430
431 close_db(db);
432 return 0;
433 }
434
435 /*
436 * traversedir --
437 * Traverses the given directory recursively and passes all the man page files
438 * in the way to build_file_cache()
439 */
440 static void
441 traversedir(const char *file, sqlite3 *db, struct mparse *mp)
442 {
443 struct stat sb;
444 struct dirent *dirp;
445 DIR *dp;
446 char *buf;
447
448 if (stat(file, &sb) < 0) {
449 warn("stat failed: %s", file);
450 return;
451 }
452
453 /* If it is a regular file or a symlink, pass it to build_cache() */
454 if (S_ISREG(sb.st_mode) || S_ISLNK(sb.st_mode)) {
455 build_file_cache(db, file, &sb);
456 return;
457 }
458
459 /* If it is a directory, traverse it recursively */
460 if (S_ISDIR(sb.st_mode)) {
461 if ((dp = opendir(file)) == NULL) {
462 warn("opendir error: %s", file);
463 return;
464 }
465
466 while ((dirp = readdir(dp)) != NULL) {
467 /* Avoid . and .. entries in a directory */
468 if (strncmp(dirp->d_name, ".", 1)) {
469 easprintf(&buf, "%s/%s", file, dirp->d_name);
470 traversedir(buf, db, mp);
471 free(buf);
472 }
473 }
474 closedir(dp);
475 }
476 }
477
478 /* build_file_cache --
479 * This function generates an md5 hash of the file passed as it's 2nd parameter
480 * and stores it in a temporary table file_cache along with the full file path.
481 * This is done to support incremental updation of the database.
482 * The temporary table file_cache is dropped thereafter in the function
483 * update_db(), once the database has been updated.
484 */
485 static void
486 build_file_cache(sqlite3 *db, const char *file, struct stat *sb)
487 {
488 const char *sqlstr;
489 sqlite3_stmt *stmt = NULL;
490 int rc, idx;
491 assert(file != NULL);
492 dev_t device_cache = sb->st_dev;
493 ino_t inode_cache = sb->st_ino;
494 time_t mtime_cache = sb->st_mtime;
495
496 sqlstr = "INSERT INTO metadb.file_cache VALUES (:device, :inode,"
497 " :mtime, :file)";
498 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
499 if (rc != SQLITE_OK) {
500 warnx("%s", sqlite3_errmsg(db));
501 return;
502 }
503
504 idx = sqlite3_bind_parameter_index(stmt, ":device");
505 rc = sqlite3_bind_int64(stmt, idx, device_cache);
506 if (rc != SQLITE_OK) {
507 warnx("%s", sqlite3_errmsg(db));
508 sqlite3_finalize(stmt);
509 return;
510 }
511
512 idx = sqlite3_bind_parameter_index(stmt, ":inode");
513 rc = sqlite3_bind_int64(stmt, idx, inode_cache);
514 if (rc != SQLITE_OK) {
515 warnx("%s", sqlite3_errmsg(db));
516 sqlite3_finalize(stmt);
517 return;
518 }
519
520 idx = sqlite3_bind_parameter_index(stmt, ":mtime");
521 rc = sqlite3_bind_int64(stmt, idx, mtime_cache);
522 if (rc != SQLITE_OK) {
523 warnx("%s", sqlite3_errmsg(db));
524 sqlite3_finalize(stmt);
525 return;
526 }
527
528 idx = sqlite3_bind_parameter_index(stmt, ":file");
529 rc = sqlite3_bind_text(stmt, idx, file, -1, NULL);
530 if (rc != SQLITE_OK) {
531 warnx("%s", sqlite3_errmsg(db));
532 sqlite3_finalize(stmt);
533 return;
534 }
535
536 sqlite3_step(stmt);
537 sqlite3_finalize(stmt);
538 }
539
540 static void
541 update_existing_entry(sqlite3 *db, const char *file, const char *hash,
542 mandb_rec *rec, int *new_count, int *link_count, int *err_count)
543 {
544 int update_count, rc, idx;
545 const char *inner_sqlstr;
546 sqlite3_stmt *inner_stmt;
547
548 update_count = sqlite3_total_changes(db);
549 inner_sqlstr = "UPDATE mandb_meta SET device = :device,"
550 " inode = :inode, mtime = :mtime WHERE"
551 " md5_hash = :md5 AND file = :file AND"
552 " (device <> :device2 OR inode <> "
553 " :inode2 OR mtime <> :mtime2)";
554 rc = sqlite3_prepare_v2(db, inner_sqlstr, -1, &inner_stmt, NULL);
555 if (rc != SQLITE_OK) {
556 warnx("%s", sqlite3_errmsg(db));
557 return;
558 }
559 idx = sqlite3_bind_parameter_index(inner_stmt, ":device");
560 sqlite3_bind_int64(inner_stmt, idx, rec->device);
561 idx = sqlite3_bind_parameter_index(inner_stmt, ":inode");
562 sqlite3_bind_int64(inner_stmt, idx, rec->inode);
563 idx = sqlite3_bind_parameter_index(inner_stmt, ":mtime");
564 sqlite3_bind_int64(inner_stmt, idx, rec->mtime);
565 idx = sqlite3_bind_parameter_index(inner_stmt, ":md5");
566 sqlite3_bind_text(inner_stmt, idx, hash, -1, NULL);
567 idx = sqlite3_bind_parameter_index(inner_stmt, ":file");
568 sqlite3_bind_text(inner_stmt, idx, file, -1, NULL);
569 idx = sqlite3_bind_parameter_index(inner_stmt, ":device2");
570 sqlite3_bind_int64(inner_stmt, idx, rec->device);
571 idx = sqlite3_bind_parameter_index(inner_stmt, ":inode2");
572 sqlite3_bind_int64(inner_stmt, idx, rec->inode);
573 idx = sqlite3_bind_parameter_index(inner_stmt, ":mtime2");
574 sqlite3_bind_int64(inner_stmt, idx, rec->mtime);
575
576 rc = sqlite3_step(inner_stmt);
577 if (rc == SQLITE_DONE) {
578 /* Check if an update has been performed. */
579 if (update_count != sqlite3_total_changes(db)) {
580 if (mflags.verbosity)
581 printf("Updated %s\n", file);
582 (*new_count)++;
583 } else {
584 /* Otherwise it was a hardlink. */
585 (*link_count)++;
586 }
587 } else {
588 warnx("Could not update the meta data for %s", file);
589 (*err_count)++;
590 }
591 sqlite3_finalize(inner_stmt);
592 }
593
594 /* update_db --
595 * Does an incremental updation of the database by checking the file_cache.
596 * It parses and adds the pages which are present in file_cache,
597 * but not in the database.
598 * It also removes the pages which are present in the databse,
599 * but not in the file_cache.
600 */
601 static void
602 update_db(sqlite3 *db, struct mparse *mp, mandb_rec *rec)
603 {
604 const char *sqlstr;
605 sqlite3_stmt *stmt = NULL;
606 const char *file;
607 char *errmsg = NULL;
608 char *buf = NULL;
609 int new_count = 0; /* Counter for newly indexed/updated pages */
610 int total_count = 0; /* Counter for total number of pages */
611 int err_count = 0; /* Counter for number of failed pages */
612 int link_count = 0; /* Counter for number of hard/sym links */
613 int md5_status;
614 int rc;
615
616 sqlstr = "SELECT device, inode, mtime, file FROM metadb.file_cache"
617 " EXCEPT SELECT device, inode, mtime, file from mandb_meta";
618
619 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
620 if (rc != SQLITE_OK) {
621 warnx("%s", sqlite3_errmsg(db));
622 close_db(db);
623 errx(EXIT_FAILURE, "Could not query file cache");
624 }
625
626 while (sqlite3_step(stmt) == SQLITE_ROW) {
627 total_count++;
628 rec->device = sqlite3_column_int64(stmt, 0);
629 rec->inode = sqlite3_column_int64(stmt, 1);
630 rec->mtime = sqlite3_column_int64(stmt, 2);
631 file = (const char *) sqlite3_column_text(stmt, 3);
632 md5_status = check_md5(file, db, "mandb_meta", &buf);
633 assert(buf != NULL);
634 if (md5_status == -1) {
635 warnx("An error occurred in checking md5 value"
636 " for file %s", file);
637 err_count++;
638 continue;
639 }
640
641 if (md5_status == 0) {
642 /*
643 * The MD5 hash is already present in the database,
644 * so simply update the metadata, ignoring symlinks.
645 */
646 struct stat sb;
647 stat(file, &sb);
648 if (S_ISLNK(sb.st_mode)) {
649 free(buf);
650 link_count++;
651 continue;
652 }
653 update_existing_entry(db, file, buf, rec,
654 &new_count, &link_count, &err_count);
655 free(buf);
656 continue;
657 }
658
659 if (md5_status == 1) {
660 /*
661 * The MD5 hash was not present in the database.
662 * This means is either a new file or an updated file.
663 * We should go ahead with parsing.
664 */
665 if (mflags.verbosity > 1)
666 printf("Parsing: %s\n", file);
667 rec->md5_hash = buf;
668 rec->file_path = estrdup(file);
669 // file_path is freed by insert_into_db itself.
670 begin_parse(file, mp, rec);
671 if (insert_into_db(db, rec) < 0) {
672 warnx("Error in indexing %s", file);
673 err_count++;
674 } else {
675 new_count++;
676 }
677 }
678 }
679
680 sqlite3_finalize(stmt);
681
682 if (mflags.verbosity) {
683 printf("Total Number of new or updated pages enountered = %d\n"
684 "Total number of pages that were successfully"
685 " indexed/updated = %d\n"
686 "Total number of (hard or symbolic) links found = %d\n"
687 "Total number of pages that could not be indexed"
688 " due to errors = %d\n",
689 total_count, new_count, link_count, err_count);
690 }
691
692 if (mflags.recreate == 0)
693 return;
694
695 if (mflags.verbosity)
696 printf("Deleting stale index entries\n");
697
698 sqlstr = "DELETE FROM mandb_meta WHERE file NOT IN"
699 " (SELECT file FROM metadb.file_cache);"
700 "DROP TABLE metadb.file_cache;"
701 "DELETE FROM mandb WHERE rowid NOT IN"
702 " (SELECT id FROM mandb_meta);";
703
704 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
705 if (errmsg != NULL) {
706 warnx("Removing old entries failed: %s", errmsg);
707 warnx("Please rebuild database from scratch with -f.");
708 free(errmsg);
709 return;
710 }
711 }
712
713 /*
714 * begin_parse --
715 * parses the man page using libmandoc
716 */
717 static void
718 begin_parse(const char *file, struct mparse *mp, mandb_rec *rec)
719 {
720 struct mdoc *mdoc;
721 struct man *man;
722 mparse_reset(mp);
723
724 rec->xr_found = 0;
725
726 if (mparse_readfd(mp, -1, file) >= MANDOCLEVEL_FATAL) {
727 warnx("%s: Parse failure", file);
728 return;
729 }
730
731 mparse_result(mp, &mdoc, &man);
732 if (mdoc == NULL && man == NULL) {
733 warnx("Not a man(7) or mdoc(7) page");
734 return;
735 }
736
737 set_machine(mdoc, rec);
738 set_section(mdoc, man, rec);
739 if (mdoc) {
740 rec->page_type = MDOC;
741 pmdoc_node(mdoc_node(mdoc), rec);
742 } else {
743 rec->page_type = MAN;
744 pman_node(man_node(man), rec);
745 }
746 }
747
748 /*
749 * set_section --
750 * Extracts the section number and normalizes it to only the numeric part
751 * (Which should be the first character of the string).
752 */
753 static void
754 set_section(const struct mdoc *md, const struct man *m, mandb_rec *rec)
755 {
756 if (md) {
757 const struct mdoc_meta *md_meta = mdoc_meta(md);
758 rec->section[0] = md_meta->msec[0];
759 } else if (m) {
760 const struct man_meta *m_meta = man_meta(m);
761 rec->section[0] = m_meta->msec[0];
762 }
763 }
764
765 /*
766 * get_machine --
767 * Extracts the machine architecture information if available.
768 */
769 static void
770 set_machine(const struct mdoc *md, mandb_rec *rec)
771 {
772 if (md == NULL)
773 return;
774 const struct mdoc_meta *md_meta = mdoc_meta(md);
775 if (md_meta->arch)
776 rec->machine = estrdup(md_meta->arch);
777 }
778
779 static void
780 pmdoc_node(const struct mdoc_node *n, mandb_rec *rec)
781 {
782
783 if (n == NULL)
784 return;
785
786 switch (n->type) {
787 case (MDOC_BODY):
788 /* FALLTHROUGH */
789 case (MDOC_TAIL):
790 /* FALLTHROUGH */
791 case (MDOC_ELEM):
792 if (mdocs[n->tok] == NULL)
793 break;
794 (*mdocs[n->tok])(n, rec);
795 break;
796 default:
797 break;
798 }
799
800 pmdoc_node(n->child, rec);
801 pmdoc_node(n->next, rec);
802 }
803
804 /*
805 * pmdoc_Nm --
806 * Extracts the Name of the manual page from the .Nm macro
807 */
808 static void
809 pmdoc_Nm(const struct mdoc_node *n, mandb_rec *rec)
810 {
811 if (n->sec != SEC_NAME)
812 return;
813
814 for (n = n->child; n; n = n->next) {
815 if (n->type == MDOC_TEXT) {
816 concat(&rec->name, n->string);
817 }
818 }
819 }
820
821 /*
822 * pmdoc_Nd --
823 * Extracts the one line description of the man page from the .Nd macro
824 */
825 static void
826 pmdoc_Nd(const struct mdoc_node *n, mandb_rec *rec)
827 {
828 /*
829 * A static variable for keeping track of whether a Xr macro was seen
830 * previously.
831 */
832 char *buf = NULL;
833 char *temp;
834
835 if (n == NULL)
836 return;
837
838 if (n->type == MDOC_TEXT) {
839 if (rec->xr_found && n->next) {
840 /*
841 * An Xr macro was seen previously, so parse this
842 * and the next node.
843 */
844 temp = estrdup(n->string);
845 n = n->next;
846 easprintf(&buf, "%s(%s)", temp, n->string);
847 concat(&rec->name_desc, buf);
848 free(buf);
849 free(temp);
850 } else {
851 concat(&rec->name_desc, n->string);
852 }
853 rec->xr_found = 0;
854 } else if (mdocs[n->tok] == pmdoc_Xr) {
855 /* Remember that we have encountered an Xr macro */
856 rec->xr_found = 1;
857 }
858
859 if (n->child)
860 pmdoc_Nd(n->child, rec);
861
862 if(n->next)
863 pmdoc_Nd(n->next, rec);
864 }
865
866 /*
867 * pmdoc_macro_handler--
868 * This function is a single point of handling all the special macros that we
869 * want to handle especially. For example the .Xr macro for properly parsing
870 * the referenced page name along with the section number, or the .Pp macro
871 * for adding a new line whenever we encounter it.
872 */
873 static void
874 pmdoc_macro_handler(const struct mdoc_node *n, mandb_rec *rec, enum mdoct doct)
875 {
876 const struct mdoc_node *sn;
877 assert(n);
878
879 switch (doct) {
880 /* Parse the man page references.
881 * Basically the .Xr macros are used like:
882 * .Xr ls 1
883 * and formatted like this:
884 * ls(1)
885 * Prepare a buffer to format the data like the above example and call
886 * pmdoc_parse_section to append it.
887 */
888 case MDOC_Xr:
889 n = n->child;
890 while (n->type != MDOC_TEXT && n->next)
891 n = n->next;
892
893 if (n && n->type != MDOC_TEXT)
894 return;
895 sn = n;
896 if (n->next)
897 n = n->next;
898
899 while (n->type != MDOC_TEXT && n->next)
900 n = n->next;
901
902 if (n && n->type == MDOC_TEXT) {
903 size_t len = strlen(sn->string);
904 char *buf = emalloc(len + 4);
905 memcpy(buf, sn->string, len);
906 buf[len] = '(';
907 buf[len + 1] = n->string[0];
908 buf[len + 2] = ')';
909 buf[len + 3] = 0;
910 mdoc_parse_section(n->sec, buf, rec);
911 free(buf);
912 }
913
914 break;
915
916 /* Parse the .Pp macro to add a new line */
917 case MDOC_Pp:
918 if (n->type == MDOC_TEXT)
919 mdoc_parse_section(n->sec, "\n", rec);
920 break;
921 default:
922 break;
923 }
924
925 }
926
927 /*
928 * pmdoc_Xr, pmdoc_Pp--
929 * Empty stubs.
930 * The parser calls these functions each time it encounters
931 * a .Xr or .Pp macro. We are parsing all the data from
932 * the pmdoc_Sh function, so don't do anything here.
933 * (See if else blocks in pmdoc_Sh.)
934 */
935 static void
936 pmdoc_Xr(const struct mdoc_node *n, mandb_rec *rec)
937 {
938 }
939
940 static void
941 pmdoc_Pp(const struct mdoc_node *n, mandb_rec *rec)
942 {
943 }
944
945 /*
946 * pmdoc_Sh --
947 * Called when a .Sh macro is encountered and loops through its body, calling
948 * mdoc_parse_section to append the data to the section specific buffer.
949 * Two special macros which may occur inside the body of Sh are .Nm and .Xr and
950 * they need special handling, thus the separate if branches for them.
951 */
952 static void
953 pmdoc_Sh(const struct mdoc_node *n, mandb_rec *rec)
954 {
955 if (n == NULL)
956 return;
957 int xr_found = 0;
958
959 if (n->type == MDOC_TEXT) {
960 mdoc_parse_section(n->sec, n->string, rec);
961 } else if (mdocs[n->tok] == pmdoc_Nm && rec->name != NULL) {
962 /*
963 * When encountering a .Nm macro, substitute it
964 * with its previously cached value of the argument.
965 */
966 mdoc_parse_section(n->sec, rec->name, rec);
967 } else if (mdocs[n->tok] == pmdoc_Xr) {
968 /*
969 * When encountering other inline macros,
970 * call pmdoc_macro_handler.
971 */
972 pmdoc_macro_handler(n, rec, MDOC_Xr);
973 xr_found = 1;
974 } else if (mdocs[n->tok] == pmdoc_Pp) {
975 pmdoc_macro_handler(n, rec, MDOC_Pp);
976 }
977
978 /*
979 * If an Xr macro was encountered then the child node has
980 * already been explored by pmdoc_macro_handler.
981 */
982 if (xr_found == 0)
983 pmdoc_Sh(n->child, rec);
984 pmdoc_Sh(n->next, rec);
985 }
986
987 /*
988 * mdoc_parse_section--
989 * Utility function for parsing sections of the mdoc type pages.
990 * Takes two params:
991 * 1. sec is an enum which indicates the section in which we are present
992 * 2. string is the string which we need to append to the secbuff for this
993 * particular section.
994 * The function appends string to the global section buffer and returns.
995 */
996 static void
997 mdoc_parse_section(enum mdoc_sec sec, const char *string, mandb_rec *rec)
998 {
999 /*
1000 * If the user specified the 'l' flag, then parse and store only the
1001 * NAME section. Ignore the rest.
1002 */
1003 if (mflags.limit)
1004 return;
1005
1006 switch (sec) {
1007 case SEC_LIBRARY:
1008 append(&rec->lib, string);
1009 break;
1010 case SEC_RETURN_VALUES:
1011 append(&rec->return_vals, string);
1012 break;
1013 case SEC_ENVIRONMENT:
1014 append(&rec->env, string);
1015 break;
1016 case SEC_FILES:
1017 append(&rec->files, string);
1018 break;
1019 case SEC_EXIT_STATUS:
1020 append(&rec->exit_status, string);
1021 break;
1022 case SEC_DIAGNOSTICS:
1023 append(&rec->diagnostics, string);
1024 break;
1025 case SEC_ERRORS:
1026 append(&rec->errors, string);
1027 break;
1028 case SEC_NAME:
1029 case SEC_SYNOPSIS:
1030 case SEC_EXAMPLES:
1031 case SEC_STANDARDS:
1032 case SEC_HISTORY:
1033 case SEC_AUTHORS:
1034 case SEC_BUGS:
1035 break;
1036 default:
1037 append(&rec->desc, string);
1038 break;
1039 }
1040 }
1041
1042 static void
1043 pman_node(const struct man_node *n, mandb_rec *rec)
1044 {
1045 if (n == NULL)
1046 return;
1047
1048 switch (n->type) {
1049 case (MAN_BODY):
1050 /* FALLTHROUGH */
1051 case (MAN_TAIL):
1052 /* FALLTHROUGH */
1053 case (MAN_BLOCK):
1054 /* FALLTHROUGH */
1055 case (MAN_ELEM):
1056 if (mans[n->tok] != NULL)
1057 (*mans[n->tok])(n, rec);
1058 break;
1059 default:
1060 break;
1061 }
1062
1063 pman_node(n->child, rec);
1064 pman_node(n->next, rec);
1065 }
1066
1067 /*
1068 * pman_parse_name --
1069 * Parses the NAME section and puts the complete content in the name_desc
1070 * variable.
1071 */
1072 static void
1073 pman_parse_name(const struct man_node *n, mandb_rec *rec)
1074 {
1075 if (n == NULL)
1076 return;
1077
1078 if (n->type == MAN_TEXT) {
1079 char *tmp = parse_escape(n->string);
1080 concat(&rec->name_desc, tmp);
1081 free(tmp);
1082 }
1083
1084 if (n->child)
1085 pman_parse_name(n->child, rec);
1086
1087 if(n->next)
1088 pman_parse_name(n->next, rec);
1089 }
1090
1091 /*
1092 * A stub function to be able to parse the macros like .B embedded inside
1093 * a section.
1094 */
1095 static void
1096 pman_block(const struct man_node *n, mandb_rec *rec)
1097 {
1098 }
1099
1100 /*
1101 * pman_sh --
1102 * This function does one of the two things:
1103 * 1. If the present section is NAME, then it will:
1104 * (a) Extract the name of the page (in case of multiple comma separated
1105 * names, it will pick up the first one).
1106 * (b) Build a space spearated list of all the symlinks/hardlinks to
1107 * this page and store in the buffer 'links'. These are extracted from
1108 * the comma separated list of names in the NAME section as well.
1109 * (c) Move on to the one line description section, which is after the list
1110 * of names in the NAME section.
1111 * 2. Otherwise, it will check the section name and call the man_parse_section
1112 * function, passing the enum corresponding that section.
1113 */
1114 static void
1115 pman_sh(const struct man_node *n, mandb_rec *rec)
1116 {
1117 static const struct {
1118 enum man_sec section;
1119 const char *header;
1120 } mapping[] = {
1121 { MANSEC_DESCRIPTION, "DESCRIPTION" },
1122 { MANSEC_SYNOPSIS, "SYNOPSIS" },
1123 { MANSEC_LIBRARY, "LIBRARY" },
1124 { MANSEC_ERRORS, "ERRORS" },
1125 { MANSEC_FILES, "FILES" },
1126 { MANSEC_RETURN_VALUES, "RETURN VALUE" },
1127 { MANSEC_RETURN_VALUES, "RETURN VALUES" },
1128 { MANSEC_EXIT_STATUS, "EXIT STATUS" },
1129 { MANSEC_EXAMPLES, "EXAMPLES" },
1130 { MANSEC_EXAMPLES, "EXAMPLE" },
1131 { MANSEC_STANDARDS, "STANDARDS" },
1132 { MANSEC_HISTORY, "HISTORY" },
1133 { MANSEC_BUGS, "BUGS" },
1134 { MANSEC_AUTHORS, "AUTHORS" },
1135 { MANSEC_COPYRIGHT, "COPYRIGHT" },
1136 };
1137 const struct man_node *head;
1138 char *name_desc;
1139 int sz;
1140 size_t i;
1141
1142 if ((head = n->parent->head) == NULL || (head = head->child) == NULL ||
1143 head->type != MAN_TEXT)
1144 return;
1145
1146 /*
1147 * Check if this section should be extracted and
1148 * where it should be stored. Handled the trival cases first.
1149 */
1150 for (i = 0; i < sizeof(mapping) / sizeof(mapping[0]); ++i) {
1151 if (strcmp(head->string, mapping[i].header) == 0) {
1152 man_parse_section(mapping[i].section, n, rec);
1153 return;
1154 }
1155 }
1156
1157 if (strcmp(head->string, "NAME") == 0) {
1158 /*
1159 * We are in the NAME section.
1160 * pman_parse_name will put the complete content in name_desc.
1161 */
1162 pman_parse_name(n, rec);
1163
1164 name_desc = rec->name_desc;
1165
1166 /* Remove any leading spaces. */
1167 while (name_desc[0] == ' ')
1168 name_desc++;
1169
1170 /* If the line begins with a "\&", avoid those */
1171 if (name_desc[0] == '\\' && name_desc[1] == '&')
1172 name_desc += 2;
1173
1174 /* Now name_desc should be left with a comma-space
1175 * separated list of names and the one line description
1176 * of the page:
1177 * "a, b, c \- sample description"
1178 * Take out the first name, before the first comma
1179 * (or space) and store it in rec->name.
1180 * If the page has aliases then they should be
1181 * in the form of a comma separated list.
1182 * Keep looping while there is a comma in name_desc,
1183 * extract the alias name and store in rec->links.
1184 * When there are no more commas left, break out.
1185 */
1186 int has_alias = 0; // Any more aliases left?
1187 while (*name_desc) {
1188 /* Remove any leading spaces. */
1189 if (name_desc[0] == ' ') {
1190 name_desc++;
1191 continue;
1192 }
1193 sz = strcspn(name_desc, ", ");
1194
1195 /* Extract the first term and store it in rec->name. */
1196 if (rec->name == NULL) {
1197 if (name_desc[sz] == ',')
1198 has_alias = 1;
1199 name_desc[sz] = 0;
1200 rec->name = emalloc(sz + 1);
1201 memcpy(rec->name, name_desc, sz + 1);
1202 name_desc += sz + 1;
1203 continue;
1204 }
1205
1206 /*
1207 * Once rec->name is set, rest of the names
1208 * are to be treated as links or aliases.
1209 */
1210 if (rec->name && has_alias) {
1211 if (name_desc[sz] != ',') {
1212 /* No more commas left -->
1213 * no more aliases to take out
1214 */
1215 has_alias = 0;
1216 }
1217 name_desc[sz] = 0;
1218 concat2(&rec->links, name_desc, sz);
1219 name_desc += sz + 1;
1220 continue;
1221 }
1222 break;
1223 }
1224
1225 /* Parse any escape sequences that might be there */
1226 char *temp = parse_escape(name_desc);
1227 free(rec->name_desc);
1228 rec->name_desc = temp;
1229 temp = parse_escape(rec->name);
1230 free(rec->name);
1231 rec->name = temp;
1232 return;
1233 }
1234
1235 /* The RETURN VALUE section might be specified in multiple ways */
1236 if (strcmp(head->string, "RETURN") == 0 &&
1237 head->next != NULL && head->next->type == MAN_TEXT &&
1238 (strcmp(head->next->string, "VALUE") == 0 ||
1239 strcmp(head->next->string, "VALUES") == 0)) {
1240 man_parse_section(MANSEC_RETURN_VALUES, n, rec);
1241 return;
1242 }
1243
1244 /*
1245 * EXIT STATUS section can also be specified all on one line or on two
1246 * separate lines.
1247 */
1248 if (strcmp(head->string, "EXIT") == 0 &&
1249 head->next != NULL && head->next->type == MAN_TEXT &&
1250 strcmp(head->next->string, "STATUS") == 0) {
1251 man_parse_section(MANSEC_EXIT_STATUS, n, rec);
1252 return;
1253 }
1254
1255 /* Store the rest of the content in desc. */
1256 man_parse_section(MANSEC_NONE, n, rec);
1257 }
1258
1259 /*
1260 * pman_parse_node --
1261 * Generic function to iterate through a node. Usually called from
1262 * man_parse_section to parse a particular section of the man page.
1263 */
1264 static void
1265 pman_parse_node(const struct man_node *n, secbuff *s)
1266 {
1267 if (n == NULL)
1268 return;
1269
1270 if (n->type == MAN_TEXT)
1271 append(s, n->string);
1272
1273 pman_parse_node(n->child, s);
1274 pman_parse_node(n->next, s);
1275 }
1276
1277 /*
1278 * man_parse_section --
1279 * Takes two parameters:
1280 * sec: Tells which section we are present in
1281 * n: Is the present node of the AST.
1282 * Depending on the section, we call pman_parse_node to parse that section and
1283 * concatenate the content from that section into the buffer for that section.
1284 */
1285 static void
1286 man_parse_section(enum man_sec sec, const struct man_node *n, mandb_rec *rec)
1287 {
1288 /*
1289 * If the user sepecified the 'l' flag then just parse
1290 * the NAME section, ignore the rest.
1291 */
1292 if (mflags.limit)
1293 return;
1294
1295 switch (sec) {
1296 case MANSEC_LIBRARY:
1297 pman_parse_node(n, &rec->lib);
1298 break;
1299 case MANSEC_RETURN_VALUES:
1300 pman_parse_node(n, &rec->return_vals);
1301 break;
1302 case MANSEC_ENVIRONMENT:
1303 pman_parse_node(n, &rec->env);
1304 break;
1305 case MANSEC_FILES:
1306 pman_parse_node(n, &rec->files);
1307 break;
1308 case MANSEC_EXIT_STATUS:
1309 pman_parse_node(n, &rec->exit_status);
1310 break;
1311 case MANSEC_DIAGNOSTICS:
1312 pman_parse_node(n, &rec->diagnostics);
1313 break;
1314 case MANSEC_ERRORS:
1315 pman_parse_node(n, &rec->errors);
1316 break;
1317 case MANSEC_NAME:
1318 case MANSEC_SYNOPSIS:
1319 case MANSEC_EXAMPLES:
1320 case MANSEC_STANDARDS:
1321 case MANSEC_HISTORY:
1322 case MANSEC_BUGS:
1323 case MANSEC_AUTHORS:
1324 case MANSEC_COPYRIGHT:
1325 break;
1326 default:
1327 pman_parse_node(n, &rec->desc);
1328 break;
1329 }
1330
1331 }
1332
1333 /*
1334 * insert_into_db --
1335 * Inserts the parsed data of the man page in the Sqlite databse.
1336 * If any of the values is NULL, then we cleanup and return -1 indicating
1337 * an error.
1338 * Otherwise, store the data in the database and return 0.
1339 */
1340 static int
1341 insert_into_db(sqlite3 *db, mandb_rec *rec)
1342 {
1343 int rc = 0;
1344 int idx = -1;
1345 const char *sqlstr = NULL;
1346 sqlite3_stmt *stmt = NULL;
1347 char *ln = NULL;
1348 char *errmsg = NULL;
1349 long int mandb_rowid;
1350
1351 /*
1352 * At the very minimum we want to make sure that we store
1353 * the following data:
1354 * Name, one line description, and the MD5 hash
1355 */
1356 if (rec->name == NULL || rec->name_desc == NULL ||
1357 rec->md5_hash == NULL) {
1358 cleanup(rec);
1359 return -1;
1360 }
1361
1362 /* Write null byte at the end of all the sec_buffs */
1363 rec->desc.data[rec->desc.offset] = 0;
1364 rec->lib.data[rec->lib.offset] = 0;
1365 rec->env.data[rec->env.offset] = 0;
1366 rec->return_vals.data[rec->return_vals.offset] = 0;
1367 rec->exit_status.data[rec->exit_status.offset] = 0;
1368 rec->files.data[rec->files.offset] = 0;
1369 rec->diagnostics.data[rec->diagnostics.offset] = 0;
1370 rec->errors.data[rec->errors.offset] = 0;
1371
1372 /*
1373 * In case of a mdoc page: (sorry, no better place to put this code)
1374 * parse the comma separated list of names of man pages,
1375 * the first name will be stored in the mandb table, rest will be
1376 * treated as links and put in the mandb_links table.
1377 */
1378 if (rec->page_type == MDOC) {
1379 char *tmp;
1380 rec->links = estrdup(rec->name);
1381 free(rec->name);
1382 int sz = strcspn(rec->links, " \0");
1383 rec->name = emalloc(sz + 1);
1384 memcpy(rec->name, rec->links, sz);
1385 if(rec->name[sz - 1] == ',')
1386 rec->name[sz - 1] = 0;
1387 else
1388 rec->name[sz] = 0;
1389 while (rec->links[sz] == ' ')
1390 ++sz;
1391 tmp = estrdup(rec->links + sz);
1392 free(rec->links);
1393 rec->links = tmp;
1394 }
1395
1396 /*------------------------ Populate the mandb table---------------------------*/
1397 sqlstr = "INSERT INTO mandb VALUES (:section, :name, :name_desc, :desc,"
1398 " :lib, :return_vals, :env, :files, :exit_status,"
1399 " :diagnostics, :errors, :md5_hash, :machine)";
1400
1401 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
1402 if (rc != SQLITE_OK)
1403 goto Out;
1404
1405 idx = sqlite3_bind_parameter_index(stmt, ":name");
1406 rc = sqlite3_bind_text(stmt, idx, rec->name, -1, NULL);
1407 if (rc != SQLITE_OK) {
1408 sqlite3_finalize(stmt);
1409 goto Out;
1410 }
1411
1412 idx = sqlite3_bind_parameter_index(stmt, ":section");
1413 rc = sqlite3_bind_text(stmt, idx, rec->section, -1, NULL);
1414 if (rc != SQLITE_OK) {
1415 sqlite3_finalize(stmt);
1416 goto Out;
1417 }
1418
1419 idx = sqlite3_bind_parameter_index(stmt, ":name_desc");
1420 rc = sqlite3_bind_text(stmt, idx, rec->name_desc, -1, NULL);
1421 if (rc != SQLITE_OK) {
1422 sqlite3_finalize(stmt);
1423 goto Out;
1424 }
1425
1426 idx = sqlite3_bind_parameter_index(stmt, ":desc");
1427 rc = sqlite3_bind_text(stmt, idx, rec->desc.data,
1428 rec->desc.offset + 1, NULL);
1429 if (rc != SQLITE_OK) {
1430 sqlite3_finalize(stmt);
1431 goto Out;
1432 }
1433
1434 idx = sqlite3_bind_parameter_index(stmt, ":lib");
1435 rc = sqlite3_bind_text(stmt, idx, rec->lib.data, rec->lib.offset + 1, NULL);
1436 if (rc != SQLITE_OK) {
1437 sqlite3_finalize(stmt);
1438 goto Out;
1439 }
1440
1441 idx = sqlite3_bind_parameter_index(stmt, ":return_vals");
1442 rc = sqlite3_bind_text(stmt, idx, rec->return_vals.data,
1443 rec->return_vals.offset + 1, NULL);
1444 if (rc != SQLITE_OK) {
1445 sqlite3_finalize(stmt);
1446 goto Out;
1447 }
1448
1449 idx = sqlite3_bind_parameter_index(stmt, ":env");
1450 rc = sqlite3_bind_text(stmt, idx, rec->env.data, rec->env.offset + 1, NULL);
1451 if (rc != SQLITE_OK) {
1452 sqlite3_finalize(stmt);
1453 goto Out;
1454 }
1455
1456 idx = sqlite3_bind_parameter_index(stmt, ":files");
1457 rc = sqlite3_bind_text(stmt, idx, rec->files.data,
1458 rec->files.offset + 1, NULL);
1459 if (rc != SQLITE_OK) {
1460 sqlite3_finalize(stmt);
1461 goto Out;
1462 }
1463
1464 idx = sqlite3_bind_parameter_index(stmt, ":exit_status");
1465 rc = sqlite3_bind_text(stmt, idx, rec->exit_status.data,
1466 rec->exit_status.offset + 1, NULL);
1467 if (rc != SQLITE_OK) {
1468 sqlite3_finalize(stmt);
1469 goto Out;
1470 }
1471
1472 idx = sqlite3_bind_parameter_index(stmt, ":diagnostics");
1473 rc = sqlite3_bind_text(stmt, idx, rec->diagnostics.data,
1474 rec->diagnostics.offset + 1, NULL);
1475 if (rc != SQLITE_OK) {
1476 sqlite3_finalize(stmt);
1477 goto Out;
1478 }
1479
1480 idx = sqlite3_bind_parameter_index(stmt, ":errors");
1481 rc = sqlite3_bind_text(stmt, idx, rec->errors.data,
1482 rec->errors.offset + 1, NULL);
1483 if (rc != SQLITE_OK) {
1484 sqlite3_finalize(stmt);
1485 goto Out;
1486 }
1487
1488 idx = sqlite3_bind_parameter_index(stmt, ":md5_hash");
1489 rc = sqlite3_bind_text(stmt, idx, rec->md5_hash, -1, NULL);
1490 if (rc != SQLITE_OK) {
1491 sqlite3_finalize(stmt);
1492 goto Out;
1493 }
1494
1495 idx = sqlite3_bind_parameter_index(stmt, ":machine");
1496 if (rec->machine)
1497 rc = sqlite3_bind_text(stmt, idx, rec->machine, -1, NULL);
1498 else
1499 rc = sqlite3_bind_null(stmt, idx);
1500 if (rc != SQLITE_OK) {
1501 sqlite3_finalize(stmt);
1502 goto Out;
1503 }
1504
1505 rc = sqlite3_step(stmt);
1506 if (rc != SQLITE_DONE) {
1507 sqlite3_finalize(stmt);
1508 goto Out;
1509 }
1510
1511 sqlite3_finalize(stmt);
1512
1513 /* Get the row id of the last inserted row */
1514 mandb_rowid = sqlite3_last_insert_rowid(db);
1515
1516 /*------------------------Populate the mandb_meta table-----------------------*/
1517 sqlstr = "INSERT INTO mandb_meta VALUES (:device, :inode, :mtime,"
1518 " :file, :md5_hash, :id)";
1519 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
1520 if (rc != SQLITE_OK)
1521 goto Out;
1522
1523 idx = sqlite3_bind_parameter_index(stmt, ":device");
1524 rc = sqlite3_bind_int64(stmt, idx, rec->device);
1525 if (rc != SQLITE_OK) {
1526 sqlite3_finalize(stmt);
1527 goto Out;
1528 }
1529
1530 idx = sqlite3_bind_parameter_index(stmt, ":inode");
1531 rc = sqlite3_bind_int64(stmt, idx, rec->inode);
1532 if (rc != SQLITE_OK) {
1533 sqlite3_finalize(stmt);
1534 goto Out;
1535 }
1536
1537 idx = sqlite3_bind_parameter_index(stmt, ":mtime");
1538 rc = sqlite3_bind_int64(stmt, idx, rec->mtime);
1539 if (rc != SQLITE_OK) {
1540 sqlite3_finalize(stmt);
1541 goto Out;
1542 }
1543
1544 idx = sqlite3_bind_parameter_index(stmt, ":file");
1545 rc = sqlite3_bind_text(stmt, idx, rec->file_path, -1, NULL);
1546 if (rc != SQLITE_OK) {
1547 sqlite3_finalize(stmt);
1548 goto Out;
1549 }
1550
1551 idx = sqlite3_bind_parameter_index(stmt, ":md5_hash");
1552 rc = sqlite3_bind_text(stmt, idx, rec->md5_hash, -1, NULL);
1553 if (rc != SQLITE_OK) {
1554 sqlite3_finalize(stmt);
1555 goto Out;
1556 }
1557
1558 idx = sqlite3_bind_parameter_index(stmt, ":id");
1559 rc = sqlite3_bind_int64(stmt, idx, mandb_rowid);
1560 if (rc != SQLITE_OK) {
1561 sqlite3_finalize(stmt);
1562 goto Out;
1563 }
1564
1565 rc = sqlite3_step(stmt);
1566 sqlite3_finalize(stmt);
1567 if (rc == SQLITE_CONSTRAINT) {
1568 /* The *most* probable reason for reaching here is that
1569 * the UNIQUE contraint on the file column of the mandb_meta
1570 * table was violated.
1571 * This can happen when a file was updated/modified.
1572 * To fix this we need to do two things:
1573 * 1. Delete the row for the older version of this file
1574 * from mandb table.
1575 * 2. Run an UPDATE query to update the row for this file
1576 * in the mandb_meta table.
1577 */
1578 warnx("Trying to update index for %s", rec->file_path);
1579 char *sql = sqlite3_mprintf("DELETE FROM mandb "
1580 "WHERE rowid = (SELECT id"
1581 " FROM mandb_meta"
1582 " WHERE file = %Q)",
1583 rec->file_path);
1584 sqlite3_exec(db, sql, NULL, NULL, &errmsg);
1585 sqlite3_free(sql);
1586 if (errmsg != NULL) {
1587 warnx("%s", errmsg);
1588 free(errmsg);
1589 }
1590 sqlstr = "UPDATE mandb_meta SET device = :device,"
1591 " inode = :inode, mtime = :mtime, id = :id,"
1592 " md5_hash = :md5 WHERE file = :file";
1593 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
1594 if (rc != SQLITE_OK) {
1595 warnx("Update failed with error: %s",
1596 sqlite3_errmsg(db));
1597 close_db(db);
1598 cleanup(rec);
1599 errx(EXIT_FAILURE,
1600 "Consider running makemandb with -f option");
1601 }
1602
1603 idx = sqlite3_bind_parameter_index(stmt, ":device");
1604 sqlite3_bind_int64(stmt, idx, rec->device);
1605 idx = sqlite3_bind_parameter_index(stmt, ":inode");
1606 sqlite3_bind_int64(stmt, idx, rec->inode);
1607 idx = sqlite3_bind_parameter_index(stmt, ":mtime");
1608 sqlite3_bind_int64(stmt, idx, rec->mtime);
1609 idx = sqlite3_bind_parameter_index(stmt, ":id");
1610 sqlite3_bind_int64(stmt, idx, mandb_rowid);
1611 idx = sqlite3_bind_parameter_index(stmt, ":md5");
1612 sqlite3_bind_text(stmt, idx, rec->md5_hash, -1, NULL);
1613 idx = sqlite3_bind_parameter_index(stmt, ":file");
1614 sqlite3_bind_text(stmt, idx, rec->file_path, -1, NULL);
1615 rc = sqlite3_step(stmt);
1616 sqlite3_finalize(stmt);
1617
1618 if (rc != SQLITE_DONE) {
1619 warnx("%s", sqlite3_errmsg(db));
1620 close_db(db);
1621 cleanup(rec);
1622 errx(EXIT_FAILURE,
1623 "Consider running makemandb with -f option");
1624 }
1625 } else if (rc != SQLITE_DONE) {
1626 /* Otherwise make this error fatal */
1627 warnx("Failed at %s\n%s", rec->file_path, sqlite3_errmsg(db));
1628 cleanup(rec);
1629 close_db(db);
1630 exit(EXIT_FAILURE);
1631 }
1632
1633 /*------------------------ Populate the mandb_links table---------------------*/
1634 char *str = NULL;
1635 char *links;
1636 if (rec->links && strlen(rec->links)) {
1637 links = rec->links;
1638 for(ln = strtok(links, " "); ln; ln = strtok(NULL, " ")) {
1639 if (ln[0] == ',')
1640 ln++;
1641 if(ln[strlen(ln) - 1] == ',')
1642 ln[strlen(ln) - 1] = 0;
1643
1644 str = sqlite3_mprintf("INSERT INTO mandb_links"
1645 " VALUES (%Q, %Q, %Q, %Q)",
1646 ln, rec->name, rec->section,
1647 rec->machine);
1648 sqlite3_exec(db, str, NULL, NULL, &errmsg);
1649 sqlite3_free(str);
1650 if (errmsg != NULL) {
1651 warnx("%s", errmsg);
1652 cleanup(rec);
1653 free(errmsg);
1654 return -1;
1655 }
1656 }
1657 }
1658
1659 cleanup(rec);
1660 return 0;
1661
1662 Out:
1663 warnx("%s", sqlite3_errmsg(db));
1664 cleanup(rec);
1665 return -1;
1666 }
1667
1668 /*
1669 * check_md5--
1670 * Generates the md5 hash of the file and checks if it already doesn't exist
1671 * in the table (passed as the 3rd parameter).
1672 * This function is being used to avoid hardlinks.
1673 * On successful completion it will also set the value of the fourth parameter
1674 * to the md5 hash of the file (computed previously). It is the responsibility
1675 * of the caller to free this buffer.
1676 * Return values:
1677 * -1: If an error occurs somewhere and sets the md5 return buffer to NULL.
1678 * 0: If the md5 hash does not exist in the table.
1679 * 1: If the hash exists in the database.
1680 */
1681 static int
1682 check_md5(const char *file, sqlite3 *db, const char *table, char **buf)
1683 {
1684 int rc = 0;
1685 int idx = -1;
1686 char *sqlstr = NULL;
1687 sqlite3_stmt *stmt = NULL;
1688
1689 assert(file != NULL);
1690 *buf = MD5File(file, NULL);
1691 if (*buf == NULL) {
1692 warn("md5 failed: %s", file);
1693 return -1;
1694 }
1695
1696 easprintf(&sqlstr, "SELECT * FROM %s WHERE md5_hash = :md5_hash",
1697 table);
1698 rc = sqlite3_prepare_v2(db, sqlstr, -1, &stmt, NULL);
1699 if (rc != SQLITE_OK) {
1700 free(sqlstr);
1701 free(*buf);
1702 *buf = NULL;
1703 return -1;
1704 }
1705
1706 idx = sqlite3_bind_parameter_index(stmt, ":md5_hash");
1707 rc = sqlite3_bind_text(stmt, idx, *buf, -1, NULL);
1708 if (rc != SQLITE_OK) {
1709 warnx("%s", sqlite3_errmsg(db));
1710 sqlite3_finalize(stmt);
1711 free(sqlstr);
1712 free(*buf);
1713 *buf = NULL;
1714 return -1;
1715 }
1716
1717 if (sqlite3_step(stmt) == SQLITE_ROW) {
1718 sqlite3_finalize(stmt);
1719 free(sqlstr);
1720 return 0;
1721 }
1722
1723 sqlite3_finalize(stmt);
1724 free(sqlstr);
1725 return 1;
1726 }
1727
1728 /* Optimize the index for faster search */
1729 static void
1730 optimize(sqlite3 *db)
1731 {
1732 const char *sqlstr;
1733 char *errmsg = NULL;
1734
1735 if (mflags.verbosity)
1736 printf("Optimizing the database index\n");
1737 sqlstr = "INSERT INTO mandb(mandb) VALUES (\'optimize\');"
1738 "VACUUM";
1739 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
1740 if (errmsg != NULL) {
1741 warnx("%s", errmsg);
1742 free(errmsg);
1743 return;
1744 }
1745 }
1746
1747 /*
1748 * cleanup --
1749 * cleans up the global buffers
1750 */
1751 static void
1752 cleanup(mandb_rec *rec)
1753 {
1754 rec->desc.offset = 0;
1755 rec->lib.offset = 0;
1756 rec->return_vals.offset = 0;
1757 rec->env.offset = 0;
1758 rec->exit_status.offset = 0;
1759 rec->diagnostics.offset = 0;
1760 rec->errors.offset = 0;
1761 rec->files.offset = 0;
1762
1763 free(rec->machine);
1764 rec->machine = NULL;
1765
1766 free(rec->links);
1767 rec->links = NULL;
1768
1769 free(rec->file_path);
1770 rec->file_path = NULL;
1771
1772 free(rec->name);
1773 rec->name = NULL;
1774
1775 free(rec->name_desc);
1776 rec->name_desc = NULL;
1777
1778 free(rec->md5_hash);
1779 rec->md5_hash = NULL;
1780 }
1781
1782 /*
1783 * init_secbuffs--
1784 * Sets the value of buflen for all the sec_buff field of rec. And then
1785 * allocate memory to each sec_buff member of rec.
1786 */
1787 static void
1788 init_secbuffs(mandb_rec *rec)
1789 {
1790 /*
1791 * Some sec_buff might need more memory, for example desc,
1792 * which stores the data of the DESCRIPTION section,
1793 * while some might need very small amount of memory.
1794 * Therefore explicitly setting the value of buflen field for
1795 * each sec_buff.
1796 */
1797 rec->desc.buflen = 10 * BUFLEN;
1798 rec->desc.data = emalloc(rec->desc.buflen);
1799 rec->desc.offset = 0;
1800
1801 rec->lib.buflen = BUFLEN / 2;
1802 rec->lib.data = emalloc(rec->lib.buflen);
1803 rec->lib.offset = 0;
1804
1805 rec->return_vals.buflen = BUFLEN;
1806 rec->return_vals.data = emalloc(rec->return_vals.buflen);
1807 rec->return_vals.offset = 0;
1808
1809 rec->exit_status.buflen = BUFLEN;
1810 rec->exit_status.data = emalloc(rec->exit_status.buflen);
1811 rec->exit_status.offset = 0;
1812
1813 rec->env.buflen = BUFLEN;
1814 rec->env.data = emalloc(rec->env.buflen);
1815 rec->env.offset = 0;
1816
1817 rec->files.buflen = BUFLEN;
1818 rec->files.data = emalloc(rec->files.buflen);
1819 rec->files.offset = 0;
1820
1821 rec->diagnostics.buflen = BUFLEN;
1822 rec->diagnostics.data = emalloc(rec->diagnostics.buflen);
1823 rec->diagnostics.offset = 0;
1824
1825 rec->errors.buflen = BUFLEN;
1826 rec->errors.data = emalloc(rec->errors.buflen);
1827 rec->errors.offset = 0;
1828 }
1829
1830 /*
1831 * free_secbuffs--
1832 * This function should be called at the end, when all the pages have been
1833 * parsed.
1834 * It frees the memory allocated to sec_buffs by init_secbuffs in the starting.
1835 */
1836 static void
1837 free_secbuffs(mandb_rec *rec)
1838 {
1839 free(rec->desc.data);
1840 free(rec->lib.data);
1841 free(rec->return_vals.data);
1842 free(rec->exit_status.data);
1843 free(rec->env.data);
1844 free(rec->files.data);
1845 free(rec->diagnostics.data);
1846 free(rec->errors.data);
1847 }
1848
1849 static void
1850 replace_hyph(char *str)
1851 {
1852 char *iter = str;
1853 while ((iter = strchr(iter, ASCII_HYPH)) != NULL)
1854 *iter = '-';
1855 }
1856
1857 static char *
1858 parse_escape(const char *str)
1859 {
1860 const char *backslash, *last_backslash;
1861 char *result, *iter;
1862 size_t len;
1863
1864 assert(str);
1865
1866 last_backslash = str;
1867 backslash = strchr(str, '\\');
1868 if (backslash == NULL) {
1869 result = estrdup(str);
1870 replace_hyph(result);
1871 return result;
1872 }
1873
1874 result = emalloc(strlen(str) + 1);
1875 iter = result;
1876
1877 do {
1878 len = backslash - last_backslash;
1879 memcpy(iter, last_backslash, len);
1880 iter += len;
1881 if (backslash[1] == '-' || backslash[1] == ' ') {
1882 *iter++ = backslash[1];
1883 last_backslash = backslash + 2;
1884 backslash = strchr(backslash + 2, '\\');
1885 } else {
1886 ++backslash;
1887 mandoc_escape(&backslash, NULL, NULL);
1888 last_backslash = backslash;
1889 if (backslash == NULL)
1890 break;
1891 backslash = strchr(last_backslash, '\\');
1892 }
1893 } while (backslash != NULL);
1894 if (last_backslash != NULL)
1895 strcpy(iter, last_backslash);
1896
1897 replace_hyph(result);
1898 return result;
1899 }
1900
1901 /*
1902 * append--
1903 * Concatenates a space and src at the end of sbuff->data (much like concat in
1904 * apropos-utils.c).
1905 * Rather than reallocating space for writing data, it uses the value of the
1906 * offset field of sec_buff to write new data at the free space left in the
1907 * buffer.
1908 * In case the size of the data to be appended exceeds the number of bytes left
1909 * in the buffer, it reallocates buflen number of bytes and then continues.
1910 * Value of offset field should be adjusted as new data is written.
1911 *
1912 * NOTE: This function does not write the null byte at the end of the buffers,
1913 * write a null byte at the position pointed to by offset before inserting data
1914 * in the db.
1915 */
1916 static void
1917 append(secbuff *sbuff, const char *src)
1918 {
1919 short flag = 0;
1920 size_t srclen, newlen;
1921 char *temp;
1922
1923 assert(src != NULL);
1924 temp = parse_escape(src);
1925 srclen = strlen(temp);
1926
1927 if (sbuff->data == NULL) {
1928 sbuff->data = emalloc(sbuff->buflen);
1929 sbuff->offset = 0;
1930 }
1931
1932 newlen = sbuff->offset + srclen + 2;
1933 if (newlen >= sbuff->buflen) {
1934 while (sbuff->buflen < newlen)
1935 sbuff->buflen += sbuff->buflen;
1936 sbuff->data = erealloc(sbuff->data, sbuff->buflen);
1937 flag = 1;
1938 }
1939
1940 /* Append a space at the end of the buffer. */
1941 if (sbuff->offset || flag)
1942 sbuff->data[sbuff->offset++] = ' ';
1943 /* Now, copy src at the end of the buffer. */
1944 memcpy(sbuff->data + sbuff->offset, temp, srclen);
1945 sbuff->offset += srclen;
1946 free(temp);
1947 }
1948
1949 static void
1950 usage(void)
1951 {
1952 fprintf(stderr, "Usage: %s [-flo]\n", getprogname());
1953 exit(1);
1954 }
1955