Home | History | Annotate | Line # | Download | only in makemandb
      1  1.27       rin /*	$NetBSD: apropos.c,v 1.27 2023/08/03 07:49:23 rin Exp $	*/
      2   1.1     joerg /*-
      3   1.1     joerg  * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay (at) gmail.com>
      4   1.1     joerg  * All rights reserved.
      5   1.1     joerg  *
      6   1.1     joerg  * This code was developed as part of Google's Summer of Code 2011 program.
      7   1.1     joerg  *
      8   1.1     joerg  * Redistribution and use in source and binary forms, with or without
      9   1.1     joerg  * modification, are permitted provided that the following conditions
     10   1.1     joerg  * are met:
     11   1.1     joerg  *
     12   1.1     joerg  * 1. Redistributions of source code must retain the above copyright
     13   1.1     joerg  *    notice, this list of conditions and the following disclaimer.
     14   1.1     joerg  * 2. Redistributions in binary form must reproduce the above copyright
     15   1.1     joerg  *    notice, this list of conditions and the following disclaimer in
     16   1.1     joerg  *    the documentation and/or other materials provided with the
     17   1.1     joerg  *    distribution.
     18   1.1     joerg  *
     19   1.1     joerg  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20   1.1     joerg  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21   1.1     joerg  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     22   1.1     joerg  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
     23   1.1     joerg  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     24   1.1     joerg  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
     25   1.1     joerg  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     26   1.1     joerg  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     27   1.1     joerg  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     28   1.1     joerg  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     29   1.1     joerg  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30   1.1     joerg  * SUCH DAMAGE.
     31   1.1     joerg  */
     32   1.1     joerg 
     33   1.1     joerg #include <sys/cdefs.h>
     34  1.27       rin __RCSID("$NetBSD: apropos.c,v 1.27 2023/08/03 07:49:23 rin Exp $");
     35   1.1     joerg 
     36   1.1     joerg #include <err.h>
     37  1.26  gutterid #include <signal.h>
     38   1.1     joerg #include <stdio.h>
     39   1.1     joerg #include <stdlib.h>
     40   1.1     joerg #include <string.h>
     41   1.1     joerg #include <unistd.h>
     42   1.1     joerg #include <util.h>
     43   1.1     joerg 
     44   1.1     joerg #include "apropos-utils.h"
     45   1.1     joerg 
     46   1.1     joerg typedef struct apropos_flags {
     47  1.22   abhinav 	char **sections;
     48   1.1     joerg 	int nresults;
     49   1.1     joerg 	int pager;
     50   1.1     joerg 	int no_context;
     51  1.16  christos 	query_format format;
     52  1.13  christos 	int legacy;
     53   1.1     joerg 	const char *machine;
     54  1.21   abhinav 	const char *manconf;
     55   1.1     joerg } apropos_flags;
     56   1.1     joerg 
     57   1.1     joerg typedef struct callback_data {
     58   1.1     joerg 	int count;
     59   1.1     joerg 	FILE *out;
     60   1.1     joerg 	apropos_flags *aflags;
     61   1.1     joerg } callback_data;
     62   1.1     joerg 
     63   1.1     joerg static char *remove_stopwords(const char *);
     64  1.24   abhinav static int query_callback(query_callback_args *);
     65   1.1     joerg __dead static void usage(void);
     66   1.1     joerg 
     67   1.1     joerg #define _PATH_PAGER	"/usr/bin/more -s"
     68  1.22   abhinav #define SECTIONS_ARGS_LENGTH 4;
     69   1.1     joerg 
     70  1.13  christos static void
     71  1.13  christos parseargs(int argc, char **argv, struct apropos_flags *aflags)
     72   1.1     joerg {
     73  1.13  christos 	int ch;
     74  1.22   abhinav 	size_t sections_offset = 0;
     75  1.22   abhinav 	size_t sections_size = 0;
     76  1.22   abhinav 	char **sections = NULL;
     77  1.22   abhinav 	char *section;
     78  1.21   abhinav 	aflags->manconf = MANCONF;
     79  1.20  christos 
     80  1.22   abhinav #define RESIZE_SECTIONS(newsize) \
     81  1.22   abhinav 	if (sections == NULL || sections_offset > sections_size - 1) { \
     82  1.22   abhinav 		sections_size += newsize; \
     83  1.22   abhinav 		sections = erealloc(sections, sections_size * sizeof(*sections)); \
     84  1.22   abhinav 	}
     85  1.22   abhinav 
     86  1.21   abhinav 	while ((ch = getopt(argc, argv, "123456789C:hilMmn:PprS:s:")) != -1) {
     87   1.1     joerg 		switch (ch) {
     88   1.1     joerg 		case '1':
     89   1.1     joerg 		case '2':
     90   1.1     joerg 		case '3':
     91   1.1     joerg 		case '4':
     92   1.1     joerg 		case '5':
     93   1.1     joerg 		case '6':
     94   1.1     joerg 		case '7':
     95   1.1     joerg 		case '8':
     96   1.1     joerg 		case '9':
     97  1.22   abhinav 			section = emalloc(2);
     98  1.22   abhinav 			section[0] = ch;
     99  1.22   abhinav 			section[1] = 0;
    100  1.22   abhinav 			RESIZE_SECTIONS(SECTIONS_ARGS_LENGTH)
    101  1.22   abhinav 			sections[sections_offset++] = section;
    102   1.1     joerg 			break;
    103   1.1     joerg 		case 'C':
    104  1.21   abhinav 			aflags->manconf = optarg;
    105  1.13  christos 			break;
    106  1.16  christos 		case 'h':
    107  1.16  christos 			aflags->format = APROPOS_HTML;
    108  1.16  christos 			break;
    109  1.13  christos 		case 'i':
    110  1.16  christos 			aflags->format = APROPOS_TERM;
    111  1.13  christos 			break;
    112  1.13  christos 		case 'l':
    113  1.13  christos 			aflags->legacy = 1;
    114  1.13  christos 			aflags->no_context = 1;
    115  1.16  christos 			aflags->format = APROPOS_NONE;
    116   1.1     joerg 			break;
    117  1.21   abhinav 		case 'M':
    118  1.21   abhinav 			aflags->no_context = 1;
    119  1.21   abhinav 			break;
    120  1.21   abhinav 		case 'm':
    121  1.21   abhinav 			aflags->no_context = 0;
    122  1.21   abhinav 			break;
    123   1.1     joerg 		case 'n':
    124  1.13  christos 			aflags->nresults = atoi(optarg);
    125   1.1     joerg 			break;
    126  1.13  christos 		case 'p':	// user wants a pager
    127  1.13  christos 			aflags->pager = 1;
    128  1.16  christos 			/*FALLTHROUGH*/
    129  1.16  christos 		case 'P':
    130  1.16  christos 			aflags->format = APROPOS_PAGER;
    131   1.1     joerg 			break;
    132  1.12  christos 		case 'r':
    133  1.16  christos 			aflags->format = APROPOS_NONE;
    134  1.12  christos 			break;
    135   1.1     joerg 		case 'S':
    136  1.13  christos 			aflags->machine = optarg;
    137   1.1     joerg 			break;
    138   1.5     joerg 		case 's':
    139  1.22   abhinav 			RESIZE_SECTIONS(SECTIONS_ARGS_LENGTH)
    140  1.22   abhinav 			sections[sections_offset++] = estrdup(optarg);
    141   1.5     joerg 			break;
    142   1.1     joerg 		case '?':
    143   1.1     joerg 		default:
    144   1.1     joerg 			usage();
    145   1.1     joerg 		}
    146   1.1     joerg 	}
    147  1.22   abhinav 	if (sections) {
    148  1.22   abhinav 		RESIZE_SECTIONS(1)
    149  1.22   abhinav 		sections[sections_offset] = NULL;
    150  1.22   abhinav 	}
    151  1.22   abhinav 	aflags->sections = sections;
    152  1.13  christos }
    153  1.13  christos 
    154  1.13  christos int
    155  1.13  christos main(int argc, char *argv[])
    156  1.13  christos {
    157  1.13  christos 	query_args args;
    158  1.13  christos 	char *query = NULL;	// the user query
    159  1.13  christos 	char *errmsg = NULL;
    160  1.13  christos 	char *str;
    161  1.25  gutterid 	int pc = 0;
    162  1.13  christos 	int rc = 0;
    163  1.22   abhinav 	size_t i;
    164  1.13  christos 	int s;
    165  1.13  christos 	callback_data cbdata;
    166  1.13  christos 	cbdata.out = stdout;		// the default output stream
    167  1.13  christos 	cbdata.count = 0;
    168  1.13  christos 	apropos_flags aflags;
    169  1.22   abhinav 	aflags.sections = NULL;
    170  1.13  christos 	cbdata.aflags = &aflags;
    171  1.13  christos 	sqlite3 *db;
    172  1.13  christos 	setprogname(argv[0]);
    173  1.13  christos 	if (argc < 2)
    174  1.13  christos 		usage();
    175  1.13  christos 
    176  1.13  christos 	memset(&aflags, 0, sizeof(aflags));
    177  1.13  christos 
    178  1.13  christos 	if (!isatty(STDOUT_FILENO))
    179  1.16  christos 		aflags.format = APROPOS_NONE;
    180  1.16  christos 	else
    181  1.16  christos 		aflags.format = APROPOS_TERM;
    182  1.13  christos 
    183  1.13  christos 	if ((str = getenv("APROPOS")) != NULL) {
    184  1.13  christos 		char **ptr = emalloc((strlen(str) + 2) * sizeof(*ptr));
    185  1.13  christos #define WS "\t\n\r "
    186  1.13  christos 		ptr[0] = __UNCONST(getprogname());
    187  1.13  christos 		for (s = 1, str = strtok(str, WS); str;
    188  1.13  christos 		    str = strtok(NULL, WS), s++)
    189  1.13  christos 			ptr[s] = str;
    190  1.13  christos 		ptr[s] = NULL;
    191  1.13  christos 		parseargs(s, ptr, &aflags);
    192  1.13  christos 		free(ptr);
    193  1.13  christos 		optreset = 1;
    194  1.13  christos 		optind = 1;
    195  1.13  christos 	}
    196  1.13  christos 
    197  1.13  christos 	parseargs(argc, argv, &aflags);
    198  1.13  christos 
    199   1.1     joerg 	argc -= optind;
    200   1.1     joerg 	argv += optind;
    201  1.11  christos 
    202   1.1     joerg 	if (!argc)
    203   1.1     joerg 		usage();
    204   1.1     joerg 
    205   1.1     joerg 	str = NULL;
    206   1.1     joerg 	while (argc--)
    207   1.1     joerg 		concat(&str, *argv++);
    208   1.1     joerg 	query = remove_stopwords(lower(str));
    209   1.1     joerg 
    210  1.17  christos 	/*
    211  1.17  christos 	 * If the query consisted only of stopwords and we removed all of
    212  1.17  christos 	 * them, use the original query.
    213  1.17  christos 	 */
    214   1.1     joerg 	if (query == NULL)
    215  1.17  christos 		query = str;
    216  1.17  christos 	else
    217  1.17  christos 		free(str);
    218   1.1     joerg 
    219  1.21   abhinav 	if ((db = init_db(MANDB_READONLY, aflags.manconf)) == NULL)
    220   1.1     joerg 		exit(EXIT_FAILURE);
    221   1.1     joerg 
    222   1.1     joerg 	/* If user wants to page the output, then set some settings */
    223   1.1     joerg 	if (aflags.pager) {
    224   1.1     joerg 		const char *pager = getenv("PAGER");
    225   1.1     joerg 		if (pager == NULL)
    226   1.1     joerg 			pager = _PATH_PAGER;
    227  1.26  gutterid 
    228  1.26  gutterid 		/* Don't get killed by a broken pipe */
    229  1.26  gutterid 		signal(SIGPIPE, SIG_IGN);
    230  1.26  gutterid 
    231   1.1     joerg 		/* Open a pipe to the pager */
    232   1.1     joerg 		if ((cbdata.out = popen(pager, "w")) == NULL) {
    233   1.1     joerg 			close_db(db);
    234   1.1     joerg 			err(EXIT_FAILURE, "pipe failed");
    235   1.1     joerg 		}
    236   1.1     joerg 	}
    237   1.1     joerg 
    238   1.1     joerg 	args.search_str = query;
    239  1.22   abhinav 	args.sections = aflags.sections;
    240  1.13  christos 	args.legacy = aflags.legacy;
    241  1.14  christos 	args.nrec = aflags.nresults ? aflags.nresults : -1;
    242   1.1     joerg 	args.offset = 0;
    243   1.1     joerg 	args.machine = aflags.machine;
    244   1.1     joerg 	args.callback = &query_callback;
    245   1.1     joerg 	args.callback_data = &cbdata;
    246   1.1     joerg 	args.errmsg = &errmsg;
    247   1.1     joerg 
    248  1.16  christos 	if (aflags.format == APROPOS_HTML) {
    249  1.16  christos 		fprintf(cbdata.out, "<html>\n<header>\n<title>apropos results "
    250  1.16  christos 		    "for %s</title></header>\n<body>\n<table cellpadding=\"4\""
    251  1.16  christos 		    "style=\"border: 1px solid #000000; border-collapse:"
    252  1.16  christos 		    "collapse;\" border=\"1\">\n", query);
    253  1.16  christos 	}
    254  1.16  christos 	rc = run_query(db, aflags.format, &args);
    255  1.16  christos 	if (aflags.format == APROPOS_HTML)
    256  1.16  christos 		fprintf(cbdata.out, "</table>\n</body>\n</html>\n");
    257  1.11  christos 
    258  1.25  gutterid 	if (aflags.pager)
    259  1.25  gutterid 		pc = pclose(cbdata.out);
    260   1.1     joerg 	free(query);
    261  1.22   abhinav 
    262  1.22   abhinav 	if (aflags.sections) {
    263  1.22   abhinav 		for(i = 0; aflags.sections[i]; i++)
    264  1.22   abhinav 			free(aflags.sections[i]);
    265  1.22   abhinav 		free(aflags.sections);
    266  1.22   abhinav 	}
    267  1.22   abhinav 
    268   1.1     joerg 	close_db(db);
    269   1.1     joerg 	if (errmsg) {
    270   1.1     joerg 		warnx("%s", errmsg);
    271   1.1     joerg 		free(errmsg);
    272   1.1     joerg 		exit(EXIT_FAILURE);
    273   1.1     joerg 	}
    274   1.1     joerg 
    275  1.25  gutterid 	if (pc == -1)
    276  1.25  gutterid 		err(EXIT_FAILURE, "pclose error");
    277  1.25  gutterid 
    278  1.27       rin 	/*
    279  1.26  gutterid 	 * Something wrong with the database, writing output, or a non-existent
    280  1.26  gutterid 	 * pager.
    281  1.26  gutterid 	 */
    282  1.26  gutterid 	if (rc < 0)
    283   1.1     joerg 		exit(EXIT_FAILURE);
    284  1.11  christos 
    285   1.1     joerg 	if (cbdata.count == 0) {
    286   1.1     joerg 		warnx("No relevant results obtained.\n"
    287  1.13  christos 		    "Please make sure that you spelled all the terms correctly "
    288  1.23  jmcneill 		    "or try using different keywords.");
    289   1.1     joerg 	}
    290   1.1     joerg 	return 0;
    291   1.1     joerg }
    292   1.1     joerg 
    293   1.1     joerg /*
    294   1.1     joerg  * query_callback --
    295   1.1     joerg  *  Callback function for run_query.
    296  1.26  gutterid  *  It simply outputs the results from run_query. If the user specified the -p
    297   1.1     joerg  *  option, then the output is sent to a pager, otherwise stdout is the default
    298   1.1     joerg  *  output stream.
    299   1.1     joerg  */
    300   1.1     joerg static int
    301  1.24   abhinav query_callback(query_callback_args *qargs)
    302   1.1     joerg {
    303  1.24   abhinav 	callback_data *cbdata = (callback_data *) qargs->other_data;
    304   1.1     joerg 	FILE *out = cbdata->out;
    305   1.1     joerg 	cbdata->count++;
    306  1.16  christos 	if (cbdata->aflags->format != APROPOS_HTML) {
    307  1.16  christos 	    fprintf(out, cbdata->aflags->legacy ? "%s(%s) - %s\n" :
    308  1.24   abhinav 		"%s (%s)\t%s\n", qargs->name, qargs->section, qargs->name_desc);
    309  1.16  christos 	    if (cbdata->aflags->no_context == 0)
    310  1.24   abhinav 		    fprintf(out, "%s\n\n", qargs->snippet);
    311  1.16  christos 	} else {
    312  1.24   abhinav 	    fprintf(out, "<tr><td>%s(%s)</td><td>%s</td></tr>\n", qargs->name,
    313  1.24   abhinav 		qargs->section, qargs->name_desc);
    314  1.16  christos 	    if (cbdata->aflags->no_context == 0)
    315  1.24   abhinav 		    fprintf(out, "<tr><td colspan=2>%s</td></tr>\n", qargs->snippet);
    316  1.16  christos 	}
    317   1.1     joerg 
    318  1.26  gutterid 	return fflush(out);
    319   1.1     joerg }
    320   1.1     joerg 
    321   1.1     joerg #include "stopwords.c"
    322   1.1     joerg 
    323   1.1     joerg /*
    324   1.1     joerg  * remove_stopwords--
    325   1.1     joerg  *  Scans the query and removes any stop words from it.
    326   1.1     joerg  *  Returns the modified query or NULL, if it contained only stop words.
    327   1.1     joerg  */
    328   1.1     joerg 
    329   1.1     joerg static char *
    330   1.1     joerg remove_stopwords(const char *query)
    331   1.1     joerg {
    332   1.1     joerg 	size_t len, idx;
    333   1.1     joerg 	char *output, *buf;
    334   1.1     joerg 	const char *sep, *next;
    335   1.1     joerg 
    336   1.1     joerg 	output = buf = emalloc(strlen(query) + 1);
    337   1.1     joerg 
    338   1.1     joerg 	for (; query[0] != '\0'; query = next) {
    339   1.1     joerg 		sep = strchr(query, ' ');
    340   1.1     joerg 		if (sep == NULL) {
    341   1.1     joerg 			len = strlen(query);
    342   1.1     joerg 			next = query + len;
    343   1.1     joerg 		} else {
    344   1.1     joerg 			len = sep - query;
    345   1.1     joerg 			next = sep + 1;
    346   1.1     joerg 		}
    347   1.1     joerg 		if (len == 0)
    348   1.1     joerg 			continue;
    349   1.1     joerg 		idx = stopwords_hash(query, len);
    350   1.1     joerg 		if (memcmp(stopwords[idx], query, len) == 0 &&
    351   1.1     joerg 		    stopwords[idx][len] == '\0')
    352   1.1     joerg 			continue;
    353   1.1     joerg 		memcpy(buf, query, len);
    354   1.1     joerg 		buf += len;
    355   1.1     joerg 		*buf++ = ' ';
    356   1.1     joerg 	}
    357   1.1     joerg 
    358   1.1     joerg 	if (output == buf) {
    359   1.1     joerg 		free(output);
    360   1.1     joerg 		return NULL;
    361   1.1     joerg 	}
    362   1.1     joerg 	buf[-1] = '\0';
    363   1.1     joerg 	return output;
    364   1.1     joerg }
    365   1.1     joerg 
    366   1.1     joerg /*
    367   1.1     joerg  * usage --
    368   1.1     joerg  *	print usage message and die
    369   1.1     joerg  */
    370   1.1     joerg static void
    371   1.1     joerg usage(void)
    372   1.1     joerg {
    373  1.21   abhinav 	fprintf(stderr, "Usage: %s [-123456789ilMmpr] [-C path] [-n results] "
    374  1.15       wiz 	    "[-S machine] [-s section] query\n",
    375  1.13  christos 	    getprogname());
    376   1.1     joerg 	exit(1);
    377   1.1     joerg }
    378