Home | History | Annotate | Line # | Download | only in makemandb
      1 /*	$NetBSD: apropos.c,v 1.27 2023/08/03 07:49:23 rin Exp $	*/
      2 /*-
      3  * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay (at) gmail.com>
      4  * All rights reserved.
      5  *
      6  * This code was developed as part of Google's Summer of Code 2011 program.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  *
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in
     16  *    the documentation and/or other materials provided with the
     17  *    distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
     23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
     25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
     29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30  * SUCH DAMAGE.
     31  */
     32 
     33 #include <sys/cdefs.h>
     34 __RCSID("$NetBSD: apropos.c,v 1.27 2023/08/03 07:49:23 rin Exp $");
     35 
     36 #include <err.h>
     37 #include <signal.h>
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include <string.h>
     41 #include <unistd.h>
     42 #include <util.h>
     43 
     44 #include "apropos-utils.h"
     45 
     46 typedef struct apropos_flags {
     47 	char **sections;
     48 	int nresults;
     49 	int pager;
     50 	int no_context;
     51 	query_format format;
     52 	int legacy;
     53 	const char *machine;
     54 	const char *manconf;
     55 } apropos_flags;
     56 
     57 typedef struct callback_data {
     58 	int count;
     59 	FILE *out;
     60 	apropos_flags *aflags;
     61 } callback_data;
     62 
     63 static char *remove_stopwords(const char *);
     64 static int query_callback(query_callback_args *);
     65 __dead static void usage(void);
     66 
     67 #define _PATH_PAGER	"/usr/bin/more -s"
     68 #define SECTIONS_ARGS_LENGTH 4;
     69 
     70 static void
     71 parseargs(int argc, char **argv, struct apropos_flags *aflags)
     72 {
     73 	int ch;
     74 	size_t sections_offset = 0;
     75 	size_t sections_size = 0;
     76 	char **sections = NULL;
     77 	char *section;
     78 	aflags->manconf = MANCONF;
     79 
     80 #define RESIZE_SECTIONS(newsize) \
     81 	if (sections == NULL || sections_offset > sections_size - 1) { \
     82 		sections_size += newsize; \
     83 		sections = erealloc(sections, sections_size * sizeof(*sections)); \
     84 	}
     85 
     86 	while ((ch = getopt(argc, argv, "123456789C:hilMmn:PprS:s:")) != -1) {
     87 		switch (ch) {
     88 		case '1':
     89 		case '2':
     90 		case '3':
     91 		case '4':
     92 		case '5':
     93 		case '6':
     94 		case '7':
     95 		case '8':
     96 		case '9':
     97 			section = emalloc(2);
     98 			section[0] = ch;
     99 			section[1] = 0;
    100 			RESIZE_SECTIONS(SECTIONS_ARGS_LENGTH)
    101 			sections[sections_offset++] = section;
    102 			break;
    103 		case 'C':
    104 			aflags->manconf = optarg;
    105 			break;
    106 		case 'h':
    107 			aflags->format = APROPOS_HTML;
    108 			break;
    109 		case 'i':
    110 			aflags->format = APROPOS_TERM;
    111 			break;
    112 		case 'l':
    113 			aflags->legacy = 1;
    114 			aflags->no_context = 1;
    115 			aflags->format = APROPOS_NONE;
    116 			break;
    117 		case 'M':
    118 			aflags->no_context = 1;
    119 			break;
    120 		case 'm':
    121 			aflags->no_context = 0;
    122 			break;
    123 		case 'n':
    124 			aflags->nresults = atoi(optarg);
    125 			break;
    126 		case 'p':	// user wants a pager
    127 			aflags->pager = 1;
    128 			/*FALLTHROUGH*/
    129 		case 'P':
    130 			aflags->format = APROPOS_PAGER;
    131 			break;
    132 		case 'r':
    133 			aflags->format = APROPOS_NONE;
    134 			break;
    135 		case 'S':
    136 			aflags->machine = optarg;
    137 			break;
    138 		case 's':
    139 			RESIZE_SECTIONS(SECTIONS_ARGS_LENGTH)
    140 			sections[sections_offset++] = estrdup(optarg);
    141 			break;
    142 		case '?':
    143 		default:
    144 			usage();
    145 		}
    146 	}
    147 	if (sections) {
    148 		RESIZE_SECTIONS(1)
    149 		sections[sections_offset] = NULL;
    150 	}
    151 	aflags->sections = sections;
    152 }
    153 
    154 int
    155 main(int argc, char *argv[])
    156 {
    157 	query_args args;
    158 	char *query = NULL;	// the user query
    159 	char *errmsg = NULL;
    160 	char *str;
    161 	int pc = 0;
    162 	int rc = 0;
    163 	size_t i;
    164 	int s;
    165 	callback_data cbdata;
    166 	cbdata.out = stdout;		// the default output stream
    167 	cbdata.count = 0;
    168 	apropos_flags aflags;
    169 	aflags.sections = NULL;
    170 	cbdata.aflags = &aflags;
    171 	sqlite3 *db;
    172 	setprogname(argv[0]);
    173 	if (argc < 2)
    174 		usage();
    175 
    176 	memset(&aflags, 0, sizeof(aflags));
    177 
    178 	if (!isatty(STDOUT_FILENO))
    179 		aflags.format = APROPOS_NONE;
    180 	else
    181 		aflags.format = APROPOS_TERM;
    182 
    183 	if ((str = getenv("APROPOS")) != NULL) {
    184 		char **ptr = emalloc((strlen(str) + 2) * sizeof(*ptr));
    185 #define WS "\t\n\r "
    186 		ptr[0] = __UNCONST(getprogname());
    187 		for (s = 1, str = strtok(str, WS); str;
    188 		    str = strtok(NULL, WS), s++)
    189 			ptr[s] = str;
    190 		ptr[s] = NULL;
    191 		parseargs(s, ptr, &aflags);
    192 		free(ptr);
    193 		optreset = 1;
    194 		optind = 1;
    195 	}
    196 
    197 	parseargs(argc, argv, &aflags);
    198 
    199 	argc -= optind;
    200 	argv += optind;
    201 
    202 	if (!argc)
    203 		usage();
    204 
    205 	str = NULL;
    206 	while (argc--)
    207 		concat(&str, *argv++);
    208 	query = remove_stopwords(lower(str));
    209 
    210 	/*
    211 	 * If the query consisted only of stopwords and we removed all of
    212 	 * them, use the original query.
    213 	 */
    214 	if (query == NULL)
    215 		query = str;
    216 	else
    217 		free(str);
    218 
    219 	if ((db = init_db(MANDB_READONLY, aflags.manconf)) == NULL)
    220 		exit(EXIT_FAILURE);
    221 
    222 	/* If user wants to page the output, then set some settings */
    223 	if (aflags.pager) {
    224 		const char *pager = getenv("PAGER");
    225 		if (pager == NULL)
    226 			pager = _PATH_PAGER;
    227 
    228 		/* Don't get killed by a broken pipe */
    229 		signal(SIGPIPE, SIG_IGN);
    230 
    231 		/* Open a pipe to the pager */
    232 		if ((cbdata.out = popen(pager, "w")) == NULL) {
    233 			close_db(db);
    234 			err(EXIT_FAILURE, "pipe failed");
    235 		}
    236 	}
    237 
    238 	args.search_str = query;
    239 	args.sections = aflags.sections;
    240 	args.legacy = aflags.legacy;
    241 	args.nrec = aflags.nresults ? aflags.nresults : -1;
    242 	args.offset = 0;
    243 	args.machine = aflags.machine;
    244 	args.callback = &query_callback;
    245 	args.callback_data = &cbdata;
    246 	args.errmsg = &errmsg;
    247 
    248 	if (aflags.format == APROPOS_HTML) {
    249 		fprintf(cbdata.out, "<html>\n<header>\n<title>apropos results "
    250 		    "for %s</title></header>\n<body>\n<table cellpadding=\"4\""
    251 		    "style=\"border: 1px solid #000000; border-collapse:"
    252 		    "collapse;\" border=\"1\">\n", query);
    253 	}
    254 	rc = run_query(db, aflags.format, &args);
    255 	if (aflags.format == APROPOS_HTML)
    256 		fprintf(cbdata.out, "</table>\n</body>\n</html>\n");
    257 
    258 	if (aflags.pager)
    259 		pc = pclose(cbdata.out);
    260 	free(query);
    261 
    262 	if (aflags.sections) {
    263 		for(i = 0; aflags.sections[i]; i++)
    264 			free(aflags.sections[i]);
    265 		free(aflags.sections);
    266 	}
    267 
    268 	close_db(db);
    269 	if (errmsg) {
    270 		warnx("%s", errmsg);
    271 		free(errmsg);
    272 		exit(EXIT_FAILURE);
    273 	}
    274 
    275 	if (pc == -1)
    276 		err(EXIT_FAILURE, "pclose error");
    277 
    278 	/*
    279 	 * Something wrong with the database, writing output, or a non-existent
    280 	 * pager.
    281 	 */
    282 	if (rc < 0)
    283 		exit(EXIT_FAILURE);
    284 
    285 	if (cbdata.count == 0) {
    286 		warnx("No relevant results obtained.\n"
    287 		    "Please make sure that you spelled all the terms correctly "
    288 		    "or try using different keywords.");
    289 	}
    290 	return 0;
    291 }
    292 
    293 /*
    294  * query_callback --
    295  *  Callback function for run_query.
    296  *  It simply outputs the results from run_query. If the user specified the -p
    297  *  option, then the output is sent to a pager, otherwise stdout is the default
    298  *  output stream.
    299  */
    300 static int
    301 query_callback(query_callback_args *qargs)
    302 {
    303 	callback_data *cbdata = (callback_data *) qargs->other_data;
    304 	FILE *out = cbdata->out;
    305 	cbdata->count++;
    306 	if (cbdata->aflags->format != APROPOS_HTML) {
    307 	    fprintf(out, cbdata->aflags->legacy ? "%s(%s) - %s\n" :
    308 		"%s (%s)\t%s\n", qargs->name, qargs->section, qargs->name_desc);
    309 	    if (cbdata->aflags->no_context == 0)
    310 		    fprintf(out, "%s\n\n", qargs->snippet);
    311 	} else {
    312 	    fprintf(out, "<tr><td>%s(%s)</td><td>%s</td></tr>\n", qargs->name,
    313 		qargs->section, qargs->name_desc);
    314 	    if (cbdata->aflags->no_context == 0)
    315 		    fprintf(out, "<tr><td colspan=2>%s</td></tr>\n", qargs->snippet);
    316 	}
    317 
    318 	return fflush(out);
    319 }
    320 
    321 #include "stopwords.c"
    322 
    323 /*
    324  * remove_stopwords--
    325  *  Scans the query and removes any stop words from it.
    326  *  Returns the modified query or NULL, if it contained only stop words.
    327  */
    328 
    329 static char *
    330 remove_stopwords(const char *query)
    331 {
    332 	size_t len, idx;
    333 	char *output, *buf;
    334 	const char *sep, *next;
    335 
    336 	output = buf = emalloc(strlen(query) + 1);
    337 
    338 	for (; query[0] != '\0'; query = next) {
    339 		sep = strchr(query, ' ');
    340 		if (sep == NULL) {
    341 			len = strlen(query);
    342 			next = query + len;
    343 		} else {
    344 			len = sep - query;
    345 			next = sep + 1;
    346 		}
    347 		if (len == 0)
    348 			continue;
    349 		idx = stopwords_hash(query, len);
    350 		if (memcmp(stopwords[idx], query, len) == 0 &&
    351 		    stopwords[idx][len] == '\0')
    352 			continue;
    353 		memcpy(buf, query, len);
    354 		buf += len;
    355 		*buf++ = ' ';
    356 	}
    357 
    358 	if (output == buf) {
    359 		free(output);
    360 		return NULL;
    361 	}
    362 	buf[-1] = '\0';
    363 	return output;
    364 }
    365 
    366 /*
    367  * usage --
    368  *	print usage message and die
    369  */
    370 static void
    371 usage(void)
    372 {
    373 	fprintf(stderr, "Usage: %s [-123456789ilMmpr] [-C path] [-n results] "
    374 	    "[-S machine] [-s section] query\n",
    375 	    getprogname());
    376 	exit(1);
    377 }
    378