apropos-utils.c revision 1.35 1 /* $NetBSD: apropos-utils.c,v 1.35 2017/04/30 15:27:24 abhinav Exp $ */
2 /*-
3 * Copyright (c) 2011 Abhinav Upadhyay <er.abhinav.upadhyay (at) gmail.com>
4 * All rights reserved.
5 *
6 * This code was developed as part of Google's Summer of Code 2011 program.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #include <sys/cdefs.h>
34 __RCSID("$NetBSD: apropos-utils.c,v 1.35 2017/04/30 15:27:24 abhinav Exp $");
35
36 #include <sys/queue.h>
37 #include <sys/stat.h>
38
39 #include <assert.h>
40 #include <ctype.h>
41 #include <err.h>
42 #include <math.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <util.h>
47 #include <zlib.h>
48 #include <term.h>
49 #include <unistd.h>
50 #undef tab // XXX: manconf.h
51
52 #include "apropos-utils.h"
53 #include "manconf.h"
54
55 typedef struct orig_callback_data {
56 void *data;
57 int (*callback) (void *, const char *, const char *, const char *,
58 const char *, size_t);
59 } orig_callback_data;
60
61 typedef struct inverse_document_frequency {
62 double value;
63 int status;
64 } inverse_document_frequency;
65
66 /* weights for individual columns */
67 static const double col_weights[] = {
68 2.0, // NAME
69 2.00, // Name-description
70 0.55, // DESCRIPTION
71 0.10, // LIBRARY
72 0.001, //RETURN VALUES
73 0.20, //ENVIRONMENT
74 0.01, //FILES
75 0.001, //EXIT STATUS
76 2.00, //DIAGNOSTICS
77 0.05, //ERRORS
78 0.00, //md5_hash
79 1.00 //machine
80 };
81
82 /*
83 * lower --
84 * Converts the string str to lower case
85 */
86 char *
87 lower(char *str)
88 {
89 assert(str);
90 int i = 0;
91 char c;
92 while (str[i] != '\0') {
93 c = tolower((unsigned char) str[i]);
94 str[i++] = c;
95 }
96 return str;
97 }
98
99 /*
100 * concat--
101 * Utility function. Concatenates together: dst, a space character and src.
102 * dst + " " + src
103 */
104 void
105 concat(char **dst, const char *src)
106 {
107 concat2(dst, src, strlen(src));
108 }
109
110 void
111 concat2(char **dst, const char *src, size_t srclen)
112 {
113 size_t totallen, dstlen;
114 char *mydst = *dst;
115 assert(src != NULL);
116
117 /*
118 * If destination buffer dst is NULL, then simply
119 * strdup the source buffer
120 */
121 if (mydst == NULL) {
122 mydst = estrndup(src, srclen);
123 *dst = mydst;
124 return;
125 }
126
127 dstlen = strlen(mydst);
128 /*
129 * NUL Byte and separator space
130 */
131 totallen = dstlen + srclen + 2;
132
133 mydst = erealloc(mydst, totallen);
134
135 /* Append a space at the end of dst */
136 mydst[dstlen++] = ' ';
137
138 /* Now, copy src at the end of dst */
139 memcpy(mydst + dstlen, src, srclen);
140 mydst[dstlen + srclen] = '\0';
141 *dst = mydst;
142 }
143
144 void
145 close_db(sqlite3 *db)
146 {
147 sqlite3_close(db);
148 sqlite3_shutdown();
149 }
150
151 /*
152 * create_db --
153 * Creates the database schema.
154 */
155 static int
156 create_db(sqlite3 *db)
157 {
158 const char *sqlstr = NULL;
159 char *schemasql;
160 char *errmsg = NULL;
161
162 /*------------------------ Create the tables------------------------------*/
163
164 #if NOTYET
165 sqlite3_exec(db, "PRAGMA journal_mode = WAL", NULL, NULL, NULL);
166 #else
167 sqlite3_exec(db, "PRAGMA journal_mode = DELETE", NULL, NULL, NULL);
168 #endif
169
170 schemasql = sqlite3_mprintf("PRAGMA user_version = %d",
171 APROPOS_SCHEMA_VERSION);
172 sqlite3_exec(db, schemasql, NULL, NULL, &errmsg);
173 if (errmsg != NULL)
174 goto out;
175 sqlite3_free(schemasql);
176
177 sqlstr =
178 //mandb
179 "CREATE VIRTUAL TABLE mandb USING fts4(section, name, "
180 "name_desc, desc, lib, return_vals, env, files, "
181 "exit_status, diagnostics, errors, md5_hash UNIQUE, machine, "
182 #ifndef DEBUG
183 "compress=zip, uncompress=unzip, "
184 #endif
185 "tokenize=porter, notindexed=section, notindexed=md5_hash); "
186 //mandb_meta
187 "CREATE TABLE IF NOT EXISTS mandb_meta(device, inode, mtime, "
188 "file UNIQUE, md5_hash UNIQUE, id INTEGER PRIMARY KEY); "
189 //mandb_links
190 "CREATE TABLE IF NOT EXISTS mandb_links(link COLLATE NOCASE, target, section, "
191 "machine, md5_hash); ";
192
193 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
194 if (errmsg != NULL)
195 goto out;
196
197 sqlstr =
198 "CREATE INDEX IF NOT EXISTS index_mandb_links ON mandb_links "
199 "(link); "
200 "CREATE INDEX IF NOT EXISTS index_mandb_meta_dev ON mandb_meta "
201 "(device, inode); "
202 "CREATE INDEX IF NOT EXISTS index_mandb_links_md5 ON mandb_links "
203 "(md5_hash);";
204 sqlite3_exec(db, sqlstr, NULL, NULL, &errmsg);
205 if (errmsg != NULL)
206 goto out;
207 return 0;
208
209 out:
210 warnx("%s", errmsg);
211 free(errmsg);
212 sqlite3_close(db);
213 sqlite3_shutdown();
214 return -1;
215 }
216
217 /*
218 * zip --
219 * User defined Sqlite function to compress the FTS table
220 */
221 static void
222 zip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
223 {
224 int nin;
225 long int nout;
226 const unsigned char * inbuf;
227 unsigned char *outbuf;
228
229 assert(nval == 1);
230 nin = sqlite3_value_bytes(apval[0]);
231 inbuf = (const unsigned char *) sqlite3_value_blob(apval[0]);
232 nout = nin + 13 + (nin + 999) / 1000;
233 outbuf = emalloc(nout);
234 compress(outbuf, (unsigned long *) &nout, inbuf, nin);
235 sqlite3_result_blob(pctx, outbuf, nout, free);
236 }
237
238 /*
239 * unzip --
240 * User defined Sqlite function to uncompress the FTS table.
241 */
242 static void
243 unzip(sqlite3_context *pctx, int nval, sqlite3_value **apval)
244 {
245 unsigned int rc;
246 unsigned char *outbuf;
247 z_stream stream;
248
249 assert(nval == 1);
250 stream.next_in = __UNCONST(sqlite3_value_blob(apval[0]));
251 stream.avail_in = sqlite3_value_bytes(apval[0]);
252 stream.avail_out = stream.avail_in * 2 + 100;
253 stream.next_out = outbuf = emalloc(stream.avail_out);
254 stream.zalloc = NULL;
255 stream.zfree = NULL;
256
257 if (inflateInit(&stream) != Z_OK) {
258 free(outbuf);
259 return;
260 }
261
262 while ((rc = inflate(&stream, Z_SYNC_FLUSH)) != Z_STREAM_END) {
263 if (rc != Z_OK ||
264 (stream.avail_out != 0 && stream.avail_in == 0)) {
265 free(outbuf);
266 return;
267 }
268 outbuf = erealloc(outbuf, stream.total_out * 2);
269 stream.next_out = outbuf + stream.total_out;
270 stream.avail_out = stream.total_out;
271 }
272 if (inflateEnd(&stream) != Z_OK) {
273 free(outbuf);
274 return;
275 }
276 outbuf = erealloc(outbuf, stream.total_out);
277 sqlite3_result_text(pctx, (const char *)outbuf, stream.total_out, free);
278 }
279
280 /*
281 * get_dbpath --
282 * Read the path of the database from man.conf and return.
283 */
284 char *
285 get_dbpath(const char *manconf)
286 {
287 TAG *tp;
288 char *dbpath;
289
290 config(manconf);
291 tp = gettag("_mandb", 1);
292 if (!tp)
293 return NULL;
294
295 if (TAILQ_EMPTY(&tp->entrylist))
296 return NULL;
297
298 dbpath = TAILQ_LAST(&tp->entrylist, tqh)->s;
299 return dbpath;
300 }
301
302 /* init_db --
303 * Prepare the database. Register the compress/uncompress functions and the
304 * stopword tokenizer.
305 * db_flag specifies the mode in which to open the database. 3 options are
306 * available:
307 * 1. DB_READONLY: Open in READONLY mode. An error if db does not exist.
308 * 2. DB_READWRITE: Open in read-write mode. An error if db does not exist.
309 * 3. DB_CREATE: Open in read-write mode. It will try to create the db if
310 * it does not exist already.
311 * RETURN VALUES:
312 * The function will return NULL in case the db does not exist
313 * and DB_CREATE
314 * was not specified. And in case DB_CREATE was specified and yet NULL is
315 * returned, then there was some other error.
316 * In normal cases the function should return a handle to the db.
317 */
318 sqlite3 *
319 init_db(mandb_access_mode db_flag, const char *manconf)
320 {
321 sqlite3 *db = NULL;
322 sqlite3_stmt *stmt;
323 struct stat sb;
324 int rc;
325 int create_db_flag = 0;
326
327 char *dbpath = get_dbpath(manconf);
328 if (dbpath == NULL)
329 errx(EXIT_FAILURE, "_mandb entry not found in man.conf");
330
331 if (!(stat(dbpath, &sb) == 0 && S_ISREG(sb.st_mode))) {
332 /* Database does not exist, check if DB_CREATE was specified,
333 * and set flag to create the database schema
334 */
335 if (db_flag != (MANDB_CREATE)) {
336 warnx("Missing apropos database. "
337 "Please run makemandb to create it.");
338 return NULL;
339 }
340 create_db_flag = 1;
341 } else {
342 /*
343 * Database exists. Check if we have the permissions
344 * to read/write the files
345 */
346 int access_mode = R_OK;
347 switch (db_flag) {
348 case MANDB_CREATE:
349 case MANDB_WRITE:
350 access_mode |= W_OK;
351 break;
352 default:
353 break;
354 }
355 if ((access(dbpath, access_mode)) != 0) {
356 warnx("Unable to access the database, please check"
357 " permissions for `%s'", dbpath);
358 return NULL;
359 }
360 }
361
362 sqlite3_initialize();
363 rc = sqlite3_open_v2(dbpath, &db, db_flag, NULL);
364
365 if (rc != SQLITE_OK) {
366 warnx("%s", sqlite3_errmsg(db));
367 goto error;
368 }
369
370 if (create_db_flag && create_db(db) < 0) {
371 warnx("%s", "Unable to create database schema");
372 goto error;
373 }
374
375 rc = sqlite3_prepare_v2(db, "PRAGMA user_version", -1, &stmt, NULL);
376 if (rc != SQLITE_OK) {
377 warnx("Unable to query schema version: %s",
378 sqlite3_errmsg(db));
379 goto error;
380 }
381 if (sqlite3_step(stmt) != SQLITE_ROW) {
382 sqlite3_finalize(stmt);
383 warnx("Unable to query schema version: %s",
384 sqlite3_errmsg(db));
385 goto error;
386 }
387 if (sqlite3_column_int(stmt, 0) != APROPOS_SCHEMA_VERSION) {
388 sqlite3_finalize(stmt);
389 warnx("Incorrect schema version found. "
390 "Please run makemandb -f.");
391 goto error;
392 }
393 sqlite3_finalize(stmt);
394
395 sqlite3_extended_result_codes(db, 1);
396
397 /* Register the zip and unzip functions for FTS compression */
398 rc = sqlite3_create_function(db, "zip", 1, SQLITE_ANY, NULL, zip,
399 NULL, NULL);
400 if (rc != SQLITE_OK) {
401 warnx("Unable to register function: compress: %s",
402 sqlite3_errmsg(db));
403 goto error;
404 }
405
406 rc = sqlite3_create_function(db, "unzip", 1, SQLITE_ANY, NULL,
407 unzip, NULL, NULL);
408 if (rc != SQLITE_OK) {
409 warnx("Unable to register function: uncompress: %s",
410 sqlite3_errmsg(db));
411 goto error;
412 }
413 return db;
414
415 error:
416 close_db(db);
417 return NULL;
418 }
419
420 /*
421 * rank_func --
422 * Sqlite user defined function for ranking the documents.
423 * For each phrase of the query, it computes the tf and idf and adds them over.
424 * It computes the final rank, by multiplying tf and idf together.
425 * Weight of term t for document d = (term frequency of t in d *
426 * inverse document frequency of t)
427 *
428 * Term Frequency of term t in document d = Number of times t occurs in d /
429 * Number of times t appears in all documents
430 *
431 * Inverse document frequency of t = log(Total number of documents /
432 * Number of documents in which t occurs)
433 */
434 static void
435 rank_func(sqlite3_context *pctx, int nval, sqlite3_value **apval)
436 {
437 inverse_document_frequency *idf = sqlite3_user_data(pctx);
438 double tf = 0.0;
439 const unsigned int *matchinfo;
440 int ncol;
441 int nphrase;
442 int iphrase;
443 int ndoc;
444 int doclen = 0;
445 const double k = 3.75;
446 /*
447 * Check that the number of arguments passed to this
448 * function is correct.
449 */
450 assert(nval == 1);
451
452 matchinfo = (const unsigned int *) sqlite3_value_blob(apval[0]);
453 nphrase = matchinfo[0];
454 ncol = matchinfo[1];
455 ndoc = matchinfo[2 + 3 * ncol * nphrase + ncol];
456 for (iphrase = 0; iphrase < nphrase; iphrase++) {
457 int icol;
458 const unsigned int *phraseinfo =
459 &matchinfo[2 + ncol + iphrase * ncol * 3];
460 for(icol = 1; icol < ncol; icol++) {
461
462 /* nhitcount: number of times the current phrase occurs
463 * in the current column in the current document.
464 * nglobalhitcount: number of times current phrase
465 * occurs in the current column in all documents.
466 * ndocshitcount: number of documents in which the
467 * current phrase occurs in the current column at
468 * least once.
469 */
470 int nhitcount = phraseinfo[3 * icol];
471 int nglobalhitcount = phraseinfo[3 * icol + 1];
472 int ndocshitcount = phraseinfo[3 * icol + 2];
473 doclen = matchinfo[2 + icol ];
474 double weight = col_weights[icol - 1];
475 if (idf->status == 0 && ndocshitcount)
476 idf->value +=
477 log(((double)ndoc / ndocshitcount))* weight;
478
479 /*
480 * Dividing the tf by document length to normalize
481 * the effect of longer documents.
482 */
483 if (nglobalhitcount > 0 && nhitcount)
484 tf += (((double)nhitcount * weight)
485 / (nglobalhitcount * doclen));
486 }
487 }
488 idf->status = 1;
489
490 /*
491 * Final score: Dividing by k + tf further normalizes the weight
492 * leading to better results. The value of k is experimental
493 */
494 double score = (tf * idf->value) / (k + tf);
495 sqlite3_result_double(pctx, score);
496 return;
497 }
498
499 /*
500 * generates sql query for matching the user entered query
501 */
502 static char *
503 generate_search_query(query_args *args, const char *snippet_args[3])
504 {
505 const char *default_snippet_args[3];
506 char *section_clause = NULL;
507 char *limit_clause = NULL;
508 char *machine_clause = NULL;
509 char *query = NULL;
510
511 if (args->machine) {
512 machine_clause = sqlite3_mprintf("AND mandb.machine=%Q", args->machine);
513 if (machine_clause == NULL)
514 goto RETURN;
515 }
516
517 if (args->nrec >= 0) {
518 /* Use the provided number of records and offset */
519 limit_clause = sqlite3_mprintf(" LIMIT %d OFFSET %d",
520 args->nrec, args->offset);
521 if (limit_clause == NULL)
522 goto RETURN;
523 }
524
525 /* We want to build a query of the form: "select x,y,z from mandb where
526 * mandb match :query [AND (section IN ('1', '2')]
527 * ORDER BY rank DESC [LIMIT 10 OFFSET 0]"
528 * NOTES:
529 * 1. The portion in first pair of square brackets is optional.
530 * It will be there only if the user has specified an option
531 * to search in one or more specific sections.
532 * 2. The LIMIT portion will be there if the user has specified
533 * a limit using the -n option.
534 */
535 char *sections_str = args->sec_nums;
536 char *temp;
537 if (sections_str) {
538 while (*sections_str) {
539 size_t len = strcspn(sections_str, " ");
540 char *sec = sections_str;
541 if (sections_str[len] == 0) {
542 sections_str += len;
543 } else {
544 sections_str[len] = 0;
545 sections_str += len + 1;
546 }
547 easprintf(&temp, "\'%s\',", sec);
548
549 if (section_clause) {
550 concat(§ion_clause, temp);
551 free(temp);
552 } else {
553 section_clause = temp;
554 }
555 }
556 if (section_clause) {
557 /*
558 * At least one section requested, add glue for query.
559 * Before doing that, remove the comma at the end of
560 * section_clause
561 */
562 size_t section_clause_len = strlen(section_clause);
563 if (section_clause[section_clause_len - 1] == ',')
564 section_clause[section_clause_len - 1] = 0;
565 temp = section_clause;
566 easprintf(§ion_clause, " AND mandb.section IN (%s)", temp);
567 free(temp);
568 }
569 }
570
571 if (snippet_args == NULL) {
572 default_snippet_args[0] = "";
573 default_snippet_args[1] = "";
574 default_snippet_args[2] = "...";
575 snippet_args = default_snippet_args;
576 }
577
578 if (args->legacy) {
579 char *wild;
580 easprintf(&wild, "%%%s%%", args->search_str);
581 query = sqlite3_mprintf("SELECT section, name, name_desc, machine"
582 " FROM mandb"
583 " WHERE name LIKE %Q OR name_desc LIKE %Q "
584 "%s"
585 "%s",
586 wild, wild,
587 section_clause ? section_clause : "",
588 limit_clause ? limit_clause : "");
589 free(wild);
590 } else if (strchr(args->search_str, ' ') == NULL) {
591 /*
592 * If it's a single word query, we want to search in the
593 * links table as well. If the link table contains an entry
594 * for the queried keyword, we want to use that as the name of
595 * the man page.
596 * For example, for `apropos realloc` the output should be
597 * realloc(3) and not malloc(3).
598 */
599 query = sqlite3_mprintf(
600 "SELECT section, name, name_desc, machine,"
601 " snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
602 " rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
603 " FROM mandb WHERE name NOT IN ("
604 " SELECT target FROM mandb_links WHERE link=%Q AND"
605 " mandb_links.section=mandb.section) AND mandb MATCH %Q %s %s"
606 " UNION"
607 " SELECT mandb.section, mandb_links.link AS name, mandb.name_desc,"
608 " mandb.machine, '' AS snippet, 100.00 AS rank"
609 " FROM mandb JOIN mandb_links ON mandb.name=mandb_links.target and"
610 " mandb.section=mandb_links.section WHERE mandb_links.link=%Q"
611 " %s %s"
612 " ORDER BY rank DESC %s",
613 snippet_args[0], snippet_args[1], snippet_args[2],
614 args->search_str, args->search_str, section_clause ? section_clause : "",
615 machine_clause ? machine_clause : "", args->search_str,
616 machine_clause ? machine_clause : "",
617 section_clause ? section_clause : "",
618 limit_clause ? limit_clause : "");
619 } else {
620 query = sqlite3_mprintf("SELECT section, name, name_desc, machine,"
621 " snippet(mandb, %Q, %Q, %Q, -1, 40 ),"
622 " rank_func(matchinfo(mandb, \"pclxn\")) AS rank"
623 " FROM mandb"
624 " WHERE mandb MATCH %Q %s "
625 "%s"
626 " ORDER BY rank DESC"
627 "%s",
628 snippet_args[0], snippet_args[1], snippet_args[2],
629 args->search_str, machine_clause ? machine_clause : "",
630 section_clause ? section_clause : "",
631 limit_clause ? limit_clause : "");
632 }
633
634 RETURN:
635 free(machine_clause);
636 free(section_clause);
637 free(limit_clause);
638 return query;
639 }
640
641 /*
642 * Execute the full text search query and return the number of results
643 * obtained.
644 */
645 static unsigned int
646 execute_search_query(sqlite3 *db, char *query, query_args *args)
647 {
648 sqlite3_stmt *stmt;
649 const char *section;
650 char *name;
651 char *slash_ptr;
652 const char *name_desc;
653 const char *machine;
654 const char *snippet = "";
655 const char *name_temp;
656 char *m = NULL;
657 int rc;
658 inverse_document_frequency idf = {0, 0};
659
660 if (!args->legacy) {
661 /* Register the rank function */
662 rc = sqlite3_create_function(db, "rank_func", 1, SQLITE_ANY,
663 (void *) &idf, rank_func, NULL, NULL);
664 if (rc != SQLITE_OK) {
665 warnx("Unable to register the ranking function: %s",
666 sqlite3_errmsg(db));
667 sqlite3_close(db);
668 sqlite3_shutdown();
669 exit(EXIT_FAILURE);
670 }
671 }
672
673 rc = sqlite3_prepare_v2(db, query, -1, &stmt, NULL);
674 if (rc == SQLITE_IOERR) {
675 warnx("Corrupt database. Please rerun makemandb");
676 return -1;
677 } else if (rc != SQLITE_OK) {
678 warnx("%s", sqlite3_errmsg(db));
679 return -1;
680 }
681
682 unsigned int nresults = 0;
683 while (sqlite3_step(stmt) == SQLITE_ROW) {
684 nresults++;
685 section = (const char *) sqlite3_column_text(stmt, 0);
686 name_temp = (const char *) sqlite3_column_text(stmt, 1);
687 name_desc = (const char *) sqlite3_column_text(stmt, 2);
688 machine = (const char *) sqlite3_column_text(stmt, 3);
689 if (!args->legacy)
690 snippet = (const char *) sqlite3_column_text(stmt, 4);
691 if ((slash_ptr = strrchr(name_temp, '/')) != NULL)
692 name_temp = slash_ptr + 1;
693 if (machine && machine[0]) {
694 m = estrdup(machine);
695 easprintf(&name, "%s/%s", lower(m), name_temp);
696 free(m);
697 } else {
698 name = estrdup((const char *)
699 sqlite3_column_text(stmt, 1));
700 }
701
702 (args->callback)(args->callback_data, section, name,
703 name_desc, snippet, args->legacy? 0: strlen(snippet));
704 free(name);
705 }
706 sqlite3_finalize(stmt);
707 return nresults;
708 }
709
710
711 /*
712 * run_query_internal --
713 * Performs the searches for the keywords entered by the user.
714 * The 2nd param: snippet_args is an array of strings providing values for the
715 * last three parameters to the snippet function of sqlite. (Look at the docs).
716 * The 3rd param: args contains rest of the search parameters. Look at
717 * arpopos-utils.h for the description of individual fields.
718 *
719 */
720 static int
721 run_query_internal(sqlite3 *db, const char *snippet_args[3], query_args *args)
722 {
723 char *query;
724 query = generate_search_query(args, snippet_args);
725 if (query == NULL) {
726 *args->errmsg = estrdup("malloc failed");
727 return -1;
728 }
729
730 execute_search_query(db, query, args);
731 sqlite3_free(query);
732 return *(args->errmsg) == NULL ? 0 : -1;
733 }
734
735 static char *
736 get_escaped_html_string(const char *src, size_t *slen)
737 {
738 static const char trouble[] = "<>\"&\002\003";
739 /*
740 * First scan the src to find out the number of occurrences
741 * of {'>', '<' '"', '&'}. Then allocate a new buffer with
742 * sufficient space to be able to store the quoted versions
743 * of the special characters {>, <, ", &}.
744 * Copy over the characters from the original src into
745 * this buffer while replacing the special characters with
746 * their quoted versions.
747 */
748 char *dst, *ddst;
749 size_t count;
750 const char *ssrc;
751
752 for (count = 0, ssrc = src; *src; count++) {
753 size_t sz = strcspn(src, trouble);
754 src += sz + 1;
755 }
756
757
758 #define append(a) \
759 do { \
760 memcpy(dst, (a), sizeof(a) - 1); \
761 dst += sizeof(a) - 1; \
762 } while (/*CONSTCOND*/0)
763
764
765 ddst = dst = emalloc(*slen + count * 5 + 1);
766 for (src = ssrc; *src; src++) {
767 switch (*src) {
768 case '<':
769 append("<");
770 break;
771 case '>':
772 append(">");
773 break;
774 case '\"':
775 append(""");
776 break;
777 case '&':
778 /*
779 * Don't perform the quoting if this & is part of
780 * an mdoc escape sequence, e.g. \&
781 */
782 if (src != ssrc && src[-1] != '\\')
783 append("&");
784 else
785 append("&");
786 break;
787 case '\002':
788 append("<b>");
789 break;
790 case '\003':
791 append("</b>");
792 break;
793 default:
794 *dst++ = *src;
795 break;
796 }
797 }
798 *dst = '\0';
799 *slen = dst - ddst;
800 return ddst;
801 }
802
803
804 /*
805 * callback_html --
806 * Callback function for run_query_html. It builds the html output and then
807 * calls the actual user supplied callback function.
808 */
809 static int
810 callback_html(void *data, const char *section, const char *name,
811 const char *name_desc, const char *snippet, size_t snippet_length)
812 {
813 struct orig_callback_data *orig_data = data;
814 int (*callback)(void *, const char *, const char *, const char *,
815 const char *, size_t) = orig_data->callback;
816 size_t length = snippet_length;
817 size_t name_description_length = strlen(name_desc);
818 char *qsnippet = get_escaped_html_string(snippet, &length);
819 char *qname_description = get_escaped_html_string(name_desc,
820 &name_description_length);
821
822 (*callback)(orig_data->data, section, name, qname_description,
823 qsnippet, length);
824 free(qsnippet);
825 free(qname_description);
826 return 0;
827 }
828
829 /*
830 * run_query_html --
831 * Utility function to output query result in HTML format.
832 * It internally calls run_query only, but it first passes the output to its
833 * own custom callback function, which preprocess the snippet for quoting
834 * inline HTML fragments.
835 * After that it delegates the call the actual user supplied callback function.
836 */
837 static int
838 run_query_html(sqlite3 *db, query_args *args)
839 {
840 struct orig_callback_data orig_data;
841 orig_data.callback = args->callback;
842 orig_data.data = args->callback_data;
843 const char *snippet_args[] = {"\002", "\003", "..."};
844 args->callback = &callback_html;
845 args->callback_data = (void *) &orig_data;
846 return run_query_internal(db, snippet_args, args);
847 }
848
849 /*
850 * underline a string, pager style.
851 */
852 static char *
853 ul_pager(int ul, const char *s)
854 {
855 size_t len;
856 char *dst, *d;
857
858 if (!ul)
859 return estrdup(s);
860
861 // a -> _\ba
862 len = strlen(s) * 3 + 1;
863
864 d = dst = emalloc(len);
865 while (*s) {
866 *d++ = '_';
867 *d++ = '\b';
868 *d++ = *s++;
869 }
870 *d = '\0';
871 return dst;
872 }
873
874 /*
875 * callback_pager --
876 * A callback similar to callback_html. It overstrikes the matching text in
877 * the snippet so that it appears emboldened when viewed using a pager like
878 * more or less.
879 */
880 static int
881 callback_pager(void *data, const char *section, const char *name,
882 const char *name_desc, const char *snippet, size_t snippet_length)
883 {
884 struct orig_callback_data *orig_data = data;
885 char *psnippet;
886 const char *temp = snippet;
887 int count = 0;
888 int i = 0, did;
889 size_t sz = 0;
890 size_t psnippet_length;
891
892 /* Count the number of bytes of matching text. For each of these
893 * bytes we will use 2 extra bytes to overstrike it so that it
894 * appears bold when viewed using a pager.
895 */
896 while (*temp) {
897 sz = strcspn(temp, "\002\003");
898 temp += sz;
899 if (*temp == '\003') {
900 count += 2 * (sz);
901 }
902 temp++;
903 }
904
905 psnippet_length = snippet_length + count;
906 psnippet = emalloc(psnippet_length + 1);
907
908 /* Copy the bytes from snippet to psnippet:
909 * 1. Copy the bytes before \002 as it is.
910 * 2. The bytes after \002 need to be overstriked till we
911 * encounter \003.
912 * 3. To overstrike a byte 'A' we need to write 'A\bA'
913 */
914 did = 0;
915 while (*snippet) {
916 sz = strcspn(snippet, "\002");
917 memcpy(&psnippet[i], snippet, sz);
918 snippet += sz;
919 i += sz;
920
921 /* Don't change this. Advancing the pointer without reading the byte
922 * is causing strange behavior.
923 */
924 if (*snippet == '\002')
925 snippet++;
926 while (*snippet && *snippet != '\003') {
927 did = 1;
928 psnippet[i++] = *snippet;
929 psnippet[i++] = '\b';
930 psnippet[i++] = *snippet++;
931 }
932 if (*snippet)
933 snippet++;
934 }
935
936 psnippet[i] = 0;
937 char *ul_section = ul_pager(did, section);
938 char *ul_name = ul_pager(did, name);
939 char *ul_name_desc = ul_pager(did, name_desc);
940 (orig_data->callback)(orig_data->data, ul_section, ul_name,
941 ul_name_desc, psnippet, psnippet_length);
942 free(ul_section);
943 free(ul_name);
944 free(ul_name_desc);
945 free(psnippet);
946 return 0;
947 }
948
949 struct term_args {
950 struct orig_callback_data *orig_data;
951 const char *smul;
952 const char *rmul;
953 };
954
955 /*
956 * underline a string, pager style.
957 */
958 static char *
959 ul_term(const char *s, const struct term_args *ta)
960 {
961 char *dst;
962
963 easprintf(&dst, "%s%s%s", ta->smul, s, ta->rmul);
964 return dst;
965 }
966
967 /*
968 * callback_term --
969 * A callback similar to callback_html. It overstrikes the matching text in
970 * the snippet so that it appears emboldened when viewed using a pager like
971 * more or less.
972 */
973 static int
974 callback_term(void *data, const char *section, const char *name,
975 const char *name_desc, const char *snippet, size_t snippet_length)
976 {
977 struct term_args *ta = data;
978 struct orig_callback_data *orig_data = ta->orig_data;
979
980 char *ul_section = ul_term(section, ta);
981 char *ul_name = ul_term(name, ta);
982 char *ul_name_desc = ul_term(name_desc, ta);
983 (orig_data->callback)(orig_data->data, ul_section, ul_name,
984 ul_name_desc, snippet, snippet_length);
985 free(ul_section);
986 free(ul_name);
987 free(ul_name_desc);
988 return 0;
989 }
990
991 /*
992 * run_query_pager --
993 * Utility function similar to run_query_html. This function tries to
994 * pre-process the result assuming it will be piped to a pager.
995 * For this purpose it first calls its own callback function callback_pager
996 * which then delegates the call to the user supplied callback.
997 */
998 static int
999 run_query_pager(sqlite3 *db, query_args *args)
1000 {
1001 struct orig_callback_data orig_data;
1002 orig_data.callback = args->callback;
1003 orig_data.data = args->callback_data;
1004 const char *snippet_args[3] = { "\002", "\003", "..." };
1005 args->callback = &callback_pager;
1006 args->callback_data = (void *) &orig_data;
1007 return run_query_internal(db, snippet_args, args);
1008 }
1009
1010 struct nv {
1011 char *s;
1012 size_t l;
1013 };
1014
1015 static int
1016 term_putc(int c, void *p)
1017 {
1018 struct nv *nv = p;
1019 nv->s[nv->l++] = c;
1020 return 0;
1021 }
1022
1023 static char *
1024 term_fix_seq(TERMINAL *ti, const char *seq)
1025 {
1026 char *res = estrdup(seq);
1027 struct nv nv;
1028
1029 if (ti == NULL)
1030 return res;
1031
1032 nv.s = res;
1033 nv.l = 0;
1034 ti_puts(ti, seq, 1, term_putc, &nv);
1035 nv.s[nv.l] = '\0';
1036
1037 return res;
1038 }
1039
1040 static void
1041 term_init(int fd, const char *sa[5])
1042 {
1043 TERMINAL *ti;
1044 int error;
1045 const char *bold, *sgr0, *smso, *rmso, *smul, *rmul;
1046
1047 if (ti_setupterm(&ti, NULL, fd, &error) == -1) {
1048 bold = sgr0 = NULL;
1049 smso = rmso = smul = rmul = "";
1050 ti = NULL;
1051 } else {
1052 bold = ti_getstr(ti, "bold");
1053 sgr0 = ti_getstr(ti, "sgr0");
1054 if (bold == NULL || sgr0 == NULL) {
1055 smso = ti_getstr(ti, "smso");
1056
1057 if (smso == NULL ||
1058 (rmso = ti_getstr(ti, "rmso")) == NULL)
1059 smso = rmso = "";
1060 bold = sgr0 = NULL;
1061 } else
1062 smso = rmso = "";
1063
1064 smul = ti_getstr(ti, "smul");
1065 if (smul == NULL || (rmul = ti_getstr(ti, "rmul")) == NULL)
1066 smul = rmul = "";
1067 }
1068
1069 sa[0] = term_fix_seq(ti, bold ? bold : smso);
1070 sa[1] = term_fix_seq(ti, sgr0 ? sgr0 : rmso);
1071 sa[2] = estrdup("...");
1072 sa[3] = term_fix_seq(ti, smul);
1073 sa[4] = term_fix_seq(ti, rmul);
1074
1075 if (ti)
1076 del_curterm(ti);
1077 }
1078
1079 /*
1080 * run_query_term --
1081 * Utility function similar to run_query_html. This function tries to
1082 * pre-process the result assuming it will be displayed on a terminal
1083 * For this purpose it first calls its own callback function callback_pager
1084 * which then delegates the call to the user supplied callback.
1085 */
1086 static int
1087 run_query_term(sqlite3 *db, query_args *args)
1088 {
1089 struct orig_callback_data orig_data;
1090 struct term_args ta;
1091 orig_data.callback = args->callback;
1092 orig_data.data = args->callback_data;
1093 const char *snippet_args[5];
1094
1095 term_init(STDOUT_FILENO, snippet_args);
1096 ta.smul = snippet_args[3];
1097 ta.rmul = snippet_args[4];
1098 ta.orig_data = (void *) &orig_data;
1099
1100 args->callback = &callback_term;
1101 args->callback_data = &ta;
1102 return run_query_internal(db, snippet_args, args);
1103 }
1104
1105 static int
1106 run_query_none(sqlite3 *db, query_args *args)
1107 {
1108 struct orig_callback_data orig_data;
1109 orig_data.callback = args->callback;
1110 orig_data.data = args->callback_data;
1111 const char *snippet_args[3] = { "", "", "..." };
1112 args->callback = &callback_pager;
1113 args->callback_data = (void *) &orig_data;
1114 return run_query_internal(db, snippet_args, args);
1115 }
1116
1117 int
1118 run_query(sqlite3 *db, query_format fmt, query_args *args)
1119 {
1120 switch (fmt) {
1121 case APROPOS_NONE:
1122 return run_query_none(db, args);
1123 case APROPOS_HTML:
1124 return run_query_html(db, args);
1125 case APROPOS_TERM:
1126 return run_query_term(db, args);
1127 case APROPOS_PAGER:
1128 return run_query_pager(db, args);
1129 default:
1130 warnx("Unknown query format %d", (int)fmt);
1131 return -1;
1132 }
1133 }
1134