Home | History | Annotate | Line # | Download | only in info
      1  1.1  christos /*	$NetBSD: search.c,v 1.1.1.1 2016/01/14 00:11:29 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /* search.c -- searching large bodies of text.
      4  1.1  christos    Id: search.c,v 1.3 2004/04/11 17:56:46 karl Exp
      5  1.1  christos 
      6  1.1  christos    Copyright (C) 1993, 1997, 1998, 2002, 2004 Free Software Foundation, Inc.
      7  1.1  christos 
      8  1.1  christos    This program is free software; you can redistribute it and/or modify
      9  1.1  christos    it under the terms of the GNU General Public License as published by
     10  1.1  christos    the Free Software Foundation; either version 2, or (at your option)
     11  1.1  christos    any later version.
     12  1.1  christos 
     13  1.1  christos    This program is distributed in the hope that it will be useful,
     14  1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16  1.1  christos    GNU General Public License for more details.
     17  1.1  christos 
     18  1.1  christos    You should have received a copy of the GNU General Public License
     19  1.1  christos    along with this program; if not, write to the Free Software
     20  1.1  christos    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
     21  1.1  christos 
     22  1.1  christos    Written by Brian Fox (bfox (at) ai.mit.edu). */
     23  1.1  christos 
     24  1.1  christos #include "info.h"
     25  1.1  christos 
     26  1.1  christos #include "search.h"
     27  1.1  christos #include "nodes.h"
     28  1.1  christos 
     29  1.1  christos /* The search functions take two arguments:
     30  1.1  christos 
     31  1.1  christos      1) a string to search for, and
     32  1.1  christos 
     33  1.1  christos      2) a pointer to a SEARCH_BINDING which contains the buffer, start,
     34  1.1  christos         and end of the search.
     35  1.1  christos 
     36  1.1  christos    They return a long, which is the offset from the start of the buffer
     37  1.1  christos    at which the match was found.  An offset of -1 indicates failure. */
     38  1.1  christos 
     39  1.1  christos /* A function which makes a binding with buffer and bounds. */
     40  1.1  christos SEARCH_BINDING *
     41  1.1  christos make_binding (char *buffer, long int start, long int end)
     42  1.1  christos {
     43  1.1  christos   SEARCH_BINDING *binding;
     44  1.1  christos 
     45  1.1  christos   binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING));
     46  1.1  christos   binding->buffer = buffer;
     47  1.1  christos   binding->start = start;
     48  1.1  christos   binding->end = end;
     49  1.1  christos   binding->flags = 0;
     50  1.1  christos 
     51  1.1  christos   return (binding);
     52  1.1  christos }
     53  1.1  christos 
     54  1.1  christos /* Make a copy of BINDING without duplicating the data. */
     55  1.1  christos SEARCH_BINDING *
     56  1.1  christos copy_binding (SEARCH_BINDING *binding)
     57  1.1  christos {
     58  1.1  christos   SEARCH_BINDING *copy;
     59  1.1  christos 
     60  1.1  christos   copy = make_binding (binding->buffer, binding->start, binding->end);
     61  1.1  christos   copy->flags = binding->flags;
     62  1.1  christos   return (copy);
     63  1.1  christos }
     64  1.1  christos 
     65  1.1  christos 
     66  1.1  christos /* **************************************************************** */
     68  1.1  christos /*                                                                  */
     69  1.1  christos /*                 The Actual Searching Functions                   */
     70  1.1  christos /*                                                                  */
     71  1.1  christos /* **************************************************************** */
     72  1.1  christos 
     73  1.1  christos /* Search forwards or backwards for the text delimited by BINDING.
     74  1.1  christos    The search is forwards if BINDING->start is greater than BINDING->end. */
     75  1.1  christos long
     76  1.1  christos search (char *string, SEARCH_BINDING *binding)
     77  1.1  christos {
     78  1.1  christos   long result;
     79  1.1  christos 
     80  1.1  christos   /* If the search is backwards, then search backwards, otherwise forwards. */
     81  1.1  christos   if (binding->start > binding->end)
     82  1.1  christos     result = search_backward (string, binding);
     83  1.1  christos   else
     84  1.1  christos     result = search_forward (string, binding);
     85  1.1  christos 
     86  1.1  christos   return (result);
     87  1.1  christos }
     88  1.1  christos 
     89  1.1  christos /* Search forwards for STRING through the text delimited in BINDING. */
     90  1.1  christos long
     91  1.1  christos search_forward (char *string, SEARCH_BINDING *binding)
     92  1.1  christos {
     93  1.1  christos   register int c, i, len;
     94  1.1  christos   register char *buff, *end;
     95  1.1  christos   char *alternate = (char *)NULL;
     96  1.1  christos 
     97  1.1  christos   len = strlen (string);
     98  1.1  christos 
     99  1.1  christos   /* We match characters in the search buffer against STRING and ALTERNATE.
    100  1.1  christos      ALTERNATE is a case reversed version of STRING; this is cheaper than
    101  1.1  christos      case folding each character before comparison.   Alternate is only
    102  1.1  christos      used if the case folding bit is turned on in the passed BINDING. */
    103  1.1  christos 
    104  1.1  christos   if (binding->flags & S_FoldCase)
    105  1.1  christos     {
    106  1.1  christos       alternate = xstrdup (string);
    107  1.1  christos 
    108  1.1  christos       for (i = 0; i < len; i++)
    109  1.1  christos         {
    110  1.1  christos           if (islower (alternate[i]))
    111  1.1  christos             alternate[i] = toupper (alternate[i]);
    112  1.1  christos           else if (isupper (alternate[i]))
    113  1.1  christos             alternate[i] = tolower (alternate[i]);
    114  1.1  christos         }
    115  1.1  christos     }
    116  1.1  christos 
    117  1.1  christos   buff = binding->buffer + binding->start;
    118  1.1  christos   end = binding->buffer + binding->end + 1;
    119  1.1  christos 
    120  1.1  christos   while (buff < (end - len))
    121  1.1  christos     {
    122  1.1  christos       for (i = 0; i < len; i++)
    123  1.1  christos         {
    124  1.1  christos           c = buff[i];
    125  1.1  christos 
    126  1.1  christos           if ((c != string[i]) && (!alternate || c != alternate[i]))
    127  1.1  christos             break;
    128  1.1  christos         }
    129  1.1  christos 
    130  1.1  christos       if (!string[i])
    131  1.1  christos         {
    132  1.1  christos           if (alternate)
    133  1.1  christos             free (alternate);
    134  1.1  christos           if (binding->flags & S_SkipDest)
    135  1.1  christos             buff += len;
    136  1.1  christos           return ((long) (buff - binding->buffer));
    137  1.1  christos         }
    138  1.1  christos 
    139  1.1  christos       buff++;
    140  1.1  christos     }
    141  1.1  christos 
    142  1.1  christos   if (alternate)
    143  1.1  christos     free (alternate);
    144  1.1  christos 
    145  1.1  christos   return ((long) -1);
    146  1.1  christos }
    147  1.1  christos 
    148  1.1  christos /* Search for STRING backwards through the text delimited in BINDING. */
    149  1.1  christos long
    150  1.1  christos search_backward (char *input_string, SEARCH_BINDING *binding)
    151  1.1  christos {
    152  1.1  christos   register int c, i, len;
    153  1.1  christos   register char *buff, *end;
    154  1.1  christos   char *string;
    155  1.1  christos   char *alternate = (char *)NULL;
    156  1.1  christos 
    157  1.1  christos   len = strlen (input_string);
    158  1.1  christos 
    159  1.1  christos   /* Reverse the characters in the search string. */
    160  1.1  christos   string = (char *)xmalloc (1 + len);
    161  1.1  christos   for (c = 0, i = len - 1; input_string[c]; c++, i--)
    162  1.1  christos     string[i] = input_string[c];
    163  1.1  christos 
    164  1.1  christos   string[c] = '\0';
    165  1.1  christos 
    166  1.1  christos   /* We match characters in the search buffer against STRING and ALTERNATE.
    167  1.1  christos      ALTERNATE is a case reversed version of STRING; this is cheaper than
    168  1.1  christos      case folding each character before comparison.   ALTERNATE is only
    169  1.1  christos      used if the case folding bit is turned on in the passed BINDING. */
    170  1.1  christos 
    171  1.1  christos   if (binding->flags & S_FoldCase)
    172  1.1  christos     {
    173  1.1  christos       alternate = xstrdup (string);
    174  1.1  christos 
    175  1.1  christos       for (i = 0; i < len; i++)
    176  1.1  christos         {
    177  1.1  christos           if (islower (alternate[i]))
    178  1.1  christos             alternate[i] = toupper (alternate[i]);
    179  1.1  christos           else if (isupper (alternate[i]))
    180  1.1  christos             alternate[i] = tolower (alternate[i]);
    181  1.1  christos         }
    182  1.1  christos     }
    183  1.1  christos 
    184  1.1  christos   buff = binding->buffer + binding->start - 1;
    185  1.1  christos   end = binding->buffer + binding->end;
    186  1.1  christos 
    187  1.1  christos   while (buff > (end + len))
    188  1.1  christos     {
    189  1.1  christos       for (i = 0; i < len; i++)
    190  1.1  christos         {
    191  1.1  christos           c = *(buff - i);
    192  1.1  christos 
    193  1.1  christos           if (c != string[i] && (!alternate || c != alternate[i]))
    194  1.1  christos             break;
    195  1.1  christos         }
    196  1.1  christos 
    197  1.1  christos       if (!string[i])
    198  1.1  christos         {
    199  1.1  christos           free (string);
    200  1.1  christos           if (alternate)
    201  1.1  christos             free (alternate);
    202  1.1  christos 
    203  1.1  christos           if (binding->flags & S_SkipDest)
    204  1.1  christos             buff -= len;
    205  1.1  christos           return ((long) (1 + (buff - binding->buffer)));
    206  1.1  christos         }
    207  1.1  christos 
    208  1.1  christos       buff--;
    209  1.1  christos     }
    210  1.1  christos 
    211  1.1  christos   free (string);
    212  1.1  christos   if (alternate)
    213  1.1  christos     free (alternate);
    214  1.1  christos 
    215  1.1  christos   return ((long) -1);
    216  1.1  christos }
    217  1.1  christos 
    218  1.1  christos /* Find STRING in LINE, returning the offset of the end of the string.
    219  1.1  christos    Return an offset of -1 if STRING does not appear in LINE.  The search
    220  1.1  christos    is bound by the end of the line (i.e., either NEWLINE or 0). */
    221  1.1  christos int
    222  1.1  christos string_in_line (char *string, char *line)
    223  1.1  christos {
    224  1.1  christos   register int end;
    225  1.1  christos   SEARCH_BINDING binding;
    226  1.1  christos 
    227  1.1  christos   /* Find the end of the line. */
    228  1.1  christos   for (end = 0; line[end] && line[end] != '\n'; end++);
    229  1.1  christos 
    230  1.1  christos   /* Search for STRING within these confines. */
    231  1.1  christos   binding.buffer = line;
    232  1.1  christos   binding.start = 0;
    233  1.1  christos   binding.end = end;
    234  1.1  christos   binding.flags = S_FoldCase | S_SkipDest;
    235  1.1  christos 
    236  1.1  christos   return (search_forward (string, &binding));
    237  1.1  christos }
    238  1.1  christos 
    239  1.1  christos /* Return non-zero if STRING is the first text to appear at BINDING. */
    240  1.1  christos int
    241  1.1  christos looking_at (char *string, SEARCH_BINDING *binding)
    242  1.1  christos {
    243  1.1  christos   long search_end;
    244  1.1  christos 
    245  1.1  christos   search_end = search (string, binding);
    246  1.1  christos 
    247  1.1  christos   /* If the string was not found, SEARCH_END is -1.  If the string was found,
    248  1.1  christos      but not right away, SEARCH_END is != binding->start.  Otherwise, the
    249  1.1  christos      string was found at binding->start. */
    250  1.1  christos   return (search_end == binding->start);
    251  1.1  christos }
    252  1.1  christos 
    253  1.1  christos /* **************************************************************** */
    255  1.1  christos /*                                                                  */
    256  1.1  christos /*                    Small String Searches                         */
    257  1.1  christos /*                                                                  */
    258  1.1  christos /* **************************************************************** */
    259  1.1  christos 
    260  1.1  christos /* Function names that start with "skip" are passed a string, and return
    261  1.1  christos    an offset from the start of that string.  Function names that start
    262  1.1  christos    with "find" are passed a SEARCH_BINDING, and return an absolute position
    263  1.1  christos    marker of the item being searched for.  "Find" functions return a value
    264  1.1  christos    of -1 if the item being looked for couldn't be found. */
    265  1.1  christos 
    266  1.1  christos /* Return the index of the first non-whitespace character in STRING. */
    267  1.1  christos int
    268  1.1  christos skip_whitespace (char *string)
    269  1.1  christos {
    270  1.1  christos   register int i;
    271  1.1  christos 
    272  1.1  christos   for (i = 0; string && whitespace (string[i]); i++);
    273  1.1  christos   return (i);
    274  1.1  christos }
    275  1.1  christos 
    276  1.1  christos /* Return the index of the first non-whitespace or newline character in
    277  1.1  christos    STRING. */
    278  1.1  christos int
    279  1.1  christos skip_whitespace_and_newlines (char *string)
    280  1.1  christos {
    281  1.1  christos   register int i;
    282  1.1  christos 
    283  1.1  christos   for (i = 0; string && whitespace_or_newline (string[i]); i++);
    284  1.1  christos   return (i);
    285  1.1  christos }
    286  1.1  christos 
    287  1.1  christos /* Return the index of the first whitespace character in STRING. */
    288  1.1  christos int
    289  1.1  christos skip_non_whitespace (char *string)
    290  1.1  christos {
    291  1.1  christos   register int i;
    292  1.1  christos 
    293  1.1  christos   for (i = 0; string && string[i] && !whitespace (string[i]); i++);
    294  1.1  christos   return (i);
    295  1.1  christos }
    296  1.1  christos 
    297  1.1  christos /* Return the index of the first non-node character in STRING.  Note that
    298  1.1  christos    this function contains quite a bit of hair to ignore periods in some
    299  1.1  christos    special cases.  This is because we here at GNU ship some info files which
    300  1.1  christos    contain nodenames that contain periods.  No such nodename can start with
    301  1.1  christos    a period, or continue with whitespace, newline, or ')' immediately following
    302  1.1  christos    the period.  If second argument NEWLINES_OKAY is non-zero, newlines should
    303  1.1  christos    be skipped while parsing out the nodename specification. */
    304  1.1  christos int
    305  1.1  christos skip_node_characters (char *string, int newlines_okay)
    306  1.1  christos {
    307  1.1  christos   register int c, i = 0;
    308  1.1  christos   int paren_seen = 0;
    309  1.1  christos   int paren = 0;
    310  1.1  christos 
    311  1.1  christos   /* Handle special case.  This is when another function has parsed out the
    312  1.1  christos      filename component of the node name, and we just want to parse out the
    313  1.1  christos      nodename proper.  In that case, a period at the start of the nodename
    314  1.1  christos      indicates an empty nodename. */
    315  1.1  christos   if (string && *string == '.')
    316  1.1  christos     return (0);
    317  1.1  christos 
    318  1.1  christos   if (string && *string == '(')
    319  1.1  christos     {
    320  1.1  christos       paren++;
    321  1.1  christos       paren_seen++;
    322  1.1  christos       i++;
    323  1.1  christos     }
    324  1.1  christos 
    325  1.1  christos   for (; string && (c = string[i]); i++)
    326  1.1  christos     {
    327  1.1  christos       if (paren)
    328  1.1  christos         {
    329  1.1  christos           if (c == '(')
    330  1.1  christos             paren++;
    331  1.1  christos           else if (c == ')')
    332  1.1  christos             paren--;
    333  1.1  christos 
    334  1.1  christos           continue;
    335  1.1  christos         }
    336  1.1  christos 
    337  1.1  christos       /* If the character following the close paren is a space or period,
    338  1.1  christos          then this node name has no more characters associated with it. */
    339  1.1  christos       if (c == '\t' ||
    340  1.1  christos           c == ','  ||
    341  1.1  christos           c == INFO_TAGSEP ||
    342  1.1  christos           ((!newlines_okay) && (c == '\n')) ||
    343  1.1  christos           ((paren_seen && string[i - 1] == ')') &&
    344  1.1  christos            (c == ' ' || c == '.')) ||
    345  1.1  christos           (c == '.' &&
    346  1.1  christos            (
    347  1.1  christos #if 0
    348  1.1  christos /* This test causes a node name ending in a period, like `This.', not to
    349  1.1  christos    be found.  The trailing . is stripped.  This occurs in the jargon
    350  1.1  christos    file (`I see no X here.' is a node name).  */
    351  1.1  christos            (!string[i + 1]) ||
    352  1.1  christos #endif
    353  1.1  christos             (whitespace_or_newline (string[i + 1])) ||
    354  1.1  christos             (string[i + 1] == ')'))))
    355  1.1  christos         break;
    356  1.1  christos     }
    357  1.1  christos   return (i);
    358  1.1  christos }
    359  1.1  christos 
    360  1.1  christos 
    361  1.1  christos /* **************************************************************** */
    363  1.1  christos /*                                                                  */
    364  1.1  christos /*                   Searching FILE_BUFFER's                        */
    365  1.1  christos /*                                                                  */
    366  1.1  christos /* **************************************************************** */
    367  1.1  christos 
    368  1.1  christos /* Return the absolute position of the first occurence of a node separator in
    369  1.1  christos    BINDING-buffer.  The search starts at BINDING->start.  Return -1 if no node
    370  1.1  christos    separator was found. */
    371  1.1  christos long
    372  1.1  christos find_node_separator (SEARCH_BINDING *binding)
    373  1.1  christos {
    374  1.1  christos   register long i;
    375  1.1  christos   char *body;
    376  1.1  christos 
    377  1.1  christos   body = binding->buffer;
    378  1.1  christos 
    379  1.1  christos   /* A node is started by [^L]^_[^L]\n.  That is to say, the C-l's are
    380  1.1  christos      optional, but the DELETE and NEWLINE are not.  This separator holds
    381  1.1  christos      true for all separated elements in an Info file, including the tags
    382  1.1  christos      table (if present) and the indirect tags table (if present). */
    383  1.1  christos   for (i = binding->start; i < binding->end - 1; i++)
    384  1.1  christos     if (((body[i] == INFO_FF && body[i + 1] == INFO_COOKIE) &&
    385  1.1  christos          (body[i + 2] == '\n' ||
    386  1.1  christos           (body[i + 2] == INFO_FF && body[i + 3] == '\n'))) ||
    387  1.1  christos         ((body[i] == INFO_COOKIE) &&
    388  1.1  christos          (body[i + 1] == '\n' ||
    389  1.1  christos           (body[i + 1] == INFO_FF && body[i + 2] == '\n'))))
    390  1.1  christos       return (i);
    391  1.1  christos   return (-1);
    392  1.1  christos }
    393  1.1  christos 
    394  1.1  christos /* Return the length of the node separator characters that BODY is
    395  1.1  christos    currently pointing at. */
    396  1.1  christos int
    397  1.1  christos skip_node_separator (char *body)
    398  1.1  christos {
    399  1.1  christos   register int i;
    400  1.1  christos 
    401  1.1  christos   i = 0;
    402  1.1  christos 
    403  1.1  christos   if (body[i] == INFO_FF)
    404  1.1  christos     i++;
    405  1.1  christos 
    406  1.1  christos   if (body[i++] != INFO_COOKIE)
    407  1.1  christos     return (0);
    408  1.1  christos 
    409  1.1  christos   if (body[i] == INFO_FF)
    410  1.1  christos     i++;
    411  1.1  christos 
    412  1.1  christos   if (body[i++] != '\n')
    413  1.1  christos     return (0);
    414  1.1  christos 
    415  1.1  christos   return (i);
    416  1.1  christos }
    417  1.1  christos 
    418  1.1  christos /* Return the number of characters from STRING to the start of
    419  1.1  christos    the next line. */
    420  1.1  christos int
    421  1.1  christos skip_line (char *string)
    422  1.1  christos {
    423  1.1  christos   register int i;
    424  1.1  christos 
    425  1.1  christos   for (i = 0; string && string[i] && string[i] != '\n'; i++);
    426  1.1  christos 
    427  1.1  christos   if (string[i] == '\n')
    428  1.1  christos     i++;
    429  1.1  christos 
    430  1.1  christos   return (i);
    431  1.1  christos }
    432  1.1  christos 
    433  1.1  christos /* Return the absolute position of the beginning of a tags table in this
    434  1.1  christos    binding starting the search at binding->start. */
    435  1.1  christos long
    436  1.1  christos find_tags_table (SEARCH_BINDING *binding)
    437  1.1  christos {
    438  1.1  christos   SEARCH_BINDING tmp_search;
    439  1.1  christos   long position;
    440  1.1  christos 
    441  1.1  christos   tmp_search.buffer = binding->buffer;
    442  1.1  christos   tmp_search.start = binding->start;
    443  1.1  christos   tmp_search.end = binding->end;
    444  1.1  christos   tmp_search.flags = S_FoldCase;
    445  1.1  christos 
    446  1.1  christos   while ((position = find_node_separator (&tmp_search)) != -1 )
    447  1.1  christos     {
    448  1.1  christos       tmp_search.start = position;
    449  1.1  christos       tmp_search.start += skip_node_separator (tmp_search.buffer
    450  1.1  christos           + tmp_search.start);
    451  1.1  christos 
    452  1.1  christos       if (looking_at (TAGS_TABLE_BEG_LABEL, &tmp_search))
    453  1.1  christos         return (position);
    454  1.1  christos     }
    455  1.1  christos   return (-1);
    456  1.1  christos }
    457  1.1  christos 
    458  1.1  christos /* Return the absolute position of the node named NODENAME in BINDING.
    459  1.1  christos    This is a brute force search, and we wish to avoid it when possible.
    460  1.1  christos    This function is called when a tag (indirect or otherwise) doesn't
    461  1.1  christos    really point to the right node.  It returns the absolute position of
    462  1.1  christos    the separator preceding the node. */
    463  1.1  christos long
    464  1.1  christos find_node_in_binding (char *nodename, SEARCH_BINDING *binding)
    465  1.1  christos {
    466  1.1  christos   long position;
    467  1.1  christos   int offset, namelen;
    468  1.1  christos   SEARCH_BINDING tmp_search;
    469  1.1  christos 
    470  1.1  christos   namelen = strlen (nodename);
    471  1.1  christos 
    472  1.1  christos   tmp_search.buffer = binding->buffer;
    473  1.1  christos   tmp_search.start = binding->start;
    474  1.1  christos   tmp_search.end = binding->end;
    475  1.1  christos   tmp_search.flags = 0;
    476  1.1  christos 
    477  1.1  christos   while ((position = find_node_separator (&tmp_search)) != -1)
    478  1.1  christos     {
    479  1.1  christos       tmp_search.start = position;
    480  1.1  christos       tmp_search.start += skip_node_separator
    481  1.1  christos         (tmp_search.buffer + tmp_search.start);
    482  1.1  christos 
    483  1.1  christos       offset = string_in_line
    484  1.1  christos         (INFO_NODE_LABEL, tmp_search.buffer + tmp_search.start);
    485  1.1  christos 
    486  1.1  christos       if (offset == -1)
    487  1.1  christos         continue;
    488  1.1  christos 
    489  1.1  christos       tmp_search.start += offset;
    490  1.1  christos       tmp_search.start += skip_whitespace (tmp_search.buffer + tmp_search.start);
    491  1.1  christos       offset = skip_node_characters
    492  1.1  christos         (tmp_search.buffer + tmp_search.start, DONT_SKIP_NEWLINES);
    493  1.1  christos 
    494  1.1  christos       /* Notice that this is an exact match.  You cannot grovel through
    495  1.1  christos          the buffer with this function looking for random nodes. */
    496  1.1  christos        if ((offset == namelen) &&
    497  1.1  christos            (tmp_search.buffer[tmp_search.start] == nodename[0]) &&
    498  1.1  christos            (strncmp (tmp_search.buffer + tmp_search.start, nodename, offset) == 0))
    499  1.1  christos          return (position);
    500                    }
    501                  return (-1);
    502                }
    503