1 1.1 christos /* $NetBSD: search.c,v 1.1.1.1 2016/01/14 00:11:29 christos Exp $ */ 2 1.1 christos 3 1.1 christos /* search.c -- searching large bodies of text. 4 1.1 christos Id: search.c,v 1.3 2004/04/11 17:56:46 karl Exp 5 1.1 christos 6 1.1 christos Copyright (C) 1993, 1997, 1998, 2002, 2004 Free Software Foundation, Inc. 7 1.1 christos 8 1.1 christos This program is free software; you can redistribute it and/or modify 9 1.1 christos it under the terms of the GNU General Public License as published by 10 1.1 christos the Free Software Foundation; either version 2, or (at your option) 11 1.1 christos any later version. 12 1.1 christos 13 1.1 christos This program is distributed in the hope that it will be useful, 14 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of 15 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 1.1 christos GNU General Public License for more details. 17 1.1 christos 18 1.1 christos You should have received a copy of the GNU General Public License 19 1.1 christos along with this program; if not, write to the Free Software 20 1.1 christos Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 21 1.1 christos 22 1.1 christos Written by Brian Fox (bfox (at) ai.mit.edu). */ 23 1.1 christos 24 1.1 christos #include "info.h" 25 1.1 christos 26 1.1 christos #include "search.h" 27 1.1 christos #include "nodes.h" 28 1.1 christos 29 1.1 christos /* The search functions take two arguments: 30 1.1 christos 31 1.1 christos 1) a string to search for, and 32 1.1 christos 33 1.1 christos 2) a pointer to a SEARCH_BINDING which contains the buffer, start, 34 1.1 christos and end of the search. 35 1.1 christos 36 1.1 christos They return a long, which is the offset from the start of the buffer 37 1.1 christos at which the match was found. An offset of -1 indicates failure. */ 38 1.1 christos 39 1.1 christos /* A function which makes a binding with buffer and bounds. */ 40 1.1 christos SEARCH_BINDING * 41 1.1 christos make_binding (char *buffer, long int start, long int end) 42 1.1 christos { 43 1.1 christos SEARCH_BINDING *binding; 44 1.1 christos 45 1.1 christos binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING)); 46 1.1 christos binding->buffer = buffer; 47 1.1 christos binding->start = start; 48 1.1 christos binding->end = end; 49 1.1 christos binding->flags = 0; 50 1.1 christos 51 1.1 christos return (binding); 52 1.1 christos } 53 1.1 christos 54 1.1 christos /* Make a copy of BINDING without duplicating the data. */ 55 1.1 christos SEARCH_BINDING * 56 1.1 christos copy_binding (SEARCH_BINDING *binding) 57 1.1 christos { 58 1.1 christos SEARCH_BINDING *copy; 59 1.1 christos 60 1.1 christos copy = make_binding (binding->buffer, binding->start, binding->end); 61 1.1 christos copy->flags = binding->flags; 62 1.1 christos return (copy); 63 1.1 christos } 64 1.1 christos 65 1.1 christos 66 1.1 christos /* **************************************************************** */ 68 1.1 christos /* */ 69 1.1 christos /* The Actual Searching Functions */ 70 1.1 christos /* */ 71 1.1 christos /* **************************************************************** */ 72 1.1 christos 73 1.1 christos /* Search forwards or backwards for the text delimited by BINDING. 74 1.1 christos The search is forwards if BINDING->start is greater than BINDING->end. */ 75 1.1 christos long 76 1.1 christos search (char *string, SEARCH_BINDING *binding) 77 1.1 christos { 78 1.1 christos long result; 79 1.1 christos 80 1.1 christos /* If the search is backwards, then search backwards, otherwise forwards. */ 81 1.1 christos if (binding->start > binding->end) 82 1.1 christos result = search_backward (string, binding); 83 1.1 christos else 84 1.1 christos result = search_forward (string, binding); 85 1.1 christos 86 1.1 christos return (result); 87 1.1 christos } 88 1.1 christos 89 1.1 christos /* Search forwards for STRING through the text delimited in BINDING. */ 90 1.1 christos long 91 1.1 christos search_forward (char *string, SEARCH_BINDING *binding) 92 1.1 christos { 93 1.1 christos register int c, i, len; 94 1.1 christos register char *buff, *end; 95 1.1 christos char *alternate = (char *)NULL; 96 1.1 christos 97 1.1 christos len = strlen (string); 98 1.1 christos 99 1.1 christos /* We match characters in the search buffer against STRING and ALTERNATE. 100 1.1 christos ALTERNATE is a case reversed version of STRING; this is cheaper than 101 1.1 christos case folding each character before comparison. Alternate is only 102 1.1 christos used if the case folding bit is turned on in the passed BINDING. */ 103 1.1 christos 104 1.1 christos if (binding->flags & S_FoldCase) 105 1.1 christos { 106 1.1 christos alternate = xstrdup (string); 107 1.1 christos 108 1.1 christos for (i = 0; i < len; i++) 109 1.1 christos { 110 1.1 christos if (islower (alternate[i])) 111 1.1 christos alternate[i] = toupper (alternate[i]); 112 1.1 christos else if (isupper (alternate[i])) 113 1.1 christos alternate[i] = tolower (alternate[i]); 114 1.1 christos } 115 1.1 christos } 116 1.1 christos 117 1.1 christos buff = binding->buffer + binding->start; 118 1.1 christos end = binding->buffer + binding->end + 1; 119 1.1 christos 120 1.1 christos while (buff < (end - len)) 121 1.1 christos { 122 1.1 christos for (i = 0; i < len; i++) 123 1.1 christos { 124 1.1 christos c = buff[i]; 125 1.1 christos 126 1.1 christos if ((c != string[i]) && (!alternate || c != alternate[i])) 127 1.1 christos break; 128 1.1 christos } 129 1.1 christos 130 1.1 christos if (!string[i]) 131 1.1 christos { 132 1.1 christos if (alternate) 133 1.1 christos free (alternate); 134 1.1 christos if (binding->flags & S_SkipDest) 135 1.1 christos buff += len; 136 1.1 christos return ((long) (buff - binding->buffer)); 137 1.1 christos } 138 1.1 christos 139 1.1 christos buff++; 140 1.1 christos } 141 1.1 christos 142 1.1 christos if (alternate) 143 1.1 christos free (alternate); 144 1.1 christos 145 1.1 christos return ((long) -1); 146 1.1 christos } 147 1.1 christos 148 1.1 christos /* Search for STRING backwards through the text delimited in BINDING. */ 149 1.1 christos long 150 1.1 christos search_backward (char *input_string, SEARCH_BINDING *binding) 151 1.1 christos { 152 1.1 christos register int c, i, len; 153 1.1 christos register char *buff, *end; 154 1.1 christos char *string; 155 1.1 christos char *alternate = (char *)NULL; 156 1.1 christos 157 1.1 christos len = strlen (input_string); 158 1.1 christos 159 1.1 christos /* Reverse the characters in the search string. */ 160 1.1 christos string = (char *)xmalloc (1 + len); 161 1.1 christos for (c = 0, i = len - 1; input_string[c]; c++, i--) 162 1.1 christos string[i] = input_string[c]; 163 1.1 christos 164 1.1 christos string[c] = '\0'; 165 1.1 christos 166 1.1 christos /* We match characters in the search buffer against STRING and ALTERNATE. 167 1.1 christos ALTERNATE is a case reversed version of STRING; this is cheaper than 168 1.1 christos case folding each character before comparison. ALTERNATE is only 169 1.1 christos used if the case folding bit is turned on in the passed BINDING. */ 170 1.1 christos 171 1.1 christos if (binding->flags & S_FoldCase) 172 1.1 christos { 173 1.1 christos alternate = xstrdup (string); 174 1.1 christos 175 1.1 christos for (i = 0; i < len; i++) 176 1.1 christos { 177 1.1 christos if (islower (alternate[i])) 178 1.1 christos alternate[i] = toupper (alternate[i]); 179 1.1 christos else if (isupper (alternate[i])) 180 1.1 christos alternate[i] = tolower (alternate[i]); 181 1.1 christos } 182 1.1 christos } 183 1.1 christos 184 1.1 christos buff = binding->buffer + binding->start - 1; 185 1.1 christos end = binding->buffer + binding->end; 186 1.1 christos 187 1.1 christos while (buff > (end + len)) 188 1.1 christos { 189 1.1 christos for (i = 0; i < len; i++) 190 1.1 christos { 191 1.1 christos c = *(buff - i); 192 1.1 christos 193 1.1 christos if (c != string[i] && (!alternate || c != alternate[i])) 194 1.1 christos break; 195 1.1 christos } 196 1.1 christos 197 1.1 christos if (!string[i]) 198 1.1 christos { 199 1.1 christos free (string); 200 1.1 christos if (alternate) 201 1.1 christos free (alternate); 202 1.1 christos 203 1.1 christos if (binding->flags & S_SkipDest) 204 1.1 christos buff -= len; 205 1.1 christos return ((long) (1 + (buff - binding->buffer))); 206 1.1 christos } 207 1.1 christos 208 1.1 christos buff--; 209 1.1 christos } 210 1.1 christos 211 1.1 christos free (string); 212 1.1 christos if (alternate) 213 1.1 christos free (alternate); 214 1.1 christos 215 1.1 christos return ((long) -1); 216 1.1 christos } 217 1.1 christos 218 1.1 christos /* Find STRING in LINE, returning the offset of the end of the string. 219 1.1 christos Return an offset of -1 if STRING does not appear in LINE. The search 220 1.1 christos is bound by the end of the line (i.e., either NEWLINE or 0). */ 221 1.1 christos int 222 1.1 christos string_in_line (char *string, char *line) 223 1.1 christos { 224 1.1 christos register int end; 225 1.1 christos SEARCH_BINDING binding; 226 1.1 christos 227 1.1 christos /* Find the end of the line. */ 228 1.1 christos for (end = 0; line[end] && line[end] != '\n'; end++); 229 1.1 christos 230 1.1 christos /* Search for STRING within these confines. */ 231 1.1 christos binding.buffer = line; 232 1.1 christos binding.start = 0; 233 1.1 christos binding.end = end; 234 1.1 christos binding.flags = S_FoldCase | S_SkipDest; 235 1.1 christos 236 1.1 christos return (search_forward (string, &binding)); 237 1.1 christos } 238 1.1 christos 239 1.1 christos /* Return non-zero if STRING is the first text to appear at BINDING. */ 240 1.1 christos int 241 1.1 christos looking_at (char *string, SEARCH_BINDING *binding) 242 1.1 christos { 243 1.1 christos long search_end; 244 1.1 christos 245 1.1 christos search_end = search (string, binding); 246 1.1 christos 247 1.1 christos /* If the string was not found, SEARCH_END is -1. If the string was found, 248 1.1 christos but not right away, SEARCH_END is != binding->start. Otherwise, the 249 1.1 christos string was found at binding->start. */ 250 1.1 christos return (search_end == binding->start); 251 1.1 christos } 252 1.1 christos 253 1.1 christos /* **************************************************************** */ 255 1.1 christos /* */ 256 1.1 christos /* Small String Searches */ 257 1.1 christos /* */ 258 1.1 christos /* **************************************************************** */ 259 1.1 christos 260 1.1 christos /* Function names that start with "skip" are passed a string, and return 261 1.1 christos an offset from the start of that string. Function names that start 262 1.1 christos with "find" are passed a SEARCH_BINDING, and return an absolute position 263 1.1 christos marker of the item being searched for. "Find" functions return a value 264 1.1 christos of -1 if the item being looked for couldn't be found. */ 265 1.1 christos 266 1.1 christos /* Return the index of the first non-whitespace character in STRING. */ 267 1.1 christos int 268 1.1 christos skip_whitespace (char *string) 269 1.1 christos { 270 1.1 christos register int i; 271 1.1 christos 272 1.1 christos for (i = 0; string && whitespace (string[i]); i++); 273 1.1 christos return (i); 274 1.1 christos } 275 1.1 christos 276 1.1 christos /* Return the index of the first non-whitespace or newline character in 277 1.1 christos STRING. */ 278 1.1 christos int 279 1.1 christos skip_whitespace_and_newlines (char *string) 280 1.1 christos { 281 1.1 christos register int i; 282 1.1 christos 283 1.1 christos for (i = 0; string && whitespace_or_newline (string[i]); i++); 284 1.1 christos return (i); 285 1.1 christos } 286 1.1 christos 287 1.1 christos /* Return the index of the first whitespace character in STRING. */ 288 1.1 christos int 289 1.1 christos skip_non_whitespace (char *string) 290 1.1 christos { 291 1.1 christos register int i; 292 1.1 christos 293 1.1 christos for (i = 0; string && string[i] && !whitespace (string[i]); i++); 294 1.1 christos return (i); 295 1.1 christos } 296 1.1 christos 297 1.1 christos /* Return the index of the first non-node character in STRING. Note that 298 1.1 christos this function contains quite a bit of hair to ignore periods in some 299 1.1 christos special cases. This is because we here at GNU ship some info files which 300 1.1 christos contain nodenames that contain periods. No such nodename can start with 301 1.1 christos a period, or continue with whitespace, newline, or ')' immediately following 302 1.1 christos the period. If second argument NEWLINES_OKAY is non-zero, newlines should 303 1.1 christos be skipped while parsing out the nodename specification. */ 304 1.1 christos int 305 1.1 christos skip_node_characters (char *string, int newlines_okay) 306 1.1 christos { 307 1.1 christos register int c, i = 0; 308 1.1 christos int paren_seen = 0; 309 1.1 christos int paren = 0; 310 1.1 christos 311 1.1 christos /* Handle special case. This is when another function has parsed out the 312 1.1 christos filename component of the node name, and we just want to parse out the 313 1.1 christos nodename proper. In that case, a period at the start of the nodename 314 1.1 christos indicates an empty nodename. */ 315 1.1 christos if (string && *string == '.') 316 1.1 christos return (0); 317 1.1 christos 318 1.1 christos if (string && *string == '(') 319 1.1 christos { 320 1.1 christos paren++; 321 1.1 christos paren_seen++; 322 1.1 christos i++; 323 1.1 christos } 324 1.1 christos 325 1.1 christos for (; string && (c = string[i]); i++) 326 1.1 christos { 327 1.1 christos if (paren) 328 1.1 christos { 329 1.1 christos if (c == '(') 330 1.1 christos paren++; 331 1.1 christos else if (c == ')') 332 1.1 christos paren--; 333 1.1 christos 334 1.1 christos continue; 335 1.1 christos } 336 1.1 christos 337 1.1 christos /* If the character following the close paren is a space or period, 338 1.1 christos then this node name has no more characters associated with it. */ 339 1.1 christos if (c == '\t' || 340 1.1 christos c == ',' || 341 1.1 christos c == INFO_TAGSEP || 342 1.1 christos ((!newlines_okay) && (c == '\n')) || 343 1.1 christos ((paren_seen && string[i - 1] == ')') && 344 1.1 christos (c == ' ' || c == '.')) || 345 1.1 christos (c == '.' && 346 1.1 christos ( 347 1.1 christos #if 0 348 1.1 christos /* This test causes a node name ending in a period, like `This.', not to 349 1.1 christos be found. The trailing . is stripped. This occurs in the jargon 350 1.1 christos file (`I see no X here.' is a node name). */ 351 1.1 christos (!string[i + 1]) || 352 1.1 christos #endif 353 1.1 christos (whitespace_or_newline (string[i + 1])) || 354 1.1 christos (string[i + 1] == ')')))) 355 1.1 christos break; 356 1.1 christos } 357 1.1 christos return (i); 358 1.1 christos } 359 1.1 christos 360 1.1 christos 361 1.1 christos /* **************************************************************** */ 363 1.1 christos /* */ 364 1.1 christos /* Searching FILE_BUFFER's */ 365 1.1 christos /* */ 366 1.1 christos /* **************************************************************** */ 367 1.1 christos 368 1.1 christos /* Return the absolute position of the first occurence of a node separator in 369 1.1 christos BINDING-buffer. The search starts at BINDING->start. Return -1 if no node 370 1.1 christos separator was found. */ 371 1.1 christos long 372 1.1 christos find_node_separator (SEARCH_BINDING *binding) 373 1.1 christos { 374 1.1 christos register long i; 375 1.1 christos char *body; 376 1.1 christos 377 1.1 christos body = binding->buffer; 378 1.1 christos 379 1.1 christos /* A node is started by [^L]^_[^L]\n. That is to say, the C-l's are 380 1.1 christos optional, but the DELETE and NEWLINE are not. This separator holds 381 1.1 christos true for all separated elements in an Info file, including the tags 382 1.1 christos table (if present) and the indirect tags table (if present). */ 383 1.1 christos for (i = binding->start; i < binding->end - 1; i++) 384 1.1 christos if (((body[i] == INFO_FF && body[i + 1] == INFO_COOKIE) && 385 1.1 christos (body[i + 2] == '\n' || 386 1.1 christos (body[i + 2] == INFO_FF && body[i + 3] == '\n'))) || 387 1.1 christos ((body[i] == INFO_COOKIE) && 388 1.1 christos (body[i + 1] == '\n' || 389 1.1 christos (body[i + 1] == INFO_FF && body[i + 2] == '\n')))) 390 1.1 christos return (i); 391 1.1 christos return (-1); 392 1.1 christos } 393 1.1 christos 394 1.1 christos /* Return the length of the node separator characters that BODY is 395 1.1 christos currently pointing at. */ 396 1.1 christos int 397 1.1 christos skip_node_separator (char *body) 398 1.1 christos { 399 1.1 christos register int i; 400 1.1 christos 401 1.1 christos i = 0; 402 1.1 christos 403 1.1 christos if (body[i] == INFO_FF) 404 1.1 christos i++; 405 1.1 christos 406 1.1 christos if (body[i++] != INFO_COOKIE) 407 1.1 christos return (0); 408 1.1 christos 409 1.1 christos if (body[i] == INFO_FF) 410 1.1 christos i++; 411 1.1 christos 412 1.1 christos if (body[i++] != '\n') 413 1.1 christos return (0); 414 1.1 christos 415 1.1 christos return (i); 416 1.1 christos } 417 1.1 christos 418 1.1 christos /* Return the number of characters from STRING to the start of 419 1.1 christos the next line. */ 420 1.1 christos int 421 1.1 christos skip_line (char *string) 422 1.1 christos { 423 1.1 christos register int i; 424 1.1 christos 425 1.1 christos for (i = 0; string && string[i] && string[i] != '\n'; i++); 426 1.1 christos 427 1.1 christos if (string[i] == '\n') 428 1.1 christos i++; 429 1.1 christos 430 1.1 christos return (i); 431 1.1 christos } 432 1.1 christos 433 1.1 christos /* Return the absolute position of the beginning of a tags table in this 434 1.1 christos binding starting the search at binding->start. */ 435 1.1 christos long 436 1.1 christos find_tags_table (SEARCH_BINDING *binding) 437 1.1 christos { 438 1.1 christos SEARCH_BINDING tmp_search; 439 1.1 christos long position; 440 1.1 christos 441 1.1 christos tmp_search.buffer = binding->buffer; 442 1.1 christos tmp_search.start = binding->start; 443 1.1 christos tmp_search.end = binding->end; 444 1.1 christos tmp_search.flags = S_FoldCase; 445 1.1 christos 446 1.1 christos while ((position = find_node_separator (&tmp_search)) != -1 ) 447 1.1 christos { 448 1.1 christos tmp_search.start = position; 449 1.1 christos tmp_search.start += skip_node_separator (tmp_search.buffer 450 1.1 christos + tmp_search.start); 451 1.1 christos 452 1.1 christos if (looking_at (TAGS_TABLE_BEG_LABEL, &tmp_search)) 453 1.1 christos return (position); 454 1.1 christos } 455 1.1 christos return (-1); 456 1.1 christos } 457 1.1 christos 458 1.1 christos /* Return the absolute position of the node named NODENAME in BINDING. 459 1.1 christos This is a brute force search, and we wish to avoid it when possible. 460 1.1 christos This function is called when a tag (indirect or otherwise) doesn't 461 1.1 christos really point to the right node. It returns the absolute position of 462 1.1 christos the separator preceding the node. */ 463 1.1 christos long 464 1.1 christos find_node_in_binding (char *nodename, SEARCH_BINDING *binding) 465 1.1 christos { 466 1.1 christos long position; 467 1.1 christos int offset, namelen; 468 1.1 christos SEARCH_BINDING tmp_search; 469 1.1 christos 470 1.1 christos namelen = strlen (nodename); 471 1.1 christos 472 1.1 christos tmp_search.buffer = binding->buffer; 473 1.1 christos tmp_search.start = binding->start; 474 1.1 christos tmp_search.end = binding->end; 475 1.1 christos tmp_search.flags = 0; 476 1.1 christos 477 1.1 christos while ((position = find_node_separator (&tmp_search)) != -1) 478 1.1 christos { 479 1.1 christos tmp_search.start = position; 480 1.1 christos tmp_search.start += skip_node_separator 481 1.1 christos (tmp_search.buffer + tmp_search.start); 482 1.1 christos 483 1.1 christos offset = string_in_line 484 1.1 christos (INFO_NODE_LABEL, tmp_search.buffer + tmp_search.start); 485 1.1 christos 486 1.1 christos if (offset == -1) 487 1.1 christos continue; 488 1.1 christos 489 1.1 christos tmp_search.start += offset; 490 1.1 christos tmp_search.start += skip_whitespace (tmp_search.buffer + tmp_search.start); 491 1.1 christos offset = skip_node_characters 492 1.1 christos (tmp_search.buffer + tmp_search.start, DONT_SKIP_NEWLINES); 493 1.1 christos 494 1.1 christos /* Notice that this is an exact match. You cannot grovel through 495 1.1 christos the buffer with this function looking for random nodes. */ 496 1.1 christos if ((offset == namelen) && 497 1.1 christos (tmp_search.buffer[tmp_search.start] == nodename[0]) && 498 1.1 christos (strncmp (tmp_search.buffer + tmp_search.start, nodename, offset) == 0)) 499 1.1 christos return (position); 500 } 501 return (-1); 502 } 503