1 /* $NetBSD: search.c,v 1.1.1.1 2016/01/14 00:11:29 christos Exp $ */ 2 3 /* search.c -- searching large bodies of text. 4 Id: search.c,v 1.3 2004/04/11 17:56:46 karl Exp 5 6 Copyright (C) 1993, 1997, 1998, 2002, 2004 Free Software Foundation, Inc. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2, or (at your option) 11 any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software 20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 21 22 Written by Brian Fox (bfox (at) ai.mit.edu). */ 23 24 #include "info.h" 25 26 #include "search.h" 27 #include "nodes.h" 28 29 /* The search functions take two arguments: 30 31 1) a string to search for, and 32 33 2) a pointer to a SEARCH_BINDING which contains the buffer, start, 34 and end of the search. 35 36 They return a long, which is the offset from the start of the buffer 37 at which the match was found. An offset of -1 indicates failure. */ 38 39 /* A function which makes a binding with buffer and bounds. */ 40 SEARCH_BINDING * 41 make_binding (char *buffer, long int start, long int end) 42 { 43 SEARCH_BINDING *binding; 44 45 binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING)); 46 binding->buffer = buffer; 47 binding->start = start; 48 binding->end = end; 49 binding->flags = 0; 50 51 return (binding); 52 } 53 54 /* Make a copy of BINDING without duplicating the data. */ 55 SEARCH_BINDING * 56 copy_binding (SEARCH_BINDING *binding) 57 { 58 SEARCH_BINDING *copy; 59 60 copy = make_binding (binding->buffer, binding->start, binding->end); 61 copy->flags = binding->flags; 62 return (copy); 63 } 64 65 66 /* **************************************************************** */ 68 /* */ 69 /* The Actual Searching Functions */ 70 /* */ 71 /* **************************************************************** */ 72 73 /* Search forwards or backwards for the text delimited by BINDING. 74 The search is forwards if BINDING->start is greater than BINDING->end. */ 75 long 76 search (char *string, SEARCH_BINDING *binding) 77 { 78 long result; 79 80 /* If the search is backwards, then search backwards, otherwise forwards. */ 81 if (binding->start > binding->end) 82 result = search_backward (string, binding); 83 else 84 result = search_forward (string, binding); 85 86 return (result); 87 } 88 89 /* Search forwards for STRING through the text delimited in BINDING. */ 90 long 91 search_forward (char *string, SEARCH_BINDING *binding) 92 { 93 register int c, i, len; 94 register char *buff, *end; 95 char *alternate = (char *)NULL; 96 97 len = strlen (string); 98 99 /* We match characters in the search buffer against STRING and ALTERNATE. 100 ALTERNATE is a case reversed version of STRING; this is cheaper than 101 case folding each character before comparison. Alternate is only 102 used if the case folding bit is turned on in the passed BINDING. */ 103 104 if (binding->flags & S_FoldCase) 105 { 106 alternate = xstrdup (string); 107 108 for (i = 0; i < len; i++) 109 { 110 if (islower (alternate[i])) 111 alternate[i] = toupper (alternate[i]); 112 else if (isupper (alternate[i])) 113 alternate[i] = tolower (alternate[i]); 114 } 115 } 116 117 buff = binding->buffer + binding->start; 118 end = binding->buffer + binding->end + 1; 119 120 while (buff < (end - len)) 121 { 122 for (i = 0; i < len; i++) 123 { 124 c = buff[i]; 125 126 if ((c != string[i]) && (!alternate || c != alternate[i])) 127 break; 128 } 129 130 if (!string[i]) 131 { 132 if (alternate) 133 free (alternate); 134 if (binding->flags & S_SkipDest) 135 buff += len; 136 return ((long) (buff - binding->buffer)); 137 } 138 139 buff++; 140 } 141 142 if (alternate) 143 free (alternate); 144 145 return ((long) -1); 146 } 147 148 /* Search for STRING backwards through the text delimited in BINDING. */ 149 long 150 search_backward (char *input_string, SEARCH_BINDING *binding) 151 { 152 register int c, i, len; 153 register char *buff, *end; 154 char *string; 155 char *alternate = (char *)NULL; 156 157 len = strlen (input_string); 158 159 /* Reverse the characters in the search string. */ 160 string = (char *)xmalloc (1 + len); 161 for (c = 0, i = len - 1; input_string[c]; c++, i--) 162 string[i] = input_string[c]; 163 164 string[c] = '\0'; 165 166 /* We match characters in the search buffer against STRING and ALTERNATE. 167 ALTERNATE is a case reversed version of STRING; this is cheaper than 168 case folding each character before comparison. ALTERNATE is only 169 used if the case folding bit is turned on in the passed BINDING. */ 170 171 if (binding->flags & S_FoldCase) 172 { 173 alternate = xstrdup (string); 174 175 for (i = 0; i < len; i++) 176 { 177 if (islower (alternate[i])) 178 alternate[i] = toupper (alternate[i]); 179 else if (isupper (alternate[i])) 180 alternate[i] = tolower (alternate[i]); 181 } 182 } 183 184 buff = binding->buffer + binding->start - 1; 185 end = binding->buffer + binding->end; 186 187 while (buff > (end + len)) 188 { 189 for (i = 0; i < len; i++) 190 { 191 c = *(buff - i); 192 193 if (c != string[i] && (!alternate || c != alternate[i])) 194 break; 195 } 196 197 if (!string[i]) 198 { 199 free (string); 200 if (alternate) 201 free (alternate); 202 203 if (binding->flags & S_SkipDest) 204 buff -= len; 205 return ((long) (1 + (buff - binding->buffer))); 206 } 207 208 buff--; 209 } 210 211 free (string); 212 if (alternate) 213 free (alternate); 214 215 return ((long) -1); 216 } 217 218 /* Find STRING in LINE, returning the offset of the end of the string. 219 Return an offset of -1 if STRING does not appear in LINE. The search 220 is bound by the end of the line (i.e., either NEWLINE or 0). */ 221 int 222 string_in_line (char *string, char *line) 223 { 224 register int end; 225 SEARCH_BINDING binding; 226 227 /* Find the end of the line. */ 228 for (end = 0; line[end] && line[end] != '\n'; end++); 229 230 /* Search for STRING within these confines. */ 231 binding.buffer = line; 232 binding.start = 0; 233 binding.end = end; 234 binding.flags = S_FoldCase | S_SkipDest; 235 236 return (search_forward (string, &binding)); 237 } 238 239 /* Return non-zero if STRING is the first text to appear at BINDING. */ 240 int 241 looking_at (char *string, SEARCH_BINDING *binding) 242 { 243 long search_end; 244 245 search_end = search (string, binding); 246 247 /* If the string was not found, SEARCH_END is -1. If the string was found, 248 but not right away, SEARCH_END is != binding->start. Otherwise, the 249 string was found at binding->start. */ 250 return (search_end == binding->start); 251 } 252 253 /* **************************************************************** */ 255 /* */ 256 /* Small String Searches */ 257 /* */ 258 /* **************************************************************** */ 259 260 /* Function names that start with "skip" are passed a string, and return 261 an offset from the start of that string. Function names that start 262 with "find" are passed a SEARCH_BINDING, and return an absolute position 263 marker of the item being searched for. "Find" functions return a value 264 of -1 if the item being looked for couldn't be found. */ 265 266 /* Return the index of the first non-whitespace character in STRING. */ 267 int 268 skip_whitespace (char *string) 269 { 270 register int i; 271 272 for (i = 0; string && whitespace (string[i]); i++); 273 return (i); 274 } 275 276 /* Return the index of the first non-whitespace or newline character in 277 STRING. */ 278 int 279 skip_whitespace_and_newlines (char *string) 280 { 281 register int i; 282 283 for (i = 0; string && whitespace_or_newline (string[i]); i++); 284 return (i); 285 } 286 287 /* Return the index of the first whitespace character in STRING. */ 288 int 289 skip_non_whitespace (char *string) 290 { 291 register int i; 292 293 for (i = 0; string && string[i] && !whitespace (string[i]); i++); 294 return (i); 295 } 296 297 /* Return the index of the first non-node character in STRING. Note that 298 this function contains quite a bit of hair to ignore periods in some 299 special cases. This is because we here at GNU ship some info files which 300 contain nodenames that contain periods. No such nodename can start with 301 a period, or continue with whitespace, newline, or ')' immediately following 302 the period. If second argument NEWLINES_OKAY is non-zero, newlines should 303 be skipped while parsing out the nodename specification. */ 304 int 305 skip_node_characters (char *string, int newlines_okay) 306 { 307 register int c, i = 0; 308 int paren_seen = 0; 309 int paren = 0; 310 311 /* Handle special case. This is when another function has parsed out the 312 filename component of the node name, and we just want to parse out the 313 nodename proper. In that case, a period at the start of the nodename 314 indicates an empty nodename. */ 315 if (string && *string == '.') 316 return (0); 317 318 if (string && *string == '(') 319 { 320 paren++; 321 paren_seen++; 322 i++; 323 } 324 325 for (; string && (c = string[i]); i++) 326 { 327 if (paren) 328 { 329 if (c == '(') 330 paren++; 331 else if (c == ')') 332 paren--; 333 334 continue; 335 } 336 337 /* If the character following the close paren is a space or period, 338 then this node name has no more characters associated with it. */ 339 if (c == '\t' || 340 c == ',' || 341 c == INFO_TAGSEP || 342 ((!newlines_okay) && (c == '\n')) || 343 ((paren_seen && string[i - 1] == ')') && 344 (c == ' ' || c == '.')) || 345 (c == '.' && 346 ( 347 #if 0 348 /* This test causes a node name ending in a period, like `This.', not to 349 be found. The trailing . is stripped. This occurs in the jargon 350 file (`I see no X here.' is a node name). */ 351 (!string[i + 1]) || 352 #endif 353 (whitespace_or_newline (string[i + 1])) || 354 (string[i + 1] == ')')))) 355 break; 356 } 357 return (i); 358 } 359 360 361 /* **************************************************************** */ 363 /* */ 364 /* Searching FILE_BUFFER's */ 365 /* */ 366 /* **************************************************************** */ 367 368 /* Return the absolute position of the first occurence of a node separator in 369 BINDING-buffer. The search starts at BINDING->start. Return -1 if no node 370 separator was found. */ 371 long 372 find_node_separator (SEARCH_BINDING *binding) 373 { 374 register long i; 375 char *body; 376 377 body = binding->buffer; 378 379 /* A node is started by [^L]^_[^L]\n. That is to say, the C-l's are 380 optional, but the DELETE and NEWLINE are not. This separator holds 381 true for all separated elements in an Info file, including the tags 382 table (if present) and the indirect tags table (if present). */ 383 for (i = binding->start; i < binding->end - 1; i++) 384 if (((body[i] == INFO_FF && body[i + 1] == INFO_COOKIE) && 385 (body[i + 2] == '\n' || 386 (body[i + 2] == INFO_FF && body[i + 3] == '\n'))) || 387 ((body[i] == INFO_COOKIE) && 388 (body[i + 1] == '\n' || 389 (body[i + 1] == INFO_FF && body[i + 2] == '\n')))) 390 return (i); 391 return (-1); 392 } 393 394 /* Return the length of the node separator characters that BODY is 395 currently pointing at. */ 396 int 397 skip_node_separator (char *body) 398 { 399 register int i; 400 401 i = 0; 402 403 if (body[i] == INFO_FF) 404 i++; 405 406 if (body[i++] != INFO_COOKIE) 407 return (0); 408 409 if (body[i] == INFO_FF) 410 i++; 411 412 if (body[i++] != '\n') 413 return (0); 414 415 return (i); 416 } 417 418 /* Return the number of characters from STRING to the start of 419 the next line. */ 420 int 421 skip_line (char *string) 422 { 423 register int i; 424 425 for (i = 0; string && string[i] && string[i] != '\n'; i++); 426 427 if (string[i] == '\n') 428 i++; 429 430 return (i); 431 } 432 433 /* Return the absolute position of the beginning of a tags table in this 434 binding starting the search at binding->start. */ 435 long 436 find_tags_table (SEARCH_BINDING *binding) 437 { 438 SEARCH_BINDING tmp_search; 439 long position; 440 441 tmp_search.buffer = binding->buffer; 442 tmp_search.start = binding->start; 443 tmp_search.end = binding->end; 444 tmp_search.flags = S_FoldCase; 445 446 while ((position = find_node_separator (&tmp_search)) != -1 ) 447 { 448 tmp_search.start = position; 449 tmp_search.start += skip_node_separator (tmp_search.buffer 450 + tmp_search.start); 451 452 if (looking_at (TAGS_TABLE_BEG_LABEL, &tmp_search)) 453 return (position); 454 } 455 return (-1); 456 } 457 458 /* Return the absolute position of the node named NODENAME in BINDING. 459 This is a brute force search, and we wish to avoid it when possible. 460 This function is called when a tag (indirect or otherwise) doesn't 461 really point to the right node. It returns the absolute position of 462 the separator preceding the node. */ 463 long 464 find_node_in_binding (char *nodename, SEARCH_BINDING *binding) 465 { 466 long position; 467 int offset, namelen; 468 SEARCH_BINDING tmp_search; 469 470 namelen = strlen (nodename); 471 472 tmp_search.buffer = binding->buffer; 473 tmp_search.start = binding->start; 474 tmp_search.end = binding->end; 475 tmp_search.flags = 0; 476 477 while ((position = find_node_separator (&tmp_search)) != -1) 478 { 479 tmp_search.start = position; 480 tmp_search.start += skip_node_separator 481 (tmp_search.buffer + tmp_search.start); 482 483 offset = string_in_line 484 (INFO_NODE_LABEL, tmp_search.buffer + tmp_search.start); 485 486 if (offset == -1) 487 continue; 488 489 tmp_search.start += offset; 490 tmp_search.start += skip_whitespace (tmp_search.buffer + tmp_search.start); 491 offset = skip_node_characters 492 (tmp_search.buffer + tmp_search.start, DONT_SKIP_NEWLINES); 493 494 /* Notice that this is an exact match. You cannot grovel through 495 the buffer with this function looking for random nodes. */ 496 if ((offset == namelen) && 497 (tmp_search.buffer[tmp_search.start] == nodename[0]) && 498 (strncmp (tmp_search.buffer + tmp_search.start, nodename, offset) == 0)) 499 return (position); 500 } 501 return (-1); 502 } 503