look.c revision 1.3 1 /*-
2 * Copyright (c) 1991 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * David Hitz of Auspex Systems, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #ifndef lint
38 char copyright[] =
39 "@(#) Copyright (c) 1991 The Regents of the University of California.\n\
40 All rights reserved.\n";
41 #endif /* not lint */
42
43 #ifndef lint
44 /*static char sccsid[] = "from: @(#)look.c 5.1 (Berkeley) 7/21/91";*/
45 static char rcsid[] = "$Id: look.c,v 1.3 1993/08/01 18:13:24 mycroft Exp $";
46 #endif /* not lint */
47
48 /*
49 * look -- find lines in a sorted list.
50 *
51 * The man page said that TABs and SPACEs participate in -d comparisons.
52 * In fact, they were ignored. This implements historic practice, not
53 * the manual page.
54 */
55
56 #include <sys/types.h>
57 #include <sys/mman.h>
58 #include <sys/stat.h>
59 #include <errno.h>
60 #include <fcntl.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <string.h>
64 #include <ctype.h>
65 #include "pathnames.h"
66
67 /*
68 * FOLD and DICT convert characters to a normal form for comparison,
69 * according to the user specified flags.
70 *
71 * DICT expects integers because it uses a non-character value to
72 * indicate a character which should not participate in comparisons.
73 */
74 #define EQUAL 0
75 #define GREATER 1
76 #define LESS (-1)
77 #define NO_COMPARE (-2)
78
79 #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c))
80 #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
81
82 int dflag, fflag;
83
84 char *binary_search __P((char *, char *, char *));
85 int compare __P((char *, char *, char *));
86 void err __P((const char *fmt, ...));
87 char *linear_search __P((char *, char *, char *));
88 int look __P((char *, char *, char *));
89 void print_from __P((char *, char *, char *));
90 static void usage __P((void));
91
92 main(argc, argv)
93 int argc;
94 char *argv[];
95 {
96 struct stat sb;
97 int ch, fd;
98 char *back, *file, *front, *string;
99
100 file = _PATH_WORDS;
101 while ((ch = getopt(argc, argv, "df")) != EOF)
102 switch(ch) {
103 case 'd':
104 dflag = 1;
105 break;
106 case 'f':
107 fflag = 1;
108 break;
109 case '?':
110 default:
111 usage();
112 }
113 argc -= optind;
114 argv += optind;
115
116 switch (argc) {
117 case 2: /* Don't set -df for user. */
118 string = *argv++;
119 file = *argv;
120 break;
121 case 1: /* But set -df by default. */
122 dflag = fflag = 1;
123 string = *argv;
124 break;
125 default:
126 usage();
127 }
128
129 if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb) ||
130 (front = mmap(NULL, sb.st_size, PROT_READ, MAP_FILE, fd,
131 (off_t)0)) == NULL)
132 err("%s: %s", file, strerror(errno));
133 back = front + sb.st_size;
134 exit(look(string, front, back));
135 }
136
137 look(string, front, back)
138 char *string, *front, *back;
139 {
140 register int ch;
141 register char *readp, *writep;
142
143 /* Reformat string string to avoid doing it multiple times later. */
144 for (readp = writep = string; ch = *readp++;) {
145 if (fflag)
146 ch = FOLD(ch);
147 if (dflag)
148 ch = DICT(ch);
149 if (ch != NO_COMPARE)
150 *(writep++) = ch;
151 }
152 *writep = '\0';
153
154 front = binary_search(string, front, back);
155 front = linear_search(string, front, back);
156
157 if (front)
158 print_from(string, front, back);
159 return (front ? 0 : 1);
160 }
161
162
163 /*
164 * Binary search for "string" in memory between "front" and "back".
165 *
166 * This routine is expected to return a pointer to the start of a line at
167 * *or before* the first word matching "string". Relaxing the constraint
168 * this way simplifies the algorithm.
169 *
170 * Invariants:
171 * front points to the beginning of a line at or before the first
172 * matching string.
173 *
174 * back points to the beginning of a line at or after the first
175 * matching line.
176 *
177 * Base of the Invariants.
178 * front = NULL;
179 * back = EOF;
180 *
181 * Advancing the Invariants:
182 *
183 * p = first newline after halfway point from front to back.
184 *
185 * If the string at "p" is not greater than the string to match,
186 * p is the new front. Otherwise it is the new back.
187 *
188 * Termination:
189 *
190 * The definition of the routine allows it return at any point,
191 * since front is always at or before the line to print.
192 *
193 * In fact, it returns when the chosen "p" equals "back". This
194 * implies that there exists a string is least half as long as
195 * (back - front), which in turn implies that a linear search will
196 * be no more expensive than the cost of simply printing a string or two.
197 *
198 * Trying to continue with binary search at this point would be
199 * more trouble than it's worth.
200 */
201 #define SKIP_PAST_NEWLINE(p, back) \
202 while (p < back && *p++ != '\n');
203
204 char *
205 binary_search(string, front, back)
206 register char *string, *front, *back;
207 {
208 register char *p;
209
210 p = front + (back - front) / 2;
211 SKIP_PAST_NEWLINE(p, back);
212
213 while (p != back) {
214 if (compare(string, p, back) == GREATER)
215 front = p;
216 else
217 back = p;
218 p = front + (back - front) / 2;
219 SKIP_PAST_NEWLINE(p, back);
220 }
221 return (front);
222 }
223
224 /*
225 * Find the first line that starts with string, linearly searching from front
226 * to back.
227 *
228 * Return NULL for no such line.
229 *
230 * This routine assumes:
231 *
232 * o front points at the first character in a line.
233 * o front is before or at the first line to be printed.
234 */
235 char *
236 linear_search(string, front, back)
237 char *string, *front, *back;
238 {
239 while (front < back) {
240 switch (compare(string, front, back)) {
241 case EQUAL: /* Found it. */
242 return (front);
243 break;
244 case LESS: /* No such string. */
245 return (NULL);
246 break;
247 case GREATER: /* Keep going. */
248 break;
249 }
250 SKIP_PAST_NEWLINE(front, back);
251 }
252 return (NULL);
253 }
254
255 /*
256 * Print as many lines as match string, starting at front.
257 */
258 void
259 print_from(string, front, back)
260 register char *string, *front, *back;
261 {
262 for (; front < back && compare(string, front, back) == EQUAL; ++front) {
263 for (; front < back && *front != '\n'; ++front)
264 if (putchar(*front) == EOF)
265 err("stdout: %s", strerror(errno));
266 if (putchar('\n') == EOF)
267 err("stdout: %s", strerror(errno));
268 }
269 }
270
271 /*
272 * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
273 * string2 (s1 ??? s2).
274 *
275 * o Matches up to len(s1) are EQUAL.
276 * o Matches up to len(s2) are GREATER.
277 *
278 * Compare understands about the -f and -d flags, and treats comparisons
279 * appropriately.
280 *
281 * The string "s1" is null terminated. The string s2 is '\n' terminated (or
282 * "back" terminated).
283 */
284 int
285 compare(s1, s2, back)
286 register char *s1, *s2, *back;
287 {
288 register int ch;
289
290 for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
291 ch = *s2;
292 if (fflag)
293 ch = FOLD(ch);
294 if (dflag)
295 ch = DICT(ch);
296
297 if (ch == NO_COMPARE) {
298 ++s2; /* Ignore character in comparison. */
299 continue;
300 }
301 if (*s1 != ch)
302 return (*s1 < ch ? LESS : GREATER);
303 }
304 return (*s1 ? GREATER : EQUAL);
305 }
306
307 static void
308 usage()
309 {
310 (void)fprintf(stderr, "usage: look [-df] string [file]\n");
311 exit(2);
312 }
313
314 #if __STDC__
315 #include <stdarg.h>
316 #else
317 #include <varargs.h>
318 #endif
319
320 void
321 #if __STDC__
322 err(const char *fmt, ...)
323 #else
324 err(fmt, va_alist)
325 char *fmt;
326 va_dcl
327 #endif
328 {
329 va_list ap;
330 #if __STDC__
331 va_start(ap, fmt);
332 #else
333 va_start(ap);
334 #endif
335 (void)fprintf(stderr, "look: ");
336 (void)vfprintf(stderr, fmt, ap);
337 va_end(ap);
338 (void)fprintf(stderr, "\n");
339 exit(2);
340 /* NOTREACHED */
341 }
342