look.c revision 1.4 1 /*-
2 * Copyright (c) 1991 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * David Hitz of Auspex Systems, Inc.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #ifndef lint
38 char copyright[] =
39 "@(#) Copyright (c) 1991 The Regents of the University of California.\n\
40 All rights reserved.\n";
41 #endif /* not lint */
42
43 #ifndef lint
44 /*static char sccsid[] = "from: @(#)look.c 5.1 (Berkeley) 7/21/91";*/
45 static char rcsid[] = "$Id: look.c,v 1.4 1993/10/13 18:34:30 jtc Exp $";
46 #endif /* not lint */
47
48 /*
49 * look -- find lines in a sorted list.
50 *
51 * The man page said that TABs and SPACEs participate in -d comparisons.
52 * In fact, they were ignored. This implements historic practice, not
53 * the manual page.
54 */
55
56 #include <sys/types.h>
57 #include <sys/mman.h>
58 #include <sys/stat.h>
59 #include <errno.h>
60 #include <fcntl.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <string.h>
64 #include <ctype.h>
65 #include <unistd.h>
66 #include "pathnames.h"
67
68 /*
69 * FOLD and DICT convert characters to a normal form for comparison,
70 * according to the user specified flags.
71 *
72 * DICT expects integers because it uses a non-character value to
73 * indicate a character which should not participate in comparisons.
74 */
75 #define EQUAL 0
76 #define GREATER 1
77 #define LESS (-1)
78 #define NO_COMPARE (-2)
79
80 #define FOLD(c) (isascii(c) && isupper(c) ? tolower(c) : (c))
81 #define DICT(c) (isascii(c) && isalnum(c) ? (c) : NO_COMPARE)
82
83 int dflag, fflag;
84
85 char *binary_search __P((char *, char *, char *));
86 int compare __P((char *, char *, char *));
87 void err __P((const char *fmt, ...));
88 char *linear_search __P((char *, char *, char *));
89 int look __P((char *, char *, char *));
90 void print_from __P((char *, char *, char *));
91 static void usage __P((void));
92
93 main(argc, argv)
94 int argc;
95 char *argv[];
96 {
97 struct stat sb;
98 int ch, fd;
99 char *back, *file, *front, *string;
100
101 file = _PATH_WORDS;
102 while ((ch = getopt(argc, argv, "df")) != EOF)
103 switch(ch) {
104 case 'd':
105 dflag = 1;
106 break;
107 case 'f':
108 fflag = 1;
109 break;
110 case '?':
111 default:
112 usage();
113 }
114 argc -= optind;
115 argv += optind;
116
117 switch (argc) {
118 case 2: /* Don't set -df for user. */
119 string = *argv++;
120 file = *argv;
121 break;
122 case 1: /* But set -df by default. */
123 dflag = fflag = 1;
124 string = *argv;
125 break;
126 default:
127 usage();
128 }
129
130 if ((fd = open(file, O_RDONLY, 0)) < 0 || fstat(fd, &sb) ||
131 (front = mmap(NULL, sb.st_size, PROT_READ, MAP_FILE, fd,
132 (off_t)0)) == NULL)
133 err("%s: %s", file, strerror(errno));
134 back = front + sb.st_size;
135 exit(look(string, front, back));
136 }
137
138 look(string, front, back)
139 char *string, *front, *back;
140 {
141 register int ch;
142 register char *readp, *writep;
143
144 /* Reformat string string to avoid doing it multiple times later. */
145 for (readp = writep = string; ch = *readp++;) {
146 if (fflag)
147 ch = FOLD(ch);
148 if (dflag)
149 ch = DICT(ch);
150 if (ch != NO_COMPARE)
151 *(writep++) = ch;
152 }
153 *writep = '\0';
154
155 front = binary_search(string, front, back);
156 front = linear_search(string, front, back);
157
158 if (front)
159 print_from(string, front, back);
160 return (front ? 0 : 1);
161 }
162
163
164 /*
165 * Binary search for "string" in memory between "front" and "back".
166 *
167 * This routine is expected to return a pointer to the start of a line at
168 * *or before* the first word matching "string". Relaxing the constraint
169 * this way simplifies the algorithm.
170 *
171 * Invariants:
172 * front points to the beginning of a line at or before the first
173 * matching string.
174 *
175 * back points to the beginning of a line at or after the first
176 * matching line.
177 *
178 * Base of the Invariants.
179 * front = NULL;
180 * back = EOF;
181 *
182 * Advancing the Invariants:
183 *
184 * p = first newline after halfway point from front to back.
185 *
186 * If the string at "p" is not greater than the string to match,
187 * p is the new front. Otherwise it is the new back.
188 *
189 * Termination:
190 *
191 * The definition of the routine allows it return at any point,
192 * since front is always at or before the line to print.
193 *
194 * In fact, it returns when the chosen "p" equals "back". This
195 * implies that there exists a string is least half as long as
196 * (back - front), which in turn implies that a linear search will
197 * be no more expensive than the cost of simply printing a string or two.
198 *
199 * Trying to continue with binary search at this point would be
200 * more trouble than it's worth.
201 */
202 #define SKIP_PAST_NEWLINE(p, back) \
203 while (p < back && *p++ != '\n');
204
205 char *
206 binary_search(string, front, back)
207 register char *string, *front, *back;
208 {
209 register char *p;
210
211 p = front + (back - front) / 2;
212 SKIP_PAST_NEWLINE(p, back);
213
214 while (p != back) {
215 if (compare(string, p, back) == GREATER)
216 front = p;
217 else
218 back = p;
219 p = front + (back - front) / 2;
220 SKIP_PAST_NEWLINE(p, back);
221 }
222 return (front);
223 }
224
225 /*
226 * Find the first line that starts with string, linearly searching from front
227 * to back.
228 *
229 * Return NULL for no such line.
230 *
231 * This routine assumes:
232 *
233 * o front points at the first character in a line.
234 * o front is before or at the first line to be printed.
235 */
236 char *
237 linear_search(string, front, back)
238 char *string, *front, *back;
239 {
240 while (front < back) {
241 switch (compare(string, front, back)) {
242 case EQUAL: /* Found it. */
243 return (front);
244 break;
245 case LESS: /* No such string. */
246 return (NULL);
247 break;
248 case GREATER: /* Keep going. */
249 break;
250 }
251 SKIP_PAST_NEWLINE(front, back);
252 }
253 return (NULL);
254 }
255
256 /*
257 * Print as many lines as match string, starting at front.
258 */
259 void
260 print_from(string, front, back)
261 register char *string, *front, *back;
262 {
263 for (; front < back && compare(string, front, back) == EQUAL; ++front) {
264 for (; front < back && *front != '\n'; ++front)
265 if (putchar(*front) == EOF)
266 err("stdout: %s", strerror(errno));
267 if (putchar('\n') == EOF)
268 err("stdout: %s", strerror(errno));
269 }
270 }
271
272 /*
273 * Return LESS, GREATER, or EQUAL depending on how the string1 compares with
274 * string2 (s1 ??? s2).
275 *
276 * o Matches up to len(s1) are EQUAL.
277 * o Matches up to len(s2) are GREATER.
278 *
279 * Compare understands about the -f and -d flags, and treats comparisons
280 * appropriately.
281 *
282 * The string "s1" is null terminated. The string s2 is '\n' terminated (or
283 * "back" terminated).
284 */
285 int
286 compare(s1, s2, back)
287 register char *s1, *s2, *back;
288 {
289 register int ch;
290
291 for (; *s1 && s2 < back && *s2 != '\n'; ++s1, ++s2) {
292 ch = *s2;
293 if (fflag)
294 ch = FOLD(ch);
295 if (dflag)
296 ch = DICT(ch);
297
298 if (ch == NO_COMPARE) {
299 ++s2; /* Ignore character in comparison. */
300 continue;
301 }
302 if (*s1 != ch)
303 return (*s1 < ch ? LESS : GREATER);
304 }
305 return (*s1 ? GREATER : EQUAL);
306 }
307
308 static void
309 usage()
310 {
311 (void)fprintf(stderr, "usage: look [-df] string [file]\n");
312 exit(2);
313 }
314
315 #if __STDC__
316 #include <stdarg.h>
317 #else
318 #include <varargs.h>
319 #endif
320
321 void
322 #if __STDC__
323 err(const char *fmt, ...)
324 #else
325 err(fmt, va_alist)
326 char *fmt;
327 va_dcl
328 #endif
329 {
330 va_list ap;
331 #if __STDC__
332 va_start(ap, fmt);
333 #else
334 va_start(ap);
335 #endif
336 (void)fprintf(stderr, "look: ");
337 (void)vfprintf(stderr, fmt, ap);
338 va_end(ap);
339 (void)fprintf(stderr, "\n");
340 exit(2);
341 /* NOTREACHED */
342 }
343