uniq.c revision 1.4 1 /*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Case Larsen.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #ifndef lint
38 char copyright[] =
39 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\
40 All rights reserved.\n";
41 #endif /* not lint */
42
43 #ifndef lint
44 static char sccsid[] = "@(#)uniq.c 5.4 (Berkeley) 1/9/92";
45 #endif /* not lint */
46
47 #include <errno.h>
48 #include <stdio.h>
49 #include <ctype.h>
50 #include <stdlib.h>
51 #include <string.h>
52
53 #define MAXLINELEN (8 * 1024)
54
55 int cflag, dflag, uflag;
56 int numchars, numfields, repeats;
57
58 void err __P((const char *, ...));
59 FILE *file __P((char *, char *));
60 void show __P((FILE *, char *));
61 char *skip __P((char *));
62 void obsolete __P((char *[]));
63 void usage __P((void));
64
65 int
66 main (argc, argv)
67 int argc;
68 char *argv[];
69 {
70 register char *t1, *t2;
71 FILE *ifp, *ofp;
72 int ch;
73 char *prevline, *thisline, *p;
74
75 obsolete(argv);
76 while ((ch = getopt(argc, argv, "-cdf:s:u")) != EOF)
77 switch (ch) {
78 case '-':
79 --optind;
80 goto done;
81 case 'c':
82 cflag = 1;
83 break;
84 case 'd':
85 dflag = 1;
86 break;
87 case 'f':
88 numfields = strtol(optarg, &p, 10);
89 if (numfields < 0 || *p)
90 err("illegal field skip value: %s", optarg);
91 break;
92 case 's':
93 numchars = strtol(optarg, &p, 10);
94 if (numchars < 0 || *p)
95 err("illegal character skip value: %s", optarg);
96 break;
97 case 'u':
98 uflag = 1;
99 break;
100 case '?':
101 default:
102 usage();
103 }
104
105 done: argc -= optind;
106 argv +=optind;
107
108 /* If no flags are set, default is -d -u. */
109 if (cflag) {
110 if (dflag || uflag)
111 usage();
112 } else if (!dflag && !uflag)
113 dflag = uflag = 1;
114
115 switch(argc) {
116 case 0:
117 ifp = stdin;
118 ofp = stdout;
119 break;
120 case 1:
121 ifp = file(argv[0], "r");
122 ofp = stdout;
123 break;
124 case 2:
125 ifp = file(argv[0], "r");
126 ofp = file(argv[1], "w");
127 break;
128 default:
129 usage();
130 }
131
132 prevline = malloc(MAXLINELEN);
133 thisline = malloc(MAXLINELEN);
134 if (!fgets(prevline, MAXLINELEN, ifp))
135 exit(0);
136
137 while (fgets(thisline, MAXLINELEN, ifp)) {
138 /* If requested get the chosen fields + character offsets. */
139 if (numfields || numchars) {
140 t1 = skip(thisline);
141 t2 = skip(prevline);
142 } else {
143 t1 = thisline;
144 t2 = prevline;
145 }
146
147 /* If different, print; set previous to new value. */
148 if (strcmp(t1, t2)) {
149 show(ofp, prevline);
150 t1 = prevline;
151 prevline = thisline;
152 thisline = t1;
153 repeats = 0;
154 } else
155 ++repeats;
156 }
157 show(ofp, prevline);
158 exit(0);
159 }
160
161 /*
162 * show --
163 * Output a line depending on the flags and number of repetitions
164 * of the line.
165 */
166 void
167 show(ofp, str)
168 FILE *ofp;
169 char *str;
170 {
171 if (cflag)
172 (void)fprintf(ofp, "%4d %s", repeats + 1, str);
173 if (dflag && repeats || uflag && !repeats)
174 (void)fprintf(ofp, "%s", str);
175 }
176
177 char *
178 skip(str)
179 register char *str;
180 {
181 register int infield, nchars, nfields;
182
183 for (nfields = numfields, infield = 0; nfields && *str; ++str)
184 if (isspace(*str)) {
185 if (infield) {
186 infield = 0;
187 --nfields;
188 }
189 } else if (!infield)
190 infield = 1;
191 for (nchars = numchars; nchars-- && *str; ++str);
192 return(str);
193 }
194
195 FILE *
196 file(name, mode)
197 char *name, *mode;
198 {
199 FILE *fp;
200
201 if ((fp = fopen(name, mode)) == NULL)
202 err("%s: %s", name, strerror(errno));
203 return(fp);
204 }
205
206 void
207 obsolete(argv)
208 char *argv[];
209 {
210 int len;
211 char *ap, *p, *start;
212
213 while (ap = *++argv) {
214 /* Return if "--" or not an option of any form. */
215 if (ap[0] != '-') {
216 if (ap[0] != '+')
217 return;
218 } else if (ap[1] == '-')
219 return;
220 if (!isdigit(ap[1]))
221 continue;
222 /*
223 * Digit signifies an old-style option. Malloc space for dash,
224 * new option and argument.
225 */
226 len = strlen(ap);
227 if ((start = p = malloc(len + 3)) == NULL)
228 err("%s", strerror(errno));
229 *p++ = '-';
230 *p++ = ap[0] == '+' ? 's' : 'f';
231 (void)strcpy(p, ap + 1);
232 *argv = start;
233 }
234 }
235
236 void
237 usage()
238 {
239 (void)fprintf(stderr,
240 "usage: uniq [-c | -du] [-f fields] [-s chars] [input [output]]\n");
241 exit(1);
242 }
243
244 #if __STDC__
245 #include <stdarg.h>
246 #else
247 #include <varargs.h>
248 #endif
249
250 void
251 #if __STDC__
252 err(const char *fmt, ...)
253 #else
254 err(fmt, va_alist)
255 char *fmt;
256 va_dcl
257 #endif
258 {
259 va_list ap;
260 #if __STDC__
261 va_start(ap, fmt);
262 #else
263 va_start(ap);
264 #endif
265 (void)fprintf(stderr, "uniq: ");
266 (void)vfprintf(stderr, fmt, ap);
267 va_end(ap);
268 (void)fprintf(stderr, "\n");
269 exit(1);
270 /* NOTREACHED */
271 }
272