uniq.c revision 1.10 1 /* $NetBSD: uniq.c,v 1.10 2003/07/14 09:29:21 itojun Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Case Larsen.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 __COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\
42 The Regents of the University of California. All rights reserved.\n");
43 #endif /* not lint */
44
45 #ifndef lint
46 #if 0
47 static char sccsid[] = "@(#)uniq.c 8.3 (Berkeley) 5/4/95";
48 #endif
49 __RCSID("$NetBSD: uniq.c,v 1.10 2003/07/14 09:29:21 itojun Exp $");
50 #endif /* not lint */
51
52 #include <err.h>
53 #include <errno.h>
54 #include <stdio.h>
55 #include <ctype.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <unistd.h>
59
60 #define MAXLINELEN (8 * 1024)
61
62 int cflag, dflag, uflag;
63 int numchars, numfields, repeats;
64
65 FILE *file __P((char *, char *));
66 int main __P((int, char **));
67 void show __P((FILE *, char *));
68 char *skip __P((char *));
69 void obsolete __P((char *[]));
70 void usage __P((void));
71
72 int
73 main (argc, argv)
74 int argc;
75 char *argv[];
76 {
77 char *t1, *t2;
78 FILE *ifp, *ofp;
79 int ch;
80 char *prevline, *thisline, *p;
81
82 ifp = ofp = NULL;
83 obsolete(argv);
84 while ((ch = getopt(argc, argv, "-cdf:s:u")) != -1)
85 switch (ch) {
86 case '-':
87 --optind;
88 goto done;
89 case 'c':
90 cflag = 1;
91 break;
92 case 'd':
93 dflag = 1;
94 break;
95 case 'f':
96 numfields = strtol(optarg, &p, 10);
97 if (numfields < 0 || *p)
98 errx(1, "illegal field skip value: %s", optarg);
99 break;
100 case 's':
101 numchars = strtol(optarg, &p, 10);
102 if (numchars < 0 || *p)
103 errx(1, "illegal character skip value: %s",
104 optarg);
105 break;
106 case 'u':
107 uflag = 1;
108 break;
109 case '?':
110 default:
111 usage();
112 }
113
114 done: argc -= optind;
115 argv +=optind;
116
117 /* If no flags are set, default is -d -u. */
118 if (cflag) {
119 if (dflag || uflag)
120 usage();
121 } else if (!dflag && !uflag)
122 dflag = uflag = 1;
123
124 switch(argc) {
125 case 0:
126 ifp = stdin;
127 ofp = stdout;
128 break;
129 case 1:
130 ifp = file(argv[0], "r");
131 ofp = stdout;
132 break;
133 case 2:
134 ifp = file(argv[0], "r");
135 ofp = file(argv[1], "w");
136 break;
137 default:
138 usage();
139 }
140
141 prevline = malloc(MAXLINELEN);
142 thisline = malloc(MAXLINELEN);
143 if (prevline == NULL || thisline == NULL)
144 err(1, "malloc");
145
146 if (fgets(prevline, MAXLINELEN, ifp) == NULL)
147 exit(0);
148
149 while (fgets(thisline, MAXLINELEN, ifp)) {
150 /* If requested get the chosen fields + character offsets. */
151 if (numfields || numchars) {
152 t1 = skip(thisline);
153 t2 = skip(prevline);
154 } else {
155 t1 = thisline;
156 t2 = prevline;
157 }
158
159 /* If different, print; set previous to new value. */
160 if (strcmp(t1, t2)) {
161 show(ofp, prevline);
162 t1 = prevline;
163 prevline = thisline;
164 thisline = t1;
165 repeats = 0;
166 } else
167 ++repeats;
168 }
169 show(ofp, prevline);
170 exit(0);
171 }
172
173 /*
174 * show --
175 * Output a line depending on the flags and number of repetitions
176 * of the line.
177 */
178 void
179 show(ofp, str)
180 FILE *ofp;
181 char *str;
182 {
183
184 if (cflag && *str)
185 (void)fprintf(ofp, "%4d %s", repeats + 1, str);
186 if ((dflag && repeats) || (uflag && !repeats))
187 (void)fprintf(ofp, "%s", str);
188 }
189
190 char *
191 skip(str)
192 char *str;
193 {
194 int infield, nchars, nfields;
195
196 for (nfields = numfields, infield = 0; nfields && *str; ++str)
197 if (isspace((unsigned char)*str)) {
198 if (infield) {
199 infield = 0;
200 --nfields;
201 }
202 } else if (!infield)
203 infield = 1;
204 for (nchars = numchars; nchars-- && *str; ++str);
205 return(str);
206 }
207
208 FILE *
209 file(name, mode)
210 char *name, *mode;
211 {
212 FILE *fp;
213
214 if ((fp = fopen(name, mode)) == NULL)
215 err(1, "%s", name);
216 return(fp);
217 }
218
219 void
220 obsolete(argv)
221 char *argv[];
222 {
223 char *ap, *p, *start;
224
225 while ((ap = *++argv) != NULL) {
226 /* Return if "--" or not an option of any form. */
227 if (ap[0] != '-') {
228 if (ap[0] != '+')
229 return;
230 } else if (ap[1] == '-')
231 return;
232 if (!isdigit((unsigned char)ap[1]))
233 continue;
234 /*
235 * Digit signifies an old-style option. Malloc space for dash,
236 * new option and argument.
237 */
238 asprintf(&p, "-%c%s", ap[0] == '+' ? 's' : 'f', ap + 1);
239 if (!p)
240 err(1, "malloc");
241 start = p;
242 *argv = start;
243 }
244 }
245
246 void
247 usage()
248 {
249 (void)fprintf(stderr,
250 "usage: uniq [-c | -du] [-f fields] [-s chars] [input [output]]\n");
251 exit(1);
252 }
253