uniq.c revision 1.3 1 /*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Case Larsen.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #ifndef lint
38 char copyright[] =
39 "@(#) Copyright (c) 1989 The Regents of the University of California.\n\
40 All rights reserved.\n";
41 #endif /* not lint */
42
43 #ifndef lint
44 static char sccsid[] = "@(#)uniq.c 5.2 (Berkeley) 6/1/90";
45 #endif /* not lint */
46
47 #include <stdio.h>
48 #include <ctype.h>
49
50 int cflag, dflag, uflag;
51 int numchars, numfields, repeats;
52
53 #define MAXLINELEN (2048 + 1)
54
55 main (argc,argv)
56 int argc;
57 char **argv;
58 {
59 extern int optind;
60 FILE *ifp, *ofp, *file();
61 int ch;
62 register char *t1, *t2;
63 char *prevline, *thisline, *malloc(), *skip();
64
65 while ((ch = getopt(argc, argv, "-cdu123456789")) != EOF)
66 switch (ch) {
67 case '-':
68 --optind;
69 goto done;
70 case 'c':
71 cflag = 1;
72 break;
73 case 'd':
74 dflag = 1;
75 break;
76 case 'u':
77 uflag = 1;
78 break;
79 /*
80 * since -n is a valid option that could be picked up by
81 * getopt, but is better handled by the +n and -n code, we
82 * break out.
83 */
84 case '1': case '2': case '3': case '4':
85 case '5': case '6': case '7': case '8': case '9':
86 --optind;
87 goto done;
88 case '?':
89 default:
90 usage();
91 }
92
93 done: argc -= optind;
94 argv +=optind;
95
96 /* if no flags are set, default is -d -u */
97 if (cflag) {
98 if (dflag || uflag)
99 usage();
100 } else if (!dflag && !uflag)
101 dflag = uflag = 1;
102
103 /* because of the +, getopt is messed up */
104 for (; *argv && (**argv == '+' || **argv == '-'); ++argv, --argc)
105 switch (**argv) {
106 case '+':
107 if ((numchars = atoi(*argv + 1)) < 0)
108 goto negerr;
109 break;
110 case '-':
111 if ((numfields = atoi(*argv + 1)) < 0) {
112 negerr: (void)fprintf(stderr,
113 "uniq: negative field/char skip value.\n");
114 usage();
115 }
116 break;
117 }
118
119 switch(argc) {
120 case 0:
121 ifp = stdin;
122 ofp = stdout;
123 break;
124 case 1:
125 ifp = file(argv[0], "r");
126 ofp = stdout;
127 break;
128 case 2:
129 ifp = file(argv[0], "r");
130 ofp = file(argv[1], "w");
131 break;
132 default:
133 usage();
134 }
135
136 prevline = malloc(MAXLINELEN);
137 thisline = malloc(MAXLINELEN);
138 if (!fgets(prevline, MAXLINELEN, ifp))
139 exit(0);
140
141 while (fgets(thisline, MAXLINELEN, ifp)) {
142 /* if requested get the chosen fields + character offsets */
143 if (numfields || numchars) {
144 t1 = skip(thisline);
145 t2 = skip(prevline);
146 } else {
147 t1 = thisline;
148 t2 = prevline;
149 }
150
151 /* if different, print; set previous to new value */
152 if (strcmp(t1, t2)) {
153 show(ofp, prevline);
154 t1 = prevline;
155 prevline = thisline;
156 thisline = t1;
157 repeats = 0;
158 }
159 else
160 ++repeats;
161 }
162 show(ofp, prevline);
163 exit(0);
164 }
165
166 /*
167 * show --
168 * output a line depending on the flags and number of repetitions
169 * of the line.
170 */
171 show(ofp, str)
172 FILE *ofp;
173 char *str;
174 {
175 if (cflag)
176 (void)fprintf(ofp, "%4d %s", repeats + 1, str);
177 if (dflag && repeats || uflag && !repeats)
178 (void)fprintf(ofp, "%s", str);
179 }
180
181 char *
182 skip(str)
183 register char *str;
184 {
185 register int infield, nchars, nfields;
186
187 for (nfields = numfields, infield = 0; nfields && *str; ++str)
188 if (isspace(*str)) {
189 if (infield) {
190 infield = 0;
191 --nfields;
192 }
193 } else if (!infield)
194 infield = 1;
195 for (nchars = numchars; nchars-- && *str; ++str);
196 return(str);
197 }
198
199 FILE *
200 file(name, mode)
201 char *name, *mode;
202 {
203 FILE *fp;
204
205 if (!(fp = fopen(name, mode))) {
206 (void)fprintf(stderr, "uniq: can't open %s.\n", name);
207 exit(1);
208 }
209 return(fp);
210 }
211
212 usage()
213 {
214 (void)fprintf(stderr,
215 "usage: uniq [-c | -du] [- #fields] [+ #chars] [input [output]]\n");
216 exit(1);
217 }
218