uniq.c revision 1.3 1 /* $NetBSD: uniq.c,v 1.3 2007/06/24 19:51:43 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christos Zoulas.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38 #include <sys/cdefs.h>
39 __RCSID("$NetBSD: uniq.c,v 1.3 2007/06/24 19:51:43 christos Exp $");
40
41 #include <stdio.h>
42 #include <string.h>
43 #include <stdlib.h>
44 #include <db.h>
45 #include <err.h>
46 #include <util.h>
47 #include <ctype.h>
48 #include <fcntl.h>
49
50 extern const HASHINFO hinfo;
51
52 void uniq(const char *);
53 static int comp(const char *, char **, size_t *);
54
55 /*
56 * Preserve only unique content lines in a file. Input lines that have
57 * content [alphanumeric characters before a comment] are white-space
58 * normalized and have their comments removed. Then they are placed
59 * in a hash table, and only the first instance of them is printed.
60 * Comment lines without any alphanumeric content are always printed
61 * since they are there to make the file "pretty". Comment lines with
62 * alphanumeric content are also placed into the hash table and only
63 * printed once.
64 */
65 void
66 uniq(const char *fname)
67 {
68 DB *db;
69 DBT key;
70 static const DBT data = { NULL, 0 };
71 FILE *fp;
72 char *line;
73 size_t len;
74
75 if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
76 err(1, "Cannot create in memory database");
77
78 fp = efopen(fname, "r");
79 while ((line = fgetln(fp, &len)) != NULL) {
80 size_t complen = len;
81 char *compline;
82 if (!comp(line, &compline, &complen)) {
83 (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
84 line);
85 continue;
86 }
87 key.data = compline;
88 key.size = complen;
89 switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
90 case 0:
91 (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
92 line);
93 break;
94 case 1:
95 break;
96 case -1:
97 err(1, "put");
98 default:
99 abort();
100 break;
101 }
102 }
103 (void)fflush(stdout);
104 exit(0);
105 }
106
107 /*
108 * normalize whitespace in the original line and place a new string
109 * with whitespace converted to a single space in compline. If the line
110 * contains just comments, we preserve them. If it contains data and
111 * comments, we kill the comments. Return 1 if the line had actual
112 * contents, or 0 if it was just a comment without alphanumeric characters.
113 */
114 static int
115 comp(const char *origline, char **compline, size_t *len)
116 {
117 const unsigned char *p;
118 unsigned char *q;
119 char *cline;
120 size_t l = *len, complen;
121 int hasalnum, iscomment;
122
123 /* Eat leading space */
124 for (p = (const unsigned char *)origline; l && *p && isspace(*p);
125 p++, l--)
126 continue;
127 cline = emalloc(l + 1);
128 (void)memcpy(cline, p, l);
129 cline[l] = '\0';
130 if (*cline == '\0')
131 return 0;
132
133 complen = 0;
134 hasalnum = 0;
135 iscomment = 0;
136
137 for (q = (unsigned char *)cline; l && *p; p++, l--) {
138 if (isspace(*p)) {
139 if (complen && isspace(q[-1]))
140 continue;
141 *q++ = ' ';
142 complen++;
143 } else {
144 if (!iscomment && *p == '#') {
145 if (hasalnum)
146 break;
147 iscomment = 1;
148 } else
149 hasalnum |= isalnum(*p);
150 *q++ = *p;
151 complen++;
152 }
153 }
154
155 /* Eat trailing space */
156 while (complen && isspace(q[-1])) {
157 --q;
158 --complen;
159 }
160 *q = '\0';
161 *compline = cline;
162 *len = complen;
163 return hasalnum;
164 }
165