uniq.c revision 1.3 1 1.3 christos /* $NetBSD: uniq.c,v 1.3 2007/06/24 19:51:43 christos Exp $ */
2 1.1 christos
3 1.1 christos /*-
4 1.1 christos * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 1.1 christos * All rights reserved.
6 1.1 christos *
7 1.1 christos * This code is derived from software contributed to The NetBSD Foundation
8 1.1 christos * by Christos Zoulas.
9 1.1 christos *
10 1.1 christos * Redistribution and use in source and binary forms, with or without
11 1.1 christos * modification, are permitted provided that the following conditions
12 1.1 christos * are met:
13 1.1 christos * 1. Redistributions of source code must retain the above copyright
14 1.1 christos * notice, this list of conditions and the following disclaimer.
15 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 christos * notice, this list of conditions and the following disclaimer in the
17 1.1 christos * documentation and/or other materials provided with the distribution.
18 1.1 christos * 3. All advertising materials mentioning features or use of this software
19 1.1 christos * must display the following acknowledgement:
20 1.1 christos * This product includes software developed by the NetBSD
21 1.1 christos * Foundation, Inc. and its contributors.
22 1.1 christos * 4. Neither the name of The NetBSD Foundation nor the names of its
23 1.1 christos * contributors may be used to endorse or promote products derived
24 1.1 christos * from this software without specific prior written permission.
25 1.1 christos *
26 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 1.1 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 1.1 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 1.1 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 1.1 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 1.1 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 1.1 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 1.1 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 1.1 christos * POSSIBILITY OF SUCH DAMAGE.
37 1.1 christos */
38 1.1 christos #include <sys/cdefs.h>
39 1.3 christos __RCSID("$NetBSD: uniq.c,v 1.3 2007/06/24 19:51:43 christos Exp $");
40 1.1 christos
41 1.1 christos #include <stdio.h>
42 1.1 christos #include <string.h>
43 1.1 christos #include <stdlib.h>
44 1.1 christos #include <db.h>
45 1.1 christos #include <err.h>
46 1.1 christos #include <util.h>
47 1.1 christos #include <ctype.h>
48 1.1 christos #include <fcntl.h>
49 1.1 christos
50 1.1 christos extern const HASHINFO hinfo;
51 1.1 christos
52 1.1 christos void uniq(const char *);
53 1.1 christos static int comp(const char *, char **, size_t *);
54 1.1 christos
55 1.1 christos /*
56 1.2 christos * Preserve only unique content lines in a file. Input lines that have
57 1.1 christos * content [alphanumeric characters before a comment] are white-space
58 1.1 christos * normalized and have their comments removed. Then they are placed
59 1.1 christos * in a hash table, and only the first instance of them is printed.
60 1.1 christos * Comment lines without any alphanumeric content are always printed
61 1.1 christos * since they are there to make the file "pretty". Comment lines with
62 1.1 christos * alphanumeric content are also placed into the hash table and only
63 1.1 christos * printed once.
64 1.1 christos */
65 1.1 christos void
66 1.1 christos uniq(const char *fname)
67 1.1 christos {
68 1.1 christos DB *db;
69 1.1 christos DBT key;
70 1.1 christos static const DBT data = { NULL, 0 };
71 1.1 christos FILE *fp;
72 1.1 christos char *line;
73 1.1 christos size_t len;
74 1.1 christos
75 1.1 christos if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
76 1.1 christos err(1, "Cannot create in memory database");
77 1.1 christos
78 1.2 christos fp = efopen(fname, "r");
79 1.1 christos while ((line = fgetln(fp, &len)) != NULL) {
80 1.1 christos size_t complen = len;
81 1.1 christos char *compline;
82 1.1 christos if (!comp(line, &compline, &complen)) {
83 1.1 christos (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
84 1.1 christos line);
85 1.1 christos continue;
86 1.1 christos }
87 1.1 christos key.data = compline;
88 1.1 christos key.size = complen;
89 1.1 christos switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
90 1.1 christos case 0:
91 1.1 christos (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
92 1.1 christos line);
93 1.1 christos break;
94 1.1 christos case 1:
95 1.1 christos break;
96 1.1 christos case -1:
97 1.1 christos err(1, "put");
98 1.1 christos default:
99 1.1 christos abort();
100 1.1 christos break;
101 1.1 christos }
102 1.1 christos }
103 1.1 christos (void)fflush(stdout);
104 1.1 christos exit(0);
105 1.1 christos }
106 1.1 christos
107 1.1 christos /*
108 1.1 christos * normalize whitespace in the original line and place a new string
109 1.3 christos * with whitespace converted to a single space in compline. If the line
110 1.1 christos * contains just comments, we preserve them. If it contains data and
111 1.1 christos * comments, we kill the comments. Return 1 if the line had actual
112 1.3 christos * contents, or 0 if it was just a comment without alphanumeric characters.
113 1.1 christos */
114 1.1 christos static int
115 1.1 christos comp(const char *origline, char **compline, size_t *len)
116 1.1 christos {
117 1.1 christos const unsigned char *p;
118 1.1 christos unsigned char *q;
119 1.1 christos char *cline;
120 1.1 christos size_t l = *len, complen;
121 1.3 christos int hasalnum, iscomment;
122 1.1 christos
123 1.3 christos /* Eat leading space */
124 1.1 christos for (p = (const unsigned char *)origline; l && *p && isspace(*p);
125 1.1 christos p++, l--)
126 1.1 christos continue;
127 1.1 christos cline = emalloc(l + 1);
128 1.1 christos (void)memcpy(cline, p, l);
129 1.1 christos cline[l] = '\0';
130 1.3 christos if (*cline == '\0')
131 1.1 christos return 0;
132 1.1 christos
133 1.1 christos complen = 0;
134 1.3 christos hasalnum = 0;
135 1.1 christos iscomment = 0;
136 1.3 christos
137 1.1 christos for (q = (unsigned char *)cline; l && *p; p++, l--) {
138 1.1 christos if (isspace(*p)) {
139 1.3 christos if (complen && isspace(q[-1]))
140 1.1 christos continue;
141 1.3 christos *q++ = ' ';
142 1.3 christos complen++;
143 1.1 christos } else {
144 1.3 christos if (!iscomment && *p == '#') {
145 1.3 christos if (hasalnum)
146 1.1 christos break;
147 1.1 christos iscomment = 1;
148 1.1 christos } else
149 1.1 christos hasalnum |= isalnum(*p);
150 1.3 christos *q++ = *p;
151 1.3 christos complen++;
152 1.1 christos }
153 1.3 christos }
154 1.3 christos
155 1.3 christos /* Eat trailing space */
156 1.3 christos while (complen && isspace(q[-1])) {
157 1.3 christos --q;
158 1.3 christos --complen;
159 1.1 christos }
160 1.1 christos *q = '\0';
161 1.1 christos *compline = cline;
162 1.1 christos *len = complen;
163 1.1 christos return hasalnum;
164 1.1 christos }
165