uniq.c revision 1.3.10.1 1 1.3.10.1 yamt /* $NetBSD: uniq.c,v 1.3.10.1 2008/05/18 12:36:23 yamt Exp $ */
2 1.1 christos
3 1.1 christos /*-
4 1.1 christos * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 1.1 christos * All rights reserved.
6 1.1 christos *
7 1.1 christos * This code is derived from software contributed to The NetBSD Foundation
8 1.1 christos * by Christos Zoulas.
9 1.1 christos *
10 1.1 christos * Redistribution and use in source and binary forms, with or without
11 1.1 christos * modification, are permitted provided that the following conditions
12 1.1 christos * are met:
13 1.1 christos * 1. Redistributions of source code must retain the above copyright
14 1.1 christos * notice, this list of conditions and the following disclaimer.
15 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 christos * notice, this list of conditions and the following disclaimer in the
17 1.1 christos * documentation and/or other materials provided with the distribution.
18 1.1 christos *
19 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 christos * POSSIBILITY OF SUCH DAMAGE.
30 1.1 christos */
31 1.1 christos #include <sys/cdefs.h>
32 1.3.10.1 yamt __RCSID("$NetBSD: uniq.c,v 1.3.10.1 2008/05/18 12:36:23 yamt Exp $");
33 1.1 christos
34 1.1 christos #include <stdio.h>
35 1.1 christos #include <string.h>
36 1.1 christos #include <stdlib.h>
37 1.1 christos #include <db.h>
38 1.1 christos #include <err.h>
39 1.1 christos #include <util.h>
40 1.1 christos #include <ctype.h>
41 1.1 christos #include <fcntl.h>
42 1.1 christos
43 1.1 christos extern const HASHINFO hinfo;
44 1.1 christos
45 1.1 christos void uniq(const char *);
46 1.1 christos static int comp(const char *, char **, size_t *);
47 1.1 christos
48 1.1 christos /*
49 1.2 christos * Preserve only unique content lines in a file. Input lines that have
50 1.1 christos * content [alphanumeric characters before a comment] are white-space
51 1.1 christos * normalized and have their comments removed. Then they are placed
52 1.1 christos * in a hash table, and only the first instance of them is printed.
53 1.1 christos * Comment lines without any alphanumeric content are always printed
54 1.1 christos * since they are there to make the file "pretty". Comment lines with
55 1.1 christos * alphanumeric content are also placed into the hash table and only
56 1.1 christos * printed once.
57 1.1 christos */
58 1.1 christos void
59 1.1 christos uniq(const char *fname)
60 1.1 christos {
61 1.1 christos DB *db;
62 1.1 christos DBT key;
63 1.1 christos static const DBT data = { NULL, 0 };
64 1.1 christos FILE *fp;
65 1.1 christos char *line;
66 1.1 christos size_t len;
67 1.1 christos
68 1.1 christos if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
69 1.1 christos err(1, "Cannot create in memory database");
70 1.1 christos
71 1.2 christos fp = efopen(fname, "r");
72 1.1 christos while ((line = fgetln(fp, &len)) != NULL) {
73 1.1 christos size_t complen = len;
74 1.1 christos char *compline;
75 1.1 christos if (!comp(line, &compline, &complen)) {
76 1.1 christos (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
77 1.1 christos line);
78 1.1 christos continue;
79 1.1 christos }
80 1.1 christos key.data = compline;
81 1.1 christos key.size = complen;
82 1.1 christos switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
83 1.1 christos case 0:
84 1.1 christos (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
85 1.1 christos line);
86 1.1 christos break;
87 1.1 christos case 1:
88 1.1 christos break;
89 1.1 christos case -1:
90 1.1 christos err(1, "put");
91 1.1 christos default:
92 1.1 christos abort();
93 1.1 christos break;
94 1.1 christos }
95 1.1 christos }
96 1.1 christos (void)fflush(stdout);
97 1.1 christos exit(0);
98 1.1 christos }
99 1.1 christos
100 1.1 christos /*
101 1.1 christos * normalize whitespace in the original line and place a new string
102 1.3 christos * with whitespace converted to a single space in compline. If the line
103 1.1 christos * contains just comments, we preserve them. If it contains data and
104 1.1 christos * comments, we kill the comments. Return 1 if the line had actual
105 1.3 christos * contents, or 0 if it was just a comment without alphanumeric characters.
106 1.1 christos */
107 1.1 christos static int
108 1.1 christos comp(const char *origline, char **compline, size_t *len)
109 1.1 christos {
110 1.1 christos const unsigned char *p;
111 1.1 christos unsigned char *q;
112 1.1 christos char *cline;
113 1.1 christos size_t l = *len, complen;
114 1.3 christos int hasalnum, iscomment;
115 1.1 christos
116 1.3 christos /* Eat leading space */
117 1.1 christos for (p = (const unsigned char *)origline; l && *p && isspace(*p);
118 1.1 christos p++, l--)
119 1.1 christos continue;
120 1.1 christos cline = emalloc(l + 1);
121 1.1 christos (void)memcpy(cline, p, l);
122 1.1 christos cline[l] = '\0';
123 1.3 christos if (*cline == '\0')
124 1.1 christos return 0;
125 1.1 christos
126 1.1 christos complen = 0;
127 1.3 christos hasalnum = 0;
128 1.1 christos iscomment = 0;
129 1.3 christos
130 1.1 christos for (q = (unsigned char *)cline; l && *p; p++, l--) {
131 1.1 christos if (isspace(*p)) {
132 1.3 christos if (complen && isspace(q[-1]))
133 1.1 christos continue;
134 1.3 christos *q++ = ' ';
135 1.3 christos complen++;
136 1.1 christos } else {
137 1.3 christos if (!iscomment && *p == '#') {
138 1.3 christos if (hasalnum)
139 1.1 christos break;
140 1.1 christos iscomment = 1;
141 1.1 christos } else
142 1.1 christos hasalnum |= isalnum(*p);
143 1.3 christos *q++ = *p;
144 1.3 christos complen++;
145 1.1 christos }
146 1.3 christos }
147 1.3 christos
148 1.3 christos /* Eat trailing space */
149 1.3 christos while (complen && isspace(q[-1])) {
150 1.3 christos --q;
151 1.3 christos --complen;
152 1.1 christos }
153 1.1 christos *q = '\0';
154 1.1 christos *compline = cline;
155 1.1 christos *len = complen;
156 1.1 christos return hasalnum;
157 1.1 christos }
158