uniq.c revision 1.7 1 1.7 christos /* $NetBSD: uniq.c,v 1.7 2021/03/22 03:28:55 christos Exp $ */
2 1.1 christos
3 1.1 christos /*-
4 1.1 christos * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 1.1 christos * All rights reserved.
6 1.1 christos *
7 1.1 christos * This code is derived from software contributed to The NetBSD Foundation
8 1.1 christos * by Christos Zoulas.
9 1.1 christos *
10 1.1 christos * Redistribution and use in source and binary forms, with or without
11 1.1 christos * modification, are permitted provided that the following conditions
12 1.1 christos * are met:
13 1.1 christos * 1. Redistributions of source code must retain the above copyright
14 1.1 christos * notice, this list of conditions and the following disclaimer.
15 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 christos * notice, this list of conditions and the following disclaimer in the
17 1.1 christos * documentation and/or other materials provided with the distribution.
18 1.1 christos *
19 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 christos * POSSIBILITY OF SUCH DAMAGE.
30 1.1 christos */
31 1.1 christos #include <sys/cdefs.h>
32 1.7 christos __RCSID("$NetBSD: uniq.c,v 1.7 2021/03/22 03:28:55 christos Exp $");
33 1.1 christos
34 1.1 christos #include <stdio.h>
35 1.1 christos #include <string.h>
36 1.1 christos #include <stdlib.h>
37 1.1 christos #include <db.h>
38 1.1 christos #include <err.h>
39 1.1 christos #include <util.h>
40 1.1 christos #include <ctype.h>
41 1.1 christos #include <fcntl.h>
42 1.1 christos
43 1.5 joerg #include "extern.h"
44 1.5 joerg
45 1.5 joerg static const HASHINFO hinfo = {
46 1.5 joerg .bsize = 256,
47 1.5 joerg .ffactor = 4,
48 1.5 joerg .nelem = 32768,
49 1.5 joerg .cachesize = 1024,
50 1.5 joerg .hash = NULL,
51 1.5 joerg .lorder = 0
52 1.5 joerg };
53 1.1 christos
54 1.1 christos static int comp(const char *, char **, size_t *);
55 1.1 christos
56 1.1 christos /*
57 1.2 christos * Preserve only unique content lines in a file. Input lines that have
58 1.1 christos * content [alphanumeric characters before a comment] are white-space
59 1.1 christos * normalized and have their comments removed. Then they are placed
60 1.1 christos * in a hash table, and only the first instance of them is printed.
61 1.1 christos * Comment lines without any alphanumeric content are always printed
62 1.1 christos * since they are there to make the file "pretty". Comment lines with
63 1.1 christos * alphanumeric content are also placed into the hash table and only
64 1.1 christos * printed once.
65 1.1 christos */
66 1.1 christos void
67 1.1 christos uniq(const char *fname)
68 1.1 christos {
69 1.1 christos DB *db;
70 1.1 christos DBT key;
71 1.1 christos static const DBT data = { NULL, 0 };
72 1.1 christos FILE *fp;
73 1.1 christos char *line;
74 1.1 christos size_t len;
75 1.1 christos
76 1.1 christos if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
77 1.1 christos err(1, "Cannot create in memory database");
78 1.1 christos
79 1.2 christos fp = efopen(fname, "r");
80 1.1 christos while ((line = fgetln(fp, &len)) != NULL) {
81 1.1 christos size_t complen = len;
82 1.1 christos char *compline;
83 1.1 christos if (!comp(line, &compline, &complen)) {
84 1.1 christos (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
85 1.1 christos line);
86 1.1 christos continue;
87 1.1 christos }
88 1.1 christos key.data = compline;
89 1.1 christos key.size = complen;
90 1.1 christos switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
91 1.1 christos case 0:
92 1.1 christos (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
93 1.1 christos line);
94 1.1 christos break;
95 1.1 christos case 1:
96 1.1 christos break;
97 1.1 christos case -1:
98 1.1 christos err(1, "put");
99 1.6 christos /*NOTREACHED*/
100 1.1 christos default:
101 1.1 christos abort();
102 1.1 christos break;
103 1.1 christos }
104 1.1 christos }
105 1.1 christos (void)fflush(stdout);
106 1.1 christos exit(0);
107 1.1 christos }
108 1.1 christos
109 1.1 christos /*
110 1.1 christos * normalize whitespace in the original line and place a new string
111 1.3 christos * with whitespace converted to a single space in compline. If the line
112 1.1 christos * contains just comments, we preserve them. If it contains data and
113 1.1 christos * comments, we kill the comments. Return 1 if the line had actual
114 1.3 christos * contents, or 0 if it was just a comment without alphanumeric characters.
115 1.1 christos */
116 1.1 christos static int
117 1.1 christos comp(const char *origline, char **compline, size_t *len)
118 1.1 christos {
119 1.1 christos const unsigned char *p;
120 1.1 christos unsigned char *q;
121 1.1 christos char *cline;
122 1.1 christos size_t l = *len, complen;
123 1.3 christos int hasalnum, iscomment;
124 1.1 christos
125 1.3 christos /* Eat leading space */
126 1.1 christos for (p = (const unsigned char *)origline; l && *p && isspace(*p);
127 1.1 christos p++, l--)
128 1.1 christos continue;
129 1.7 christos if (*p == '\0' || l == 0)
130 1.7 christos return 0;
131 1.7 christos
132 1.1 christos cline = emalloc(l + 1);
133 1.1 christos (void)memcpy(cline, p, l);
134 1.1 christos cline[l] = '\0';
135 1.1 christos
136 1.1 christos complen = 0;
137 1.3 christos hasalnum = 0;
138 1.1 christos iscomment = 0;
139 1.3 christos
140 1.1 christos for (q = (unsigned char *)cline; l && *p; p++, l--) {
141 1.1 christos if (isspace(*p)) {
142 1.3 christos if (complen && isspace(q[-1]))
143 1.1 christos continue;
144 1.3 christos *q++ = ' ';
145 1.3 christos complen++;
146 1.1 christos } else {
147 1.3 christos if (!iscomment && *p == '#') {
148 1.3 christos if (hasalnum)
149 1.1 christos break;
150 1.1 christos iscomment = 1;
151 1.1 christos } else
152 1.1 christos hasalnum |= isalnum(*p);
153 1.3 christos *q++ = *p;
154 1.3 christos complen++;
155 1.1 christos }
156 1.3 christos }
157 1.3 christos
158 1.3 christos /* Eat trailing space */
159 1.3 christos while (complen && isspace(q[-1])) {
160 1.3 christos --q;
161 1.3 christos --complen;
162 1.1 christos }
163 1.1 christos *q = '\0';
164 1.7 christos if (!hasalnum) {
165 1.7 christos free(cline);
166 1.7 christos cline = NULL;
167 1.7 christos complen = 0;
168 1.7 christos }
169 1.1 christos *compline = cline;
170 1.1 christos *len = complen;
171 1.1 christos return hasalnum;
172 1.1 christos }
173