1 1.7 christos /* $NetBSD: uniq.c,v 1.7 2021/03/22 03:28:55 christos Exp $ */ 2 1.1 christos 3 1.1 christos /*- 4 1.1 christos * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 1.1 christos * All rights reserved. 6 1.1 christos * 7 1.1 christos * This code is derived from software contributed to The NetBSD Foundation 8 1.1 christos * by Christos Zoulas. 9 1.1 christos * 10 1.1 christos * Redistribution and use in source and binary forms, with or without 11 1.1 christos * modification, are permitted provided that the following conditions 12 1.1 christos * are met: 13 1.1 christos * 1. Redistributions of source code must retain the above copyright 14 1.1 christos * notice, this list of conditions and the following disclaimer. 15 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 christos * notice, this list of conditions and the following disclaimer in the 17 1.1 christos * documentation and/or other materials provided with the distribution. 18 1.1 christos * 19 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 christos * POSSIBILITY OF SUCH DAMAGE. 30 1.1 christos */ 31 1.1 christos #include <sys/cdefs.h> 32 1.7 christos __RCSID("$NetBSD: uniq.c,v 1.7 2021/03/22 03:28:55 christos Exp $"); 33 1.1 christos 34 1.1 christos #include <stdio.h> 35 1.1 christos #include <string.h> 36 1.1 christos #include <stdlib.h> 37 1.1 christos #include <db.h> 38 1.1 christos #include <err.h> 39 1.1 christos #include <util.h> 40 1.1 christos #include <ctype.h> 41 1.1 christos #include <fcntl.h> 42 1.1 christos 43 1.5 joerg #include "extern.h" 44 1.5 joerg 45 1.5 joerg static const HASHINFO hinfo = { 46 1.5 joerg .bsize = 256, 47 1.5 joerg .ffactor = 4, 48 1.5 joerg .nelem = 32768, 49 1.5 joerg .cachesize = 1024, 50 1.5 joerg .hash = NULL, 51 1.5 joerg .lorder = 0 52 1.5 joerg }; 53 1.1 christos 54 1.1 christos static int comp(const char *, char **, size_t *); 55 1.1 christos 56 1.1 christos /* 57 1.2 christos * Preserve only unique content lines in a file. Input lines that have 58 1.1 christos * content [alphanumeric characters before a comment] are white-space 59 1.1 christos * normalized and have their comments removed. Then they are placed 60 1.1 christos * in a hash table, and only the first instance of them is printed. 61 1.1 christos * Comment lines without any alphanumeric content are always printed 62 1.1 christos * since they are there to make the file "pretty". Comment lines with 63 1.1 christos * alphanumeric content are also placed into the hash table and only 64 1.1 christos * printed once. 65 1.1 christos */ 66 1.1 christos void 67 1.1 christos uniq(const char *fname) 68 1.1 christos { 69 1.1 christos DB *db; 70 1.1 christos DBT key; 71 1.1 christos static const DBT data = { NULL, 0 }; 72 1.1 christos FILE *fp; 73 1.1 christos char *line; 74 1.1 christos size_t len; 75 1.1 christos 76 1.1 christos if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL) 77 1.1 christos err(1, "Cannot create in memory database"); 78 1.1 christos 79 1.2 christos fp = efopen(fname, "r"); 80 1.1 christos while ((line = fgetln(fp, &len)) != NULL) { 81 1.1 christos size_t complen = len; 82 1.1 christos char *compline; 83 1.1 christos if (!comp(line, &compline, &complen)) { 84 1.1 christos (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 85 1.1 christos line); 86 1.1 christos continue; 87 1.1 christos } 88 1.1 christos key.data = compline; 89 1.1 christos key.size = complen; 90 1.1 christos switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) { 91 1.1 christos case 0: 92 1.1 christos (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 93 1.1 christos line); 94 1.1 christos break; 95 1.1 christos case 1: 96 1.1 christos break; 97 1.1 christos case -1: 98 1.1 christos err(1, "put"); 99 1.6 christos /*NOTREACHED*/ 100 1.1 christos default: 101 1.1 christos abort(); 102 1.1 christos break; 103 1.1 christos } 104 1.1 christos } 105 1.1 christos (void)fflush(stdout); 106 1.1 christos exit(0); 107 1.1 christos } 108 1.1 christos 109 1.1 christos /* 110 1.1 christos * normalize whitespace in the original line and place a new string 111 1.3 christos * with whitespace converted to a single space in compline. If the line 112 1.1 christos * contains just comments, we preserve them. If it contains data and 113 1.1 christos * comments, we kill the comments. Return 1 if the line had actual 114 1.3 christos * contents, or 0 if it was just a comment without alphanumeric characters. 115 1.1 christos */ 116 1.1 christos static int 117 1.1 christos comp(const char *origline, char **compline, size_t *len) 118 1.1 christos { 119 1.1 christos const unsigned char *p; 120 1.1 christos unsigned char *q; 121 1.1 christos char *cline; 122 1.1 christos size_t l = *len, complen; 123 1.3 christos int hasalnum, iscomment; 124 1.1 christos 125 1.3 christos /* Eat leading space */ 126 1.1 christos for (p = (const unsigned char *)origline; l && *p && isspace(*p); 127 1.1 christos p++, l--) 128 1.1 christos continue; 129 1.7 christos if (*p == '\0' || l == 0) 130 1.7 christos return 0; 131 1.7 christos 132 1.1 christos cline = emalloc(l + 1); 133 1.1 christos (void)memcpy(cline, p, l); 134 1.1 christos cline[l] = '\0'; 135 1.1 christos 136 1.1 christos complen = 0; 137 1.3 christos hasalnum = 0; 138 1.1 christos iscomment = 0; 139 1.3 christos 140 1.1 christos for (q = (unsigned char *)cline; l && *p; p++, l--) { 141 1.1 christos if (isspace(*p)) { 142 1.3 christos if (complen && isspace(q[-1])) 143 1.1 christos continue; 144 1.3 christos *q++ = ' '; 145 1.3 christos complen++; 146 1.1 christos } else { 147 1.3 christos if (!iscomment && *p == '#') { 148 1.3 christos if (hasalnum) 149 1.1 christos break; 150 1.1 christos iscomment = 1; 151 1.1 christos } else 152 1.1 christos hasalnum |= isalnum(*p); 153 1.3 christos *q++ = *p; 154 1.3 christos complen++; 155 1.1 christos } 156 1.3 christos } 157 1.3 christos 158 1.3 christos /* Eat trailing space */ 159 1.3 christos while (complen && isspace(q[-1])) { 160 1.3 christos --q; 161 1.3 christos --complen; 162 1.1 christos } 163 1.1 christos *q = '\0'; 164 1.7 christos if (!hasalnum) { 165 1.7 christos free(cline); 166 1.7 christos cline = NULL; 167 1.7 christos complen = 0; 168 1.7 christos } 169 1.1 christos *compline = cline; 170 1.1 christos *len = complen; 171 1.1 christos return hasalnum; 172 1.1 christos } 173