append.c revision 1.12 1 /* $NetBSD: append.c,v 1.12 2003/08/07 11:32:34 jdolecek Exp $ */
2
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*-
40 * Copyright (c) 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * Peter McIlroy.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 */
70
71 #include "sort.h"
72
73 #ifndef lint
74 __RCSID("$NetBSD: append.c,v 1.12 2003/08/07 11:32:34 jdolecek Exp $");
75 __SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93");
76 #endif /* not lint */
77
78 #include <stdlib.h>
79 #include <string.h>
80
81 #define OUTPUT { \
82 if ((n = cpos - ppos) > 1) { \
83 for (; ppos < cpos; ++ppos) \
84 *ppos -= odepth; \
85 ppos -= n; \
86 if (stable_sort) \
87 sradixsort(ppos, n, wts1, REC_D); \
88 else \
89 radixsort(ppos, n, wts1, REC_D); \
90 for (; ppos < cpos; ppos++) { \
91 prec = (const RECHEADER *) (*ppos - sizeof(TRECHEADER));\
92 put(prec, fp); \
93 } \
94 } else put(prec, fp); \
95 }
96
97 /*
98 * copy sorted lines to output; check for uniqueness
99 */
100 void
101 append(keylist, nelem, depth, fp, put, ftbl)
102 const u_char **keylist;
103 int nelem;
104 int depth;
105 FILE *fp;
106 put_func_t put;
107 struct field *ftbl;
108 {
109 u_char *wts, *wts1;
110 int n, odepth;
111 const u_char **cpos, **ppos, **lastkey;
112 const u_char *cend, *pend, *start;
113 const struct recheader *crec, *prec;
114
115 if (*keylist == '\0' && UNIQUE)
116 return;
117 wts1 = wts = ftbl[0].weights;
118 if ((!UNIQUE) && SINGL_FLD) {
119 if ((ftbl[0].flags & F) && (ftbl[0].flags & R))
120 wts1 = Rascii;
121 else if (ftbl[0].flags & F)
122 wts1 = ascii;
123 odepth = depth;
124 }
125 lastkey = keylist + nelem;
126 depth += sizeof(TRECHEADER);
127 if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
128 ppos = keylist;
129 prec = (const RECHEADER *) (*ppos - depth);
130 if (UNIQUE)
131 put(prec, fp);
132 for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
133 crec = (const RECHEADER *) (*cpos - depth);
134 if (crec->length == prec->length) {
135 /*
136 * Set pend and cend so that trailing NUL and
137 * record separator is ignored.
138 */
139 pend = (const u_char *) &prec->data + prec->length - 2;
140 cend = (const u_char *) &crec->data + crec->length - 2;
141 for (start = *cpos; cend >= start; cend--) {
142 if (wts[*cend] != wts[*pend])
143 break;
144 pend--;
145 }
146 if (pend + 1 != *ppos) {
147 if (!UNIQUE) {
148 OUTPUT;
149 } else
150 put(crec, fp);
151 ppos = cpos;
152 prec = crec;
153 }
154 } else {
155 if (!UNIQUE) {
156 OUTPUT;
157 } else
158 put(crec, fp);
159 ppos = cpos;
160 prec = crec;
161 }
162 }
163 if (!UNIQUE) { OUTPUT; }
164 } else if (UNIQUE) {
165 ppos = keylist;
166 prec = (const RECHEADER *) (*ppos - depth);
167 put(prec, fp);
168 for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
169 crec = (const RECHEADER *) (*cpos - depth);
170 if (crec->offset == prec->offset) {
171 /*
172 * Set pend and cend so that trailing NUL and
173 * record separator is ignored.
174 */
175 pend = (const u_char *) &prec->data + prec->offset - 2;
176 cend = (const u_char *) &crec->data + crec->offset - 2;
177 for (start = *cpos; cend >= start; cend--) {
178 if (wts[*cend] != wts[*pend])
179 break;
180 pend--;
181 }
182 if (pend + 1 != *ppos) {
183 ppos = cpos;
184 prec = crec;
185 put(prec, fp);
186 }
187 } else {
188 ppos = cpos;
189 prec = crec;
190 put(prec, fp);
191 }
192 }
193 } else for (cpos = keylist; cpos < lastkey; cpos++) {
194 crec = (const RECHEADER *) (*cpos - depth);
195 put(crec, fp);
196 }
197 }
198
199 /*
200 * output the already sorted eol bin.
201 */
202 void
203 rd_append(binno, infl0, nfiles, outfp, buffer, bufend)
204 u_char *buffer;
205 int infl0;
206 int binno, nfiles;
207 FILE *outfp;
208 u_char *bufend;
209 {
210 RECHEADER *rec;
211
212 rec = (RECHEADER *) buffer;
213 if (!getnext(binno, infl0, NULL, nfiles,
214 (RECHEADER *) buffer, bufend, 0)) {
215 putline(rec, outfp);
216 while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer,
217 bufend, 0) == 0) {
218 if (!UNIQUE)
219 putline(rec, outfp);
220 }
221 }
222 }
223
224 /*
225 * append plain text--used after sorting the biggest bin.
226 */
227 void
228 concat(a, b)
229 FILE *a, *b;
230 {
231 int nread;
232 char buffer[4096];
233
234 rewind(b);
235 while ((nread = fread(buffer, 1, 4096, b)) > 0)
236 EWRITE(buffer, 1, nread, a);
237 }
238