append.c revision 1.16 1 /* $NetBSD: append.c,v 1.16 2009/08/16 19:53:43 dsl Exp $ */
2
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 */
63
64 #include "sort.h"
65
66 #ifndef lint
67 __RCSID("$NetBSD: append.c,v 1.16 2009/08/16 19:53:43 dsl Exp $");
68 __SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93");
69 #endif /* not lint */
70
71 #include <stdlib.h>
72 #include <string.h>
73
74 #define OUTPUT { \
75 if ((n = cpos - ppos) > 1) { \
76 for (; ppos < cpos; ++ppos) \
77 *ppos -= odepth; \
78 ppos -= n; \
79 if (stable_sort) \
80 sradixsort(ppos, n, wts1, REC_D); \
81 else \
82 radixsort(ppos, n, wts1, REC_D); \
83 for (; ppos < cpos; ppos++) { \
84 prec = (const RECHEADER *) (*ppos - REC_DATA_OFFSET);\
85 put(prec, fp); \
86 } \
87 } else put(prec, fp); \
88 }
89
90 /*
91 * copy sorted lines to output; check for uniqueness
92 */
93 void
94 append(const u_char **keylist, int nelem, int depth, FILE *fp, put_func_t put,
95 struct field *ftbl)
96 {
97 u_char *wts, *wts1;
98 int n, odepth = depth;
99 const u_char **cpos, **ppos, **lastkey;
100 const u_char *cend, *pend, *start;
101 const struct recheader *crec, *prec;
102
103 if (*keylist == '\0' && UNIQUE)
104 return;
105 wts1 = wts = ftbl[0].weights;
106 if ((!UNIQUE) && SINGL_FLD) {
107 if ((ftbl[0].flags & F) && (ftbl[0].flags & R))
108 wts1 = Rascii;
109 else if (ftbl[0].flags & F)
110 wts1 = ascii;
111 }
112 lastkey = keylist + nelem;
113 depth += REC_DATA_OFFSET;
114 if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
115 ppos = keylist;
116 prec = (const RECHEADER *) (*ppos - depth);
117 if (UNIQUE)
118 put(prec, fp);
119 for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
120 crec = (const RECHEADER *) (*cpos - depth);
121 if (crec->length == prec->length) {
122 /*
123 * Set pend and cend so that trailing NUL and
124 * record separator is ignored.
125 */
126 pend = (const u_char *) &prec->data + prec->length - 2;
127 cend = (const u_char *) &crec->data + crec->length - 2;
128 for (start = *cpos; cend >= start; cend--) {
129 if (wts[*cend] != wts[*pend])
130 break;
131 pend--;
132 }
133 if (pend + 1 != *ppos) {
134 if (!UNIQUE) {
135 OUTPUT;
136 } else
137 put(crec, fp);
138 ppos = cpos;
139 prec = crec;
140 }
141 } else {
142 if (!UNIQUE) {
143 OUTPUT;
144 } else
145 put(crec, fp);
146 ppos = cpos;
147 prec = crec;
148 }
149 }
150 if (!UNIQUE) { OUTPUT; }
151 } else if (UNIQUE) {
152 ppos = keylist;
153 prec = (const RECHEADER *) (*ppos - depth);
154 put(prec, fp);
155 for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
156 crec = (const RECHEADER *) (*cpos - depth);
157 if (crec->offset == prec->offset) {
158 /*
159 * Set pend and cend so that trailing NUL and
160 * record separator is ignored.
161 */
162 pend = (const u_char *) &prec->data + prec->offset - 2;
163 cend = (const u_char *) &crec->data + crec->offset - 2;
164 for (start = *cpos; cend >= start; cend--) {
165 if (wts[*cend] != wts[*pend])
166 break;
167 pend--;
168 }
169 if (pend + 1 != *ppos) {
170 ppos = cpos;
171 prec = crec;
172 put(prec, fp);
173 }
174 } else {
175 ppos = cpos;
176 prec = crec;
177 put(prec, fp);
178 }
179 }
180 } else for (cpos = keylist; cpos < lastkey; cpos++) {
181 crec = (const RECHEADER *) (*cpos - depth);
182 put(crec, fp);
183 }
184 }
185
186 /*
187 * output the already sorted eol bin.
188 */
189 void
190 rd_append(int binno, int infl0, int nfiles, FILE *outfp, u_char *buffer,
191 u_char *bufend)
192 {
193 RECHEADER *rec;
194
195 rec = (RECHEADER *) buffer;
196 if (!getnext(binno, infl0, NULL, nfiles,
197 (RECHEADER *) buffer, bufend, 0)) {
198 putline(rec, outfp);
199 while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer,
200 bufend, 0) == 0) {
201 if (!UNIQUE)
202 putline(rec, outfp);
203 }
204 }
205 }
206
207 /*
208 * append plain text--used after sorting the biggest bin.
209 */
210 void
211 concat(FILE *a, FILE *b)
212 {
213 int nread;
214 char buffer[4096];
215
216 rewind(b);
217 while ((nread = fread(buffer, 1, 4096, b)) > 0)
218 EWRITE(buffer, 1, nread, a);
219 }
220