msort.c revision 1.22 1 /* $NetBSD: msort.c,v 1.22 2009/08/20 06:36:25 dsl Exp $ */
2
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 */
63
64 #include "sort.h"
65 #include "fsort.h"
66
67 #ifndef lint
68 __RCSID("$NetBSD: msort.c,v 1.22 2009/08/20 06:36:25 dsl Exp $");
69 __SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93");
70 #endif /* not lint */
71
72 #include <stdlib.h>
73 #include <string.h>
74 #include <unistd.h>
75
76 /* Subroutines using comparisons: merge sort and check order */
77 #define DELETE (1)
78
79 typedef struct mfile {
80 u_char *end;
81 short flno;
82 struct recheader rec[1];
83 } MFILE;
84
85 static u_char *wts, *wts1 = NULL;
86
87 static int cmp(RECHEADER *, RECHEADER *);
88 static int insert(struct mfile **, struct mfile **, int, int);
89 static void merge(int, int, get_func_t, FILE *, put_func_t, struct field *);
90
91 void
92 fmerge(int binno, struct filelist *filelist, int nfiles,
93 get_func_t get, FILE *outfp, put_func_t fput, struct field *ftbl)
94 {
95 FILE *tout;
96 int i, j, last;
97 put_func_t put;
98
99 wts = ftbl->weights;
100 if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
101 wts1 = (ftbl->flags & R) ? Rascii : ascii;
102
103 if (!buffer) {
104 buffer = malloc(bufsize);
105 if (!buffer)
106 err(2, "fmerge(): malloc");
107 memset(buffer, 0, bufsize);
108 }
109
110 while (nfiles) {
111 put = putrec;
112 for (j = 0; j < nfiles; j += MERGE_FNUM) {
113 if (nfiles <= MERGE_FNUM) {
114 tout = outfp;
115 put = fput;
116 }
117 else
118 tout = ftmp();
119 last = min(MERGE_FNUM, nfiles - j);
120 if (binno < 0) {
121 for (i = 0; i < last; i++)
122 if (!(fstack[i+MAXFCT-1-MERGE_FNUM].fp =
123 fopen(filelist->names[j+i], "r")))
124 err(2, "%s",
125 filelist->names[j+i]);
126 merge(MAXFCT-1-MERGE_FNUM, last, get, tout, put, ftbl);
127 } else {
128 for (i = 0; i< last; i++)
129 rewind(fstack[i+j].fp);
130 merge(j, last, get, tout, put, ftbl);
131 }
132 if (nfiles > MERGE_FNUM)
133 fstack[j/MERGE_FNUM].fp = tout;
134 }
135 nfiles = (nfiles + (MERGE_FNUM - 1)) / MERGE_FNUM;
136 if (nfiles == 1)
137 nfiles = 0;
138 if (binno < 0) {
139 binno = 0;
140 get = geteasy;
141 }
142 }
143 }
144
145 static void
146 merge(int infl0, int nfiles, get_func_t get, FILE *outfp, put_func_t put,
147 struct field *ftbl)
148 {
149 int c, i, j, nf = nfiles;
150 struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
151 size_t availsz = bufsize;
152 static void *bufs[MERGE_FNUM + 1];
153 static size_t bufs_sz[MERGE_FNUM + 1];
154
155 /*
156 * We need nfiles + 1 buffers. One is 'buffer', the
157 * rest needs to be allocated.
158 */
159 bufs[0] = buffer;
160 bufs_sz[0] = bufsize;
161 for (i = 1; i < nfiles + 1; i++) {
162 if (bufs[i])
163 continue;
164
165 bufs[i] = malloc(DEFLLEN);
166 if (!bufs[i])
167 err(2, "merge: malloc");
168 memset(bufs[i], 0, DEFLLEN);
169 bufs_sz[i] = DEFLLEN;
170 }
171
172 for (i = j = 0; i < nfiles; i++, j++) {
173 cfile = (struct mfile *) bufs[j];
174 cfile->flno = infl0 + j;
175 cfile->end = (u_char *) bufs[j] + bufs_sz[j];
176 for (c = 1; c == 1;) {
177 if (EOF == (c = get(cfile->flno, 0, NULL, nfiles,
178 cfile->rec, cfile->end, ftbl))) {
179 --i;
180 --nfiles;
181 break;
182 }
183
184 if (c == BUFFEND) {
185 cfile = realloc(bufs[j], bufs_sz[j]);
186 if (!cfile)
187 err(2, "merge: realloc");
188
189 bufs[j] = (void *) cfile;
190 bufs_sz[j] *= 2;
191 cfile->end = (u_char *)cfile + bufs_sz[j];
192
193 c = 1;
194 continue;
195 }
196
197 if (i)
198 c = insert(flist, &cfile, i, !DELETE);
199 else
200 flist[0] = cfile;
201 }
202 }
203
204 cfile = (struct mfile *) bufs[nf];
205 cfile->flno = flist[0]->flno;
206 cfile->end = (u_char *) cfile + bufs_sz[nf];
207 while (nfiles) {
208 for (c = 1; c == 1;) {
209 if (EOF == (c = get(cfile->flno, 0, NULL, nfiles,
210 cfile->rec, cfile->end, ftbl))) {
211 put(flist[0]->rec, outfp);
212 if (--nfiles > 0) {
213 flist++;
214 cfile->flno = flist[0]->flno;
215 }
216 break;
217 }
218 if (c == BUFFEND) {
219 char *oldbuf = (char *) cfile;
220 availsz = (char *) cfile->end - oldbuf;
221 availsz *= 2;
222 cfile = realloc(oldbuf, availsz);
223 if (!cfile)
224 err(2, "merge: realloc");
225
226 for (i = 0; i < nf + 1; i++) {
227 if (bufs[i] == oldbuf) {
228 bufs[i] = (char *)cfile;
229 bufs_sz[i] = availsz;
230 break;
231 }
232 }
233
234 cfile->end = (u_char *)cfile + availsz;
235 c = 1;
236 continue;
237 }
238
239 if (!(c = insert(flist, &cfile, nfiles, DELETE)))
240 put(cfile->rec, outfp);
241 }
242 }
243
244 if (bufs_sz[0] > bufsize) {
245 buffer = bufs[0];
246 bufsize = bufs_sz[0];
247 }
248 }
249
250 /*
251 * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
252 * otherwise just inserts *rec in flist.
253 */
254 static int
255 insert(struct mfile **flist, struct mfile **rec, int ttop, int delete)
256 /* delete, ttop: delete = 0 or 1 */
257 {
258 struct mfile *tmprec = *rec;
259 int mid, top = ttop, bot = 0, cmpv = 1;
260
261 for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {
262 cmpv = cmp(tmprec->rec, flist[mid]->rec);
263 if (cmpv < 0)
264 top = mid;
265 else if (cmpv > 0)
266 bot = mid;
267 else {
268 if (UNIQUE)
269 break;
270
271 if (stable_sort) {
272 /*
273 * Apply sort by fileno, to give priority
274 * to earlier specified files, hence providing
275 * more stable sort.
276 * If fileno is same, the new record should
277 * be put _after_ the previous entry.
278 */
279 cmpv = tmprec->flno - flist[mid]->flno;
280 if (cmpv >= 0)
281 bot = mid;
282 else /* cmpv == 0 */
283 bot = mid - 1;
284 } else {
285 /* non-stable sort */
286 bot = mid - 1;
287 }
288
289 break;
290 }
291 }
292
293 if (delete) {
294 if (UNIQUE) {
295 if (!bot && cmpv)
296 cmpv = cmp(tmprec->rec, flist[0]->rec);
297 if (!cmpv)
298 return (1);
299 }
300 tmprec = flist[0];
301 if (bot)
302 memmove(flist, flist + 1, bot * sizeof(MFILE **));
303 flist[bot] = *rec;
304 *rec = tmprec;
305 (*rec)->flno = flist[0]->flno;
306 return (0);
307 } else {
308 if (!bot && !(UNIQUE && !cmpv)) {
309 cmpv = cmp(tmprec->rec, flist[0]->rec);
310 if (cmpv < 0)
311 bot = -1;
312 }
313 if (UNIQUE && !cmpv)
314 return (1);
315 bot++;
316 memmove(flist + bot + 1, flist + bot,
317 (ttop - bot) * sizeof(MFILE **));
318 flist[bot] = *rec;
319 return (0);
320 }
321 }
322
323 /*
324 * check order on one file
325 */
326 void
327 order(struct filelist *filelist, get_func_t get, struct field *ftbl)
328 {
329 u_char *crec_end, *prec_end, *trec_end;
330 int c;
331 RECHEADER *crec, *prec, *trec;
332
333 buffer = malloc(2 * (DEFLLEN + REC_DATA_OFFSET));
334 crec = (RECHEADER *) buffer;
335 crec_end = buffer + DEFLLEN + REC_DATA_OFFSET;
336 prec = (RECHEADER *) (buffer + DEFLLEN + REC_DATA_OFFSET);
337 prec_end = buffer + 2*(DEFLLEN + REC_DATA_OFFSET);
338 wts = ftbl->weights;
339 if (SINGL_FLD && (ftbl->flags & F))
340 wts1 = (ftbl->flags & R) ? Rascii : ascii;
341 else
342 wts1 = NULL;
343 if (0 == get(-1, 0, filelist, 1, prec, prec_end, ftbl))
344 while (0 == get(-1, 0, filelist, 1, crec, crec_end, ftbl)) {
345 if (0 < (c = cmp(prec, crec))) {
346 crec->data[crec->length-1] = 0;
347 errx(1, "found disorder: %s", crec->data+crec->offset);
348 }
349 if (UNIQUE && !c) {
350 crec->data[crec->length-1] = 0;
351 errx(1, "found non-uniqueness: %s",
352 crec->data+crec->offset);
353 }
354 /*
355 * Swap pointers so that this record is on place pointed
356 * to by prec and new record is read to place pointed to by
357 * crec.
358 */
359 trec = prec;
360 prec = crec;
361 crec = trec;
362 trec_end = prec_end;
363 prec_end = crec_end;
364 crec_end = trec_end;
365 }
366 exit(0);
367 }
368
369 static int
370 cmp(RECHEADER *rec1, RECHEADER *rec2)
371 {
372 int r;
373 u_char *pos1, *pos2, *end;
374 u_char *cwts;
375 for (cwts = wts; cwts; cwts = (cwts == wts1 ? NULL : wts1)) {
376 pos1 = rec1->data;
377 pos2 = rec2->data;
378 if (!SINGL_FLD && (UNIQUE || stable_sort))
379 end = pos1 + min(rec1->offset, rec2->offset);
380 else
381 end = pos1 + min(rec1->length, rec2->length);
382
383 for (; pos1 < end; ) {
384 if ((r = cwts[*pos1++] - cwts[*pos2++]))
385 return (r);
386 }
387 }
388 return (0);
389 }
390