fsort.c revision 1.37 1 1.37 dsl /* $NetBSD: fsort.c,v 1.37 2009/08/18 18:00:28 dsl Exp $ */
2 1.26 jdolecek
3 1.26 jdolecek /*-
4 1.26 jdolecek * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 1.26 jdolecek * All rights reserved.
6 1.26 jdolecek *
7 1.26 jdolecek * This code is derived from software contributed to The NetBSD Foundation
8 1.26 jdolecek * by Ben Harris and Jaromir Dolecek.
9 1.26 jdolecek *
10 1.26 jdolecek * Redistribution and use in source and binary forms, with or without
11 1.26 jdolecek * modification, are permitted provided that the following conditions
12 1.26 jdolecek * are met:
13 1.26 jdolecek * 1. Redistributions of source code must retain the above copyright
14 1.26 jdolecek * notice, this list of conditions and the following disclaimer.
15 1.26 jdolecek * 2. Redistributions in binary form must reproduce the above copyright
16 1.26 jdolecek * notice, this list of conditions and the following disclaimer in the
17 1.26 jdolecek * documentation and/or other materials provided with the distribution.
18 1.26 jdolecek *
19 1.26 jdolecek * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.26 jdolecek * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.26 jdolecek * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.26 jdolecek * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.26 jdolecek * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.26 jdolecek * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.26 jdolecek * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.26 jdolecek * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.26 jdolecek * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.26 jdolecek * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.26 jdolecek * POSSIBILITY OF SUCH DAMAGE.
30 1.26 jdolecek */
31 1.2 bjh21
32 1.1 bjh21 /*-
33 1.1 bjh21 * Copyright (c) 1993
34 1.1 bjh21 * The Regents of the University of California. All rights reserved.
35 1.1 bjh21 *
36 1.1 bjh21 * This code is derived from software contributed to Berkeley by
37 1.1 bjh21 * Peter McIlroy.
38 1.1 bjh21 *
39 1.1 bjh21 * Redistribution and use in source and binary forms, with or without
40 1.1 bjh21 * modification, are permitted provided that the following conditions
41 1.1 bjh21 * are met:
42 1.1 bjh21 * 1. Redistributions of source code must retain the above copyright
43 1.1 bjh21 * notice, this list of conditions and the following disclaimer.
44 1.1 bjh21 * 2. Redistributions in binary form must reproduce the above copyright
45 1.1 bjh21 * notice, this list of conditions and the following disclaimer in the
46 1.1 bjh21 * documentation and/or other materials provided with the distribution.
47 1.25 agc * 3. Neither the name of the University nor the names of its contributors
48 1.1 bjh21 * may be used to endorse or promote products derived from this software
49 1.1 bjh21 * without specific prior written permission.
50 1.1 bjh21 *
51 1.1 bjh21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 1.1 bjh21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 1.1 bjh21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 1.1 bjh21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 1.1 bjh21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 1.1 bjh21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 1.1 bjh21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 1.1 bjh21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 1.1 bjh21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 1.1 bjh21 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 1.1 bjh21 * SUCH DAMAGE.
62 1.1 bjh21 */
63 1.1 bjh21
64 1.1 bjh21 /*
65 1.37 dsl * Read in a block of records (until 'enough').
66 1.37 dsl * sort, write to temp file.
67 1.37 dsl * Merge sort temp files into output file
68 1.37 dsl * Small files miss out the temp file stage.
69 1.37 dsl * Large files might get multiple merges.
70 1.16 jdolecek */
71 1.1 bjh21 #include "sort.h"
72 1.1 bjh21 #include "fsort.h"
73 1.1 bjh21
74 1.2 bjh21 #ifndef lint
75 1.37 dsl __RCSID("$NetBSD: fsort.c,v 1.37 2009/08/18 18:00:28 dsl Exp $");
76 1.2 bjh21 __SCCSID("@(#)fsort.c 8.1 (Berkeley) 6/6/93");
77 1.2 bjh21 #endif /* not lint */
78 1.2 bjh21
79 1.1 bjh21 #include <stdlib.h>
80 1.1 bjh21 #include <string.h>
81 1.1 bjh21
82 1.13 jdolecek static const u_char **keylist = 0;
83 1.34 dsl u_char *buffer = 0;
84 1.18 jdolecek size_t bufsize = DEFBUFSIZE;
85 1.1 bjh21 #define FSORTMAX 4
86 1.1 bjh21 int PANIC = FSORTMAX;
87 1.1 bjh21
88 1.23 jdolecek struct tempfile fstack[MAXFCT];
89 1.11 jdolecek #define MSTART (MAXFCT - MERGE_FNUM)
90 1.23 jdolecek #define CHECKFSTACK(n) \
91 1.24 jdolecek if (n >= MAXFCT) \
92 1.23 jdolecek errx(2, "fstack: too many temporary files; use -H or sort in pieces")
93 1.23 jdolecek
94 1.13 jdolecek #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
95 1.8 jdolecek
96 1.1 bjh21 void
97 1.33 dsl fsort(int binno, int depth, int top, struct filelist *filelist, int nfiles,
98 1.33 dsl FILE *outfp, struct field *ftbl)
99 1.1 bjh21 {
100 1.4 jdolecek const u_char **keypos;
101 1.30 jdolecek u_char *bufend;
102 1.1 bjh21 u_char *weights;
103 1.37 dsl int ntfiles, mfct = 0;
104 1.37 dsl int c, nelem;
105 1.8 jdolecek get_func_t get;
106 1.4 jdolecek struct recheader *crec;
107 1.1 bjh21 struct field tfield[2];
108 1.27 itojun u_char *nbuffer;
109 1.1 bjh21
110 1.1 bjh21 memset(tfield, 0, sizeof(tfield));
111 1.1 bjh21 if (ftbl[0].flags & R)
112 1.1 bjh21 tfield[0].weights = Rascii;
113 1.1 bjh21 else
114 1.1 bjh21 tfield[0].weights = ascii;
115 1.1 bjh21 tfield[0].icol.num = 1;
116 1.1 bjh21 weights = ftbl[0].weights;
117 1.1 bjh21 if (!buffer) {
118 1.5 jdolecek buffer = malloc(bufsize);
119 1.1 bjh21 keylist = malloc(MAXNUM * sizeof(u_char *));
120 1.12 itojun memset(keylist, 0, MAXNUM * sizeof(u_char *));
121 1.1 bjh21 }
122 1.5 jdolecek bufend = buffer + bufsize;
123 1.37 dsl if (SINGL_FLD)
124 1.37 dsl get = makeline;
125 1.37 dsl else
126 1.37 dsl get = makekey;
127 1.37 dsl
128 1.37 dsl c = nelem = ntfiles = 0; /* XXXGCC -Wuninitialized m68k */
129 1.37 dsl keypos = keylist; /* XXXGCC -Wuninitialized m68k */
130 1.37 dsl crec = (RECHEADER *) buffer; /* XXXGCC -Wuninitialized m68k */
131 1.37 dsl while (c != EOF) {
132 1.37 dsl keypos = keylist;
133 1.37 dsl nelem = 0;
134 1.37 dsl crec = (RECHEADER *) buffer;
135 1.37 dsl
136 1.37 dsl do_read:
137 1.37 dsl while ((c = get(-1, top, filelist, nfiles, crec,
138 1.37 dsl bufend, ftbl)) == 0) {
139 1.37 dsl *keypos++ = crec->data + depth;
140 1.37 dsl if (++nelem == MAXNUM) {
141 1.37 dsl c = BUFFEND;
142 1.37 dsl break;
143 1.37 dsl }
144 1.37 dsl crec =(RECHEADER *)((char *) crec +
145 1.37 dsl SALIGN(crec->length) + REC_DATA_OFFSET);
146 1.1 bjh21 }
147 1.13 jdolecek
148 1.37 dsl if (c == BUFFEND && nelem < MAXNUM
149 1.37 dsl && bufsize < MAXBUFSIZE) {
150 1.37 dsl const u_char **keyp;
151 1.37 dsl u_char *oldb = buffer;
152 1.37 dsl
153 1.37 dsl /* buffer was too small for data, allocate
154 1.37 dsl * bigger buffer */
155 1.37 dsl nbuffer = realloc(buffer, bufsize * 2);
156 1.37 dsl if (!nbuffer) {
157 1.37 dsl err(2, "failed to realloc buffer to %lu bytes",
158 1.37 dsl (unsigned long) bufsize * 2);
159 1.5 jdolecek }
160 1.37 dsl buffer = nbuffer;
161 1.37 dsl bufsize *= 2;
162 1.37 dsl bufend = buffer + bufsize;
163 1.19 jdolecek
164 1.37 dsl /* patch up keylist[] */
165 1.37 dsl for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
166 1.37 dsl *keyp = buffer + (*keyp - oldb);
167 1.19 jdolecek
168 1.37 dsl crec = (RECHEADER *) (buffer + ((u_char *)crec - oldb));
169 1.37 dsl goto do_read;
170 1.1 bjh21 }
171 1.37 dsl
172 1.37 dsl if (c != BUFFEND && !ntfiles && !mfct) {
173 1.37 dsl /* do not push */
174 1.37 dsl continue;
175 1.1 bjh21 }
176 1.37 dsl
177 1.37 dsl /* push */
178 1.37 dsl fstack[MSTART + mfct].fp = ftmp();
179 1.37 dsl if (radix_sort(keylist, nelem, weights, REC_D))
180 1.37 dsl err(2, NULL);
181 1.37 dsl append(keylist, nelem, depth, fstack[MSTART + mfct].fp, putrec,
182 1.37 dsl ftbl);
183 1.37 dsl mfct++;
184 1.37 dsl /* reduce number of open files */
185 1.37 dsl if (mfct == MERGE_FNUM ||(c == EOF && ntfiles)) {
186 1.37 dsl /*
187 1.37 dsl * Only copy extra incomplete crec
188 1.37 dsl * data if there are any.
189 1.37 dsl */
190 1.37 dsl int nodata = (bufend >= (u_char *)crec
191 1.37 dsl && bufend <= crec->data);
192 1.37 dsl size_t sz=0;
193 1.37 dsl u_char *tmpbuf=NULL;
194 1.37 dsl
195 1.37 dsl if (!nodata) {
196 1.37 dsl sz = bufend - crec->data;
197 1.37 dsl tmpbuf = malloc(sz);
198 1.37 dsl memmove(tmpbuf, crec->data, sz);
199 1.9 itojun }
200 1.8 jdolecek
201 1.37 dsl CHECKFSTACK(ntfiles);
202 1.37 dsl fstack[ntfiles].fp = ftmp();
203 1.37 dsl fmerge(0, MSTART, filelist, mfct, geteasy,
204 1.37 dsl fstack[ntfiles].fp, putrec, ftbl);
205 1.37 dsl ntfiles++;
206 1.37 dsl mfct = 0;
207 1.37 dsl
208 1.37 dsl if (!nodata) {
209 1.37 dsl memmove(crec->data, tmpbuf, sz);
210 1.37 dsl free(tmpbuf);
211 1.37 dsl }
212 1.1 bjh21 }
213 1.1 bjh21 }
214 1.19 jdolecek
215 1.37 dsl if (!ntfiles && !mfct) { /* everything in memory--pop */
216 1.37 dsl if (nelem > 1 && radix_sort(keylist, nelem, weights, REC_D))
217 1.37 dsl err(2, NULL);
218 1.37 dsl if (nelem > 0)
219 1.37 dsl append(keylist, nelem, depth, outfp, putline, ftbl);
220 1.37 dsl }
221 1.37 dsl if (!ntfiles)
222 1.37 dsl fmerge(0, MSTART, filelist, mfct, geteasy,
223 1.37 dsl outfp, putline, ftbl);
224 1.37 dsl else
225 1.37 dsl fmerge(0, 0, filelist, ntfiles, geteasy,
226 1.37 dsl outfp, putline, ftbl);
227 1.1 bjh21
228 1.37 dsl free(keylist);
229 1.37 dsl keylist = NULL;
230 1.37 dsl free(buffer);
231 1.37 dsl buffer = NULL;
232 1.1 bjh21 }
233