1 1.47 enami /* $NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $ */ 2 1.26 jdolecek 3 1.26 jdolecek /*- 4 1.26 jdolecek * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 1.26 jdolecek * All rights reserved. 6 1.26 jdolecek * 7 1.26 jdolecek * This code is derived from software contributed to The NetBSD Foundation 8 1.26 jdolecek * by Ben Harris and Jaromir Dolecek. 9 1.26 jdolecek * 10 1.26 jdolecek * Redistribution and use in source and binary forms, with or without 11 1.26 jdolecek * modification, are permitted provided that the following conditions 12 1.26 jdolecek * are met: 13 1.26 jdolecek * 1. Redistributions of source code must retain the above copyright 14 1.26 jdolecek * notice, this list of conditions and the following disclaimer. 15 1.26 jdolecek * 2. Redistributions in binary form must reproduce the above copyright 16 1.26 jdolecek * notice, this list of conditions and the following disclaimer in the 17 1.26 jdolecek * documentation and/or other materials provided with the distribution. 18 1.26 jdolecek * 19 1.26 jdolecek * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.26 jdolecek * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.26 jdolecek * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.26 jdolecek * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.26 jdolecek * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.26 jdolecek * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.26 jdolecek * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.26 jdolecek * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.26 jdolecek * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.26 jdolecek * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.26 jdolecek * POSSIBILITY OF SUCH DAMAGE. 30 1.26 jdolecek */ 31 1.2 bjh21 32 1.1 bjh21 /*- 33 1.1 bjh21 * Copyright (c) 1993 34 1.1 bjh21 * The Regents of the University of California. All rights reserved. 35 1.1 bjh21 * 36 1.1 bjh21 * This code is derived from software contributed to Berkeley by 37 1.1 bjh21 * Peter McIlroy. 38 1.1 bjh21 * 39 1.1 bjh21 * Redistribution and use in source and binary forms, with or without 40 1.1 bjh21 * modification, are permitted provided that the following conditions 41 1.1 bjh21 * are met: 42 1.1 bjh21 * 1. Redistributions of source code must retain the above copyright 43 1.1 bjh21 * notice, this list of conditions and the following disclaimer. 44 1.1 bjh21 * 2. Redistributions in binary form must reproduce the above copyright 45 1.1 bjh21 * notice, this list of conditions and the following disclaimer in the 46 1.1 bjh21 * documentation and/or other materials provided with the distribution. 47 1.25 agc * 3. Neither the name of the University nor the names of its contributors 48 1.1 bjh21 * may be used to endorse or promote products derived from this software 49 1.1 bjh21 * without specific prior written permission. 50 1.1 bjh21 * 51 1.1 bjh21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 1.1 bjh21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 1.1 bjh21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 1.1 bjh21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 1.1 bjh21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 1.1 bjh21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 1.1 bjh21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 1.1 bjh21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 1.1 bjh21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 1.1 bjh21 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 1.1 bjh21 * SUCH DAMAGE. 62 1.1 bjh21 */ 63 1.1 bjh21 64 1.1 bjh21 /* 65 1.37 dsl * Read in a block of records (until 'enough'). 66 1.37 dsl * sort, write to temp file. 67 1.37 dsl * Merge sort temp files into output file 68 1.37 dsl * Small files miss out the temp file stage. 69 1.37 dsl * Large files might get multiple merges. 70 1.16 jdolecek */ 71 1.1 bjh21 #include "sort.h" 72 1.1 bjh21 #include "fsort.h" 73 1.1 bjh21 74 1.47 enami __RCSID("$NetBSD: fsort.c,v 1.47 2010/02/05 21:58:41 enami Exp $"); 75 1.2 bjh21 76 1.1 bjh21 #include <stdlib.h> 77 1.1 bjh21 #include <string.h> 78 1.1 bjh21 79 1.13 jdolecek #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1)) 80 1.8 jdolecek 81 1.1 bjh21 void 82 1.38 dsl fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl) 83 1.1 bjh21 { 84 1.41 dsl RECHEADER **keylist; 85 1.41 dsl RECHEADER **keypos, **keyp; 86 1.40 dsl RECHEADER *buffer; 87 1.40 dsl size_t bufsize = DEFBUFSIZE; 88 1.30 jdolecek u_char *bufend; 89 1.38 dsl int mfct = 0; 90 1.37 dsl int c, nelem; 91 1.8 jdolecek get_func_t get; 92 1.40 dsl RECHEADER *crec; 93 1.40 dsl RECHEADER *nbuffer; 94 1.42 dsl FILE *fp, *tmp_fp; 95 1.42 dsl int file_no; 96 1.42 dsl int max_recs = DEBUG('m') ? 16 : MAXNUM; 97 1.1 bjh21 98 1.47 enami buffer = allocrec(NULL, bufsize); 99 1.40 dsl bufend = (u_char *)buffer + bufsize; 100 1.40 dsl /* Allocate double length keymap for radix_sort */ 101 1.42 dsl keylist = malloc(2 * max_recs * sizeof(*keylist)); 102 1.40 dsl if (buffer == NULL || keylist == NULL) 103 1.40 dsl err(2, "failed to malloc initial buffer or keylist"); 104 1.38 dsl 105 1.37 dsl if (SINGL_FLD) 106 1.38 dsl /* Key and data are one! */ 107 1.37 dsl get = makeline; 108 1.37 dsl else 109 1.38 dsl /* Key (merged key fields) added before data */ 110 1.37 dsl get = makekey; 111 1.37 dsl 112 1.42 dsl file_no = 0; 113 1.42 dsl fp = fopen(filelist->names[0], "r"); 114 1.42 dsl if (fp == NULL) 115 1.42 dsl err(2, "%s", filelist->names[0]); 116 1.42 dsl 117 1.38 dsl /* Loop through reads of chunk of input files that get sorted 118 1.38 dsl * and then merged together. */ 119 1.38 dsl for (;;) { 120 1.37 dsl keypos = keylist; 121 1.37 dsl nelem = 0; 122 1.40 dsl crec = buffer; 123 1.43 dsl makeline_copydown(crec); 124 1.37 dsl 125 1.38 dsl /* Loop reading records */ 126 1.38 dsl for (;;) { 127 1.42 dsl c = get(fp, crec, bufend, ftbl); 128 1.38 dsl /* 'c' is 0, EOF or BUFFEND */ 129 1.38 dsl if (c == 0) { 130 1.38 dsl /* Save start of key in input buffer */ 131 1.40 dsl *keypos++ = crec; 132 1.42 dsl if (++nelem == max_recs) { 133 1.38 dsl c = BUFFEND; 134 1.38 dsl break; 135 1.38 dsl } 136 1.40 dsl crec = (RECHEADER *)(crec->data + SALIGN(crec->length)); 137 1.38 dsl continue; 138 1.38 dsl } 139 1.42 dsl if (c == EOF) { 140 1.42 dsl /* try next file */ 141 1.42 dsl if (++file_no >= nfiles) 142 1.42 dsl /* no more files */ 143 1.42 dsl break; 144 1.42 dsl fp = fopen(filelist->names[file_no], "r"); 145 1.42 dsl if (fp == NULL) 146 1.42 dsl err(2, "%s", filelist->names[file_no]); 147 1.42 dsl continue; 148 1.42 dsl } 149 1.45 dsl if (nelem >= max_recs 150 1.45 dsl || (bufsize >= MAXBUFSIZE && nelem > 8)) 151 1.38 dsl /* Need to sort and save this lot of data */ 152 1.37 dsl break; 153 1.13 jdolecek 154 1.38 dsl /* c == BUFFEND, and we can process more data */ 155 1.38 dsl /* Allocate a larger buffer for this lot of data */ 156 1.38 dsl bufsize *= 2; 157 1.47 enami nbuffer = allocrec(buffer, bufsize); 158 1.37 dsl if (!nbuffer) { 159 1.38 dsl err(2, "failed to realloc buffer to %zu bytes", 160 1.38 dsl bufsize); 161 1.5 jdolecek } 162 1.19 jdolecek 163 1.37 dsl /* patch up keylist[] */ 164 1.37 dsl for (keyp = &keypos[-1]; keyp >= keylist; keyp--) 165 1.38 dsl *keyp = nbuffer + (*keyp - buffer); 166 1.19 jdolecek 167 1.40 dsl crec = nbuffer + (crec - buffer); 168 1.38 dsl buffer = nbuffer; 169 1.40 dsl bufend = (u_char *)buffer + bufsize; 170 1.1 bjh21 } 171 1.37 dsl 172 1.38 dsl /* Sort this set of records */ 173 1.42 dsl radix_sort(keylist, keylist + max_recs, nelem); 174 1.38 dsl 175 1.38 dsl if (c == EOF && mfct == 0) { 176 1.38 dsl /* all the data is (sorted) in the buffer */ 177 1.39 dsl append(keylist, nelem, outfp, 178 1.41 dsl DEBUG('k') ? putkeydump : putline); 179 1.38 dsl break; 180 1.1 bjh21 } 181 1.37 dsl 182 1.38 dsl /* Save current data to a temporary file for a later merge */ 183 1.44 dsl if (nelem != 0) { 184 1.44 dsl tmp_fp = ftmp(); 185 1.44 dsl append(keylist, nelem, tmp_fp, putrec); 186 1.44 dsl save_for_merge(tmp_fp, geteasy, ftbl); 187 1.44 dsl } 188 1.42 dsl mfct = 1; 189 1.8 jdolecek 190 1.38 dsl if (c == EOF) { 191 1.38 dsl /* merge to output file */ 192 1.42 dsl merge_sort(outfp, 193 1.39 dsl DEBUG('k') ? putkeydump : putline, ftbl); 194 1.38 dsl break; 195 1.1 bjh21 } 196 1.37 dsl } 197 1.1 bjh21 198 1.37 dsl free(keylist); 199 1.37 dsl keylist = NULL; 200 1.37 dsl free(buffer); 201 1.37 dsl buffer = NULL; 202 1.1 bjh21 } 203