Home | History | Annotate | Line # | Download | only in sort
fsort.c revision 1.37
      1  1.37       dsl /*	$NetBSD: fsort.c,v 1.37 2009/08/18 18:00:28 dsl Exp $	*/
      2  1.26  jdolecek 
      3  1.26  jdolecek /*-
      4  1.26  jdolecek  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
      5  1.26  jdolecek  * All rights reserved.
      6  1.26  jdolecek  *
      7  1.26  jdolecek  * This code is derived from software contributed to The NetBSD Foundation
      8  1.26  jdolecek  * by Ben Harris and Jaromir Dolecek.
      9  1.26  jdolecek  *
     10  1.26  jdolecek  * Redistribution and use in source and binary forms, with or without
     11  1.26  jdolecek  * modification, are permitted provided that the following conditions
     12  1.26  jdolecek  * are met:
     13  1.26  jdolecek  * 1. Redistributions of source code must retain the above copyright
     14  1.26  jdolecek  *    notice, this list of conditions and the following disclaimer.
     15  1.26  jdolecek  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.26  jdolecek  *    notice, this list of conditions and the following disclaimer in the
     17  1.26  jdolecek  *    documentation and/or other materials provided with the distribution.
     18  1.26  jdolecek  *
     19  1.26  jdolecek  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  1.26  jdolecek  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  1.26  jdolecek  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  1.26  jdolecek  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  1.26  jdolecek  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  1.26  jdolecek  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  1.26  jdolecek  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  1.26  jdolecek  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  1.26  jdolecek  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  1.26  jdolecek  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  1.26  jdolecek  * POSSIBILITY OF SUCH DAMAGE.
     30  1.26  jdolecek  */
     31   1.2     bjh21 
     32   1.1     bjh21 /*-
     33   1.1     bjh21  * Copyright (c) 1993
     34   1.1     bjh21  *	The Regents of the University of California.  All rights reserved.
     35   1.1     bjh21  *
     36   1.1     bjh21  * This code is derived from software contributed to Berkeley by
     37   1.1     bjh21  * Peter McIlroy.
     38   1.1     bjh21  *
     39   1.1     bjh21  * Redistribution and use in source and binary forms, with or without
     40   1.1     bjh21  * modification, are permitted provided that the following conditions
     41   1.1     bjh21  * are met:
     42   1.1     bjh21  * 1. Redistributions of source code must retain the above copyright
     43   1.1     bjh21  *    notice, this list of conditions and the following disclaimer.
     44   1.1     bjh21  * 2. Redistributions in binary form must reproduce the above copyright
     45   1.1     bjh21  *    notice, this list of conditions and the following disclaimer in the
     46   1.1     bjh21  *    documentation and/or other materials provided with the distribution.
     47  1.25       agc  * 3. Neither the name of the University nor the names of its contributors
     48   1.1     bjh21  *    may be used to endorse or promote products derived from this software
     49   1.1     bjh21  *    without specific prior written permission.
     50   1.1     bjh21  *
     51   1.1     bjh21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52   1.1     bjh21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53   1.1     bjh21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54   1.1     bjh21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55   1.1     bjh21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56   1.1     bjh21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57   1.1     bjh21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58   1.1     bjh21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59   1.1     bjh21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60   1.1     bjh21  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61   1.1     bjh21  * SUCH DAMAGE.
     62   1.1     bjh21  */
     63   1.1     bjh21 
     64   1.1     bjh21 /*
     65  1.37       dsl  * Read in a block of records (until 'enough').
     66  1.37       dsl  * sort, write to temp file.
     67  1.37       dsl  * Merge sort temp files into output file
     68  1.37       dsl  * Small files miss out the temp file stage.
     69  1.37       dsl  * Large files might get multiple merges.
     70  1.16  jdolecek  */
     71   1.1     bjh21 #include "sort.h"
     72   1.1     bjh21 #include "fsort.h"
     73   1.1     bjh21 
     74   1.2     bjh21 #ifndef lint
     75  1.37       dsl __RCSID("$NetBSD: fsort.c,v 1.37 2009/08/18 18:00:28 dsl Exp $");
     76   1.2     bjh21 __SCCSID("@(#)fsort.c	8.1 (Berkeley) 6/6/93");
     77   1.2     bjh21 #endif /* not lint */
     78   1.2     bjh21 
     79   1.1     bjh21 #include <stdlib.h>
     80   1.1     bjh21 #include <string.h>
     81   1.1     bjh21 
     82  1.13  jdolecek static const u_char **keylist = 0;
     83  1.34       dsl u_char *buffer = 0;
     84  1.18  jdolecek size_t bufsize = DEFBUFSIZE;
     85   1.1     bjh21 #define FSORTMAX 4
     86   1.1     bjh21 int PANIC = FSORTMAX;
     87   1.1     bjh21 
     88  1.23  jdolecek struct tempfile fstack[MAXFCT];
     89  1.11  jdolecek #define MSTART		(MAXFCT - MERGE_FNUM)
     90  1.23  jdolecek #define	CHECKFSTACK(n)					\
     91  1.24  jdolecek 	if (n >= MAXFCT)				\
     92  1.23  jdolecek 		errx(2, "fstack: too many temporary files; use -H or sort in pieces")
     93  1.23  jdolecek 
     94  1.13  jdolecek #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
     95   1.8  jdolecek 
     96   1.1     bjh21 void
     97  1.33       dsl fsort(int binno, int depth, int top, struct filelist *filelist, int nfiles,
     98  1.33       dsl     FILE *outfp, struct field *ftbl)
     99   1.1     bjh21 {
    100   1.4  jdolecek 	const u_char **keypos;
    101  1.30  jdolecek 	u_char *bufend;
    102   1.1     bjh21 	u_char *weights;
    103  1.37       dsl 	int ntfiles, mfct = 0;
    104  1.37       dsl 	int c, nelem;
    105   1.8  jdolecek 	get_func_t get;
    106   1.4  jdolecek 	struct recheader *crec;
    107   1.1     bjh21 	struct field tfield[2];
    108  1.27    itojun 	u_char *nbuffer;
    109   1.1     bjh21 
    110   1.1     bjh21 	memset(tfield, 0, sizeof(tfield));
    111   1.1     bjh21 	if (ftbl[0].flags & R)
    112   1.1     bjh21 		tfield[0].weights = Rascii;
    113   1.1     bjh21 	else
    114   1.1     bjh21 		tfield[0].weights = ascii;
    115   1.1     bjh21 	tfield[0].icol.num = 1;
    116   1.1     bjh21 	weights = ftbl[0].weights;
    117   1.1     bjh21 	if (!buffer) {
    118   1.5  jdolecek 		buffer = malloc(bufsize);
    119   1.1     bjh21 		keylist = malloc(MAXNUM * sizeof(u_char *));
    120  1.12    itojun 		memset(keylist, 0, MAXNUM * sizeof(u_char *));
    121   1.1     bjh21 	}
    122   1.5  jdolecek 	bufend = buffer + bufsize;
    123  1.37       dsl 	if (SINGL_FLD)
    124  1.37       dsl 		get = makeline;
    125  1.37       dsl 	else
    126  1.37       dsl 		get = makekey;
    127  1.37       dsl 
    128  1.37       dsl 	c = nelem = ntfiles = 0; /* XXXGCC -Wuninitialized m68k */
    129  1.37       dsl 	keypos = keylist;	     /* XXXGCC -Wuninitialized m68k */
    130  1.37       dsl 	crec = (RECHEADER *) buffer; /* XXXGCC -Wuninitialized m68k */
    131  1.37       dsl 	while (c != EOF) {
    132  1.37       dsl 		keypos = keylist;
    133  1.37       dsl 		nelem = 0;
    134  1.37       dsl 		crec = (RECHEADER *) buffer;
    135  1.37       dsl 
    136  1.37       dsl 	   do_read:
    137  1.37       dsl 		while ((c = get(-1, top, filelist, nfiles, crec,
    138  1.37       dsl 		    bufend, ftbl)) == 0) {
    139  1.37       dsl 			*keypos++ = crec->data + depth;
    140  1.37       dsl 			if (++nelem == MAXNUM) {
    141  1.37       dsl 				c = BUFFEND;
    142  1.37       dsl 				break;
    143  1.37       dsl 			}
    144  1.37       dsl 			crec =(RECHEADER *)((char *) crec +
    145  1.37       dsl 			    SALIGN(crec->length) + REC_DATA_OFFSET);
    146   1.1     bjh21 		}
    147  1.13  jdolecek 
    148  1.37       dsl 		if (c == BUFFEND && nelem < MAXNUM
    149  1.37       dsl 		    && bufsize < MAXBUFSIZE) {
    150  1.37       dsl 			const u_char **keyp;
    151  1.37       dsl 			u_char *oldb = buffer;
    152  1.37       dsl 
    153  1.37       dsl 			/* buffer was too small for data, allocate
    154  1.37       dsl 			 * bigger buffer */
    155  1.37       dsl 			nbuffer = realloc(buffer, bufsize * 2);
    156  1.37       dsl 			if (!nbuffer) {
    157  1.37       dsl 				err(2, "failed to realloc buffer to %lu bytes",
    158  1.37       dsl 					(unsigned long) bufsize * 2);
    159   1.5  jdolecek 			}
    160  1.37       dsl 			buffer = nbuffer;
    161  1.37       dsl 			bufsize *= 2;
    162  1.37       dsl 			bufend = buffer + bufsize;
    163  1.19  jdolecek 
    164  1.37       dsl 			/* patch up keylist[] */
    165  1.37       dsl 			for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
    166  1.37       dsl 				*keyp = buffer + (*keyp - oldb);
    167  1.19  jdolecek 
    168  1.37       dsl 			crec = (RECHEADER *) (buffer + ((u_char *)crec - oldb));
    169  1.37       dsl 			goto do_read;
    170   1.1     bjh21 		}
    171  1.37       dsl 
    172  1.37       dsl 		if (c != BUFFEND && !ntfiles && !mfct) {
    173  1.37       dsl 			/* do not push */
    174  1.37       dsl 			continue;
    175   1.1     bjh21 		}
    176  1.37       dsl 
    177  1.37       dsl 		/* push */
    178  1.37       dsl 		fstack[MSTART + mfct].fp = ftmp();
    179  1.37       dsl 		if (radix_sort(keylist, nelem, weights, REC_D))
    180  1.37       dsl 			err(2, NULL);
    181  1.37       dsl 		append(keylist, nelem, depth, fstack[MSTART + mfct].fp, putrec,
    182  1.37       dsl 		    ftbl);
    183  1.37       dsl 		mfct++;
    184  1.37       dsl 		/* reduce number of open files */
    185  1.37       dsl 		if (mfct == MERGE_FNUM ||(c == EOF && ntfiles)) {
    186  1.37       dsl 			/*
    187  1.37       dsl 			 * Only copy extra incomplete crec
    188  1.37       dsl 			 * data if there are any.
    189  1.37       dsl 			 */
    190  1.37       dsl 			int nodata = (bufend >= (u_char *)crec
    191  1.37       dsl 			    && bufend <= crec->data);
    192  1.37       dsl 			size_t sz=0;
    193  1.37       dsl 			u_char *tmpbuf=NULL;
    194  1.37       dsl 
    195  1.37       dsl 			if (!nodata) {
    196  1.37       dsl 				sz = bufend - crec->data;
    197  1.37       dsl 				tmpbuf = malloc(sz);
    198  1.37       dsl 				memmove(tmpbuf, crec->data, sz);
    199   1.9    itojun 			}
    200   1.8  jdolecek 
    201  1.37       dsl 			CHECKFSTACK(ntfiles);
    202  1.37       dsl 			fstack[ntfiles].fp = ftmp();
    203  1.37       dsl 			fmerge(0, MSTART, filelist, mfct, geteasy,
    204  1.37       dsl 			    fstack[ntfiles].fp, putrec, ftbl);
    205  1.37       dsl 			ntfiles++;
    206  1.37       dsl 			mfct = 0;
    207  1.37       dsl 
    208  1.37       dsl 			if (!nodata) {
    209  1.37       dsl 				memmove(crec->data, tmpbuf, sz);
    210  1.37       dsl 				free(tmpbuf);
    211  1.37       dsl 			}
    212   1.1     bjh21 		}
    213   1.1     bjh21 	}
    214  1.19  jdolecek 
    215  1.37       dsl 	if (!ntfiles && !mfct) {	/* everything in memory--pop */
    216  1.37       dsl 		if (nelem > 1 && radix_sort(keylist, nelem, weights, REC_D))
    217  1.37       dsl 			err(2, NULL);
    218  1.37       dsl 		if (nelem > 0)
    219  1.37       dsl 			append(keylist, nelem, depth, outfp, putline, ftbl);
    220  1.37       dsl 	}
    221  1.37       dsl 	if (!ntfiles)
    222  1.37       dsl 		fmerge(0, MSTART, filelist, mfct, geteasy,
    223  1.37       dsl 		    outfp, putline, ftbl);
    224  1.37       dsl 	else
    225  1.37       dsl 		fmerge(0, 0, filelist, ntfiles, geteasy,
    226  1.37       dsl 		    outfp, putline, ftbl);
    227   1.1     bjh21 
    228  1.37       dsl 	free(keylist);
    229  1.37       dsl 	keylist = NULL;
    230  1.37       dsl 	free(buffer);
    231  1.37       dsl 	buffer = NULL;
    232   1.1     bjh21 }
    233