Home | History | Annotate | Line # | Download | only in sort
append.c revision 1.12
      1 /*	$NetBSD: append.c,v 1.12 2003/08/07 11:32:34 jdolecek Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Ben Harris and Jaromir Dolecek.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *        This product includes software developed by the NetBSD
     21  *        Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*-
     40  * Copyright (c) 1993
     41  *	The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * Peter McIlroy.
     45  *
     46  * Redistribution and use in source and binary forms, with or without
     47  * modification, are permitted provided that the following conditions
     48  * are met:
     49  * 1. Redistributions of source code must retain the above copyright
     50  *    notice, this list of conditions and the following disclaimer.
     51  * 2. Redistributions in binary form must reproduce the above copyright
     52  *    notice, this list of conditions and the following disclaimer in the
     53  *    documentation and/or other materials provided with the distribution.
     54  * 3. Neither the name of the University nor the names of its contributors
     55  *    may be used to endorse or promote products derived from this software
     56  *    without specific prior written permission.
     57  *
     58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68  * SUCH DAMAGE.
     69  */
     70 
     71 #include "sort.h"
     72 
     73 #ifndef lint
     74 __RCSID("$NetBSD: append.c,v 1.12 2003/08/07 11:32:34 jdolecek Exp $");
     75 __SCCSID("@(#)append.c	8.1 (Berkeley) 6/6/93");
     76 #endif /* not lint */
     77 
     78 #include <stdlib.h>
     79 #include <string.h>
     80 
     81 #define OUTPUT {							\
     82 	if ((n = cpos - ppos) > 1) {					\
     83 		for (; ppos < cpos; ++ppos)				\
     84 			*ppos -= odepth;				\
     85 		ppos -= n;						\
     86 		if (stable_sort)					\
     87 			sradixsort(ppos, n, wts1, REC_D);		\
     88 		else							\
     89 			radixsort(ppos, n, wts1, REC_D);		\
     90 		for (; ppos < cpos; ppos++) {				\
     91 			prec = (const RECHEADER *) (*ppos - sizeof(TRECHEADER));\
     92 			put(prec, fp);					\
     93 		}							\
     94 	} else put(prec, fp);						\
     95 }
     96 
     97 /*
     98  * copy sorted lines to output; check for uniqueness
     99  */
    100 void
    101 append(keylist, nelem, depth, fp, put, ftbl)
    102 	const u_char **keylist;
    103 	int nelem;
    104 	int depth;
    105 	FILE *fp;
    106 	put_func_t put;
    107 	struct field *ftbl;
    108 {
    109 	u_char *wts, *wts1;
    110 	int n, odepth;
    111 	const u_char **cpos, **ppos, **lastkey;
    112 	const u_char *cend, *pend, *start;
    113 	const struct recheader *crec, *prec;
    114 
    115 	if (*keylist == '\0' && UNIQUE)
    116 		return;
    117 	wts1 = wts = ftbl[0].weights;
    118 	if ((!UNIQUE) && SINGL_FLD) {
    119 		if ((ftbl[0].flags & F) && (ftbl[0].flags & R))
    120 			wts1 = Rascii;
    121 		else if (ftbl[0].flags & F)
    122 			wts1 = ascii;
    123 		odepth = depth;
    124 	}
    125 	lastkey = keylist + nelem;
    126 	depth += sizeof(TRECHEADER);
    127 	if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
    128 		ppos = keylist;
    129 		prec = (const RECHEADER *) (*ppos - depth);
    130 		if (UNIQUE)
    131 			put(prec, fp);
    132 		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
    133 			crec = (const RECHEADER *) (*cpos - depth);
    134 			if (crec->length  == prec->length) {
    135 				/*
    136 				 * Set pend and cend so that trailing NUL and
    137 				 * record separator is ignored.
    138 				 */
    139 				pend = (const u_char *) &prec->data + prec->length - 2;
    140 				cend = (const u_char *) &crec->data + crec->length - 2;
    141 				for (start = *cpos; cend >= start; cend--) {
    142 					if (wts[*cend] != wts[*pend])
    143 						break;
    144 					pend--;
    145 				}
    146 				if (pend + 1 != *ppos) {
    147 					if (!UNIQUE) {
    148 						OUTPUT;
    149 					} else
    150 						put(crec, fp);
    151 					ppos = cpos;
    152 					prec = crec;
    153 				}
    154 			} else {
    155 				if (!UNIQUE) {
    156 					OUTPUT;
    157 				} else
    158 					put(crec, fp);
    159 				ppos = cpos;
    160 				prec = crec;
    161 			}
    162 		}
    163 		if (!UNIQUE)  { OUTPUT; }
    164 	} else if (UNIQUE) {
    165 		ppos = keylist;
    166 		prec = (const RECHEADER *) (*ppos - depth);
    167 		put(prec, fp);
    168 		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
    169 			crec = (const RECHEADER *) (*cpos - depth);
    170 			if (crec->offset == prec->offset) {
    171 				/*
    172 				 * Set pend and cend so that trailing NUL and
    173 				 * record separator is ignored.
    174 				 */
    175 				pend = (const u_char *) &prec->data + prec->offset - 2;
    176 				cend = (const u_char *) &crec->data + crec->offset - 2;
    177 				for (start = *cpos; cend >= start; cend--) {
    178 					if (wts[*cend] != wts[*pend])
    179 						break;
    180 					pend--;
    181 				}
    182 				if (pend + 1 != *ppos) {
    183 					ppos = cpos;
    184 					prec = crec;
    185 					put(prec, fp);
    186 				}
    187 			} else {
    188 				ppos = cpos;
    189 				prec = crec;
    190 				put(prec, fp);
    191 			}
    192 		}
    193 	} else for (cpos = keylist; cpos < lastkey; cpos++) {
    194 		crec = (const RECHEADER *) (*cpos - depth);
    195 		put(crec, fp);
    196 	}
    197 }
    198 
    199 /*
    200  * output the already sorted eol bin.
    201  */
    202 void
    203 rd_append(binno, infl0, nfiles, outfp, buffer, bufend)
    204 	u_char *buffer;
    205 	int infl0;
    206 	int binno, nfiles;
    207 	FILE *outfp;
    208 	u_char *bufend;
    209 {
    210 	RECHEADER *rec;
    211 
    212 	rec = (RECHEADER *) buffer;
    213 	if (!getnext(binno, infl0, NULL, nfiles,
    214 			(RECHEADER *) buffer, bufend, 0)) {
    215 		putline(rec, outfp);
    216 		while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer,
    217 			bufend, 0) == 0) {
    218 			if (!UNIQUE)
    219 				putline(rec, outfp);
    220 		}
    221 	}
    222 }
    223 
    224 /*
    225  * append plain text--used after sorting the biggest bin.
    226  */
    227 void
    228 concat(a, b)
    229 	FILE *a, *b;
    230 {
    231         int nread;
    232         char buffer[4096];
    233 
    234 	rewind(b);
    235         while ((nread = fread(buffer, 1, 4096, b)) > 0)
    236                 EWRITE(buffer, 1, nread, a);
    237 }
    238