Home | History | Annotate | Line # | Download | only in sort
append.c revision 1.16
      1 /*	$NetBSD: append.c,v 1.16 2009/08/16 19:53:43 dsl Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Ben Harris and Jaromir Dolecek.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1993
     34  *	The Regents of the University of California.  All rights reserved.
     35  *
     36  * This code is derived from software contributed to Berkeley by
     37  * Peter McIlroy.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code must retain the above copyright
     43  *    notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  * 3. Neither the name of the University nor the names of its contributors
     48  *    may be used to endorse or promote products derived from this software
     49  *    without specific prior written permission.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61  * SUCH DAMAGE.
     62  */
     63 
     64 #include "sort.h"
     65 
     66 #ifndef lint
     67 __RCSID("$NetBSD: append.c,v 1.16 2009/08/16 19:53:43 dsl Exp $");
     68 __SCCSID("@(#)append.c	8.1 (Berkeley) 6/6/93");
     69 #endif /* not lint */
     70 
     71 #include <stdlib.h>
     72 #include <string.h>
     73 
     74 #define OUTPUT {							\
     75 	if ((n = cpos - ppos) > 1) {					\
     76 		for (; ppos < cpos; ++ppos)				\
     77 			*ppos -= odepth;				\
     78 		ppos -= n;						\
     79 		if (stable_sort)					\
     80 			sradixsort(ppos, n, wts1, REC_D);		\
     81 		else							\
     82 			radixsort(ppos, n, wts1, REC_D);		\
     83 		for (; ppos < cpos; ppos++) {				\
     84 			prec = (const RECHEADER *) (*ppos - REC_DATA_OFFSET);\
     85 			put(prec, fp);					\
     86 		}							\
     87 	} else put(prec, fp);						\
     88 }
     89 
     90 /*
     91  * copy sorted lines to output; check for uniqueness
     92  */
     93 void
     94 append(const u_char **keylist, int nelem, int depth, FILE *fp, put_func_t put,
     95     struct field *ftbl)
     96 {
     97 	u_char *wts, *wts1;
     98 	int n, odepth = depth;
     99 	const u_char **cpos, **ppos, **lastkey;
    100 	const u_char *cend, *pend, *start;
    101 	const struct recheader *crec, *prec;
    102 
    103 	if (*keylist == '\0' && UNIQUE)
    104 		return;
    105 	wts1 = wts = ftbl[0].weights;
    106 	if ((!UNIQUE) && SINGL_FLD) {
    107 		if ((ftbl[0].flags & F) && (ftbl[0].flags & R))
    108 			wts1 = Rascii;
    109 		else if (ftbl[0].flags & F)
    110 			wts1 = ascii;
    111 	}
    112 	lastkey = keylist + nelem;
    113 	depth += REC_DATA_OFFSET;
    114 	if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
    115 		ppos = keylist;
    116 		prec = (const RECHEADER *) (*ppos - depth);
    117 		if (UNIQUE)
    118 			put(prec, fp);
    119 		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
    120 			crec = (const RECHEADER *) (*cpos - depth);
    121 			if (crec->length  == prec->length) {
    122 				/*
    123 				 * Set pend and cend so that trailing NUL and
    124 				 * record separator is ignored.
    125 				 */
    126 				pend = (const u_char *) &prec->data + prec->length - 2;
    127 				cend = (const u_char *) &crec->data + crec->length - 2;
    128 				for (start = *cpos; cend >= start; cend--) {
    129 					if (wts[*cend] != wts[*pend])
    130 						break;
    131 					pend--;
    132 				}
    133 				if (pend + 1 != *ppos) {
    134 					if (!UNIQUE) {
    135 						OUTPUT;
    136 					} else
    137 						put(crec, fp);
    138 					ppos = cpos;
    139 					prec = crec;
    140 				}
    141 			} else {
    142 				if (!UNIQUE) {
    143 					OUTPUT;
    144 				} else
    145 					put(crec, fp);
    146 				ppos = cpos;
    147 				prec = crec;
    148 			}
    149 		}
    150 		if (!UNIQUE)  { OUTPUT; }
    151 	} else if (UNIQUE) {
    152 		ppos = keylist;
    153 		prec = (const RECHEADER *) (*ppos - depth);
    154 		put(prec, fp);
    155 		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
    156 			crec = (const RECHEADER *) (*cpos - depth);
    157 			if (crec->offset == prec->offset) {
    158 				/*
    159 				 * Set pend and cend so that trailing NUL and
    160 				 * record separator is ignored.
    161 				 */
    162 				pend = (const u_char *) &prec->data + prec->offset - 2;
    163 				cend = (const u_char *) &crec->data + crec->offset - 2;
    164 				for (start = *cpos; cend >= start; cend--) {
    165 					if (wts[*cend] != wts[*pend])
    166 						break;
    167 					pend--;
    168 				}
    169 				if (pend + 1 != *ppos) {
    170 					ppos = cpos;
    171 					prec = crec;
    172 					put(prec, fp);
    173 				}
    174 			} else {
    175 				ppos = cpos;
    176 				prec = crec;
    177 				put(prec, fp);
    178 			}
    179 		}
    180 	} else for (cpos = keylist; cpos < lastkey; cpos++) {
    181 		crec = (const RECHEADER *) (*cpos - depth);
    182 		put(crec, fp);
    183 	}
    184 }
    185 
    186 /*
    187  * output the already sorted eol bin.
    188  */
    189 void
    190 rd_append(int binno, int infl0, int nfiles, FILE *outfp, u_char *buffer,
    191     u_char *bufend)
    192 {
    193 	RECHEADER *rec;
    194 
    195 	rec = (RECHEADER *) buffer;
    196 	if (!getnext(binno, infl0, NULL, nfiles,
    197 			(RECHEADER *) buffer, bufend, 0)) {
    198 		putline(rec, outfp);
    199 		while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer,
    200 			bufend, 0) == 0) {
    201 			if (!UNIQUE)
    202 				putline(rec, outfp);
    203 		}
    204 	}
    205 }
    206 
    207 /*
    208  * append plain text--used after sorting the biggest bin.
    209  */
    210 void
    211 concat(FILE *a, FILE *b)
    212 {
    213         int nread;
    214         char buffer[4096];
    215 
    216 	rewind(b);
    217         while ((nread = fread(buffer, 1, 4096, b)) > 0)
    218                 EWRITE(buffer, 1, nread, a);
    219 }
    220