sort.h revision 1.19.12.1       1  1.19.12.1      matt /*	$NetBSD: sort.h,v 1.19.12.1 2010/04/21 05:27:12 matt Exp $	*/
      2       1.17  jdolecek 
      3       1.17  jdolecek /*-
      4       1.17  jdolecek  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
      5       1.17  jdolecek  * All rights reserved.
      6       1.17  jdolecek  *
      7       1.17  jdolecek  * This code is derived from software contributed to The NetBSD Foundation
      8       1.17  jdolecek  * by Ben Harris and Jaromir Dolecek.
      9       1.17  jdolecek  *
     10       1.17  jdolecek  * Redistribution and use in source and binary forms, with or without
     11       1.17  jdolecek  * modification, are permitted provided that the following conditions
     12       1.17  jdolecek  * are met:
     13       1.17  jdolecek  * 1. Redistributions of source code must retain the above copyright
     14       1.17  jdolecek  *    notice, this list of conditions and the following disclaimer.
     15       1.17  jdolecek  * 2. Redistributions in binary form must reproduce the above copyright
     16       1.17  jdolecek  *    notice, this list of conditions and the following disclaimer in the
     17       1.17  jdolecek  *    documentation and/or other materials provided with the distribution.
     18       1.17  jdolecek  *
     19       1.17  jdolecek  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20       1.17  jdolecek  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21       1.17  jdolecek  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22       1.17  jdolecek  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23       1.17  jdolecek  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24       1.17  jdolecek  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25       1.17  jdolecek  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26       1.17  jdolecek  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27       1.17  jdolecek  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28       1.17  jdolecek  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29       1.17  jdolecek  * POSSIBILITY OF SUCH DAMAGE.
     30       1.17  jdolecek  */
     31        1.2     bjh21 
     32        1.1     bjh21 /*-
     33        1.1     bjh21  * Copyright (c) 1993
     34        1.1     bjh21  *	The Regents of the University of California.  All rights reserved.
     35        1.1     bjh21  *
     36        1.1     bjh21  * This code is derived from software contributed to Berkeley by
     37        1.1     bjh21  * Peter McIlroy.
     38        1.1     bjh21  *
     39        1.1     bjh21  * Redistribution and use in source and binary forms, with or without
     40        1.1     bjh21  * modification, are permitted provided that the following conditions
     41        1.1     bjh21  * are met:
     42        1.1     bjh21  * 1. Redistributions of source code must retain the above copyright
     43        1.1     bjh21  *    notice, this list of conditions and the following disclaimer.
     44        1.1     bjh21  * 2. Redistributions in binary form must reproduce the above copyright
     45        1.1     bjh21  *    notice, this list of conditions and the following disclaimer in the
     46        1.1     bjh21  *    documentation and/or other materials provided with the distribution.
     47       1.16       agc  * 3. Neither the name of the University nor the names of its contributors
     48        1.1     bjh21  *    may be used to endorse or promote products derived from this software
     49        1.1     bjh21  *    without specific prior written permission.
     50        1.1     bjh21  *
     51        1.1     bjh21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52        1.1     bjh21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53        1.1     bjh21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54        1.1     bjh21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55        1.1     bjh21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56        1.1     bjh21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57        1.1     bjh21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58        1.1     bjh21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59        1.1     bjh21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60        1.1     bjh21  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61        1.1     bjh21  * SUCH DAMAGE.
     62        1.1     bjh21  *
     63        1.1     bjh21  *	@(#)sort.h	8.1 (Berkeley) 6/6/93
     64        1.1     bjh21  */
     65        1.1     bjh21 
     66        1.1     bjh21 #include <sys/param.h>
     67        1.1     bjh21 
     68        1.1     bjh21 #include <err.h>
     69        1.1     bjh21 #include <errno.h>
     70        1.1     bjh21 #include <fcntl.h>
     71        1.1     bjh21 #include <limits.h>
     72  1.19.12.1      matt #include <stddef.h>
     73        1.1     bjh21 #include <stdio.h>
     74        1.1     bjh21 #include <stdlib.h>
     75        1.4    simonb #include <string.h>
     76        1.1     bjh21 
     77       1.11  jdolecek #define NBINS		256
     78        1.1     bjh21 
     79        1.1     bjh21 /* values for masks, weights, and other flags. */
     80  1.19.12.1      matt /* R and F get used to index weight_tables[] */
     81  1.19.12.1      matt #define R 1		/* Field is reversed */
     82  1.19.12.1      matt #define F 2		/* weight lower and upper case the same */
     83  1.19.12.1      matt #define I 4		/* mask out non-printable characters */
     84  1.19.12.1      matt #define D 8		/* sort alphanumeric characters only */
     85  1.19.12.1      matt #define N 16		/* Field is a number */
     86        1.1     bjh21 #define BI 32		/* ignore blanks in icol */
     87        1.1     bjh21 #define BT 64		/* ignore blanks in tcol */
     88        1.1     bjh21 
     89        1.1     bjh21 /* masks for delimiters: blanks, fields, and termination. */
     90  1.19.12.1      matt #define BLANK 1		/* ' ', '\t'; '\n' if -R is invoked */
     91        1.1     bjh21 #define FLD_D 2		/* ' ', '\t' default; from -t otherwise */
     92  1.19.12.1      matt #define REC_D_F 4	/* '\n' default; from -R otherwise */
     93        1.1     bjh21 
     94        1.1     bjh21 #define min(a, b) ((a) < (b) ? (a) : (b))
     95        1.1     bjh21 #define max(a, b) ((a) > (b) ? (a) : (b))
     96        1.1     bjh21 
     97        1.1     bjh21 #define	FCLOSE(file) {							\
     98        1.1     bjh21 	if (EOF == fclose(file))					\
     99        1.2     bjh21 		err(2, "%p", file);					\
    100        1.1     bjh21 }
    101        1.1     bjh21 
    102        1.1     bjh21 #define	EWRITE(ptr, size, n, f) {					\
    103        1.1     bjh21 	if (!fwrite(ptr, size, n, f))					\
    104        1.1     bjh21 		 err(2, NULL);						\
    105        1.1     bjh21 }
    106        1.1     bjh21 
    107  1.19.12.1      matt /* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort
    108  1.19.12.1      matt  * in a sane way.
    109  1.19.12.1      matt  * Anyone who wants to sort data records longer than 2GB definitely needs a
    110  1.19.12.1      matt  * different program! */
    111  1.19.12.1      matt typedef unsigned int length_t;
    112        1.1     bjh21 
    113  1.19.12.1      matt /* A record is a key/line pair starting at rec.data. It has a total length
    114        1.1     bjh21  * and an offset to the start of the line half of the pair.
    115        1.1     bjh21  */
    116        1.1     bjh21 typedef struct recheader {
    117  1.19.12.1      matt 	length_t length;	/* total length of key and line */
    118  1.19.12.1      matt 	length_t offset;	/* to line */
    119  1.19.12.1      matt 	int      keylen;	/* length of key */
    120  1.19.12.1      matt 	u_char   data[];	/* key then line */
    121        1.1     bjh21 } RECHEADER;
    122        1.1     bjh21 
    123        1.1     bjh21 /* This is the column as seen by struct field.  It is used by enterfield.
    124        1.1     bjh21  * They are matched with corresponding coldescs during initialization.
    125        1.1     bjh21  */
    126        1.1     bjh21 struct column {
    127        1.1     bjh21 	struct coldesc *p;
    128        1.1     bjh21 	int num;
    129        1.1     bjh21 	int indent;
    130        1.1     bjh21 };
    131        1.1     bjh21 
    132        1.1     bjh21 /* a coldesc has a number and pointers to the beginning and end of the
    133        1.1     bjh21  * corresponding column in the current line.  This is determined in enterkey.
    134        1.1     bjh21  */
    135        1.1     bjh21 typedef struct coldesc {
    136        1.1     bjh21 	u_char *start;
    137        1.1     bjh21 	u_char *end;
    138        1.1     bjh21 	int num;
    139        1.1     bjh21 } COLDESC;
    140        1.1     bjh21 
    141        1.1     bjh21 /* A field has an initial and final column; an omitted final column
    142        1.1     bjh21  * implies the end of the line.  Flags regulate omission of blanks and
    143        1.1     bjh21  * numerical sorts; mask determines which characters are ignored (from -i, -d);
    144        1.1     bjh21  * weights determines the sort weights of a character (from -f, -r).
    145  1.19.12.1      matt  *
    146  1.19.12.1      matt  * The first field contain the global flags etc.
    147  1.19.12.1      matt  * The list terminates when icol = 0.
    148        1.1     bjh21  */
    149        1.1     bjh21 struct field {
    150        1.1     bjh21 	struct column icol;
    151        1.1     bjh21 	struct column tcol;
    152        1.1     bjh21 	u_int flags;
    153        1.1     bjh21 	u_char *mask;
    154        1.1     bjh21 	u_char *weights;
    155        1.1     bjh21 };
    156        1.1     bjh21 
    157        1.8  jdolecek struct filelist {
    158        1.7  jdolecek 	const char * const * names;
    159        1.1     bjh21 };
    160        1.8  jdolecek 
    161  1.19.12.1      matt typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *);
    162  1.19.12.1      matt typedef void (*put_func_t)(const RECHEADER *, FILE *);
    163        1.8  jdolecek 
    164        1.1     bjh21 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
    165  1.19.12.1      matt extern u_char *const weight_tables[4];   /* ascii, Rascii, Ftable, RFtable */
    166        1.1     bjh21 extern u_char d_mask[NBINS];
    167  1.19.12.1      matt extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE;
    168  1.19.12.1      matt extern int posix_sort;
    169        1.1     bjh21 extern int REC_D;
    170        1.5  jdolecek extern const char *tmpdir;
    171       1.18  jdolecek extern struct coldesc *clist;
    172       1.13  jdolecek extern int ncols;
    173        1.1     bjh21 
    174  1.19.12.1      matt #define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
    175  1.19.12.1      matt extern unsigned int debug_flags;
    176  1.19.12.1      matt 
    177  1.19.12.1      matt void	 append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *));
    178       1.14  jdolecek void	 concat(FILE *, FILE *);
    179  1.19.12.1      matt length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
    180       1.14  jdolecek void	 fixit(int *, char **);
    181       1.14  jdolecek void	 fldreset(struct field *);
    182       1.14  jdolecek FILE	*ftmp(void);
    183  1.19.12.1      matt void	 fmerge(struct filelist *, int, FILE *, struct field *);
    184  1.19.12.1      matt void	 save_for_merge(FILE *, get_func_t, struct field *);
    185  1.19.12.1      matt void	 merge_sort(FILE *, put_func_t, struct field *);
    186  1.19.12.1      matt void	 fsort(struct filelist *, int, FILE *, struct field *);
    187  1.19.12.1      matt int	 geteasy(FILE *, RECHEADER *, u_char *, struct field *);
    188  1.19.12.1      matt int	 makekey(FILE *, RECHEADER *, u_char *, struct field *);
    189  1.19.12.1      matt int	 makeline(FILE *, RECHEADER *, u_char *, struct field *);
    190  1.19.12.1      matt void	 makeline_copydown(RECHEADER *);
    191       1.14  jdolecek int	 optval(int, int);
    192  1.19.12.1      matt void	 order(struct filelist *, struct field *);
    193       1.14  jdolecek void	 putline(const RECHEADER *, FILE *);
    194       1.14  jdolecek void	 putrec(const RECHEADER *, FILE *);
    195  1.19.12.1      matt void	 putkeydump(const RECHEADER *, FILE *);
    196       1.14  jdolecek void	 rd_append(int, int, int, FILE *, u_char *, u_char *);
    197  1.19.12.1      matt void	 radix_sort(RECHEADER **, RECHEADER **, int);
    198       1.14  jdolecek int	 setfield(const char *, struct field *, int);
    199  1.19.12.1      matt void	 settables(void);
    200