Home | History | Annotate | Line # | Download | only in sort
sort.h revision 1.20
      1 /*	$NetBSD: sort.h,v 1.20 2009/04/13 11:07:59 lukem Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Ben Harris and Jaromir Dolecek.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1993
     34  *	The Regents of the University of California.  All rights reserved.
     35  *
     36  * This code is derived from software contributed to Berkeley by
     37  * Peter McIlroy.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code must retain the above copyright
     43  *    notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  * 3. Neither the name of the University nor the names of its contributors
     48  *    may be used to endorse or promote products derived from this software
     49  *    without specific prior written permission.
     50  *
     51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     61  * SUCH DAMAGE.
     62  *
     63  *	@(#)sort.h	8.1 (Berkeley) 6/6/93
     64  */
     65 
     66 #include <sys/param.h>
     67 
     68 #include <db.h>
     69 #include <err.h>
     70 #include <errno.h>
     71 #include <fcntl.h>
     72 #include <limits.h>
     73 #include <stddef.h>
     74 #include <stdio.h>
     75 #include <stdlib.h>
     76 #include <string.h>
     77 
     78 #define NBINS		256
     79 
     80 /* values for masks, weights, and other flags. */
     81 #define I 1		/* mask out non-printable characters */
     82 #define D 2		/* sort alphanumeric characters only */
     83 #define N 4		/* Field is a number */
     84 #define F 8		/* weight lower and upper case the same */
     85 #define R 16		/* Field is reversed with respect to the global weight */
     86 #define BI 32		/* ignore blanks in icol */
     87 #define BT 64		/* ignore blanks in tcol */
     88 
     89 /* masks for delimiters: blanks, fields, and termination. */
     90 #define BLANK 1		/* ' ', '\t'; '\n' if -T is invoked */
     91 #define FLD_D 2		/* ' ', '\t' default; from -t otherwise */
     92 #define REC_D_F 4	/* '\n' default; from -T otherwise */
     93 
     94 #define min(a, b) ((a) < (b) ? (a) : (b))
     95 #define max(a, b) ((a) > (b) ? (a) : (b))
     96 
     97 #define	FCLOSE(file) {							\
     98 	if (EOF == fclose(file))					\
     99 		err(2, "%p", file);					\
    100 }
    101 
    102 #define	EWRITE(ptr, size, n, f) {					\
    103 	if (!fwrite(ptr, size, n, f))					\
    104 		 err(2, NULL);						\
    105 }
    106 
    107 /* length of record is currently limited to maximum string length (size_t) */
    108 typedef size_t length_t;
    109 
    110 /* a record is a key/line pair starting at rec.data. It has a total length
    111  * and an offset to the start of the line half of the pair.
    112  */
    113 typedef struct recheader {
    114 	length_t length;
    115 	length_t offset;
    116 	u_char data[1];
    117 } RECHEADER;
    118 
    119 typedef struct trecheader {
    120 	length_t length;
    121 	length_t offset;
    122 } TRECHEADER;
    123 
    124 /* This is the column as seen by struct field.  It is used by enterfield.
    125  * They are matched with corresponding coldescs during initialization.
    126  */
    127 struct column {
    128 	struct coldesc *p;
    129 	int num;
    130 	int indent;
    131 };
    132 
    133 /* a coldesc has a number and pointers to the beginning and end of the
    134  * corresponding column in the current line.  This is determined in enterkey.
    135  */
    136 typedef struct coldesc {
    137 	u_char *start;
    138 	u_char *end;
    139 	int num;
    140 } COLDESC;
    141 
    142 /* A field has an initial and final column; an omitted final column
    143  * implies the end of the line.  Flags regulate omission of blanks and
    144  * numerical sorts; mask determines which characters are ignored (from -i, -d);
    145  * weights determines the sort weights of a character (from -f, -r).
    146  */
    147 struct field {
    148 	struct column icol;
    149 	struct column tcol;
    150 	u_int flags;
    151 	u_char *mask;
    152 	u_char *weights;
    153 };
    154 
    155 struct filelist {
    156 	const char * const * names;
    157 };
    158 
    159 typedef int (*get_func_t)(int, int, struct filelist *, int,
    160 		RECHEADER *, u_char *, struct field *);
    161 typedef void (*put_func_t)(const struct recheader *, FILE *);
    162 
    163 extern int PANIC;	/* maximum depth of fsort before fmerge is called */
    164 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
    165 extern u_char d_mask[NBINS];
    166 extern int SINGL_FLD, SEP_FLAG, UNIQUE;
    167 extern int REC_D;
    168 extern const char *tmpdir;
    169 extern int stable_sort;
    170 extern u_char gweights[NBINS];
    171 extern struct coldesc *clist;
    172 extern int ncols;
    173 
    174 void	 append(const u_char **, int, int, FILE *,
    175 	    void (*)(const RECHEADER *, FILE *), struct field *);
    176 void	 concat(FILE *, FILE *);
    177 length_t enterkey(RECHEADER *, DBT *, int, struct field *);
    178 void	 fixit(int *, char **);
    179 void	 fldreset(struct field *);
    180 FILE	*ftmp(void);
    181 void	 fmerge(int, int, struct filelist *, int,
    182 		get_func_t, FILE *, put_func_t, struct field *);
    183 void	 fsort(int, int, int, struct filelist *, int, FILE *,
    184 		struct field *);
    185 int	 geteasy(int, int, struct filelist *,
    186 	    int, RECHEADER *, u_char *, struct field *);
    187 int	 getnext(int, int, struct filelist *,
    188 	    int, RECHEADER *, u_char *, struct field *);
    189 int	 makekey(int, int, struct filelist *,
    190 	    int, RECHEADER *, u_char *, struct field *);
    191 int	 makeline(int, int, struct filelist *,
    192 	    int, RECHEADER *, u_char *, struct field *);
    193 void	 num_init(void);
    194 void	 onepass(const u_char **, int, long, long *, u_char *, FILE *);
    195 int	 optval(int, int);
    196 void	 order(struct filelist *, get_func_t, struct field *);
    197 void	 putline(const RECHEADER *, FILE *);
    198 void	 putrec(const RECHEADER *, FILE *);
    199 void	 rd_append(int, int, int, FILE *, u_char *, u_char *);
    200 int	 setfield(const char *, struct field *, int);
    201 void	 settables(int);
    202