sort.h revision 1.29 1 1.29 dsl /* $NetBSD: sort.h,v 1.29 2009/09/26 21:16:55 dsl Exp $ */
2 1.17 jdolecek
3 1.17 jdolecek /*-
4 1.17 jdolecek * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 1.17 jdolecek * All rights reserved.
6 1.17 jdolecek *
7 1.17 jdolecek * This code is derived from software contributed to The NetBSD Foundation
8 1.17 jdolecek * by Ben Harris and Jaromir Dolecek.
9 1.17 jdolecek *
10 1.17 jdolecek * Redistribution and use in source and binary forms, with or without
11 1.17 jdolecek * modification, are permitted provided that the following conditions
12 1.17 jdolecek * are met:
13 1.17 jdolecek * 1. Redistributions of source code must retain the above copyright
14 1.17 jdolecek * notice, this list of conditions and the following disclaimer.
15 1.17 jdolecek * 2. Redistributions in binary form must reproduce the above copyright
16 1.17 jdolecek * notice, this list of conditions and the following disclaimer in the
17 1.17 jdolecek * documentation and/or other materials provided with the distribution.
18 1.17 jdolecek *
19 1.17 jdolecek * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.17 jdolecek * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.17 jdolecek * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.17 jdolecek * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.17 jdolecek * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.17 jdolecek * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.17 jdolecek * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.17 jdolecek * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.17 jdolecek * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.17 jdolecek * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.17 jdolecek * POSSIBILITY OF SUCH DAMAGE.
30 1.17 jdolecek */
31 1.2 bjh21
32 1.1 bjh21 /*-
33 1.1 bjh21 * Copyright (c) 1993
34 1.1 bjh21 * The Regents of the University of California. All rights reserved.
35 1.1 bjh21 *
36 1.1 bjh21 * This code is derived from software contributed to Berkeley by
37 1.1 bjh21 * Peter McIlroy.
38 1.1 bjh21 *
39 1.1 bjh21 * Redistribution and use in source and binary forms, with or without
40 1.1 bjh21 * modification, are permitted provided that the following conditions
41 1.1 bjh21 * are met:
42 1.1 bjh21 * 1. Redistributions of source code must retain the above copyright
43 1.1 bjh21 * notice, this list of conditions and the following disclaimer.
44 1.1 bjh21 * 2. Redistributions in binary form must reproduce the above copyright
45 1.1 bjh21 * notice, this list of conditions and the following disclaimer in the
46 1.1 bjh21 * documentation and/or other materials provided with the distribution.
47 1.16 agc * 3. Neither the name of the University nor the names of its contributors
48 1.1 bjh21 * may be used to endorse or promote products derived from this software
49 1.1 bjh21 * without specific prior written permission.
50 1.1 bjh21 *
51 1.1 bjh21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 1.1 bjh21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 1.1 bjh21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 1.1 bjh21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 1.1 bjh21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 1.1 bjh21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 1.1 bjh21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 1.1 bjh21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 1.1 bjh21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 1.1 bjh21 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 1.1 bjh21 * SUCH DAMAGE.
62 1.1 bjh21 *
63 1.1 bjh21 * @(#)sort.h 8.1 (Berkeley) 6/6/93
64 1.1 bjh21 */
65 1.1 bjh21
66 1.1 bjh21 #include <sys/param.h>
67 1.1 bjh21
68 1.1 bjh21 #include <err.h>
69 1.1 bjh21 #include <errno.h>
70 1.1 bjh21 #include <fcntl.h>
71 1.1 bjh21 #include <limits.h>
72 1.20 lukem #include <stddef.h>
73 1.1 bjh21 #include <stdio.h>
74 1.1 bjh21 #include <stdlib.h>
75 1.4 simonb #include <string.h>
76 1.1 bjh21
77 1.11 jdolecek #define NBINS 256
78 1.1 bjh21
79 1.1 bjh21 /* values for masks, weights, and other flags. */
80 1.25 dsl /* R and F get used to index weight_tables[] */
81 1.25 dsl #define R 1 /* Field is reversed */
82 1.25 dsl #define F 2 /* weight lower and upper case the same */
83 1.25 dsl #define I 4 /* mask out non-printable characters */
84 1.25 dsl #define D 8 /* sort alphanumeric characters only */
85 1.25 dsl #define N 16 /* Field is a number */
86 1.1 bjh21 #define BI 32 /* ignore blanks in icol */
87 1.1 bjh21 #define BT 64 /* ignore blanks in tcol */
88 1.1 bjh21
89 1.1 bjh21 /* masks for delimiters: blanks, fields, and termination. */
90 1.25 dsl #define BLANK 1 /* ' ', '\t'; '\n' if -R is invoked */
91 1.1 bjh21 #define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
92 1.25 dsl #define REC_D_F 4 /* '\n' default; from -R otherwise */
93 1.1 bjh21
94 1.1 bjh21 #define min(a, b) ((a) < (b) ? (a) : (b))
95 1.1 bjh21 #define max(a, b) ((a) > (b) ? (a) : (b))
96 1.1 bjh21
97 1.1 bjh21 #define FCLOSE(file) { \
98 1.1 bjh21 if (EOF == fclose(file)) \
99 1.2 bjh21 err(2, "%p", file); \
100 1.1 bjh21 }
101 1.1 bjh21
102 1.1 bjh21 #define EWRITE(ptr, size, n, f) { \
103 1.1 bjh21 if (!fwrite(ptr, size, n, f)) \
104 1.1 bjh21 err(2, NULL); \
105 1.1 bjh21 }
106 1.1 bjh21
107 1.28 dsl /* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort
108 1.28 dsl * in a sane way.
109 1.28 dsl * Anyone who wants to sort data records longer than 2GB definitely needs a
110 1.28 dsl * different program! */
111 1.28 dsl typedef unsigned int length_t;
112 1.1 bjh21
113 1.22 dsl /* A record is a key/line pair starting at rec.data. It has a total length
114 1.1 bjh21 * and an offset to the start of the line half of the pair.
115 1.1 bjh21 */
116 1.1 bjh21 typedef struct recheader {
117 1.28 dsl length_t length; /* total length of key and line */
118 1.28 dsl length_t offset; /* to line */
119 1.28 dsl int keylen; /* length of key */
120 1.28 dsl u_char data[]; /* key then line */
121 1.1 bjh21 } RECHEADER;
122 1.1 bjh21
123 1.1 bjh21 /* This is the column as seen by struct field. It is used by enterfield.
124 1.1 bjh21 * They are matched with corresponding coldescs during initialization.
125 1.1 bjh21 */
126 1.1 bjh21 struct column {
127 1.1 bjh21 struct coldesc *p;
128 1.1 bjh21 int num;
129 1.1 bjh21 int indent;
130 1.1 bjh21 };
131 1.1 bjh21
132 1.1 bjh21 /* a coldesc has a number and pointers to the beginning and end of the
133 1.1 bjh21 * corresponding column in the current line. This is determined in enterkey.
134 1.1 bjh21 */
135 1.1 bjh21 typedef struct coldesc {
136 1.1 bjh21 u_char *start;
137 1.1 bjh21 u_char *end;
138 1.1 bjh21 int num;
139 1.1 bjh21 } COLDESC;
140 1.1 bjh21
141 1.1 bjh21 /* A field has an initial and final column; an omitted final column
142 1.1 bjh21 * implies the end of the line. Flags regulate omission of blanks and
143 1.1 bjh21 * numerical sorts; mask determines which characters are ignored (from -i, -d);
144 1.1 bjh21 * weights determines the sort weights of a character (from -f, -r).
145 1.25 dsl *
146 1.25 dsl * The first field contain the global flags etc.
147 1.25 dsl * The list terminates when icol = 0.
148 1.1 bjh21 */
149 1.1 bjh21 struct field {
150 1.1 bjh21 struct column icol;
151 1.1 bjh21 struct column tcol;
152 1.1 bjh21 u_int flags;
153 1.1 bjh21 u_char *mask;
154 1.1 bjh21 u_char *weights;
155 1.1 bjh21 };
156 1.1 bjh21
157 1.8 jdolecek struct filelist {
158 1.7 jdolecek const char * const * names;
159 1.1 bjh21 };
160 1.8 jdolecek
161 1.29 dsl typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *);
162 1.27 dsl typedef void (*put_func_t)(const RECHEADER *, FILE *);
163 1.8 jdolecek
164 1.1 bjh21 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
165 1.25 dsl extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */
166 1.1 bjh21 extern u_char d_mask[NBINS];
167 1.28 dsl extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE;
168 1.28 dsl extern int posix_sort;
169 1.1 bjh21 extern int REC_D;
170 1.5 jdolecek extern const char *tmpdir;
171 1.18 jdolecek extern struct coldesc *clist;
172 1.13 jdolecek extern int ncols;
173 1.1 bjh21
174 1.25 dsl #define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
175 1.25 dsl extern unsigned int debug_flags;
176 1.25 dsl
177 1.28 dsl void append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *));
178 1.14 jdolecek void concat(FILE *, FILE *);
179 1.21 dsl length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
180 1.14 jdolecek void fixit(int *, char **);
181 1.14 jdolecek void fldreset(struct field *);
182 1.14 jdolecek FILE *ftmp(void);
183 1.29 dsl void fmerge(struct filelist *, int, FILE *, struct field *);
184 1.29 dsl void save_for_merge(FILE *, get_func_t, struct field *);
185 1.29 dsl void merge_sort(FILE *, put_func_t, struct field *);
186 1.24 dsl void fsort(struct filelist *, int, FILE *, struct field *);
187 1.29 dsl int geteasy(FILE *, RECHEADER *, u_char *, struct field *);
188 1.29 dsl int makekey(FILE *, RECHEADER *, u_char *, struct field *);
189 1.29 dsl int makeline(FILE *, RECHEADER *, u_char *, struct field *);
190 1.14 jdolecek int optval(int, int);
191 1.29 dsl void order(struct filelist *, struct field *);
192 1.14 jdolecek void putline(const RECHEADER *, FILE *);
193 1.14 jdolecek void putrec(const RECHEADER *, FILE *);
194 1.25 dsl void putkeydump(const RECHEADER *, FILE *);
195 1.14 jdolecek void rd_append(int, int, int, FILE *, u_char *, u_char *);
196 1.28 dsl void radix_sort(RECHEADER **, RECHEADER **, int);
197 1.14 jdolecek int setfield(const char *, struct field *, int);
198 1.26 dsl void settables(void);
199