sort.h revision 1.18 1 /* $NetBSD: sort.h,v 1.18 2004/02/15 14:22:55 jdolecek Exp $ */
2
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*-
40 * Copyright (c) 1993
41 * The Regents of the University of California. All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * Peter McIlroy.
45 *
46 * Redistribution and use in source and binary forms, with or without
47 * modification, are permitted provided that the following conditions
48 * are met:
49 * 1. Redistributions of source code must retain the above copyright
50 * notice, this list of conditions and the following disclaimer.
51 * 2. Redistributions in binary form must reproduce the above copyright
52 * notice, this list of conditions and the following disclaimer in the
53 * documentation and/or other materials provided with the distribution.
54 * 3. Neither the name of the University nor the names of its contributors
55 * may be used to endorse or promote products derived from this software
56 * without specific prior written permission.
57 *
58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68 * SUCH DAMAGE.
69 *
70 * @(#)sort.h 8.1 (Berkeley) 6/6/93
71 */
72
73 #include <sys/param.h>
74
75 #include <db.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <limits.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83
84 #define NBINS 256
85
86 /* values for masks, weights, and other flags. */
87 #define I 1 /* mask out non-printable characters */
88 #define D 2 /* sort alphanumeric characters only */
89 #define N 4 /* Field is a number */
90 #define F 8 /* weight lower and upper case the same */
91 #define R 16 /* Field is reversed with respect to the global weight */
92 #define BI 32 /* ignore blanks in icol */
93 #define BT 64 /* ignore blanks in tcol */
94
95 /* masks for delimiters: blanks, fields, and termination. */
96 #define BLANK 1 /* ' ', '\t'; '\n' if -T is invoked */
97 #define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
98 #define REC_D_F 4 /* '\n' default; from -T otherwise */
99
100 #define min(a, b) ((a) < (b) ? (a) : (b))
101 #define max(a, b) ((a) > (b) ? (a) : (b))
102
103 #define FCLOSE(file) { \
104 if (EOF == fclose(file)) \
105 err(2, "%p", file); \
106 }
107
108 #define EWRITE(ptr, size, n, f) { \
109 if (!fwrite(ptr, size, n, f)) \
110 err(2, NULL); \
111 }
112
113 /* length of record is currently limited to maximum string length (size_t) */
114 typedef size_t length_t;
115
116 /* a record is a key/line pair starting at rec.data. It has a total length
117 * and an offset to the start of the line half of the pair.
118 */
119 typedef struct recheader {
120 length_t length;
121 length_t offset;
122 u_char data[1];
123 } RECHEADER;
124
125 typedef struct trecheader {
126 length_t length;
127 length_t offset;
128 } TRECHEADER;
129
130 /* This is the column as seen by struct field. It is used by enterfield.
131 * They are matched with corresponding coldescs during initialization.
132 */
133 struct column {
134 struct coldesc *p;
135 int num;
136 int indent;
137 };
138
139 /* a coldesc has a number and pointers to the beginning and end of the
140 * corresponding column in the current line. This is determined in enterkey.
141 */
142 typedef struct coldesc {
143 u_char *start;
144 u_char *end;
145 int num;
146 } COLDESC;
147
148 /* A field has an initial and final column; an omitted final column
149 * implies the end of the line. Flags regulate omission of blanks and
150 * numerical sorts; mask determines which characters are ignored (from -i, -d);
151 * weights determines the sort weights of a character (from -f, -r).
152 */
153 struct field {
154 struct column icol;
155 struct column tcol;
156 u_int flags;
157 u_char *mask;
158 u_char *weights;
159 };
160
161 struct filelist {
162 const char * const * names;
163 };
164
165 typedef int (*get_func_t)(int, int, struct filelist *, int,
166 RECHEADER *, u_char *, struct field *);
167 typedef void (*put_func_t)(const struct recheader *, FILE *);
168
169 extern int PANIC; /* maximum depth of fsort before fmerge is called */
170 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
171 extern u_char d_mask[NBINS];
172 extern int SINGL_FLD, SEP_FLAG, UNIQUE;
173 extern int REC_D;
174 extern const char *tmpdir;
175 extern int stable_sort;
176 extern u_char gweights[NBINS];
177 extern struct coldesc *clist;
178 extern int ncols;
179
180 void append(const u_char **, int, int, FILE *,
181 void (*)(const RECHEADER *, FILE *), struct field *);
182 void concat(FILE *, FILE *);
183 length_t enterkey(RECHEADER *, DBT *, int, struct field *);
184 void fixit(int *, char **);
185 void fldreset(struct field *);
186 FILE *ftmp(void);
187 void fmerge(int, int, struct filelist *, int,
188 get_func_t, FILE *, put_func_t, struct field *);
189 void fsort(int, int, int, struct filelist *, int, FILE *,
190 struct field *);
191 int geteasy(int, int, struct filelist *,
192 int, RECHEADER *, u_char *, struct field *);
193 int getnext(int, int, struct filelist *,
194 int, RECHEADER *, u_char *, struct field *);
195 int makekey(int, int, struct filelist *,
196 int, RECHEADER *, u_char *, struct field *);
197 int makeline(int, int, struct filelist *,
198 int, RECHEADER *, u_char *, struct field *);
199 void num_init(void);
200 void onepass(const u_char **, int, long, long *, u_char *, FILE *);
201 int optval(int, int);
202 void order(struct filelist *, get_func_t, struct field *);
203 void putline(const RECHEADER *, FILE *);
204 void putrec(const RECHEADER *, FILE *);
205 void rd_append(int, int, int, FILE *, u_char *, u_char *);
206 int setfield(const char *, struct field *, int);
207 void settables(int);
208