sort.h revision 1.19 1 /* $NetBSD: sort.h,v 1.19 2008/04/28 20:24:15 martin Exp $ */
2
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)sort.h 8.1 (Berkeley) 6/6/93
64 */
65
66 #include <sys/param.h>
67
68 #include <db.h>
69 #include <err.h>
70 #include <errno.h>
71 #include <fcntl.h>
72 #include <limits.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76
77 #define NBINS 256
78
79 /* values for masks, weights, and other flags. */
80 #define I 1 /* mask out non-printable characters */
81 #define D 2 /* sort alphanumeric characters only */
82 #define N 4 /* Field is a number */
83 #define F 8 /* weight lower and upper case the same */
84 #define R 16 /* Field is reversed with respect to the global weight */
85 #define BI 32 /* ignore blanks in icol */
86 #define BT 64 /* ignore blanks in tcol */
87
88 /* masks for delimiters: blanks, fields, and termination. */
89 #define BLANK 1 /* ' ', '\t'; '\n' if -T is invoked */
90 #define FLD_D 2 /* ' ', '\t' default; from -t otherwise */
91 #define REC_D_F 4 /* '\n' default; from -T otherwise */
92
93 #define min(a, b) ((a) < (b) ? (a) : (b))
94 #define max(a, b) ((a) > (b) ? (a) : (b))
95
96 #define FCLOSE(file) { \
97 if (EOF == fclose(file)) \
98 err(2, "%p", file); \
99 }
100
101 #define EWRITE(ptr, size, n, f) { \
102 if (!fwrite(ptr, size, n, f)) \
103 err(2, NULL); \
104 }
105
106 /* length of record is currently limited to maximum string length (size_t) */
107 typedef size_t length_t;
108
109 /* a record is a key/line pair starting at rec.data. It has a total length
110 * and an offset to the start of the line half of the pair.
111 */
112 typedef struct recheader {
113 length_t length;
114 length_t offset;
115 u_char data[1];
116 } RECHEADER;
117
118 typedef struct trecheader {
119 length_t length;
120 length_t offset;
121 } TRECHEADER;
122
123 /* This is the column as seen by struct field. It is used by enterfield.
124 * They are matched with corresponding coldescs during initialization.
125 */
126 struct column {
127 struct coldesc *p;
128 int num;
129 int indent;
130 };
131
132 /* a coldesc has a number and pointers to the beginning and end of the
133 * corresponding column in the current line. This is determined in enterkey.
134 */
135 typedef struct coldesc {
136 u_char *start;
137 u_char *end;
138 int num;
139 } COLDESC;
140
141 /* A field has an initial and final column; an omitted final column
142 * implies the end of the line. Flags regulate omission of blanks and
143 * numerical sorts; mask determines which characters are ignored (from -i, -d);
144 * weights determines the sort weights of a character (from -f, -r).
145 */
146 struct field {
147 struct column icol;
148 struct column tcol;
149 u_int flags;
150 u_char *mask;
151 u_char *weights;
152 };
153
154 struct filelist {
155 const char * const * names;
156 };
157
158 typedef int (*get_func_t)(int, int, struct filelist *, int,
159 RECHEADER *, u_char *, struct field *);
160 typedef void (*put_func_t)(const struct recheader *, FILE *);
161
162 extern int PANIC; /* maximum depth of fsort before fmerge is called */
163 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
164 extern u_char d_mask[NBINS];
165 extern int SINGL_FLD, SEP_FLAG, UNIQUE;
166 extern int REC_D;
167 extern const char *tmpdir;
168 extern int stable_sort;
169 extern u_char gweights[NBINS];
170 extern struct coldesc *clist;
171 extern int ncols;
172
173 void append(const u_char **, int, int, FILE *,
174 void (*)(const RECHEADER *, FILE *), struct field *);
175 void concat(FILE *, FILE *);
176 length_t enterkey(RECHEADER *, DBT *, int, struct field *);
177 void fixit(int *, char **);
178 void fldreset(struct field *);
179 FILE *ftmp(void);
180 void fmerge(int, int, struct filelist *, int,
181 get_func_t, FILE *, put_func_t, struct field *);
182 void fsort(int, int, int, struct filelist *, int, FILE *,
183 struct field *);
184 int geteasy(int, int, struct filelist *,
185 int, RECHEADER *, u_char *, struct field *);
186 int getnext(int, int, struct filelist *,
187 int, RECHEADER *, u_char *, struct field *);
188 int makekey(int, int, struct filelist *,
189 int, RECHEADER *, u_char *, struct field *);
190 int makeline(int, int, struct filelist *,
191 int, RECHEADER *, u_char *, struct field *);
192 void num_init(void);
193 void onepass(const u_char **, int, long, long *, u_char *, FILE *);
194 int optval(int, int);
195 void order(struct filelist *, get_func_t, struct field *);
196 void putline(const RECHEADER *, FILE *);
197 void putrec(const RECHEADER *, FILE *);
198 void rd_append(int, int, int, FILE *, u_char *, u_char *);
199 int setfield(const char *, struct field *, int);
200 void settables(int);
201