init.c revision 1.25 1 /* $NetBSD: init.c,v 1.25 2010/05/27 05:52:29 dholland Exp $ */
2
3 /*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 */
63
64 #include "sort.h"
65
66 __RCSID("$NetBSD: init.c,v 1.25 2010/05/27 05:52:29 dholland Exp $");
67
68 #include <ctype.h>
69 #include <string.h>
70
71 static void insertcol(struct field *);
72 static const char *setcolumn(const char *, struct field *);
73
74 /*
75 * masks of ignored characters.
76 */
77 static u_char dtable[NBINS], itable[NBINS];
78
79 /*
80 * parsed key options
81 */
82 struct coldesc *clist = NULL;
83 int ncols = 0;
84
85 /*
86 * clist (list of columns which correspond to one or more icol or tcol)
87 * is in increasing order of columns.
88 * Fields are kept in increasing order of fields.
89 */
90
91 /*
92 * keep clist in order--inserts a column in a sorted array
93 */
94 static void
95 insertcol(struct field *field)
96 {
97 int i;
98 struct coldesc *p;
99
100 /* Make space for new item */
101 p = realloc(clist, (ncols + 2) * sizeof(*clist));
102 if (!p)
103 err(1, "realloc");
104 clist = p;
105 memset(&clist[ncols], 0, sizeof(clist[ncols]));
106
107 for (i = 0; i < ncols; i++)
108 if (field->icol.num <= clist[i].num)
109 break;
110 if (field->icol.num != clist[i].num) {
111 memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
112 clist[i].num = field->icol.num;
113 ncols++;
114 }
115 if (field->tcol.num && field->tcol.num != field->icol.num) {
116 for (i = 0; i < ncols; i++)
117 if (field->tcol.num <= clist[i].num)
118 break;
119 if (field->tcol.num != clist[i].num) {
120 memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
121 clist[i].num = field->tcol.num;
122 ncols++;
123 }
124 }
125 }
126
127 /*
128 * matches fields with the appropriate columns--n^2 but who cares?
129 */
130 void
131 fldreset(struct field *fldtab)
132 {
133 int i;
134
135 fldtab[0].tcol.p = clist + ncols - 1;
136 for (++fldtab; fldtab->icol.num; ++fldtab) {
137 for (i = 0; fldtab->icol.num != clist[i].num; i++)
138 ;
139 fldtab->icol.p = clist + i;
140 if (!fldtab->tcol.num)
141 continue;
142 for (i = 0; fldtab->tcol.num != clist[i].num; i++)
143 ;
144 fldtab->tcol.p = clist + i;
145 }
146 }
147
148 /*
149 * interprets a column in a -k field
150 */
151 static const char *
152 setcolumn(const char *pos, struct field *cur_fld)
153 {
154 struct column *col;
155 char *npos;
156 int tmp;
157 col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol);
158 col->num = (int) strtol(pos, &npos, 10);
159 pos = npos;
160 if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
161 errx(2, "field numbers must be positive");
162 if (*pos == '.') {
163 if (!col->num)
164 errx(2, "cannot indent end of line");
165 ++pos;
166 col->indent = (int) strtol(pos, &npos, 10);
167 pos = npos;
168 if (&cur_fld->icol == col)
169 col->indent--;
170 if (col->indent < 0)
171 errx(2, "illegal offset");
172 }
173 for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++)
174 cur_fld->flags |= tmp;
175 if (cur_fld->icol.num == 0)
176 cur_fld->icol.num = 1;
177 return (pos);
178 }
179
180 int
181 setfield(const char *pos, struct field *cur_fld, int gflag)
182 {
183 cur_fld->mask = NULL;
184
185 pos = setcolumn(pos, cur_fld);
186 if (*pos == '\0') /* key extends to EOL. */
187 cur_fld->tcol.num = 0;
188 else {
189 if (*pos != ',')
190 errx(2, "illegal field descriptor");
191 setcolumn((++pos), cur_fld);
192 }
193 if (!cur_fld->flags)
194 cur_fld->flags = gflag;
195 if (REVERSE)
196 /* A local 'r' doesn't invert the global one */
197 cur_fld->flags &= ~R;
198
199 /* Assign appropriate mask table and weight table. */
200 cur_fld->weights = weight_tables[cur_fld->flags & (R | F)];
201 if (cur_fld->flags & I)
202 cur_fld->mask = itable;
203 else if (cur_fld->flags & D)
204 cur_fld->mask = dtable;
205
206 cur_fld->flags |= (gflag & (BI | BT));
207 if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */
208 cur_fld->flags &= ~BT;
209
210 if (cur_fld->tcol.num
211 && !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
212 && (cur_fld->tcol.num <= cur_fld->icol.num
213 /* indent if 0 -> end of field, i.e. okay */
214 && cur_fld->tcol.indent != 0
215 && cur_fld->tcol.indent < cur_fld->icol.indent))
216 errx(2, "fields out of order");
217
218 insertcol(cur_fld);
219 return (cur_fld->tcol.num);
220 }
221
222 int
223 optval(int desc, int tcolflag)
224 {
225 switch(desc) {
226 case 'b':
227 if (!tcolflag)
228 return (BI);
229 else
230 return (BT);
231 case 'd': return (D);
232 case 'f': return (F);
233 case 'i': return (I);
234 case 'n': return (N);
235 case 'r': return (R);
236 default: return (0);
237 }
238 }
239
240 /*
241 * Replace historic +SPEC arguments with appropriate -kSPEC.
242 */
243 void
244 fixit(int *argc, char **argv)
245 {
246 int i, j, fplus=0;
247 char *vpos, *tpos, spec[20];
248 int col, indent;
249 size_t sz;
250
251 for (i = 1; i < *argc; i++) {
252 /*
253 * Stop where getopt will stop, to avoid turning e.g.
254 * "sort x +3" into "sort x -k4.1" which will croak if
255 * +3 was in fact really a file name.
256 */
257 if (argv[i][0] == '-' && argv[i][1] == '-')
258 break;
259 if (argv[i][0] != '-' && argv[i][0] != '+')
260 break;
261
262 if (argv[i][0] != '+' && !fplus)
263 continue;
264
265 if (fplus && (argv[i][0] != '-' || !isdigit((unsigned char)argv[i][1]))) {
266 fplus = 0;
267 if (argv[i][0] != '+') {
268 /* not a -POS argument, skip */
269 continue;
270 }
271 }
272
273 /* parse spec */
274 tpos = argv[i]+1;
275 col = (int)strtol(tpos, &tpos, 10);
276 if (*tpos == '.') {
277 ++tpos;
278 indent = (int) strtol(tpos, &tpos, 10);
279 } else
280 indent = 0;
281 /* tpos points to optional flags now */
282
283 /*
284 * For x.y, the obsolescent variant assumed 0 == beginning
285 * of line, while the new form uses 0 == end of line.
286 * Convert accordingly.
287 */
288 if (!fplus) {
289 /* +POS */
290 col += 1;
291 indent += 1;
292 } else {
293 /* -POS */
294 if (indent > 0)
295 col += 1;
296 }
297
298 /* new style spec */
299 sz = snprintf(spec, sizeof(spec), "%d.%d%s", col, indent,
300 tpos);
301
302 if (!fplus) {
303 /* Replace the +POS argument with new-style -kSPEC */
304 asprintf(&vpos, "-k%s", spec);
305 argv[i] = vpos;
306 fplus = 1;
307 } else {
308 /*
309 * Append the spec to one previously generated from
310 * +POS argument, and remove the argv element.
311 */
312 asprintf(&vpos, "%s,%s", argv[i-1], spec);
313 free(argv[i-1]);
314 argv[i-1] = vpos;
315 for (j=i; j < *argc; j++)
316 argv[j] = argv[j+1];
317 *argc -= 1;
318 i--;
319 }
320 }
321 }
322
323 /*
324 * ascii, Rascii, Ftable, and RFtable map
325 *
326 * Sorting 'weight' tables.
327 * Convert 'ascii' characters into their sort order.
328 * The 'F' variants fold lower case to upper equivalent
329 * The 'R' variants are for reverse sorting.
330 *
331 * The record separator (REC_D) never needs a weight, this frees one
332 * byte value as an 'end of key' marker. This must be 0 for normal
333 * weight tables, and 0xff for reverse weight tables - and is used
334 * to terminate keys so that short keys sort before (after if reverse)
335 * longer keys.
336 *
337 * The field separator has a normal weight - although it cannot occur
338 * within a key unless it is the default (space+tab).
339 *
340 * All other bytes map to the appropriate value for the sort order.
341 * Numeric sorts don't need any tables, they are reversed by negation.
342 *
343 * Global reverse sorts are done by writing the sorted keys in reverse
344 * order - the sort itself is stil forwards.
345 * This means that weights are only ever used when generating keys, any
346 * sort of the original data bytes is always forwards and unweighted.
347 *
348 * Note: this is only good for ASCII sorting. For different LC 's,
349 * all bets are off.
350 *
351 * itable[] and dtable[] are the masks for -i (ignore non-printables)
352 * and -d (only sort blank and alphanumerics).
353 */
354 void
355 settables(void)
356 {
357 int i;
358 int next_weight = 1;
359 int rev_weight = 254;
360
361 ascii[REC_D] = 0;
362 Rascii[REC_D] = 255;
363 Ftable[REC_D] = 0;
364 RFtable[REC_D] = 255;
365
366 for (i = 0; i < 256; i++) {
367 if (i == REC_D)
368 continue;
369 ascii[i] = next_weight;
370 Rascii[i] = rev_weight;
371 if (Ftable[i] == 0) {
372 Ftable[i] = next_weight;
373 RFtable[i] = rev_weight;
374 Ftable[tolower(i)] = next_weight;
375 RFtable[tolower(i)] = rev_weight;
376 }
377 next_weight++;
378 rev_weight--;
379
380 if (i == '\n' || isprint(i))
381 itable[i] = 1;
382
383 if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
384 dtable[i] = 1;
385 }
386 }
387