Home | History | Annotate | Line # | Download | only in sort
init.c revision 1.9
      1 /*	$NetBSD: init.c,v 1.9 2003/08/07 11:32:34 jdolecek Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Ben Harris and Jaromir Dolecek.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *        This product includes software developed by the NetBSD
     21  *        Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*-
     40  * Copyright (c) 1993
     41  *	The Regents of the University of California.  All rights reserved.
     42  *
     43  * This code is derived from software contributed to Berkeley by
     44  * Peter McIlroy.
     45  *
     46  * Redistribution and use in source and binary forms, with or without
     47  * modification, are permitted provided that the following conditions
     48  * are met:
     49  * 1. Redistributions of source code must retain the above copyright
     50  *    notice, this list of conditions and the following disclaimer.
     51  * 2. Redistributions in binary form must reproduce the above copyright
     52  *    notice, this list of conditions and the following disclaimer in the
     53  *    documentation and/or other materials provided with the distribution.
     54  * 3. Neither the name of the University nor the names of its contributors
     55  *    may be used to endorse or promote products derived from this software
     56  *    without specific prior written permission.
     57  *
     58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     68  * SUCH DAMAGE.
     69  */
     70 
     71 #include "sort.h"
     72 
     73 #ifndef lint
     74 __RCSID("$NetBSD: init.c,v 1.9 2003/08/07 11:32:34 jdolecek Exp $");
     75 __SCCSID("@(#)init.c	8.1 (Berkeley) 6/6/93");
     76 #endif /* not lint */
     77 
     78 #include <ctype.h>
     79 #include <string.h>
     80 
     81 static void insertcol __P((struct field *));
     82 static const char *setcolumn __P((const char *, struct field *, int));
     83 int setfield __P((const char *, struct field *, int));
     84 
     85 u_char gweights[NBINS];
     86 
     87 /*
     88  * masks of ignored characters.  Alltable is 256 ones.
     89  */
     90 static u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
     91 
     92 /*
     93  * clist (list of columns which correspond to one or more icol or tcol)
     94  * is in increasing order of columns.
     95  * Fields are kept in increasing order of fields.
     96  */
     97 
     98 /*
     99  * keep clist in order--inserts a column in a sorted array
    100  */
    101 static void
    102 insertcol(field)
    103 	struct field *field;
    104 {
    105 	int i;
    106 	for (i = 0; i < ncols; i++)
    107 		if (field->icol.num <= clist[i].num)
    108 			break;
    109 	if (field->icol.num != clist[i].num) {
    110 		memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i));
    111 		clist[i].num = field->icol.num;
    112 		ncols++;
    113 	}
    114 	if (field->tcol.num && field->tcol.num != field->icol.num) {
    115 		for (i = 0; i < ncols; i++)
    116 			if (field->tcol.num <= clist[i].num)
    117 				break;
    118 		if (field->tcol.num != clist[i].num) {
    119 			memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i));
    120 			clist[i].num = field->tcol.num;
    121 			ncols++;
    122 		}
    123 	}
    124 }
    125 
    126 /*
    127  * matches fields with the appropriate columns--n^2 but who cares?
    128  */
    129 void
    130 fldreset(fldtab)
    131 	struct field *fldtab;
    132 {
    133 	int i;
    134 	fldtab[0].tcol.p = clist+ncols-1;
    135 	for (++fldtab; fldtab->icol.num; ++fldtab) {
    136 		for (i = 0; fldtab->icol.num != clist[i].num; i++)
    137 			;
    138 		fldtab->icol.p = clist + i;
    139 		if (!fldtab->tcol.num)
    140 			continue;
    141 		for (i = 0; fldtab->tcol.num != clist[i].num; i++)
    142 			;
    143 		fldtab->tcol.p = clist + i;
    144 	}
    145 }
    146 
    147 /*
    148  * interprets a column in a -k field
    149  */
    150 static const char *
    151 setcolumn(pos, cur_fld, gflag)
    152 	const char *pos;
    153 	struct field *cur_fld;
    154 	int gflag;
    155 {
    156 	struct column *col;
    157 	int tmp;
    158 	col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol);
    159 	pos += sscanf(pos, "%d", &(col->num));
    160 	while (isdigit(*pos))
    161 		pos++;
    162 	if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol)))
    163 		errx(2, "field numbers must be positive");
    164 	if (*pos == '.') {
    165 		if (!col->num)
    166 			errx(2, "cannot indent end of line");
    167 		++pos;
    168 		pos += sscanf(pos, "%d", &(col->indent));
    169 		while (isdigit(*pos))
    170 			pos++;
    171 		if (&cur_fld->icol == col)
    172 			col->indent--;
    173 		if (col->indent < 0)
    174 			errx(2, "illegal offset");
    175 	}
    176 	if (optval(*pos, cur_fld->tcol.num))
    177 		while ((tmp = optval(*pos, cur_fld->tcol.num))) {
    178 			cur_fld->flags |= tmp;
    179 			pos++;
    180 	}
    181 	if (cur_fld->icol.num == 0)
    182 		cur_fld->icol.num = 1;
    183 	return (pos);
    184 }
    185 
    186 int
    187 setfield(pos, cur_fld, gflag)
    188 	const char *pos;
    189 	struct field *cur_fld;
    190 	int gflag;
    191 {
    192 	static int nfields = 0;
    193 	int tmp;
    194 
    195 	if (++nfields == ND)
    196 		errx(2, "too many sort keys. (Limit is %d)", ND-1);
    197 
    198 	cur_fld->weights = ascii;
    199 	cur_fld->mask = alltable;
    200 
    201 	pos = setcolumn(pos, cur_fld, gflag);
    202 	if (*pos == '\0')			/* key extends to EOL. */
    203 		cur_fld->tcol.num = 0;
    204 	else {
    205 		if (*pos != ',')
    206 			errx(2, "illegal field descriptor");
    207 		setcolumn((++pos), cur_fld, gflag);
    208 	}
    209 	if (!cur_fld->flags)
    210 		cur_fld->flags = gflag;
    211 	tmp = cur_fld->flags;
    212 
    213 	/*
    214 	 * Assign appropriate mask table and weight table.
    215 	 * If the global weights are reversed, the local field
    216 	 * must be "re-reversed".
    217 	 */
    218 	if (((tmp & R) ^ (gflag & R)) && (tmp & F))
    219 		cur_fld->weights = RFtable;
    220 	else if (tmp & F)
    221 		cur_fld->weights = Ftable;
    222 	else if ((tmp & R) ^ (gflag & R))
    223 		cur_fld->weights = Rascii;
    224 
    225 	if (tmp & I)
    226 		cur_fld->mask = itable;
    227 	else if (tmp & D)
    228 		cur_fld->mask = dtable;
    229 
    230 	cur_fld->flags |= (gflag & (BI | BT));
    231 	if (!cur_fld->tcol.indent)	/* BT has no meaning at end of field */
    232 		cur_fld->flags &= ~BT;
    233 
    234 	if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
    235 	    && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
    236 	    && cur_fld->tcol.indent < cur_fld->icol.indent))
    237 		errx(2, "fields out of order");
    238 	insertcol(cur_fld);
    239 	return (cur_fld->tcol.num);
    240 }
    241 
    242 int
    243 optval(desc, tcolflag)
    244 	int desc, tcolflag;
    245 {
    246 	switch(desc) {
    247 		case 'b':
    248 			if (!tcolflag)
    249 				return (BI);
    250 			else
    251 				return (BT);
    252 		case 'd': return (D);
    253 		case 'f': return (F);
    254 		case 'i': return (I);
    255 		case 'n': return (N);
    256 		case 'r': return (R);
    257 		default:  return (0);
    258 	}
    259 }
    260 
    261 void
    262 fixit(argc, argv)
    263 	int *argc;
    264 	char **argv;
    265 {
    266 	int i, j, v, w, x;
    267 	static char vbuf[ND*20], *vpos, *tpos;
    268 	vpos = vbuf;
    269 
    270 	for (i = 1; i < *argc; i++) {
    271 		if (argv[i][0] == '+') {
    272 			tpos = argv[i]+1;
    273 			argv[i] = vpos;
    274 			vpos += sprintf(vpos, "-k");
    275 			tpos += sscanf(tpos, "%d", &v);
    276 			while (isdigit(*tpos))
    277 				tpos++;
    278 			vpos += sprintf(vpos, "%d", v+1);
    279 			if (*tpos == '.') {
    280 				++tpos;
    281 				tpos += sscanf(tpos, "%d", &x);
    282 				vpos += sprintf(vpos, ".%d", x+1);
    283 			}
    284 			while (*tpos)
    285 				*vpos++ = *tpos++;
    286 			vpos += sprintf(vpos, ",");
    287 			if (argv[i+1] &&
    288 			    argv[i+1][0] == '-' && isdigit(argv[i+1][1])) {
    289 				tpos = argv[i+1] + 1;
    290 				tpos += sscanf(tpos, "%d", &w);
    291 				while (isdigit(*tpos))
    292 					tpos++;
    293 				x = 0;
    294 				if (*tpos == '.') {
    295 					++tpos;
    296 					tpos += sscanf(tpos, "%d", &x);
    297 					while (isdigit(*tpos))
    298 						tpos++;
    299 				}
    300 				if (x) {
    301 					vpos += sprintf(vpos, "%d", w+1);
    302 					vpos += sprintf(vpos, ".%d", x);
    303 				} else
    304 					vpos += sprintf(vpos, "%d", w);
    305 				while (*tpos)
    306 					*vpos++ = *tpos++;
    307 				for (j= i+1; j < *argc; j++)
    308 					argv[j] = argv[j+1];
    309 				*argc -= 1;
    310 			}
    311 		}
    312 	}
    313 }
    314 
    315 /*
    316  * ascii, Rascii, Ftable, and RFtable map
    317  * REC_D -> REC_D;  {not REC_D} -> {not REC_D}.
    318  * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
    319  * Note: when sorting in forward order, to encode character zero in a key,
    320  * use \001\001; character 1 becomes \001\002.  In this case, character 0
    321  * is reserved for the field delimiter.  Analagously for -r (fld_d = 255).
    322  * Note: this is only good for ASCII sorting.  For different LC 's,
    323  * all bets are off.  See also num_init in number.c
    324  */
    325 void
    326 settables(gflags)
    327 	int gflags;
    328 {
    329 	u_char *wts;
    330 	int i, incr;
    331 	for (i=0; i < 256; i++) {
    332 		ascii[i] = i;
    333 		if (i > REC_D && i < 255 - REC_D+1)
    334 			Rascii[i] = 255 - i + 1;
    335 		else
    336 			Rascii[i] = 255 - i;
    337 		if (islower(i)) {
    338 			Ftable[i] = Ftable[toupper(i)];
    339 			RFtable[i] = RFtable[toupper(i)];
    340 		} else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
    341 			Ftable[i] = i + 1;
    342 			RFtable[i] = Rascii[i] - 1;
    343 		} else {
    344 			Ftable[i] = i;
    345 			RFtable[i] = Rascii[i];
    346 		}
    347 		alltable[i] = 1;
    348 
    349 		if (i == '\n' || isprint(i))
    350 			itable[i] = 1;
    351 		else
    352 			itable[i] = 0;
    353 
    354 		if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
    355 			dtable[i] = 1;
    356 		else
    357 			dtable[i] = 0;
    358 	}
    359 
    360 	Rascii[REC_D] = RFtable[REC_D] = REC_D;
    361 	if (isupper(REC_D))
    362 		Ftable[tolower(REC_D)]++;
    363 
    364 	if ((gflags & R) && !((gflags & F) && SINGL_FLD))
    365 		wts = Rascii;
    366 	else if (!((gflags & F) && SINGL_FLD))
    367 		wts = ascii;
    368 	else if (gflags & R)
    369 		wts = RFtable;
    370 	else
    371 		wts = Ftable;
    372 
    373 	memmove(gweights, wts, sizeof(gweights));
    374 	incr = (gflags & R) ? -1 : 1;
    375 	for (i = 0; i < REC_D; i++)
    376 		gweights[i] += incr;
    377 	gweights[REC_D] = ((gflags & R) ? 255 : 0);
    378 	if (SINGL_FLD && (gflags & F)) {
    379 		for (i = 0; i < REC_D; i++) {
    380 			ascii[i] += incr;
    381 			Rascii[i] += incr;
    382 		}
    383 		ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
    384 	}
    385 }
    386