Home | History | Annotate | Line # | Download | only in sed
compile.c revision 1.25.2.3.2.1
      1  1.25.2.3.2.1       jmc /*	$NetBSD: compile.c,v 1.25.2.3.2.1 2005/02/06 08:31:40 jmc Exp $	*/
      2          1.15       tls 
      3           1.1       alm /*-
      4           1.9       cgd  * Copyright (c) 1992, 1993
      5           1.9       cgd  *	The Regents of the University of California.  All rights reserved.
      6           1.1       alm  *
      7           1.1       alm  * This code is derived from software contributed to Berkeley by
      8           1.1       alm  * Diomidis Spinellis of Imperial College, University of London.
      9           1.1       alm  *
     10           1.1       alm  * Redistribution and use in source and binary forms, with or without
     11           1.1       alm  * modification, are permitted provided that the following conditions
     12           1.1       alm  * are met:
     13           1.1       alm  * 1. Redistributions of source code must retain the above copyright
     14           1.1       alm  *    notice, this list of conditions and the following disclaimer.
     15           1.1       alm  * 2. Redistributions in binary form must reproduce the above copyright
     16           1.1       alm  *    notice, this list of conditions and the following disclaimer in the
     17           1.1       alm  *    documentation and/or other materials provided with the distribution.
     18          1.25       agc  * 3. Neither the name of the University nor the names of its contributors
     19          1.25       agc  *    may be used to endorse or promote products derived from this software
     20          1.25       agc  *    without specific prior written permission.
     21          1.25       agc  *
     22          1.25       agc  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23          1.25       agc  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24          1.25       agc  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25          1.25       agc  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26          1.25       agc  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27          1.25       agc  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28          1.25       agc  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29          1.25       agc  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30          1.25       agc  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31          1.25       agc  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32          1.25       agc  * SUCH DAMAGE.
     33          1.25       agc  */
     34          1.25       agc 
     35          1.25       agc /*-
     36          1.25       agc  * Copyright (c) 1992 Diomidis Spinellis.
     37          1.25       agc  *
     38          1.25       agc  * This code is derived from software contributed to Berkeley by
     39          1.25       agc  * Diomidis Spinellis of Imperial College, University of London.
     40          1.25       agc  *
     41          1.25       agc  * Redistribution and use in source and binary forms, with or without
     42          1.25       agc  * modification, are permitted provided that the following conditions
     43          1.25       agc  * are met:
     44          1.25       agc  * 1. Redistributions of source code must retain the above copyright
     45          1.25       agc  *    notice, this list of conditions and the following disclaimer.
     46          1.25       agc  * 2. Redistributions in binary form must reproduce the above copyright
     47          1.25       agc  *    notice, this list of conditions and the following disclaimer in the
     48          1.25       agc  *    documentation and/or other materials provided with the distribution.
     49           1.1       alm  * 3. All advertising materials mentioning features or use of this software
     50           1.1       alm  *    must display the following acknowledgement:
     51           1.1       alm  *	This product includes software developed by the University of
     52           1.1       alm  *	California, Berkeley and its contributors.
     53           1.1       alm  * 4. Neither the name of the University nor the names of its contributors
     54           1.1       alm  *    may be used to endorse or promote products derived from this software
     55           1.1       alm  *    without specific prior written permission.
     56           1.1       alm  *
     57           1.1       alm  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     58           1.1       alm  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     59           1.1       alm  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     60           1.1       alm  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     61           1.1       alm  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     62           1.1       alm  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     63           1.1       alm  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     64           1.1       alm  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     65           1.1       alm  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     66           1.1       alm  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     67           1.1       alm  * SUCH DAMAGE.
     68           1.1       alm  */
     69           1.1       alm 
     70          1.17     lukem #include <sys/cdefs.h>
     71           1.1       alm #ifndef lint
     72          1.16       mrg #if 0
     73          1.16       mrg static char sccsid[] = "@(#)compile.c	8.2 (Berkeley) 4/28/95";
     74          1.16       mrg #else
     75  1.25.2.3.2.1       jmc __RCSID("$NetBSD: compile.c,v 1.25.2.3.2.1 2005/02/06 08:31:40 jmc Exp $");
     76          1.16       mrg #endif
     77           1.1       alm #endif /* not lint */
     78           1.1       alm 
     79           1.1       alm #include <sys/types.h>
     80           1.1       alm #include <sys/stat.h>
     81           1.1       alm 
     82           1.1       alm #include <ctype.h>
     83           1.1       alm #include <errno.h>
     84           1.1       alm #include <fcntl.h>
     85           1.1       alm #include <limits.h>
     86           1.1       alm #include <regex.h>
     87           1.1       alm #include <stdio.h>
     88           1.1       alm #include <stdlib.h>
     89           1.1       alm #include <string.h>
     90           1.1       alm 
     91           1.1       alm #include "defs.h"
     92           1.1       alm #include "extern.h"
     93           1.1       alm 
     94           1.9       cgd #define LHSZ	128
     95           1.9       cgd #define	LHMASK	(LHSZ - 1)
     96           1.9       cgd static struct labhash {
     97           1.9       cgd 	struct	labhash *lh_next;
     98           1.9       cgd 	u_int	lh_hash;
     99           1.9       cgd 	struct	s_command *lh_cmd;
    100           1.9       cgd 	int	lh_ref;
    101           1.9       cgd } *labels[LHSZ];
    102           1.9       cgd 
    103          1.24       wiz static char	 *compile_addr(char *, struct s_addr *);
    104          1.24       wiz static char	 *compile_ccl(char **, char *);
    105          1.24       wiz static char	 *compile_delimited(char *, char *);
    106          1.24       wiz static char	 *compile_flags(char *, struct s_subst *);
    107          1.24       wiz static char	 *compile_re(char *, regex_t **);
    108          1.24       wiz static char	 *compile_subst(char *, struct s_subst *);
    109          1.24       wiz static char	 *compile_text(void);
    110          1.24       wiz static char	 *compile_tr(char *, char **);
    111           1.1       alm static struct s_command
    112          1.24       wiz 		**compile_stream(struct s_command **);
    113          1.24       wiz static char	 *duptoeol(char *, char *);
    114          1.24       wiz static void	  enterlabel(struct s_command *);
    115           1.1       alm static struct s_command
    116          1.24       wiz 		 *findlabel(char *);
    117          1.24       wiz static void	  fixuplabel(struct s_command *, struct s_command *);
    118          1.24       wiz static void	  uselabel(void);
    119           1.1       alm 
    120           1.1       alm /*
    121           1.1       alm  * Command specification.  This is used to drive the command parser.
    122           1.1       alm  */
    123           1.1       alm struct s_format {
    124           1.1       alm 	char code;				/* Command code */
    125           1.1       alm 	int naddr;				/* Number of address args */
    126           1.1       alm 	enum e_args args;			/* Argument type */
    127           1.1       alm };
    128           1.1       alm 
    129           1.1       alm static struct s_format cmd_fmts[] = {
    130           1.1       alm 	{'{', 2, GROUP},
    131          1.14   mycroft 	{'}', 0, ENDGROUP},
    132           1.1       alm 	{'a', 1, TEXT},
    133           1.1       alm 	{'b', 2, BRANCH},
    134           1.1       alm 	{'c', 2, TEXT},
    135           1.1       alm 	{'d', 2, EMPTY},
    136           1.1       alm 	{'D', 2, EMPTY},
    137           1.1       alm 	{'g', 2, EMPTY},
    138           1.1       alm 	{'G', 2, EMPTY},
    139           1.1       alm 	{'h', 2, EMPTY},
    140           1.1       alm 	{'H', 2, EMPTY},
    141           1.1       alm 	{'i', 1, TEXT},
    142           1.1       alm 	{'l', 2, EMPTY},
    143           1.1       alm 	{'n', 2, EMPTY},
    144           1.1       alm 	{'N', 2, EMPTY},
    145           1.1       alm 	{'p', 2, EMPTY},
    146           1.1       alm 	{'P', 2, EMPTY},
    147           1.1       alm 	{'q', 1, EMPTY},
    148           1.1       alm 	{'r', 1, RFILE},
    149           1.1       alm 	{'s', 2, SUBST},
    150           1.1       alm 	{'t', 2, BRANCH},
    151           1.1       alm 	{'w', 2, WFILE},
    152           1.1       alm 	{'x', 2, EMPTY},
    153           1.1       alm 	{'y', 2, TR},
    154           1.1       alm 	{'!', 2, NONSEL},
    155           1.1       alm 	{':', 0, LABEL},
    156           1.1       alm 	{'#', 0, COMMENT},
    157           1.1       alm 	{'=', 1, EMPTY},
    158           1.1       alm 	{'\0', 0, COMMENT},
    159           1.1       alm };
    160           1.1       alm 
    161           1.1       alm /* The compiled program. */
    162           1.1       alm struct s_command *prog;
    163           1.1       alm 
    164           1.1       alm /*
    165           1.1       alm  * Compile the program into prog.
    166           1.1       alm  * Initialise appends.
    167           1.1       alm  */
    168           1.1       alm void
    169          1.24       wiz compile(void)
    170           1.1       alm {
    171          1.14   mycroft 	*compile_stream(&prog) = NULL;
    172           1.9       cgd 	fixuplabel(prog, NULL);
    173           1.9       cgd 	uselabel();
    174          1.18  drochner 	if (appendnum > 0)
    175          1.18  drochner 		appends = xmalloc(sizeof(struct s_appends) * appendnum);
    176           1.1       alm 	match = xmalloc((maxnsub + 1) * sizeof(regmatch_t));
    177           1.1       alm }
    178           1.1       alm 
    179           1.1       alm #define EATSPACE() do {							\
    180           1.1       alm 	if (p)								\
    181          1.19  christos 		while (*p && isascii((unsigned char)*p) &&		\
    182          1.19  christos 		    isspace((unsigned char)*p))				\
    183           1.1       alm 			p++;						\
    184           1.1       alm 	} while (0)
    185           1.1       alm 
    186           1.1       alm static struct s_command **
    187          1.24       wiz compile_stream(struct s_command **link)
    188          1.14   mycroft {
    189          1.17     lukem 	char *p;
    190           1.1       alm 	static char lbuf[_POSIX2_LINE_MAX + 1];	/* To save stack */
    191          1.14   mycroft 	struct s_command *cmd, *cmd2, *stack;
    192           1.1       alm 	struct s_format *fp;
    193           1.1       alm 	int naddr;				/* Number of addresses */
    194           1.1       alm 
    195          1.14   mycroft 	stack = 0;
    196           1.1       alm 	for (;;) {
    197           1.1       alm 		if ((p = cu_fgets(lbuf, sizeof(lbuf))) == NULL) {
    198          1.14   mycroft 			if (stack != 0)
    199           1.1       alm 				err(COMPILE, "unexpected EOF (pending }'s)");
    200           1.1       alm 			return (link);
    201           1.1       alm 		}
    202           1.1       alm 
    203           1.1       alm semicolon:	EATSPACE();
    204          1.20    kleink 		if (p) {
    205          1.20    kleink 			if (*p == '#' || *p == '\0')
    206          1.20    kleink 				continue;
    207          1.20    kleink 			else if (*p == ';') {
    208          1.20    kleink 				p++;
    209          1.20    kleink 				goto semicolon;
    210          1.20    kleink 			}
    211          1.20    kleink 		}
    212           1.1       alm 		*link = cmd = xmalloc(sizeof(struct s_command));
    213           1.1       alm 		link = &cmd->next;
    214           1.1       alm 		cmd->nonsel = cmd->inrange = 0;
    215           1.1       alm 		/* First parse the addresses */
    216           1.1       alm 		naddr = 0;
    217           1.1       alm 
    218           1.1       alm /* Valid characters to start an address */
    219           1.1       alm #define	addrchar(c)	(strchr("0123456789/\\$", (c)))
    220           1.1       alm 		if (addrchar(*p)) {
    221           1.1       alm 			naddr++;
    222           1.1       alm 			cmd->a1 = xmalloc(sizeof(struct s_addr));
    223           1.1       alm 			p = compile_addr(p, cmd->a1);
    224           1.1       alm 			EATSPACE();				/* EXTENSION */
    225           1.1       alm 			if (*p == ',') {
    226           1.1       alm 				p++;
    227           1.1       alm 				EATSPACE();			/* EXTENSION */
    228          1.13   mycroft 				naddr++;
    229           1.1       alm 				cmd->a2 = xmalloc(sizeof(struct s_addr));
    230           1.1       alm 				p = compile_addr(p, cmd->a2);
    231          1.13   mycroft 				EATSPACE();
    232          1.12   mycroft 			} else
    233          1.12   mycroft 				cmd->a2 = 0;
    234          1.12   mycroft 		} else
    235          1.13   mycroft 			cmd->a1 = cmd->a2 = 0;
    236           1.1       alm 
    237           1.1       alm nonsel:		/* Now parse the command */
    238           1.1       alm 		if (!*p)
    239           1.1       alm 			err(COMPILE, "command expected");
    240           1.1       alm 		cmd->code = *p;
    241           1.1       alm 		for (fp = cmd_fmts; fp->code; fp++)
    242           1.1       alm 			if (fp->code == *p)
    243           1.1       alm 				break;
    244           1.1       alm 		if (!fp->code)
    245           1.1       alm 			err(COMPILE, "invalid command code %c", *p);
    246           1.1       alm 		if (naddr > fp->naddr)
    247           1.1       alm 			err(COMPILE,
    248           1.1       alm "command %c expects up to %d address(es), found %d", *p, fp->naddr, naddr);
    249           1.1       alm 		switch (fp->args) {
    250           1.1       alm 		case NONSEL:			/* ! */
    251          1.13   mycroft 			p++;
    252          1.13   mycroft 			EATSPACE();
    253           1.1       alm 			cmd->nonsel = ! cmd->nonsel;
    254           1.1       alm 			goto nonsel;
    255           1.1       alm 		case GROUP:			/* { */
    256           1.1       alm 			p++;
    257           1.1       alm 			EATSPACE();
    258          1.14   mycroft 			cmd->next = stack;
    259          1.14   mycroft 			stack = cmd;
    260          1.14   mycroft 			link = &cmd->u.c;
    261          1.14   mycroft 			if (*p)
    262          1.14   mycroft 				goto semicolon;
    263          1.14   mycroft 			break;
    264          1.14   mycroft 		case ENDGROUP:
    265          1.12   mycroft 			/*
    266          1.12   mycroft 			 * Short-circuit command processing, since end of
    267          1.12   mycroft 			 * group is really just a noop.
    268          1.12   mycroft 			 */
    269          1.14   mycroft 			cmd->nonsel = 1;
    270          1.14   mycroft 			if (stack == 0)
    271          1.14   mycroft 				err(COMPILE, "unexpected }");
    272          1.14   mycroft 			cmd2 = stack;
    273          1.14   mycroft 			stack = cmd2->next;
    274          1.14   mycroft 			cmd2->next = cmd;
    275          1.14   mycroft 			/*FALLTHROUGH*/
    276           1.1       alm 		case EMPTY:		/* d D g G h H l n N p P q x = \0 */
    277           1.1       alm 			p++;
    278           1.1       alm 			EATSPACE();
    279           1.1       alm 			if (*p == ';') {
    280           1.1       alm 				p++;
    281           1.1       alm 				link = &cmd->next;
    282           1.1       alm 				goto semicolon;
    283           1.1       alm 			}
    284           1.1       alm 			if (*p)
    285           1.1       alm 				err(COMPILE,
    286           1.1       alm "extra characters at the end of %c command", cmd->code);
    287           1.1       alm 			break;
    288           1.1       alm 		case TEXT:			/* a c i */
    289           1.1       alm 			p++;
    290           1.1       alm 			EATSPACE();
    291           1.1       alm 			if (*p != '\\')
    292           1.1       alm 				err(COMPILE,
    293           1.1       alm "command %c expects \\ followed by text", cmd->code);
    294           1.1       alm 			p++;
    295           1.1       alm 			EATSPACE();
    296           1.1       alm 			if (*p)
    297           1.1       alm 				err(COMPILE,
    298           1.1       alm "extra characters after \\ at the end of %c command", cmd->code);
    299           1.1       alm 			cmd->t = compile_text();
    300           1.1       alm 			break;
    301           1.1       alm 		case COMMENT:			/* \0 # */
    302           1.1       alm 			break;
    303           1.1       alm 		case WFILE:			/* w */
    304           1.1       alm 			p++;
    305           1.1       alm 			EATSPACE();
    306           1.1       alm 			if (*p == '\0')
    307           1.1       alm 				err(COMPILE, "filename expected");
    308           1.9       cgd 			cmd->t = duptoeol(p, "w command");
    309           1.1       alm 			if (aflag)
    310           1.1       alm 				cmd->u.fd = -1;
    311           1.1       alm 			else if ((cmd->u.fd = open(p,
    312           1.1       alm 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
    313           1.1       alm 			    DEFFILEMODE)) == -1)
    314          1.23    itojun 				err(FATAL, "%s: %s", p, strerror(errno));
    315           1.1       alm 			break;
    316           1.1       alm 		case RFILE:			/* r */
    317           1.1       alm 			p++;
    318           1.1       alm 			EATSPACE();
    319           1.1       alm 			if (*p == '\0')
    320           1.1       alm 				err(COMPILE, "filename expected");
    321           1.1       alm 			else
    322           1.9       cgd 				cmd->t = duptoeol(p, "read command");
    323           1.1       alm 			break;
    324           1.1       alm 		case BRANCH:			/* b t */
    325           1.1       alm 			p++;
    326           1.1       alm 			EATSPACE();
    327           1.1       alm 			if (*p == '\0')
    328           1.1       alm 				cmd->t = NULL;
    329           1.1       alm 			else
    330           1.9       cgd 				cmd->t = duptoeol(p, "branch");
    331           1.1       alm 			break;
    332           1.1       alm 		case LABEL:			/* : */
    333           1.1       alm 			p++;
    334           1.1       alm 			EATSPACE();
    335           1.9       cgd 			cmd->t = duptoeol(p, "label");
    336           1.1       alm 			if (strlen(p) == 0)
    337           1.1       alm 				err(COMPILE, "empty label");
    338           1.9       cgd 			enterlabel(cmd);
    339           1.1       alm 			break;
    340           1.1       alm 		case SUBST:			/* s */
    341           1.1       alm 			p++;
    342           1.1       alm 			if (*p == '\0' || *p == '\\')
    343           1.1       alm 				err(COMPILE,
    344           1.1       alm "substitute pattern can not be delimited by newline or backslash");
    345           1.1       alm 			cmd->u.s = xmalloc(sizeof(struct s_subst));
    346           1.1       alm 			p = compile_re(p, &cmd->u.s->re);
    347           1.1       alm 			if (p == NULL)
    348           1.1       alm 				err(COMPILE, "unterminated substitute pattern");
    349           1.1       alm 			--p;
    350           1.1       alm 			p = compile_subst(p, cmd->u.s);
    351           1.1       alm 			p = compile_flags(p, cmd->u.s);
    352           1.1       alm 			EATSPACE();
    353           1.1       alm 			if (*p == ';') {
    354           1.1       alm 				p++;
    355           1.1       alm 				link = &cmd->next;
    356           1.1       alm 				goto semicolon;
    357           1.1       alm 			}
    358           1.1       alm 			break;
    359           1.1       alm 		case TR:			/* y */
    360           1.1       alm 			p++;
    361           1.1       alm 			p = compile_tr(p, (char **)&cmd->u.y);
    362           1.1       alm 			EATSPACE();
    363           1.1       alm 			if (*p == ';') {
    364           1.1       alm 				p++;
    365           1.1       alm 				link = &cmd->next;
    366           1.1       alm 				goto semicolon;
    367           1.1       alm 			}
    368           1.1       alm 			if (*p)
    369           1.1       alm 				err(COMPILE,
    370           1.1       alm "extra text at the end of a transform command");
    371           1.1       alm 			break;
    372           1.1       alm 		}
    373           1.1       alm 	}
    374           1.1       alm }
    375           1.1       alm 
    376           1.1       alm /*
    377          1.22       wiz  * Get a delimited string.  P points to the delimiter of the string; d points
    378           1.1       alm  * to a buffer area.  Newline and delimiter escapes are processed; other
    379           1.1       alm  * escapes are ignored.
    380           1.1       alm  *
    381           1.1       alm  * Returns a pointer to the first character after the final delimiter or NULL
    382           1.1       alm  * in the case of a non-terminated string.  The character array d is filled
    383           1.1       alm  * with the processed string.
    384           1.1       alm  */
    385           1.1       alm static char *
    386          1.24       wiz compile_delimited(char *p, char *d)
    387           1.1       alm {
    388           1.1       alm 	char c;
    389           1.1       alm 
    390           1.1       alm 	c = *p++;
    391           1.1       alm 	if (c == '\0')
    392           1.1       alm 		return (NULL);
    393           1.1       alm 	else if (c == '\\')
    394           1.1       alm 		err(COMPILE, "\\ can not be used as a string delimiter");
    395           1.1       alm 	else if (c == '\n')
    396           1.1       alm 		err(COMPILE, "newline can not be used as a string delimiter");
    397           1.1       alm 	while (*p) {
    398          1.11       alm 		if (*p == '[') {
    399          1.11       alm 			if ((d = compile_ccl(&p, d)) == NULL)
    400          1.11       alm 				err(COMPILE, "unbalanced brackets ([])");
    401          1.11       alm 			continue;
    402          1.11       alm 		} else if (*p == '\\' && p[1] == '[') {
    403          1.11       alm 			*d++ = *p++;
    404          1.11       alm 		} else if (*p == '\\' && p[1] == c)
    405           1.1       alm 			p++;
    406           1.1       alm 		else if (*p == '\\' && p[1] == 'n') {
    407           1.1       alm 			*d++ = '\n';
    408           1.1       alm 			p += 2;
    409           1.1       alm 			continue;
    410           1.1       alm 		} else if (*p == '\\' && p[1] == '\\')
    411           1.1       alm 			*d++ = *p++;
    412           1.1       alm 		else if (*p == c) {
    413           1.1       alm 			*d = '\0';
    414           1.1       alm 			return (p + 1);
    415           1.1       alm 		}
    416           1.1       alm 		*d++ = *p++;
    417           1.1       alm 	}
    418           1.1       alm 	return (NULL);
    419          1.11       alm }
    420          1.11       alm 
    421          1.11       alm 
    422          1.11       alm /* compile_ccl: expand a POSIX character class */
    423          1.11       alm static char *
    424          1.24       wiz compile_ccl(char **sp, char *t)
    425          1.11       alm {
    426          1.11       alm 	int c, d;
    427          1.11       alm 	char *s = *sp;
    428          1.11       alm 
    429          1.11       alm 	*t++ = *s++;
    430          1.11       alm 	if (*s == '^')
    431          1.11       alm 		*t++ = *s++;
    432          1.11       alm 	if (*s == ']')
    433          1.11       alm 		*t++ = *s++;
    434          1.11       alm 	for (; *s && (*t = *s) != ']'; s++, t++)
    435          1.11       alm 		if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
    436          1.11       alm 			*++t = *++s, t++, s++;
    437          1.11       alm 			for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
    438          1.11       alm 				if ((c = *s) == '\0')
    439          1.11       alm 					return NULL;
    440          1.11       alm 		} else if (*s == '\\' && s[1] == 'n')
    441          1.11       alm 			    *t = '\n', s++;
    442          1.11       alm 	return (*s == ']') ? *sp = ++s, ++t : NULL;
    443           1.2       alm }
    444           1.2       alm 
    445           1.1       alm /*
    446           1.1       alm  * Get a regular expression.  P points to the delimiter of the regular
    447           1.1       alm  * expression; repp points to the address of a regexp pointer.  Newline
    448           1.1       alm  * and delimiter escapes are processed; other escapes are ignored.
    449           1.1       alm  * Returns a pointer to the first character after the final delimiter
    450           1.1       alm  * or NULL in the case of a non terminated regular expression.  The regexp
    451           1.1       alm  * pointer is set to the compiled regular expression.
    452           1.1       alm  * Cflags are passed to regcomp.
    453           1.1       alm  */
    454           1.1       alm static char *
    455          1.24       wiz compile_re(char *p, regex_t **repp)
    456           1.1       alm {
    457           1.1       alm 	int eval;
    458           1.1       alm 	char re[_POSIX2_LINE_MAX + 1];
    459           1.1       alm 
    460           1.1       alm 	p = compile_delimited(p, re);
    461           1.1       alm 	if (p && strlen(re) == 0) {
    462           1.1       alm 		*repp = NULL;
    463           1.1       alm 		return (p);
    464           1.1       alm 	}
    465           1.1       alm 	*repp = xmalloc(sizeof(regex_t));
    466          1.21    atatat 	if (p && (eval = regcomp(*repp, re, ere)) != 0)
    467           1.1       alm 		err(COMPILE, "RE error: %s", strregerror(eval, *repp));
    468           1.1       alm 	if (maxnsub < (*repp)->re_nsub)
    469           1.1       alm 		maxnsub = (*repp)->re_nsub;
    470           1.1       alm 	return (p);
    471           1.1       alm }
    472           1.1       alm 
    473           1.1       alm /*
    474           1.1       alm  * Compile the substitution string of a regular expression and set res to
    475           1.1       alm  * point to a saved copy of it.  Nsub is the number of parenthesized regular
    476           1.1       alm  * expressions.
    477           1.1       alm  */
    478           1.1       alm static char *
    479          1.24       wiz compile_subst(char *p, struct s_subst *s)
    480           1.1       alm {
    481           1.1       alm 	static char lbuf[_POSIX2_LINE_MAX + 1];
    482           1.1       alm 	int asize, ref, size;
    483           1.1       alm 	char c, *text, *op, *sp;
    484      1.25.2.1       jmc 	int sawesc = 0;
    485           1.1       alm 
    486           1.1       alm 	c = *p++;			/* Terminator character */
    487           1.1       alm 	if (c == '\0')
    488           1.1       alm 		return (NULL);
    489           1.1       alm 
    490           1.1       alm 	s->maxbref = 0;
    491           1.1       alm 	s->linenum = linenum;
    492           1.1       alm 	asize = 2 * _POSIX2_LINE_MAX + 1;
    493           1.1       alm 	text = xmalloc(asize);
    494           1.1       alm 	size = 0;
    495           1.1       alm 	do {
    496           1.1       alm 		op = sp = text + size;
    497           1.1       alm 		for (; *p; p++) {
    498      1.25.2.1       jmc 			if (*p == '\\' || sawesc) {
    499      1.25.2.1       jmc 				/*
    500      1.25.2.1       jmc 				 * If this is a continuation from the last
    501      1.25.2.1       jmc 				 * buffer, we won't have a character to
    502      1.25.2.1       jmc 				 * skip over.
    503      1.25.2.1       jmc 				 */
    504      1.25.2.1       jmc 				if (sawesc)
    505      1.25.2.1       jmc 					sawesc = 0;
    506      1.25.2.1       jmc 				else
    507      1.25.2.1       jmc 					p++;
    508      1.25.2.1       jmc 
    509      1.25.2.1       jmc 				if (*p == '\0') {
    510      1.25.2.1       jmc 					/*
    511      1.25.2.1       jmc 					 * This escaped character is continued
    512      1.25.2.1       jmc 					 * in the next part of the line.  Note
    513      1.25.2.1       jmc 					 * this fact, then cause the loop to
    514      1.25.2.1       jmc 					 * exit w/ normal EOL case and reenter
    515      1.25.2.1       jmc 					 * above with the new buffer.
    516      1.25.2.1       jmc 					 */
    517      1.25.2.1       jmc 					sawesc = 1;
    518      1.25.2.1       jmc 					p--;
    519      1.25.2.1       jmc 					continue;
    520      1.25.2.1       jmc 				} else if (strchr("123456789", *p) != NULL) {
    521           1.1       alm 					*sp++ = '\\';
    522           1.1       alm 					ref = *p - '0';
    523           1.1       alm 					if (s->re != NULL &&
    524           1.1       alm 					    ref > s->re->re_nsub)
    525           1.1       alm 						err(COMPILE,
    526           1.1       alm "\\%c not defined in the RE", *p);
    527           1.1       alm 					if (s->maxbref < ref)
    528           1.1       alm 						s->maxbref = ref;
    529           1.1       alm 				} else if (*p == '&' || *p == '\\')
    530           1.1       alm 					*sp++ = '\\';
    531           1.1       alm 			} else if (*p == c) {
    532           1.1       alm 				p++;
    533           1.1       alm 				*sp++ = '\0';
    534           1.1       alm 				size += sp - op;
    535           1.1       alm 				s->new = xrealloc(text, size);
    536           1.1       alm 				return (p);
    537           1.1       alm 			} else if (*p == '\n') {
    538           1.1       alm 				err(COMPILE,
    539           1.1       alm "unescaped newline inside substitute pattern");
    540           1.1       alm 				/* NOTREACHED */
    541           1.1       alm 			}
    542           1.1       alm 			*sp++ = *p;
    543           1.1       alm 		}
    544           1.1       alm 		size += sp - op;
    545           1.1       alm 		if (asize - size < _POSIX2_LINE_MAX + 1) {
    546           1.1       alm 			asize *= 2;
    547      1.25.2.2      tron 			text = xrealloc(text, asize);
    548           1.1       alm 		}
    549           1.1       alm 	} while (cu_fgets(p = lbuf, sizeof(lbuf)));
    550           1.1       alm 	err(COMPILE, "unterminated substitute in regular expression");
    551           1.1       alm 	/* NOTREACHED */
    552          1.17     lukem 	return (NULL);
    553           1.1       alm }
    554           1.1       alm 
    555           1.1       alm /*
    556           1.1       alm  * Compile the flags of the s command
    557           1.1       alm  */
    558           1.1       alm static char *
    559          1.24       wiz compile_flags(char *p, struct s_subst *s)
    560           1.1       alm {
    561           1.1       alm 	int gn;			/* True if we have seen g or n */
    562           1.1       alm 	char wfile[_POSIX2_LINE_MAX + 1], *q;
    563           1.1       alm 
    564           1.1       alm 	s->n = 1;				/* Default */
    565           1.1       alm 	s->p = 0;
    566           1.1       alm 	s->wfile = NULL;
    567           1.1       alm 	s->wfd = -1;
    568           1.1       alm 	for (gn = 0;;) {
    569           1.1       alm 		EATSPACE();			/* EXTENSION */
    570           1.1       alm 		switch (*p) {
    571           1.1       alm 		case 'g':
    572           1.1       alm 			if (gn)
    573           1.1       alm 				err(COMPILE,
    574           1.1       alm "more than one number or 'g' in substitute flags");
    575           1.1       alm 			gn = 1;
    576           1.1       alm 			s->n = 0;
    577           1.1       alm 			break;
    578           1.1       alm 		case '\0':
    579           1.1       alm 		case '\n':
    580           1.1       alm 		case ';':
    581           1.1       alm 			return (p);
    582           1.1       alm 		case 'p':
    583           1.1       alm 			s->p = 1;
    584           1.1       alm 			break;
    585           1.1       alm 		case '1': case '2': case '3':
    586           1.1       alm 		case '4': case '5': case '6':
    587           1.1       alm 		case '7': case '8': case '9':
    588           1.1       alm 			if (gn)
    589           1.1       alm 				err(COMPILE,
    590           1.1       alm "more than one number or 'g' in substitute flags");
    591           1.1       alm 			gn = 1;
    592           1.1       alm 			/* XXX Check for overflow */
    593           1.1       alm 			s->n = (int)strtol(p, &p, 10);
    594      1.25.2.1       jmc 			p--;
    595           1.1       alm 			break;
    596           1.1       alm 		case 'w':
    597           1.1       alm 			p++;
    598           1.9       cgd #ifdef HISTORIC_PRACTICE
    599           1.1       alm 			if (*p != ' ') {
    600           1.1       alm 				err(WARNING, "space missing before w wfile");
    601           1.1       alm 				return (p);
    602           1.1       alm 			}
    603           1.1       alm #endif
    604           1.1       alm 			EATSPACE();
    605           1.1       alm 			q = wfile;
    606           1.1       alm 			while (*p) {
    607           1.1       alm 				if (*p == '\n')
    608           1.1       alm 					break;
    609           1.1       alm 				*q++ = *p++;
    610           1.1       alm 			}
    611           1.1       alm 			*q = '\0';
    612           1.1       alm 			if (q == wfile)
    613           1.1       alm 				err(COMPILE, "no wfile specified");
    614           1.1       alm 			s->wfile = strdup(wfile);
    615           1.1       alm 			if (!aflag && (s->wfd = open(wfile,
    616           1.1       alm 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
    617           1.1       alm 			    DEFFILEMODE)) == -1)
    618          1.23    itojun 				err(FATAL, "%s: %s", wfile, strerror(errno));
    619           1.1       alm 			return (p);
    620           1.1       alm 		default:
    621           1.1       alm 			err(COMPILE,
    622           1.1       alm 			    "bad flag in substitute command: '%c'", *p);
    623           1.1       alm 			break;
    624           1.1       alm 		}
    625           1.1       alm 		p++;
    626           1.1       alm 	}
    627           1.1       alm }
    628           1.1       alm 
    629           1.1       alm /*
    630           1.1       alm  * Compile a translation set of strings into a lookup table.
    631           1.1       alm  */
    632           1.1       alm static char *
    633          1.24       wiz compile_tr(char *p, char **transtab)
    634           1.1       alm {
    635           1.1       alm 	int i;
    636           1.1       alm 	char *lt, *op, *np;
    637           1.1       alm 	char old[_POSIX2_LINE_MAX + 1];
    638           1.1       alm 	char new[_POSIX2_LINE_MAX + 1];
    639           1.1       alm 
    640           1.1       alm 	if (*p == '\0' || *p == '\\')
    641           1.1       alm 		err(COMPILE,
    642           1.1       alm "transform pattern can not be delimited by newline or backslash");
    643           1.1       alm 	p = compile_delimited(p, old);
    644           1.1       alm 	if (p == NULL) {
    645           1.1       alm 		err(COMPILE, "unterminated transform source string");
    646           1.1       alm 		return (NULL);
    647           1.1       alm 	}
    648           1.1       alm 	p = compile_delimited(--p, new);
    649           1.1       alm 	if (p == NULL) {
    650           1.1       alm 		err(COMPILE, "unterminated transform target string");
    651           1.1       alm 		return (NULL);
    652           1.1       alm 	}
    653           1.1       alm 	EATSPACE();
    654           1.1       alm 	if (strlen(new) != strlen(old)) {
    655           1.1       alm 		err(COMPILE, "transform strings are not the same length");
    656           1.1       alm 		return (NULL);
    657           1.1       alm 	}
    658           1.1       alm 	/* We assume characters are 8 bits */
    659      1.25.2.1       jmc 	lt = xmalloc(UCHAR_MAX+1);
    660           1.1       alm 	for (i = 0; i <= UCHAR_MAX; i++)
    661           1.1       alm 		lt[i] = (char)i;
    662           1.1       alm 	for (op = old, np = new; *op; op++, np++)
    663           1.1       alm 		lt[(u_char)*op] = *np;
    664           1.1       alm 	*transtab = lt;
    665           1.1       alm 	return (p);
    666           1.1       alm }
    667           1.1       alm 
    668           1.1       alm /*
    669  1.25.2.3.2.1       jmc  * Compile the text following an a, c, or i command.
    670           1.1       alm  */
    671           1.1       alm static char *
    672          1.24       wiz compile_text(void)
    673           1.1       alm {
    674           1.1       alm 	int asize, size;
    675           1.1       alm 	char *text, *p, *op, *s;
    676           1.1       alm 	char lbuf[_POSIX2_LINE_MAX + 1];
    677           1.1       alm 
    678           1.1       alm 	asize = 2 * _POSIX2_LINE_MAX + 1;
    679           1.1       alm 	text = xmalloc(asize);
    680           1.1       alm 	size = 0;
    681           1.1       alm 	while (cu_fgets(lbuf, sizeof(lbuf))) {
    682           1.1       alm 		op = s = text + size;
    683           1.1       alm 		p = lbuf;
    684           1.1       alm 		for (; *p; p++) {
    685           1.1       alm 			if (*p == '\\')
    686           1.1       alm 				p++;
    687           1.1       alm 			*s++ = *p;
    688           1.1       alm 		}
    689           1.1       alm 		size += s - op;
    690           1.1       alm 		if (p[-2] != '\\') {
    691           1.1       alm 			*s = '\0';
    692           1.1       alm 			break;
    693           1.1       alm 		}
    694           1.1       alm 		if (asize - size < _POSIX2_LINE_MAX + 1) {
    695           1.1       alm 			asize *= 2;
    696      1.25.2.3      tron 			text = xrealloc(text, asize);
    697           1.1       alm 		}
    698           1.1       alm 	}
    699           1.1       alm 	return (xrealloc(text, size + 1));
    700           1.1       alm }
    701           1.1       alm 
    702           1.1       alm /*
    703           1.1       alm  * Get an address and return a pointer to the first character after
    704           1.1       alm  * it.  Fill the structure pointed to according to the address.
    705           1.1       alm  */
    706           1.1       alm static char *
    707          1.24       wiz compile_addr(char *p, struct s_addr *a)
    708           1.1       alm {
    709           1.1       alm 	char *end;
    710           1.1       alm 
    711           1.1       alm 	switch (*p) {
    712           1.1       alm 	case '\\':				/* Context address */
    713           1.1       alm 		++p;
    714           1.1       alm 		/* FALLTHROUGH */
    715           1.1       alm 	case '/':				/* Context address */
    716           1.1       alm 		p = compile_re(p, &a->u.r);
    717           1.1       alm 		if (p == NULL)
    718           1.1       alm 			err(COMPILE, "unterminated regular expression");
    719           1.1       alm 		a->type = AT_RE;
    720           1.1       alm 		return (p);
    721           1.1       alm 
    722           1.1       alm 	case '$':				/* Last line */
    723           1.1       alm 		a->type = AT_LAST;
    724           1.1       alm 		return (p + 1);
    725           1.1       alm 						/* Line number */
    726           1.1       alm 	case '0': case '1': case '2': case '3': case '4':
    727           1.1       alm 	case '5': case '6': case '7': case '8': case '9':
    728           1.1       alm 		a->type = AT_LINE;
    729           1.1       alm 		a->u.l = strtol(p, &end, 10);
    730           1.1       alm 		return (end);
    731           1.1       alm 	default:
    732           1.1       alm 		err(COMPILE, "expected context address");
    733           1.1       alm 		return (NULL);
    734           1.1       alm 	}
    735           1.1       alm }
    736           1.1       alm 
    737           1.1       alm /*
    738           1.9       cgd  * duptoeol --
    739           1.9       cgd  *	Return a copy of all the characters up to \n or \0.
    740           1.1       alm  */
    741           1.1       alm static char *
    742          1.24       wiz duptoeol(char *s, char *ctype)
    743           1.1       alm {
    744           1.1       alm 	size_t len;
    745           1.9       cgd 	int ws;
    746           1.1       alm 	char *start;
    747           1.1       alm 
    748           1.9       cgd 	ws = 0;
    749           1.9       cgd 	for (start = s; *s != '\0' && *s != '\n'; ++s)
    750          1.19  christos 		ws = isspace((unsigned char)*s);
    751           1.1       alm 	*s = '\0';
    752           1.9       cgd 	if (ws)
    753           1.9       cgd 		err(WARNING, "whitespace after %s", ctype);
    754           1.1       alm 	len = s - start + 1;
    755           1.1       alm 	return (memmove(xmalloc(len), start, len));
    756           1.1       alm }
    757           1.1       alm 
    758           1.1       alm /*
    759           1.9       cgd  * Convert goto label names to addresses, and count a and r commands, in
    760           1.9       cgd  * the given subset of the script.  Free the memory used by labels in b
    761           1.9       cgd  * and t commands (but not by :).
    762           1.9       cgd  *
    763           1.1       alm  * TODO: Remove } nodes
    764           1.1       alm  */
    765           1.1       alm static void
    766          1.24       wiz fixuplabel(struct s_command *cp, struct s_command *end)
    767           1.1       alm {
    768           1.1       alm 
    769           1.1       alm 	for (; cp != end; cp = cp->next)
    770           1.1       alm 		switch (cp->code) {
    771           1.1       alm 		case 'a':
    772           1.1       alm 		case 'r':
    773           1.1       alm 			appendnum++;
    774           1.1       alm 			break;
    775           1.1       alm 		case 'b':
    776           1.1       alm 		case 't':
    777           1.9       cgd 			/* Resolve branch target. */
    778           1.1       alm 			if (cp->t == NULL) {
    779           1.1       alm 				cp->u.c = NULL;
    780           1.1       alm 				break;
    781           1.1       alm 			}
    782           1.9       cgd 			if ((cp->u.c = findlabel(cp->t)) == NULL)
    783           1.1       alm 				err(COMPILE2, "undefined label '%s'", cp->t);
    784           1.1       alm 			free(cp->t);
    785           1.1       alm 			break;
    786           1.1       alm 		case '{':
    787           1.9       cgd 			/* Do interior commands. */
    788           1.9       cgd 			fixuplabel(cp->u.c, cp->next);
    789           1.1       alm 			break;
    790           1.1       alm 		}
    791           1.9       cgd }
    792           1.9       cgd 
    793           1.9       cgd /*
    794           1.9       cgd  * Associate the given command label for later lookup.
    795           1.9       cgd  */
    796           1.9       cgd static void
    797          1.24       wiz enterlabel(struct s_command *cp)
    798           1.9       cgd {
    799          1.17     lukem 	struct labhash **lhp, *lh;
    800          1.17     lukem 	u_char *p;
    801          1.17     lukem 	u_int h, c;
    802           1.9       cgd 
    803           1.9       cgd 	for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
    804           1.9       cgd 		h = (h << 5) + h + c;
    805           1.9       cgd 	lhp = &labels[h & LHMASK];
    806           1.9       cgd 	for (lh = *lhp; lh != NULL; lh = lh->lh_next)
    807           1.9       cgd 		if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
    808           1.9       cgd 			err(COMPILE2, "duplicate label '%s'", cp->t);
    809           1.9       cgd 	lh = xmalloc(sizeof *lh);
    810           1.9       cgd 	lh->lh_next = *lhp;
    811           1.9       cgd 	lh->lh_hash = h;
    812           1.9       cgd 	lh->lh_cmd = cp;
    813           1.9       cgd 	lh->lh_ref = 0;
    814           1.9       cgd 	*lhp = lh;
    815           1.9       cgd }
    816           1.9       cgd 
    817           1.9       cgd /*
    818           1.9       cgd  * Find the label contained in the command l in the command linked
    819           1.9       cgd  * list cp.  L is excluded from the search.  Return NULL if not found.
    820           1.9       cgd  */
    821           1.9       cgd static struct s_command *
    822          1.24       wiz findlabel(char *name)
    823           1.9       cgd {
    824          1.17     lukem 	struct labhash *lh;
    825          1.17     lukem 	u_char *p;
    826          1.17     lukem 	u_int h, c;
    827           1.9       cgd 
    828           1.9       cgd 	for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
    829           1.9       cgd 		h = (h << 5) + h + c;
    830           1.9       cgd 	for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
    831           1.9       cgd 		if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
    832           1.9       cgd 			lh->lh_ref = 1;
    833           1.9       cgd 			return (lh->lh_cmd);
    834           1.9       cgd 		}
    835           1.9       cgd 	}
    836           1.9       cgd 	return (NULL);
    837           1.9       cgd }
    838           1.9       cgd 
    839           1.9       cgd /*
    840           1.9       cgd  * Warn about any unused labels.  As a side effect, release the label hash
    841           1.9       cgd  * table space.
    842           1.9       cgd  */
    843           1.9       cgd static void
    844          1.24       wiz uselabel(void)
    845           1.9       cgd {
    846          1.17     lukem 	struct labhash *lh, *next;
    847          1.17     lukem 	int i;
    848           1.9       cgd 
    849           1.9       cgd 	for (i = 0; i < LHSZ; i++) {
    850           1.9       cgd 		for (lh = labels[i]; lh != NULL; lh = next) {
    851           1.9       cgd 			next = lh->lh_next;
    852           1.9       cgd 			if (!lh->lh_ref)
    853           1.9       cgd 				err(WARNING, "unused label '%s'",
    854           1.9       cgd 				    lh->lh_cmd->t);
    855           1.9       cgd 			free(lh);
    856           1.9       cgd 		}
    857           1.9       cgd 	}
    858           1.1       alm }
    859