Home | History | Annotate | Line # | Download | only in src
      1 /*	$NetBSD: filter.c,v 1.4 2018/12/23 16:27:17 christos Exp $	*/
      2 
      3 /* filter - postprocessing of flex output through filters */
      4 
      5 /*  This file is part of flex. */
      6 
      7 /*  Redistribution and use in source and binary forms, with or without */
      8 /*  modification, are permitted provided that the following conditions */
      9 /*  are met: */
     10 
     11 /*  1. Redistributions of source code must retain the above copyright */
     12 /*     notice, this list of conditions and the following disclaimer. */
     13 /*  2. Redistributions in binary form must reproduce the above copyright */
     14 /*     notice, this list of conditions and the following disclaimer in the */
     15 /*     documentation and/or other materials provided with the distribution. */
     16 
     17 /*  Neither the name of the University nor the names of its contributors */
     18 /*  may be used to endorse or promote products derived from this software */
     19 /*  without specific prior written permission. */
     20 
     21 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
     22 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
     23 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
     24 /*  PURPOSE. */
     25 #include "flexdef.h"
     26 __RCSID("$NetBSD: filter.c,v 1.4 2018/12/23 16:27:17 christos Exp $");
     27 
     28 static const char * check_4_gnu_m4 =
     29     "m4_dnl ifdef(`__gnu__', ,"
     30     "`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)"
     31     " m4exit(2)')\n";
     32 
     33 
     34 /** global chain. */
     35 struct filter *output_chain = NULL;
     36 
     37 /* Allocate and initialize an external filter.
     38  * @param chain the current chain or NULL for new chain
     39  * @param cmd the command to execute.
     40  * @param ... a NULL terminated list of (const char*) arguments to command,
     41  *            not including argv[0].
     42  * @return newest filter in chain
     43  */
     44 struct filter *filter_create_ext (struct filter *chain, const char *cmd,
     45 				  ...)
     46 {
     47 	struct filter *f;
     48 	int     max_args;
     49 	const char *s;
     50 	va_list ap;
     51 
     52 	/* allocate and initialize new filter */
     53 	f = malloc(sizeof(struct filter));
     54 	if (!f)
     55 		flexerror(_("malloc failed (f) in filter_create_ext"));
     56 	memset (f, 0, sizeof (*f));
     57 	f->filter_func = NULL;
     58 	f->extra = NULL;
     59 	f->next = NULL;
     60 	f->argc = 0;
     61 
     62 	if (chain != NULL) {
     63 		/* append f to end of chain */
     64 		while (chain->next)
     65 			chain = chain->next;
     66 		chain->next = f;
     67 	}
     68 
     69 
     70 	/* allocate argv, and populate it with the argument list. */
     71 	max_args = 8;
     72 	f->argv = malloc(sizeof(char *) * (size_t) (max_args + 1));
     73 	if (!f->argv)
     74 		flexerror(_("malloc failed (f->argv) in filter_create_ext"));
     75 	f->argv[f->argc++] = cmd;
     76 
     77 	va_start (ap, cmd);
     78 	while ((s = va_arg (ap, const char *)) != NULL) {
     79 		if (f->argc >= max_args) {
     80 			max_args += 8;
     81 			f->argv = realloc(f->argv, sizeof(char*) * (size_t) (max_args + 1));
     82 		}
     83 		f->argv[f->argc++] = s;
     84 	}
     85 	f->argv[f->argc] = NULL;
     86 
     87 	va_end (ap);
     88 	return f;
     89 }
     90 
     91 /* Allocate and initialize an internal filter.
     92  * @param chain the current chain or NULL for new chain
     93  * @param filter_func The function that will perform the filtering.
     94  *        filter_func should return 0 if successful, and -1
     95  *        if an error occurs -- or it can simply exit().
     96  * @param extra optional user-defined data to pass to the filter.
     97  * @return newest filter in chain
     98  */
     99 struct filter *filter_create_int (struct filter *chain,
    100 				  int (*filter_func) (struct filter *),
    101 				  void *extra)
    102 {
    103 	struct filter *f;
    104 
    105 	/* allocate and initialize new filter */
    106 	f = malloc(sizeof(struct filter));
    107 	if (!f)
    108 		flexerror(_("malloc failed in filter_create_int"));
    109 	memset (f, 0, sizeof (*f));
    110 	f->next = NULL;
    111 	f->argc = 0;
    112 	f->argv = NULL;
    113 
    114 	f->filter_func = filter_func;
    115 	f->extra = extra;
    116 
    117 	if (chain != NULL) {
    118 		/* append f to end of chain */
    119 		while (chain->next)
    120 			chain = chain->next;
    121 		chain->next = f;
    122 	}
    123 
    124 	return f;
    125 }
    126 
    127 /** Fork and exec entire filter chain.
    128  *  @param chain The head of the chain.
    129  *  @return true on success.
    130  */
    131 bool filter_apply_chain (struct filter * chain)
    132 {
    133 	int     pid, pipes[2];
    134 
    135 
    136 	/* Tricky recursion, since we want to begin the chain
    137 	 * at the END. Why? Because we need all the forked processes
    138 	 * to be children of the main flex process.
    139 	 */
    140 	if (chain)
    141 		filter_apply_chain (chain->next);
    142 	else
    143 		return true;
    144 
    145 	/* Now we are the right-most unprocessed link in the chain.
    146 	 */
    147 
    148 	fflush (stdout);
    149 	fflush (stderr);
    150 
    151 
    152 	if (pipe (pipes) == -1)
    153 		flexerror (_("pipe failed"));
    154 
    155 	if ((pid = fork ()) == -1)
    156 		flexerror (_("fork failed"));
    157 
    158 	if (pid == 0) {
    159 		/* child */
    160 
    161         /* We need stdin (the FILE* stdin) to connect to this new pipe.
    162          * There is no portable way to set stdin to a new file descriptor,
    163          * as stdin is not an lvalue on some systems (BSD).
    164          * So we dup the new pipe onto the stdin descriptor and use a no-op fseek
    165          * to sync the stream. This is a Hail Mary situation. It seems to work.
    166          */
    167 		close (pipes[1]);
    168 clearerr(stdin);
    169 		if (dup2 (pipes[0], fileno (stdin)) == -1)
    170 			flexfatal (_("dup2(pipes[0],0)"));
    171 		close (pipes[0]);
    172         fseek (stdin, 0, SEEK_CUR);
    173         ungetc(' ', stdin); /* still an evil hack, but one that works better */
    174         (void)fgetc(stdin); /* on NetBSD than the fseek attempt does */
    175 
    176 		/* run as a filter, either internally or by exec */
    177 		if (chain->filter_func) {
    178 			int     r;
    179 
    180 			if ((r = chain->filter_func (chain)) == -1)
    181 				flexfatal (_("filter_func failed"));
    182 			FLEX_EXIT (0);
    183 		}
    184 		else {
    185 			execvp (chain->argv[0],
    186 				(char **const) (chain->argv));
    187             lerr_fatal ( _("exec of %s failed"),
    188                     chain->argv[0]);
    189 		}
    190 
    191 		FLEX_EXIT (1);
    192 	}
    193 
    194 	/* Parent */
    195 	close (pipes[0]);
    196 	if (dup2 (pipes[1], fileno (stdout)) == -1)
    197 		flexfatal (_("dup2(pipes[1],1)"));
    198 	close (pipes[1]);
    199     fseek (stdout, 0, SEEK_CUR);
    200 
    201 	return true;
    202 }
    203 
    204 /** Truncate the chain to max_len number of filters.
    205  * @param chain the current chain.
    206  * @param max_len the maximum length of the chain.
    207  * @return the resulting length of the chain.
    208  */
    209 int filter_truncate (struct filter *chain, int max_len)
    210 {
    211 	int     len = 1;
    212 
    213 	if (!chain)
    214 		return 0;
    215 
    216 	while (chain->next && len < max_len) {
    217 		chain = chain->next;
    218 		++len;
    219 	}
    220 
    221 	chain->next = NULL;
    222 	return len;
    223 }
    224 
    225 /** Splits the chain in order to write to a header file.
    226  *  Similar in spirit to the 'tee' program.
    227  *  The header file name is in extra.
    228  *  @return 0 (zero) on success, and -1 on failure.
    229  */
    230 int filter_tee_header (struct filter *chain)
    231 {
    232 	/* This function reads from stdin and writes to both the C file and the
    233 	 * header file at the same time.
    234 	 */
    235 
    236 	const int readsz = 512;
    237 	char   *buf;
    238 	int     to_cfd = -1;
    239 	FILE   *to_c = NULL, *to_h = NULL;
    240 	bool    write_header;
    241 
    242 	write_header = (chain->extra != NULL);
    243 
    244 	/* Store a copy of the stdout pipe, which is already piped to C file
    245 	 * through the running chain. Then create a new pipe to the H file as
    246 	 * stdout, and fork the rest of the chain again.
    247 	 */
    248 
    249 	if ((to_cfd = dup (1)) == -1)
    250 		flexfatal (_("dup(1) failed"));
    251 	to_c = fdopen (to_cfd, "w");
    252 
    253 	if (write_header) {
    254 		if (freopen ((char *) chain->extra, "w", stdout) == NULL)
    255 			flexfatal (_("freopen(headerfilename) failed"));
    256 
    257 		filter_apply_chain (chain->next);
    258 		to_h = stdout;
    259 	}
    260 
    261 	/* Now to_c is a pipe to the C branch, and to_h is a pipe to the H branch.
    262 	 */
    263 
    264 	if (write_header) {
    265         fputs (check_4_gnu_m4, to_h);
    266 		fputs ("m4_changecom`'m4_dnl\n", to_h);
    267 		fputs ("m4_changequote`'m4_dnl\n", to_h);
    268 		fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_h);
    269 	    fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h);
    270 		fputs ("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n",
    271 		       to_h);
    272 		fprintf (to_h, "#ifndef %sHEADER_H\n", prefix);
    273 		fprintf (to_h, "#define %sHEADER_H 1\n", prefix);
    274 		fprintf (to_h, "#define %sIN_HEADER 1\n\n", prefix);
    275 		fprintf (to_h,
    276 			 "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
    277 			 headerfilename ? headerfilename : "<stdout>");
    278 
    279 	}
    280 
    281     fputs (check_4_gnu_m4, to_c);
    282 	fputs ("m4_changecom`'m4_dnl\n", to_c);
    283 	fputs ("m4_changequote`'m4_dnl\n", to_c);
    284 	fputs ("m4_changequote([[,]])[[]]m4_dnl\n", to_c);
    285 	fputs ("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c);
    286 	fprintf (to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n",
    287 		 outfilename ? outfilename : "<stdout>");
    288 
    289 	buf = malloc((size_t) readsz);
    290 	if (!buf)
    291 		flexerror(_("malloc failed in filter_tee_header"));
    292 	while (fgets (buf, readsz, stdin)) {
    293 		fputs (buf, to_c);
    294 		if (write_header)
    295 			fputs (buf, to_h);
    296 	}
    297 
    298 	if (write_header) {
    299 		fprintf (to_h, "\n");
    300 
    301 		/* write a fake line number. It will get fixed by the linedir filter. */
    302 		if (gen_line_dirs)
    303 			fprintf (to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n");
    304 
    305 		fprintf (to_h, "#undef %sIN_HEADER\n", prefix);
    306 		fprintf (to_h, "#endif /* %sHEADER_H */\n", prefix);
    307 		fputs ("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h);
    308 
    309 		fflush (to_h);
    310 		if (ferror (to_h))
    311 			lerr (_("error writing output file %s"),
    312 				(char *) chain->extra);
    313 
    314 		else if (fclose (to_h))
    315 			lerr (_("error closing output file %s"),
    316 				(char *) chain->extra);
    317 	}
    318 
    319 	fflush (to_c);
    320 	if (ferror (to_c))
    321 		lerr (_("error writing output file %s"),
    322 			outfilename ? outfilename : "<stdout>");
    323 
    324 	else if (fclose (to_c))
    325 		lerr (_("error closing output file %s"),
    326 			outfilename ? outfilename : "<stdout>");
    327 
    328 	while (wait (0) > 0) ;
    329 
    330 	FLEX_EXIT (0);
    331 	return 0;
    332 }
    333 
    334 /** Adjust the line numbers in the #line directives of the generated scanner.
    335  * After the m4 expansion, the line numbers are incorrect since the m4 macros
    336  * can add or remove lines.  This only adjusts line numbers for generated code,
    337  * not user code. This also happens to be a good place to squeeze multiple
    338  * blank lines into a single blank line.
    339  */
    340 int filter_fix_linedirs (struct filter *chain)
    341 {
    342 	char   *buf;
    343 	const size_t readsz = 512;
    344 	int     lineno = 1;
    345 	bool    in_gen = true;	/* in generated code */
    346 	bool    last_was_blank = false;
    347 
    348 	if (!chain)
    349 		return 0;
    350 
    351 	buf = malloc(readsz);
    352 	if (!buf)
    353 		flexerror(_("malloc failed in filter_fix_linedirs"));
    354 
    355 	while (fgets (buf, (int) readsz, stdin)) {
    356 
    357 		regmatch_t m[10];
    358 
    359 		/* Check for #line directive. */
    360 		if (buf[0] == '#'
    361 			&& regexec (&regex_linedir, buf, 3, m, 0) == 0) {
    362 
    363 			char   *fname;
    364 
    365 			/* extract the line number and filename */
    366 			fname = regmatch_dup (&m[2], buf);
    367 
    368 			if (strcmp (fname,
    369 				outfilename ? outfilename : "<stdout>")
    370 					== 0
    371 			 || strcmp (fname,
    372 			 	headerfilename ? headerfilename : "<stdout>")
    373 					== 0) {
    374 
    375 				char    *s1, *s2;
    376 				char	filename[MAXLINE];
    377 
    378 				s1 = fname;
    379 				s2 = filename;
    380 
    381 				while ((s2 - filename) < (MAXLINE - 1) && *s1) {
    382 					/* Escape the backslash */
    383 					if (*s1 == '\\')
    384 						*s2++ = '\\';
    385 					/* Escape the double quote */
    386 					if (*s1 == '\"')
    387 						*s2++ = '\\';
    388 					/* Copy the character as usual */
    389 					*s2++ = *s1++;
    390 				}
    391 
    392 				*s2 = '\0';
    393 
    394 				/* Adjust the line directives. */
    395 				in_gen = true;
    396 				snprintf (buf, readsz, "#line %d \"%s\"\n",
    397 					  lineno, filename);
    398 			}
    399 			else {
    400 				/* it's a #line directive for code we didn't write */
    401 				in_gen = false;
    402 			}
    403 
    404 			free (fname);
    405 			last_was_blank = false;
    406 		}
    407 
    408 		/* squeeze blank lines from generated code */
    409 		else if (in_gen
    410 			 && regexec (&regex_blank_line, buf, 0, NULL,
    411 				     0) == 0) {
    412 			if (last_was_blank)
    413 				continue;
    414 			else
    415 				last_was_blank = true;
    416 		}
    417 
    418 		else {
    419 			/* it's a line of normal, non-empty code. */
    420 			last_was_blank = false;
    421 		}
    422 
    423 		fputs (buf, stdout);
    424 		lineno++;
    425 	}
    426 	fflush (stdout);
    427 	if (ferror (stdout))
    428 		lerr (_("error writing output file %s"),
    429 			outfilename ? outfilename : "<stdout>");
    430 
    431 	else if (fclose (stdout))
    432 		lerr (_("error closing output file %s"),
    433 			outfilename ? outfilename : "<stdout>");
    434 
    435 	return 0;
    436 }
    437 
    438 /* vim:set expandtab cindent tabstop=4 softtabstop=4 shiftwidth=4 textwidth=0: */
    439