indent.c revision 1.23 1 /* $NetBSD: indent.c,v 1.23 2016/09/05 00:40:29 sevan Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
34 * Copyright (c) 1985 Sun Microsystems, Inc.
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 */
65
66 #include <sys/cdefs.h>
67 #ifndef lint
68 __COPYRIGHT("@(#) Copyright (c) 1985 Sun Microsystems, Inc.\
69 Copyright (c) 1976 Board of Trustees of the University of Illinois.\
70 Copyright (c) 1980, 1993\
71 The Regents of the University of California. All rights reserved.");
72 #endif /* not lint */
73
74 #ifndef lint
75 #if 0
76 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93";
77 #else
78 __RCSID("$NetBSD: indent.c,v 1.23 2016/09/05 00:40:29 sevan Exp $");
79 #endif
80 #endif /* not lint */
81
82 #include <sys/param.h>
83 #include <ctype.h>
84 #include <err.h>
85 #include <errno.h>
86 #include <fcntl.h>
87 #include <stdio.h>
88 #include <stdlib.h>
89 #include <string.h>
90 #include <unistd.h>
91 #include <locale.h>
92 #define EXTERN
93 #include "indent_globs.h"
94 #undef EXTERN
95 #include "indent_codes.h"
96
97 const char *in_name = "Standard Input"; /* will always point to name of
98 * input file */
99 const char *out_name = "Standard Output"; /* will always point to name of
100 * output file */
101 char bakfile[MAXPATHLEN] = "";
102
103 int
104 main(int argc, char **argv)
105 {
106
107 extern int found_err; /* flag set in diag() on error */
108 int dec_ind; /* current indentation for declarations */
109 int di_stack[20]; /* a stack of structure indentation levels */
110 int flushed_nl; /* used when buffering up comments to remember
111 * that a newline was passed over */
112 int force_nl; /* when true, code must be broken */
113 int hd_type; /* used to store type of stmt for if (...),
114 * for (...), etc */
115 int i; /* local loop counter */
116 int scase; /* set to true when we see a case, so we will
117 * know what to do with the following colon */
118 int sp_sw; /* when true, we are in the expressin of
119 * if(...), while(...), etc. */
120 int squest; /* when this is positive, we have seen a ?
121 * without the matching : in a <c>?<s>:<s>
122 * construct */
123 const char *t_ptr; /* used for copying tokens */
124 int tabs_to_var = 0; /* true if using tabs to indent to var name */
125 int type_code; /* the type of token, returned by lexi */
126
127 int last_else = 0; /* true iff last keyword was an else */
128
129
130 /*-----------------------------------------------*\
131 | INITIALIZATION |
132 \*-----------------------------------------------*/
133
134 if (!setlocale(LC_ALL, ""))
135 warnx("can't set locale.");
136
137 hd_type = 0;
138 ps.p_stack[0] = stmt; /* this is the parser's stack */
139 ps.last_nl = true; /* this is true if the last thing scanned was
140 * a newline */
141 ps.last_token = semicolon;
142 combuf = (char *) malloc(bufsize);
143 labbuf = (char *) malloc(bufsize);
144 codebuf = (char *) malloc(bufsize);
145 tokenbuf = (char *) malloc(bufsize);
146 l_com = combuf + bufsize - 5;
147 l_lab = labbuf + bufsize - 5;
148 l_code = codebuf + bufsize - 5;
149 l_token = tokenbuf + bufsize - 5;
150 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label,
151 * and comment buffers */
152 combuf[1] = codebuf[1] = labbuf[1] = '\0';
153 ps.else_if = 1; /* Default else-if special processing to on */
154 s_lab = e_lab = labbuf + 1;
155 s_code = e_code = codebuf + 1;
156 s_com = e_com = combuf + 1;
157 s_token = e_token = tokenbuf + 1;
158
159 in_buffer = (char *) malloc(10);
160 in_buffer_limit = in_buffer + 8;
161 buf_ptr = buf_end = in_buffer;
162 line_no = 1;
163 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
164 sp_sw = force_nl = false;
165 ps.in_or_st = false;
166 ps.bl_line = true;
167 dec_ind = 0;
168 di_stack[ps.dec_nest = 0] = 0;
169 ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
170
171
172 scase = ps.pcase = false;
173 squest = 0;
174 sc_end = 0;
175 bp_save = 0;
176 be_save = 0;
177
178 output = 0;
179
180
181
182 /*--------------------------------------------------*\
183 | COMMAND LINE SCAN |
184 \*--------------------------------------------------*/
185
186 #ifdef undef
187 max_col = 78; /* -l78 */
188 lineup_to_parens = 1; /* -lp */
189 ps.ljust_decl = 0; /* -ndj */
190 ps.com_ind = 33; /* -c33 */
191 star_comment_cont = 1; /* -sc */
192 ps.ind_size = 8; /* -i8 */
193 verbose = 0;
194 ps.decl_indent = 16; /* -di16 */
195 ps.indent_parameters = 1; /* -ip */
196 ps.decl_com_ind = 0; /* if this is not set to some positive value
197 * by an arg, we will set this equal to
198 * ps.com_ind */
199 btype_2 = 1; /* -br */
200 cuddle_else = 1; /* -ce */
201 ps.unindent_displace = 0; /* -d0 */
202 ps.case_indent = 0; /* -cli0 */
203 format_col1_comments = 1; /* -fc1 */
204 procnames_start_line = 1; /* -psl */
205 proc_calls_space = 0; /* -npcs */
206 comment_delimiter_on_blankline = 1; /* -cdb */
207 ps.leave_comma = 1; /* -nbc */
208 #endif
209
210 for (i = 1; i < argc; ++i)
211 if (strcmp(argv[i], "-npro") == 0)
212 break;
213 set_defaults();
214 if (i >= argc)
215 set_profile();
216
217 for (i = 1; i < argc; ++i) {
218
219 /*
220 * look thru args (if any) for changes to defaults
221 */
222 if (argv[i][0] != '-') { /* no flag on parameter */
223 if (input == 0) { /* we must have the input file */
224 in_name = argv[i]; /* remember name of
225 * input file */
226 input = fopen(in_name, "r");
227 if (input == 0) /* check for open error */
228 err(1, "%s", in_name);
229 continue;
230 } else
231 if (output == 0) { /* we have the output
232 * file */
233 out_name = argv[i]; /* remember name of
234 * output file */
235 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite
236 * the file */
237 errx(1, "input and output files must be different");
238 }
239 output = fopen(out_name, "w");
240 if (output == 0) /* check for create
241 * error */
242 err(1, "%s", out_name);
243 continue;
244 }
245 errx(1, "unknown parameter: %s", argv[i]);
246 } else
247 set_option(argv[i]);
248 } /* end of for */
249 if (input == 0) {
250 input = stdin;
251 }
252 if (output == 0) {
253 if (troff || input == stdin)
254 output = stdout;
255 else {
256 out_name = in_name;
257 bakcopy();
258 }
259 }
260 if (ps.com_ind <= 1)
261 ps.com_ind = 2; /* don't put normal comments before column 2 */
262 if (troff) {
263 if (bodyf.font[0] == 0)
264 parsefont(&bodyf, "R");
265 if (scomf.font[0] == 0)
266 parsefont(&scomf, "I");
267 if (blkcomf.font[0] == 0)
268 blkcomf = scomf, blkcomf.size += 2;
269 if (boxcomf.font[0] == 0)
270 boxcomf = blkcomf;
271 if (stringf.font[0] == 0)
272 parsefont(&stringf, "L");
273 if (keywordf.font[0] == 0)
274 parsefont(&keywordf, "B");
275 writefdef(&bodyf, 'B');
276 writefdef(&scomf, 'C');
277 writefdef(&blkcomf, 'L');
278 writefdef(&boxcomf, 'X');
279 writefdef(&stringf, 'S');
280 writefdef(&keywordf, 'K');
281 }
282 if (block_comment_max_col <= 0)
283 block_comment_max_col = max_col;
284 if (ps.decl_com_ind <= 0) /* if not specified by user, set this */
285 ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind;
286 if (continuation_indent == 0)
287 continuation_indent = ps.ind_size;
288 fill_buffer(); /* get first batch of stuff into input buffer */
289
290 parse(semicolon);
291 {
292 char *p = buf_ptr;
293 int col = 1;
294
295 while (1) {
296 if (*p == ' ')
297 col++;
298 else
299 if (*p == '\t')
300 col = ((col - 1) & ~7) + 9;
301 else
302 break;
303 p++;
304 }
305 if (col > ps.ind_size)
306 ps.ind_level = ps.i_l_follow = col / ps.ind_size;
307 }
308 if (troff) {
309 const char *p = in_name, *beg = in_name;
310
311 while (*p)
312 if (*p++ == '/')
313 beg = p;
314 fprintf(output, ".Fn \"%s\"\n", beg);
315 }
316 /*
317 * START OF MAIN LOOP
318 */
319
320 while (1) { /* this is the main loop. it will go until we
321 * reach eof */
322 int is_procname;
323
324 type_code = lexi(); /* lexi reads one token. The actual
325 * characters read are stored in
326 * "token". lexi returns a code
327 * indicating the type of token */
328 is_procname = ps.procname[0];
329
330 /*
331 * The following code moves everything following an if (), while (),
332 * else, etc. up to the start of the following stmt to a buffer. This
333 * allows proper handling of both kinds of brace placement.
334 */
335
336 flushed_nl = false;
337 while (ps.search_brace) { /* if we scanned an if(),
338 * while(), etc., we might
339 * need to copy stuff into a
340 * buffer we must loop,
341 * copying stuff into
342 * save_com, until we find the
343 * start of the stmt which
344 * follows the if, or whatever */
345 switch (type_code) {
346 case newline:
347 ++line_no;
348 flushed_nl = true;
349 case form_feed:
350 break; /* form feeds and newlines found here
351 * will be ignored */
352
353 case lbrace: /* this is a brace that starts the
354 * compound stmt */
355 if (sc_end == 0) { /* ignore buffering if a
356 * comment wasn't stored
357 * up */
358 ps.search_brace = false;
359 goto check_type;
360 }
361 if (btype_2) {
362 save_com[0] = '{'; /* we either want to put
363 * the brace right after
364 * the if */
365 goto sw_buffer; /* go to common code to
366 * get out of this loop */
367 }
368 case comment: /* we have a comment, so we must copy
369 * it into the buffer */
370 if (!flushed_nl || sc_end != 0) {
371 if (sc_end == 0) { /* if this is the first
372 * comment, we must set
373 * up the buffer */
374 save_com[0] = save_com[1] = ' ';
375 sc_end = &(save_com[2]);
376 } else {
377 *sc_end++ = '\n'; /* add newline between
378 * comments */
379 *sc_end++ = ' ';
380 --line_no;
381 }
382 *sc_end++ = '/'; /* copy in start of
383 * comment */
384 *sc_end++ = '*';
385
386 for (;;) { /* loop until we get to
387 * the end of the
388 * comment */
389 *sc_end = *buf_ptr++;
390 if (buf_ptr >= buf_end)
391 fill_buffer();
392
393 if (*sc_end++ == '*' && *buf_ptr == '/')
394 break; /* we are at end of
395 * comment */
396
397 if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer
398 * overflow */
399 diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever.");
400 fflush(output);
401 exit(1);
402 }
403 }
404 *sc_end++ = '/'; /* add ending slash */
405 if (++buf_ptr >= buf_end) /* get past / in buffer */
406 fill_buffer();
407 break;
408 }
409 default: /* it is the start of a normal
410 * statment */
411 if (flushed_nl) /* if we flushed a newline,
412 * make sure it is put back */
413 force_nl = true;
414 if ((type_code == sp_paren && *token == 'i'
415 && last_else && ps.else_if) ||
416 (type_code == sp_nparen && *token == 'e'
417 && e_code != s_code && e_code[-1] == '}'))
418 force_nl = false;
419
420 if (sc_end == 0) { /* ignore buffering if
421 * comment wasn't saved
422 * up */
423 ps.search_brace = false;
424 goto check_type;
425 }
426 if (force_nl) { /* if we should insert a nl
427 * here, put it into the
428 * buffer */
429 force_nl = false;
430 --line_no; /* this will be
431 * re-increased when the
432 * nl is read from the
433 * buffer */
434 *sc_end++ = '\n';
435 *sc_end++ = ' ';
436 if (verbose && !flushed_nl) /* print error msg if
437 * the line was not
438 * already broken */
439 diag(0, "Line broken");
440 flushed_nl = false;
441 }
442 for (t_ptr = token; *t_ptr; ++t_ptr)
443 *sc_end++ = *t_ptr; /* copy token into temp
444 * buffer */
445 ps.procname[0] = 0;
446
447 sw_buffer:
448 ps.search_brace = false; /* stop looking for
449 * start of stmt */
450 bp_save = buf_ptr; /* save current input
451 * buffer */
452 be_save = buf_end;
453 buf_ptr = save_com; /* fix so that
454 * subsequent calls to
455 * lexi will take tokens
456 * out of save_com */
457 *sc_end++ = ' '; /* add trailing blank,
458 * just in case */
459 buf_end = sc_end;
460 sc_end = 0;
461 break;
462 } /* end of switch */
463 if (type_code != 0) /* we must make this check,
464 * just in case there was an
465 * unexpected EOF */
466 type_code = lexi(); /* read another token */
467 /* if (ps.search_brace) ps.procname[0] = 0; */
468 if ((is_procname = ps.procname[0]) && flushed_nl
469 && !procnames_start_line && ps.in_decl
470 && type_code == ident)
471 flushed_nl = 0;
472 } /* end of while (search_brace) */
473 last_else = 0;
474 check_type:
475 if (type_code == 0) { /* we got eof */
476 if (s_lab != e_lab || s_code != e_code
477 || s_com != e_com) /* must dump end of line */
478 dump_line();
479 if (ps.tos > 1) /* check for balanced braces */
480 diag(1, "Stuff missing from end of file.");
481
482 if (verbose) {
483 printf("There were %d output lines and %d comments\n",
484 ps.out_lines, ps.out_coms);
485 printf("(Lines with comments)/(Lines with code): %6.3f\n",
486 (1.0 * ps.com_lines) / code_lines);
487 }
488 fflush(output);
489 exit(found_err);
490 }
491 if (
492 (type_code != comment) &&
493 (type_code != newline) &&
494 (type_code != preesc) &&
495 (type_code != form_feed)) {
496 if (force_nl &&
497 (type_code != semicolon) &&
498 (type_code != lbrace || !btype_2)) {
499 /* we should force a broken line here */
500 if (verbose && !flushed_nl)
501 diag(0, "Line broken");
502 flushed_nl = false;
503 dump_line();
504 ps.want_blank = false; /* don't insert blank at
505 * line start */
506 force_nl = false;
507 }
508 ps.in_stmt = true; /* turn on flag which causes
509 * an extra level of
510 * indentation. this is turned
511 * off by a ; or '}' */
512 if (s_com != e_com) { /* the turkey has embedded a
513 * comment in a line. fix it */
514 *e_code++ = ' ';
515 for (t_ptr = s_com; *t_ptr; ++t_ptr) {
516 CHECK_SIZE_CODE;
517 *e_code++ = *t_ptr;
518 }
519 *e_code++ = ' ';
520 *e_code = '\0'; /* null terminate code sect */
521 ps.want_blank = false;
522 e_com = s_com;
523 }
524 } else
525 if (type_code != comment) /* preserve force_nl
526 * thru a comment */
527 force_nl = false; /* cancel forced newline
528 * after newline, form
529 * feed, etc */
530
531
532
533 /*-----------------------------------------------------*\
534 | do switch on type of token scanned |
535 \*-----------------------------------------------------*/
536 CHECK_SIZE_CODE;
537 switch (type_code) { /* now, decide what to do with the
538 * token */
539
540 case form_feed:/* found a form feed in line */
541 ps.use_ff = true; /* a form feed is treated much
542 * like a newline */
543 dump_line();
544 ps.want_blank = false;
545 break;
546
547 case newline:
548 if (ps.last_token != comma || ps.p_l_follow > 0
549 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
550 dump_line();
551 ps.want_blank = false;
552 }
553 ++line_no; /* keep track of input line number */
554 break;
555
556 case lparen: /* got a '(' or '[' */
557 ++ps.p_l_follow; /* count parens to make Healy
558 * happy */
559 if (ps.want_blank && *token != '[' &&
560 (ps.last_token != ident || proc_calls_space
561 || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon))))
562 *e_code++ = ' ';
563 if (ps.in_decl && !ps.block_init) {
564 if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) {
565 ps.dumped_decl_indent = 1;
566 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
567 e_code += strlen(e_code);
568 } else {
569 while ((e_code - s_code) < dec_ind) {
570 CHECK_SIZE_CODE;
571 *e_code++ = ' ';
572 }
573 *e_code++ = token[0];
574 }
575 } else
576 *e_code++ = token[0];
577 ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code;
578 if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent
579 && ps.paren_indents[0] < 2 * ps.ind_size)
580 ps.paren_indents[0] = 2 * ps.ind_size;
581 ps.want_blank = false;
582 if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
583 /*
584 * this is a kluge to make sure that declarations will be
585 * aligned right if proc decl has an explicit type on it, i.e.
586 * "int a(x) {..."
587 */
588 parse(semicolon); /* I said this was a
589 * kluge... */
590 ps.in_or_st = false; /* turn off flag for
591 * structure decl or
592 * initialization */
593 }
594 if (ps.sizeof_keyword)
595 ps.sizeof_mask |= 1 << ps.p_l_follow;
596 break;
597
598 case rparen: /* got a ')' or ']' */
599 rparen_count--;
600 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) {
601 ps.last_u_d = true;
602 ps.cast_mask &= (1 << ps.p_l_follow) - 1;
603 }
604 ps.sizeof_mask &= (1 << ps.p_l_follow) - 1;
605 if (--ps.p_l_follow < 0) {
606 ps.p_l_follow = 0;
607 diag(0, "Extra %c", *token);
608 }
609 if (e_code == s_code) /* if the paren starts the
610 * line */
611 ps.paren_level = ps.p_l_follow; /* then indent it */
612
613 *e_code++ = token[0];
614 ps.want_blank = true;
615
616 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if
617 * (...), or some such */
618 sp_sw = false;
619 force_nl = true; /* must force newline
620 * after if */
621 ps.last_u_d = true; /* inform lexi that a
622 * following operator is
623 * unary */
624 ps.in_stmt = false; /* don't use stmt
625 * continuation
626 * indentation */
627
628 parse(hd_type); /* let parser worry about if,
629 * or whatever */
630 }
631 ps.search_brace = btype_2; /* this should insure
632 * that constructs such
633 * as main(){...} and
634 * int[]{...} have their
635 * braces put in the
636 * right place */
637 break;
638
639 case unary_op: /* this could be any unary operation */
640 if (ps.want_blank)
641 *e_code++ = ' ';
642
643 if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) {
644 sprintf(e_code, "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
645 ps.dumped_decl_indent = 1;
646 e_code += strlen(e_code);
647 } else {
648 const char *res = token;
649
650 if (ps.in_decl && !ps.block_init) { /* if this is a unary op
651 * in a declaration, we
652 * should indent this
653 * token */
654 for (i = 0; token[i]; ++i); /* find length of token */
655 while ((e_code - s_code) < (dec_ind - i)) {
656 CHECK_SIZE_CODE;
657 *e_code++ = ' '; /* pad it */
658 }
659 }
660 if (troff && token[0] == '-' && token[1] == '>')
661 res = "\\(->";
662 for (t_ptr = res; *t_ptr; ++t_ptr) {
663 CHECK_SIZE_CODE;
664 *e_code++ = *t_ptr;
665 }
666 }
667 ps.want_blank = false;
668 break;
669
670 case binary_op:/* any binary operation */
671 if (ps.want_blank)
672 *e_code++ = ' ';
673 {
674 const char *res = token;
675
676 if (troff)
677 switch (token[0]) {
678 case '<':
679 if (token[1] == '=')
680 res = "\\(<=";
681 break;
682 case '>':
683 if (token[1] == '=')
684 res = "\\(>=";
685 break;
686 case '!':
687 if (token[1] == '=')
688 res = "\\(!=";
689 break;
690 case '|':
691 if (token[1] == '|')
692 res = "\\(br\\(br";
693 else
694 if (token[1] == 0)
695 res = "\\(br";
696 break;
697 }
698 for (t_ptr = res; *t_ptr; ++t_ptr) {
699 CHECK_SIZE_CODE;
700 *e_code++ = *t_ptr; /* move the operator */
701 }
702 }
703 ps.want_blank = true;
704 break;
705
706 case postop: /* got a trailing ++ or -- */
707 *e_code++ = token[0];
708 *e_code++ = token[1];
709 ps.want_blank = true;
710 break;
711
712 case question: /* got a ? */
713 squest++; /* this will be used when a later
714 * colon appears so we can distinguish
715 * the <c>?<n>:<n> construct */
716 if (ps.want_blank)
717 *e_code++ = ' ';
718 *e_code++ = '?';
719 ps.want_blank = true;
720 break;
721
722 case casestmt: /* got word 'case' or 'default' */
723 scase = true; /* so we can process the later colon
724 * properly */
725 goto copy_id;
726
727 case colon: /* got a ':' */
728 if (squest > 0) { /* it is part of the <c>?<n>:
729 * <n> construct */
730 --squest;
731 if (ps.want_blank)
732 *e_code++ = ' ';
733 *e_code++ = ':';
734 ps.want_blank = true;
735 break;
736 }
737 if (ps.in_or_st) {
738 *e_code++ = ':';
739 ps.want_blank = false;
740 break;
741 }
742 ps.in_stmt = false; /* seeing a label does not
743 * imply we are in a stmt */
744 for (t_ptr = s_code; *t_ptr; ++t_ptr)
745 *e_lab++ = *t_ptr; /* turn everything so
746 * far into a label */
747 e_code = s_code;
748 *e_lab++ = ':';
749 *e_lab++ = ' ';
750 *e_lab = '\0';
751
752 force_nl = ps.pcase = scase; /* ps.pcase will be used
753 * by dump_line to
754 * decide how to indent
755 * the label. force_nl
756 * will force a case n:
757 * to be on a line by
758 * itself */
759 scase = false;
760 ps.want_blank = false;
761 break;
762
763 case semicolon:/* got a ';' */
764 ps.in_or_st = false; /* we are not in an
765 * initialization or structure
766 * declaration */
767 scase = false; /* these will only need resetting in a
768 * error */
769 squest = 0;
770 if (ps.last_token == rparen && rparen_count == 0)
771 ps.in_parameter_declaration = 0;
772 ps.cast_mask = 0;
773 ps.sizeof_mask = 0;
774 ps.block_init = 0;
775 ps.block_init_level = 0;
776 ps.just_saw_decl--;
777
778 if (ps.in_decl && s_code == e_code && !ps.block_init)
779 while ((e_code - s_code) < (dec_ind - 1)) {
780 CHECK_SIZE_CODE;
781 *e_code++ = ' ';
782 }
783
784 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first
785 * level structure
786 * declaration, we
787 * aren't any more */
788
789 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
790
791 /*
792 * This should be true iff there were unbalanced parens in the
793 * stmt. It is a bit complicated, because the semicolon might
794 * be in a for stmt
795 */
796 diag(1, "Unbalanced parens");
797 ps.p_l_follow = 0;
798 if (sp_sw) { /* this is a check for a if,
799 * while, etc. with unbalanced
800 * parens */
801 sp_sw = false;
802 parse(hd_type); /* don't lose the if,
803 * or whatever */
804 }
805 }
806 *e_code++ = ';';
807 ps.want_blank = true;
808 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in
809 * the middle of a stmt */
810
811 if (!sp_sw) { /* if not if for (;;) */
812 parse(semicolon); /* let parser know about
813 * end of stmt */
814 force_nl = true; /* force newline after a
815 * end of stmt */
816 }
817 break;
818
819 case lbrace: /* got a '{' */
820 ps.in_stmt = false; /* don't indent the {} */
821 if (!ps.block_init)
822 force_nl = true; /* force other stuff on
823 * same line as '{' onto
824 * new line */
825 else
826 if (ps.block_init_level <= 0)
827 ps.block_init_level = 1;
828 else
829 ps.block_init_level++;
830
831 if (s_code != e_code && !ps.block_init) {
832 if (!btype_2) {
833 dump_line();
834 ps.want_blank = false;
835 } else
836 if (ps.in_parameter_declaration && !ps.in_or_st) {
837 ps.i_l_follow = 0;
838 dump_line();
839 ps.want_blank = false;
840 }
841 }
842 if (ps.in_parameter_declaration)
843 prefix_blankline_requested = 0;
844
845 if (ps.p_l_follow > 0) { /* check for preceding
846 * unbalanced parens */
847 diag(1, "Unbalanced parens");
848 ps.p_l_follow = 0;
849 if (sp_sw) { /* check for unclosed if, for,
850 * etc. */
851 sp_sw = false;
852 parse(hd_type);
853 ps.ind_level = ps.i_l_follow;
854 }
855 }
856 if (s_code == e_code)
857 ps.ind_stmt = false; /* don't put extra
858 * indentation on line
859 * with '{' */
860 if (ps.in_decl && ps.in_or_st) { /* this is either a
861 * structure declaration
862 * or an init */
863 di_stack[ps.dec_nest++] = dec_ind;
864 /* ? dec_ind = 0; */
865 } else {
866 ps.decl_on_line = false; /* we can't be in the
867 * middle of a
868 * declaration, so don't
869 * do special
870 * indentation of
871 * comments */
872 if (blanklines_after_declarations_at_proctop
873 && ps.in_parameter_declaration)
874 postfix_blankline_requested = 1;
875 ps.in_parameter_declaration = 0;
876 }
877 dec_ind = 0;
878 parse(lbrace); /* let parser know about this */
879 if (ps.want_blank) /* put a blank before '{' if
880 * '{' is not at start of line */
881 *e_code++ = ' ';
882 ps.want_blank = false;
883 *e_code++ = '{';
884 ps.just_saw_decl = 0;
885 break;
886
887 case rbrace: /* got a '}' */
888 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be
889 * omitted in
890 * declarations */
891 parse(semicolon);
892 if (ps.p_l_follow) { /* check for unclosed if, for,
893 * else. */
894 diag(1, "Unbalanced parens");
895 ps.p_l_follow = 0;
896 sp_sw = false;
897 }
898 ps.just_saw_decl = 0;
899 ps.block_init_level--;
900 if (s_code != e_code && !ps.block_init) { /* '}' must be first on
901 * line */
902 if (verbose)
903 diag(0, "Line broken");
904 dump_line();
905 }
906 *e_code++ = '}';
907 ps.want_blank = true;
908 ps.in_stmt = ps.ind_stmt = false;
909 if (ps.dec_nest > 0) { /* we are in multi-level
910 * structure declaration */
911 dec_ind = di_stack[--ps.dec_nest];
912 if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
913 ps.just_saw_decl = 2;
914 ps.in_decl = true;
915 }
916 prefix_blankline_requested = 0;
917 parse(rbrace); /* let parser know about this */
918 ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead
919 && ps.il[ps.tos] >= ps.ind_level;
920 if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0)
921 postfix_blankline_requested = 1;
922 break;
923
924 case swstmt: /* got keyword "switch" */
925 sp_sw = true;
926 hd_type = swstmt; /* keep this for when we have
927 * seen the expression */
928 goto copy_id; /* go move the token into buffer */
929
930 case sp_paren: /* token is if, while, for */
931 sp_sw = true; /* the interesting stuff is done after
932 * the expression is scanned */
933 hd_type = (*token == 'i' ? ifstmt :
934 (*token == 'w' ? whilestmt : forstmt));
935
936 /*
937 * remember the type of header for later use by parser
938 */
939 goto copy_id; /* copy the token into line */
940
941 case sp_nparen:/* got else, do */
942 ps.in_stmt = false;
943 if (*token == 'e') {
944 if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) {
945 if (verbose)
946 diag(0, "Line broken");
947 dump_line(); /* make sure this starts
948 * a line */
949 ps.want_blank = false;
950 }
951 force_nl = true; /* also, following stuff
952 * must go onto new line */
953 last_else = 1;
954 parse(elselit);
955 } else {
956 if (e_code != s_code) { /* make sure this starts
957 * a line */
958 if (verbose)
959 diag(0, "Line broken");
960 dump_line();
961 ps.want_blank = false;
962 }
963 force_nl = true; /* also, following stuff
964 * must go onto new line */
965 last_else = 0;
966 parse(dolit);
967 }
968 goto copy_id; /* move the token into line */
969
970 case decl: /* we have a declaration type (int, register,
971 * etc.) */
972 parse(decl); /* let parser worry about indentation */
973 if (ps.last_token == rparen && ps.tos <= 1) {
974 ps.in_parameter_declaration = 1;
975 if (s_code != e_code) {
976 dump_line();
977 ps.want_blank = 0;
978 }
979 }
980 if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) {
981 ps.ind_level = ps.i_l_follow = 1;
982 ps.ind_stmt = 0;
983 }
984 ps.in_or_st = true; /* this might be a structure
985 * or initialization
986 * declaration */
987 ps.in_decl = ps.decl_on_line = true;
988 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
989 ps.just_saw_decl = 2;
990 prefix_blankline_requested = 0;
991 for (i = 0; token[i++];); /* get length of token */
992
993 /*
994 * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent
995 * : i);
996 */
997 dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i;
998 tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0);
999 goto copy_id;
1000
1001 case ident: /* got an identifier or constant */
1002 if (ps.in_decl) { /* if we are in a declaration,
1003 * we must indent identifier */
1004 if (ps.want_blank)
1005 *e_code++ = ' ';
1006 ps.want_blank = false;
1007 if (is_procname == 0 || !procnames_start_line) {
1008 if (!ps.block_init) {
1009 if (troff && !ps.dumped_decl_indent) {
1010 sprintf(e_code, "\n.De %dp+\200p\n", dec_ind * 7);
1011 ps.dumped_decl_indent = 1;
1012 e_code += strlen(e_code);
1013 CHECK_SIZE_CODE;
1014 } else {
1015 int cur_dec_ind;
1016 int pos, startpos;
1017
1018 /*
1019 * in order to get the tab math right for
1020 * indentations that are not multiples of 8 we
1021 * need to modify both startpos and dec_ind
1022 * (cur_dec_ind) here by eight minus the
1023 * remainder of the current starting column
1024 * divided by eight. This seems to be a
1025 * properly working fix
1026 */
1027 startpos = e_code - s_code;
1028 cur_dec_ind = dec_ind;
1029 pos = startpos;
1030 if ((ps.ind_level * ps.ind_size) % 8 != 0) {
1031 pos += (ps.ind_level * ps.ind_size) % 8;
1032 cur_dec_ind += (ps.ind_level * ps.ind_size) % 8;
1033 }
1034
1035 if (tabs_to_var) {
1036 while ((pos & ~7) + 8 <= cur_dec_ind) {
1037 CHECK_SIZE_CODE;
1038 *e_code++ = '\t';
1039 pos = (pos & ~7) + 8;
1040 }
1041 }
1042 while (pos < cur_dec_ind) {
1043 CHECK_SIZE_CODE;
1044 *e_code++ = ' ';
1045 pos++;
1046 }
1047 if (ps.want_blank && e_code - s_code == startpos)
1048 *e_code++ = ' ';
1049 ps.want_blank = false;
1050 }
1051 }
1052 } else {
1053 if (dec_ind && s_code != e_code)
1054 dump_line();
1055 dec_ind = 0;
1056 ps.want_blank = false;
1057 }
1058 } else
1059 if (sp_sw && ps.p_l_follow == 0) {
1060 sp_sw = false;
1061 force_nl = true;
1062 ps.last_u_d = true;
1063 ps.in_stmt = false;
1064 parse(hd_type);
1065 }
1066 copy_id:
1067 if (ps.want_blank)
1068 *e_code++ = ' ';
1069 if (troff && ps.its_a_keyword) {
1070 e_code = chfont(&bodyf, &keywordf, e_code);
1071 for (t_ptr = token; *t_ptr; ++t_ptr) {
1072 CHECK_SIZE_CODE;
1073 *e_code++ = keywordf.allcaps
1074 ? toupper((unsigned char)*t_ptr)
1075 : *t_ptr;
1076 }
1077 e_code = chfont(&keywordf, &bodyf, e_code);
1078 } else
1079 for (t_ptr = token; *t_ptr; ++t_ptr) {
1080 CHECK_SIZE_CODE;
1081 *e_code++ = *t_ptr;
1082 }
1083 ps.want_blank = true;
1084 break;
1085
1086 case period: /* treat a period kind of like a binary
1087 * operation */
1088 *e_code++ = '.'; /* move the period into line */
1089 ps.want_blank = false; /* don't put a blank after a
1090 * period */
1091 break;
1092
1093 case comma:
1094 ps.want_blank = (s_code != e_code); /* only put blank after
1095 * comma if comma does
1096 * not start the line */
1097 if (ps.in_decl && is_procname == 0 && !ps.block_init)
1098 while ((e_code - s_code) < (dec_ind - 1)) {
1099 CHECK_SIZE_CODE;
1100 *e_code++ = ' ';
1101 }
1102
1103 *e_code++ = ',';
1104 if (ps.p_l_follow == 0) {
1105 if (ps.block_init_level <= 0)
1106 ps.block_init = 0;
1107 if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8))
1108 force_nl = true;
1109 }
1110 break;
1111
1112 case preesc: /* got the character '#' */
1113 if ((s_com != e_com) ||
1114 (s_lab != e_lab) ||
1115 (s_code != e_code))
1116 dump_line();
1117 *e_lab++ = '#'; /* move whole line to 'label' buffer */
1118 {
1119 int in_comment = 0;
1120 int com_start = 0;
1121 char quote = 0;
1122 int com_end = 0;
1123
1124 while (*buf_ptr == ' ' || *buf_ptr == '\t') {
1125 buf_ptr++;
1126 if (buf_ptr >= buf_end)
1127 fill_buffer();
1128 }
1129 while (*buf_ptr != '\n' || in_comment) {
1130 CHECK_SIZE_LAB;
1131 *e_lab = *buf_ptr++;
1132 if (buf_ptr >= buf_end)
1133 fill_buffer();
1134 switch (*e_lab++) {
1135 case BACKSLASH:
1136 if (troff)
1137 *e_lab++ = BACKSLASH;
1138 if (!in_comment) {
1139 *e_lab++ = *buf_ptr++;
1140 if (buf_ptr >= buf_end)
1141 fill_buffer();
1142 }
1143 break;
1144 case '/':
1145 if (*buf_ptr == '*' && !in_comment && !quote) {
1146 in_comment = 1;
1147 *e_lab++ = *buf_ptr++;
1148 com_start = e_lab - s_lab - 2;
1149 }
1150 break;
1151 case '"':
1152 if (quote == '"')
1153 quote = 0;
1154 break;
1155 case '\'':
1156 if (quote == '\'')
1157 quote = 0;
1158 break;
1159 case '*':
1160 if (*buf_ptr == '/' && in_comment) {
1161 in_comment = 0;
1162 *e_lab++ = *buf_ptr++;
1163 com_end = e_lab - s_lab;
1164 }
1165 break;
1166 }
1167 }
1168
1169 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1170 e_lab--;
1171 if (e_lab - s_lab == com_end && bp_save == 0) { /* comment on
1172 * preprocessor line */
1173 if (sc_end == 0) /* if this is the first
1174 * comment, we must set
1175 * up the buffer */
1176 sc_end = &(save_com[0]);
1177 else {
1178 *sc_end++ = '\n'; /* add newline between
1179 * comments */
1180 *sc_end++ = ' ';
1181 --line_no;
1182 }
1183 memmove(sc_end, s_lab + com_start, com_end - com_start);
1184 sc_end += com_end - com_start;
1185 if (sc_end >= &save_com[sc_size])
1186 abort();
1187 e_lab = s_lab + com_start;
1188 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1189 e_lab--;
1190 bp_save = buf_ptr; /* save current input
1191 * buffer */
1192 be_save = buf_end;
1193 buf_ptr = save_com; /* fix so that
1194 * subsequent calls to
1195 * lexi will take tokens
1196 * out of save_com */
1197 *sc_end++ = ' '; /* add trailing blank,
1198 * just in case */
1199 buf_end = sc_end;
1200 sc_end = 0;
1201 }
1202 *e_lab = '\0'; /* null terminate line */
1203 ps.pcase = false;
1204 }
1205
1206 if (strncmp(s_lab, "#if", 3) == 0) {
1207 if (blanklines_around_conditional_compilation) {
1208 int c;
1209 prefix_blankline_requested++;
1210 while ((c = getc(input)) == '\n');
1211 ungetc(c, input);
1212 }
1213 if (ifdef_level < (int)(sizeof state_stack / sizeof state_stack[0])) {
1214 match_state[ifdef_level].tos = -1;
1215 state_stack[ifdef_level++] = ps;
1216 } else
1217 diag(1, "#if stack overflow");
1218 } else
1219 if (strncmp(s_lab, "#else", 5) == 0) {
1220 if (ifdef_level <= 0)
1221 diag(1, "Unmatched #else");
1222 else {
1223 match_state[ifdef_level - 1] = ps;
1224 ps = state_stack[ifdef_level - 1];
1225 }
1226 } else
1227 if (strncmp(s_lab, "#endif", 6) == 0) {
1228 if (ifdef_level <= 0)
1229 diag(1, "Unmatched #endif");
1230 else {
1231 ifdef_level--;
1232
1233 #ifdef undef
1234 /*
1235 * This match needs to be more intelligent before the
1236 * message is useful
1237 */
1238 if (match_state[ifdef_level].tos >= 0
1239 && memcmp(&ps, &match_state[ifdef_level], sizeof ps))
1240 diag(0, "Syntactically inconsistant #ifdef alternatives.");
1241 #endif
1242 }
1243 if (blanklines_around_conditional_compilation) {
1244 postfix_blankline_requested++;
1245 n_real_blanklines = 0;
1246 }
1247 }
1248 break; /* subsequent processing of the newline
1249 * character will cause the line to be printed */
1250
1251 case comment: /* we have gotten a start comment */
1252 /* this is a biggie */
1253 if (flushed_nl) { /* we should force a broken
1254 * line here */
1255 flushed_nl = false;
1256 dump_line();
1257 ps.want_blank = false; /* don't insert blank at
1258 * line start */
1259 force_nl = false;
1260 }
1261 pr_comment();
1262 break;
1263 } /* end of big switch stmt */
1264
1265 *e_code = '\0'; /* make sure code section is null terminated */
1266 if (type_code != comment && type_code != newline && type_code != preesc)
1267 ps.last_token = type_code;
1268 } /* end of main while (1) loop */
1269 }
1270 /*
1271 * copy input file to backup file if in_name is /blah/blah/blah/file, then
1272 * backup file will be ".Bfile" then make the backup file the input and
1273 * original input file the output
1274 */
1275 void
1276 bakcopy(void)
1277 {
1278 int n, bakchn;
1279 char buff[8 * 1024];
1280 const char *p;
1281
1282 /* construct file name .Bfile */
1283 for (p = in_name; *p; p++); /* skip to end of string */
1284 while (p > in_name && *p != '/') /* find last '/' */
1285 p--;
1286 if (*p == '/')
1287 p++;
1288 sprintf(bakfile, "%s.BAK", p);
1289
1290 /* copy in_name to backup file */
1291 bakchn = creat(bakfile, 0600);
1292 if (bakchn < 0)
1293 err(1, "%s", bakfile);
1294 while ((n = read(fileno(input), buff, sizeof buff)) > 0)
1295 if (write(bakchn, buff, n) != n)
1296 err(1, "%s", bakfile);
1297 if (n < 0)
1298 err(1, "%s", in_name);
1299 close(bakchn);
1300 fclose(input);
1301
1302 /* re-open backup file as the input file */
1303 input = fopen(bakfile, "r");
1304 if (input == 0)
1305 err(1, "%s", bakfile);
1306 /* now the original input file will be the output */
1307 output = fopen(in_name, "w");
1308 if (output == 0) {
1309 unlink(bakfile);
1310 err(1, "%s", in_name);
1311 }
1312 }
1313