indent.c revision 1.28 1 /* $NetBSD: indent.c,v 1.28 2021/03/06 20:30:06 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #if 0
41 #ifndef lint
42 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93";
43 #endif /* not lint */
44 #endif
45
46 #include <sys/cdefs.h>
47 #ifndef lint
48 #if defined(__NetBSD__)
49 __RCSID("$NetBSD: indent.c,v 1.28 2021/03/06 20:30:06 rillig Exp $");
50 #elif defined(__FreeBSD__)
51 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
52 #endif
53 #endif
54
55 #include <sys/param.h>
56 #if HAVE_CAPSICUM
57 #include <sys/capsicum.h>
58 #include <capsicum_helpers.h>
59 #endif
60 #include <err.h>
61 #include <errno.h>
62 #include <fcntl.h>
63 #include <unistd.h>
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #include <ctype.h>
68 #include "indent_globs.h"
69 #include "indent_codes.h"
70 #include "indent.h"
71
72 struct options opt;
73 struct parser_state ps;
74
75 char *labbuf;
76 char *s_lab;
77 char *e_lab;
78 char *l_lab;
79
80 char *codebuf;
81 char *s_code;
82 char *e_code;
83 char *l_code;
84
85 char *combuf;
86 char *s_com;
87 char *e_com;
88 char *l_com;
89
90 char *tokenbuf;
91 char *s_token;
92 char *e_token;
93 char *l_token;
94
95 char *in_buffer;
96 char *in_buffer_limit;
97 char *buf_ptr;
98 char *buf_end;
99
100 char sc_buf[sc_size];
101 char *save_com;
102 char *sc_end;
103
104 char *bp_save;
105 char *be_save;
106
107 int found_err;
108 int n_real_blanklines;
109 int prefix_blankline_requested;
110 int postfix_blankline_requested;
111 int break_comma;
112 float case_ind;
113 int code_lines;
114 int had_eof;
115 int line_no;
116 int inhibit_formatting;
117 int suppress_blanklines;
118
119 int ifdef_level;
120 struct parser_state state_stack[5];
121 struct parser_state match_state[5];
122
123 FILE *input;
124 FILE *output;
125
126 static void bakcopy(void);
127 static void indent_declaration(int, int);
128
129 const char *in_name = "Standard Input"; /* will always point to name of input
130 * file */
131 const char *out_name = "Standard Output"; /* will always point to name
132 * of output file */
133 const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup
134 * files */
135 char bakfile[MAXPATHLEN] = "";
136
137 int
138 main(int argc, char **argv)
139 {
140 #if HAVE_CAPSICUM
141 cap_rights_t rights;
142 #endif
143
144 int dec_ind; /* current indentation for declarations */
145 int di_stack[20]; /* a stack of structure indentation levels */
146 int force_nl; /* when true, code must be broken */
147 int hd_type = 0; /* used to store type of stmt for if (...),
148 * for (...), etc */
149 int i; /* local loop counter */
150 int scase; /* set to true when we see a case, so we will
151 * know what to do with the following colon */
152 int sp_sw; /* when true, we are in the expression of
153 * if(...), while(...), etc. */
154 int squest; /* when this is positive, we have seen a ?
155 * without the matching : in a <c>?<s>:<s>
156 * construct */
157 const char *t_ptr; /* used for copying tokens */
158 int tabs_to_var; /* true if using tabs to indent to var name */
159 int type_code; /* the type of token, returned by lexi */
160
161 int last_else = 0; /* true iff last keyword was an else */
162 const char *profile_name = NULL;
163 const char *envval = NULL;
164 struct parser_state transient_state; /* a copy for lookup */
165
166 /*-----------------------------------------------*\
167 | INITIALIZATION |
168 \*-----------------------------------------------*/
169
170 found_err = 0;
171
172 ps.p_stack[0] = stmt; /* this is the parser's stack */
173 ps.last_nl = true; /* this is true if the last thing scanned was
174 * a newline */
175 ps.last_token = semicolon;
176 combuf = (char *) malloc(bufsize);
177 if (combuf == NULL)
178 err(1, NULL);
179 labbuf = (char *) malloc(bufsize);
180 if (labbuf == NULL)
181 err(1, NULL);
182 codebuf = (char *) malloc(bufsize);
183 if (codebuf == NULL)
184 err(1, NULL);
185 tokenbuf = (char *) malloc(bufsize);
186 if (tokenbuf == NULL)
187 err(1, NULL);
188 alloc_typenames();
189 init_constant_tt();
190 l_com = combuf + bufsize - 5;
191 l_lab = labbuf + bufsize - 5;
192 l_code = codebuf + bufsize - 5;
193 l_token = tokenbuf + bufsize - 5;
194 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and
195 * comment buffers */
196 combuf[1] = codebuf[1] = labbuf[1] = '\0';
197 opt.else_if = 1; /* Default else-if special processing to on */
198 s_lab = e_lab = labbuf + 1;
199 s_code = e_code = codebuf + 1;
200 s_com = e_com = combuf + 1;
201 s_token = e_token = tokenbuf + 1;
202
203 in_buffer = (char *) malloc(10);
204 if (in_buffer == NULL)
205 err(1, NULL);
206 in_buffer_limit = in_buffer + 8;
207 buf_ptr = buf_end = in_buffer;
208 line_no = 1;
209 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
210 sp_sw = force_nl = false;
211 ps.in_or_st = false;
212 ps.bl_line = true;
213 dec_ind = 0;
214 di_stack[ps.dec_nest = 0] = 0;
215 ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
216
217 scase = ps.pcase = false;
218 squest = 0;
219 sc_end = NULL;
220 bp_save = NULL;
221 be_save = NULL;
222
223 output = NULL;
224 tabs_to_var = 0;
225
226 envval = getenv("SIMPLE_BACKUP_SUFFIX");
227 if (envval)
228 simple_backup_suffix = envval;
229
230 /*--------------------------------------------------*\
231 | COMMAND LINE SCAN |
232 \*--------------------------------------------------*/
233
234 #ifdef undef
235 max_col = 78; /* -l78 */
236 lineup_to_parens = 1; /* -lp */
237 lineup_to_parens_always = 0; /* -nlpl */
238 ps.ljust_decl = 0; /* -ndj */
239 ps.com_ind = 33; /* -c33 */
240 star_comment_cont = 1; /* -sc */
241 ps.ind_size = 8; /* -i8 */
242 verbose = 0;
243 ps.decl_indent = 16; /* -di16 */
244 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value
245 * by an arg, we will set this equal to
246 * ps.decl_ind */
247 ps.indent_parameters = 1; /* -ip */
248 ps.decl_com_ind = 0; /* if this is not set to some positive value
249 * by an arg, we will set this equal to
250 * ps.com_ind */
251 btype_2 = 1; /* -br */
252 cuddle_else = 1; /* -ce */
253 ps.unindent_displace = 0; /* -d0 */
254 ps.case_indent = 0; /* -cli0 */
255 format_block_comments = 1; /* -fcb */
256 format_col1_comments = 1; /* -fc1 */
257 procnames_start_line = 1; /* -psl */
258 proc_calls_space = 0; /* -npcs */
259 comment_delimiter_on_blankline = 1; /* -cdb */
260 ps.leave_comma = 1; /* -nbc */
261 #endif
262
263 for (i = 1; i < argc; ++i)
264 if (strcmp(argv[i], "-npro") == 0)
265 break;
266 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0')
267 profile_name = argv[i]; /* non-empty -P (set profile) */
268 set_defaults();
269 if (i >= argc)
270 set_profile(profile_name);
271
272 for (i = 1; i < argc; ++i) {
273
274 /*
275 * look thru args (if any) for changes to defaults
276 */
277 if (argv[i][0] != '-') {/* no flag on parameter */
278 if (input == NULL) { /* we must have the input file */
279 in_name = argv[i]; /* remember name of input file */
280 input = fopen(in_name, "r");
281 if (input == NULL) /* check for open error */
282 err(1, "%s", in_name);
283 continue;
284 }
285 else if (output == NULL) { /* we have the output file */
286 out_name = argv[i]; /* remember name of output file */
287 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite
288 * the file */
289 errx(1, "input and output files must be different");
290 }
291 output = fopen(out_name, "w");
292 if (output == NULL) /* check for create error */
293 err(1, "%s", out_name);
294 continue;
295 }
296 errx(1, "unknown parameter: %s", argv[i]);
297 }
298 else
299 set_option(argv[i]);
300 } /* end of for */
301 if (input == NULL)
302 input = stdin;
303 if (output == NULL) {
304 if (input == stdin)
305 output = stdout;
306 else {
307 out_name = in_name;
308 bakcopy();
309 }
310 }
311
312 #if HAVE_CAPSICUM
313 /* Restrict input/output descriptors and enter Capsicum sandbox. */
314 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE);
315 if (caph_rights_limit(fileno(output), &rights) < 0)
316 err(EXIT_FAILURE, "unable to limit rights for %s", out_name);
317 cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
318 if (caph_rights_limit(fileno(input), &rights) < 0)
319 err(EXIT_FAILURE, "unable to limit rights for %s", in_name);
320 if (caph_enter() < 0)
321 err(EXIT_FAILURE, "unable to enter capability mode");
322 #endif
323
324 if (opt.com_ind <= 1)
325 opt.com_ind = 2; /* don't put normal comments before column 2 */
326 if (opt.block_comment_max_col <= 0)
327 opt.block_comment_max_col = opt.max_col;
328 if (opt.local_decl_indent < 0) /* if not specified by user, set this */
329 opt.local_decl_indent = opt.decl_indent;
330 if (opt.decl_com_ind <= 0) /* if not specified by user, set this */
331 opt.decl_com_ind = opt.ljust_decl ? (opt.com_ind <= 10 ? 2 : opt.com_ind - 8) : opt.com_ind;
332 if (opt.continuation_indent == 0)
333 opt.continuation_indent = opt.ind_size;
334 fill_buffer(); /* get first batch of stuff into input buffer */
335
336 parse(semicolon);
337 {
338 char *p = buf_ptr;
339 int col = 1;
340
341 while (1) {
342 if (*p == ' ')
343 col++;
344 else if (*p == '\t')
345 col = opt.tabsize * (1 + (col - 1) / opt.tabsize) + 1;
346 else
347 break;
348 p++;
349 }
350 if (col > opt.ind_size)
351 ps.ind_level = ps.i_l_follow = col / opt.ind_size;
352 }
353
354 /*
355 * START OF MAIN LOOP
356 */
357
358 while (1) { /* this is the main loop. it will go until we
359 * reach eof */
360 int comment_buffered = false;
361
362 type_code = lexi(&ps); /* lexi reads one token. The actual
363 * characters read are stored in "token". lexi
364 * returns a code indicating the type of token */
365
366 /*
367 * The following code moves newlines and comments following an if (),
368 * while (), else, etc. up to the start of the following stmt to
369 * a buffer. This allows proper handling of both kinds of brace
370 * placement (-br, -bl) and cuddling "else" (-ce).
371 */
372
373 while (ps.search_brace) {
374 switch (type_code) {
375 case newline:
376 if (sc_end == NULL) {
377 save_com = sc_buf;
378 save_com[0] = save_com[1] = ' ';
379 sc_end = &save_com[2];
380 }
381 *sc_end++ = '\n';
382 /*
383 * We may have inherited a force_nl == true from the previous
384 * token (like a semicolon). But once we know that a newline
385 * has been scanned in this loop, force_nl should be false.
386 *
387 * However, the force_nl == true must be preserved if newline
388 * is never scanned in this loop, so this assignment cannot be
389 * done earlier.
390 */
391 force_nl = false;
392 case form_feed:
393 break;
394 case comment:
395 if (sc_end == NULL) {
396 /*
397 * Copy everything from the start of the line, because
398 * pr_comment() will use that to calculate original
399 * indentation of a boxed comment.
400 */
401 memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4);
402 save_com = sc_buf + (buf_ptr - in_buffer - 4);
403 save_com[0] = save_com[1] = ' ';
404 sc_end = &save_com[2];
405 }
406 comment_buffered = true;
407 *sc_end++ = '/'; /* copy in start of comment */
408 *sc_end++ = '*';
409 for (;;) { /* loop until we get to the end of the comment */
410 *sc_end = *buf_ptr++;
411 if (buf_ptr >= buf_end)
412 fill_buffer();
413 if (*sc_end++ == '*' && *buf_ptr == '/')
414 break; /* we are at end of comment */
415 if (sc_end >= &save_com[sc_size]) { /* check for temp buffer
416 * overflow */
417 diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever");
418 fflush(output);
419 exit(1);
420 }
421 }
422 *sc_end++ = '/'; /* add ending slash */
423 if (++buf_ptr >= buf_end) /* get past / in buffer */
424 fill_buffer();
425 break;
426 case lbrace:
427 /*
428 * Put KNF-style lbraces before the buffered up tokens and
429 * jump out of this loop in order to avoid copying the token
430 * again under the default case of the switch below.
431 */
432 if (sc_end != NULL && opt.btype_2) {
433 save_com[0] = '{';
434 /*
435 * Originally the lbrace may have been alone on its own
436 * line, but it will be moved into "the else's line", so
437 * if there was a newline resulting from the "{" before,
438 * it must be scanned now and ignored.
439 */
440 while (isspace((unsigned char)*buf_ptr)) {
441 if (++buf_ptr >= buf_end)
442 fill_buffer();
443 if (*buf_ptr == '\n')
444 break;
445 }
446 goto sw_buffer;
447 }
448 /* FALLTHROUGH */
449 default: /* it is the start of a normal statement */
450 {
451 int remove_newlines;
452
453 remove_newlines =
454 /* "} else" */
455 (type_code == sp_nparen && *token == 'e' &&
456 e_code != s_code && e_code[-1] == '}')
457 /* "else if" */
458 || (type_code == sp_paren && *token == 'i' &&
459 last_else && opt.else_if);
460 if (remove_newlines)
461 force_nl = false;
462 if (sc_end == NULL) { /* ignore buffering if
463 * comment wasn't saved up */
464 ps.search_brace = false;
465 goto check_type;
466 }
467 while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) {
468 sc_end--;
469 }
470 if (opt.swallow_optional_blanklines ||
471 (!comment_buffered && remove_newlines)) {
472 force_nl = !remove_newlines;
473 while (sc_end > save_com && sc_end[-1] == '\n') {
474 sc_end--;
475 }
476 }
477 if (force_nl) { /* if we should insert a nl here, put
478 * it into the buffer */
479 force_nl = false;
480 --line_no; /* this will be re-increased when the
481 * newline is read from the buffer */
482 *sc_end++ = '\n';
483 *sc_end++ = ' ';
484 if (opt.verbose) /* print error msg if the line was
485 * not already broken */
486 diag(0, "Line broken");
487 }
488 for (t_ptr = token; *t_ptr; ++t_ptr)
489 *sc_end++ = *t_ptr;
490
491 sw_buffer:
492 ps.search_brace = false; /* stop looking for start of
493 * stmt */
494 bp_save = buf_ptr; /* save current input buffer */
495 be_save = buf_end;
496 buf_ptr = save_com; /* fix so that subsequent calls to
497 * lexi will take tokens out of
498 * save_com */
499 *sc_end++ = ' ';/* add trailing blank, just in case */
500 buf_end = sc_end;
501 sc_end = NULL;
502 break;
503 }
504 } /* end of switch */
505 /*
506 * We must make this check, just in case there was an unexpected
507 * EOF.
508 */
509 if (type_code != 0) {
510 /*
511 * The only intended purpose of calling lexi() below is to
512 * categorize the next token in order to decide whether to
513 * continue buffering forthcoming tokens. Once the buffering
514 * is over, lexi() will be called again elsewhere on all of
515 * the tokens - this time for normal processing.
516 *
517 * Calling it for this purpose is a bug, because lexi() also
518 * changes the parser state and discards leading whitespace,
519 * which is needed mostly for comment-related considerations.
520 *
521 * Work around the former problem by giving lexi() a copy of
522 * the current parser state and discard it if the call turned
523 * out to be just a look ahead.
524 *
525 * Work around the latter problem by copying all whitespace
526 * characters into the buffer so that the later lexi() call
527 * will read them.
528 */
529 if (sc_end != NULL) {
530 while (*buf_ptr == ' ' || *buf_ptr == '\t') {
531 *sc_end++ = *buf_ptr++;
532 if (sc_end >= &save_com[sc_size]) {
533 errx(1, "input too long");
534 }
535 }
536 if (buf_ptr >= buf_end) {
537 fill_buffer();
538 }
539 }
540 transient_state = ps;
541 type_code = lexi(&transient_state); /* read another token */
542 if (type_code != newline && type_code != form_feed &&
543 type_code != comment && !transient_state.search_brace) {
544 ps = transient_state;
545 }
546 }
547 } /* end of while (search_brace) */
548 last_else = 0;
549 check_type:
550 if (type_code == 0) { /* we got eof */
551 if (s_lab != e_lab || s_code != e_code
552 || s_com != e_com) /* must dump end of line */
553 dump_line();
554 if (ps.tos > 1) /* check for balanced braces */
555 diag(1, "Stuff missing from end of file");
556
557 if (opt.verbose) {
558 printf("There were %d output lines and %d comments\n",
559 ps.out_lines, ps.out_coms);
560 printf("(Lines with comments)/(Lines with code): %6.3f\n",
561 (1.0 * ps.com_lines) / code_lines);
562 }
563 fflush(output);
564 exit(found_err);
565 }
566 if (
567 (type_code != comment) &&
568 (type_code != newline) &&
569 (type_code != preesc) &&
570 (type_code != form_feed)) {
571 if (force_nl &&
572 (type_code != semicolon) &&
573 (type_code != lbrace || !opt.btype_2)) {
574 /* we should force a broken line here */
575 if (opt.verbose)
576 diag(0, "Line broken");
577 dump_line();
578 ps.want_blank = false; /* dont insert blank at line start */
579 force_nl = false;
580 }
581 ps.in_stmt = true; /* turn on flag which causes an extra level of
582 * indentation. this is turned off by a ; or
583 * '}' */
584 if (s_com != e_com) { /* the turkey has embedded a comment
585 * in a line. fix it */
586 int len = e_com - s_com;
587
588 CHECK_SIZE_CODE(len + 3);
589 *e_code++ = ' ';
590 memcpy(e_code, s_com, len);
591 e_code += len;
592 *e_code++ = ' ';
593 *e_code = '\0'; /* null terminate code sect */
594 ps.want_blank = false;
595 e_com = s_com;
596 }
597 }
598 else if (type_code != comment) /* preserve force_nl thru a comment */
599 force_nl = false; /* cancel forced newline after newline, form
600 * feed, etc */
601
602
603
604 /*-----------------------------------------------------*\
605 | do switch on type of token scanned |
606 \*-----------------------------------------------------*/
607 CHECK_SIZE_CODE(3); /* maximum number of increments of e_code
608 * before the next CHECK_SIZE_CODE or
609 * dump_line() is 2. After that there's the
610 * final increment for the null character. */
611 switch (type_code) { /* now, decide what to do with the token */
612
613 case form_feed: /* found a form feed in line */
614 ps.use_ff = true; /* a form feed is treated much like a newline */
615 dump_line();
616 ps.want_blank = false;
617 break;
618
619 case newline:
620 if (ps.last_token != comma || ps.p_l_follow > 0
621 || !opt.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
622 dump_line();
623 ps.want_blank = false;
624 }
625 ++line_no; /* keep track of input line number */
626 break;
627
628 case lparen: /* got a '(' or '[' */
629 /* count parens to make Healy happy */
630 if (++ps.p_l_follow == nitems(ps.paren_indents)) {
631 diag(0, "Reached internal limit of %zu unclosed parens",
632 nitems(ps.paren_indents));
633 ps.p_l_follow--;
634 }
635 if (*token == '[')
636 /* not a function pointer declaration or a function call */;
637 else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent &&
638 ps.procname[0] == '\0' && ps.paren_level == 0) {
639 /* function pointer declarations */
640 indent_declaration(dec_ind, tabs_to_var);
641 ps.dumped_decl_indent = true;
642 }
643 else if (ps.want_blank &&
644 ((ps.last_token != ident && ps.last_token != funcname) ||
645 opt.proc_calls_space ||
646 /* offsetof (1) is never allowed a space; sizeof (2) gets
647 * one iff -bs; all other keywords (>2) always get a space
648 * before lparen */
649 ps.keyword + opt.Bill_Shannon > 2))
650 *e_code++ = ' ';
651 ps.want_blank = false;
652 *e_code++ = token[0];
653 ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1;
654 if (sp_sw && ps.p_l_follow == 1 && opt.extra_expression_indent
655 && ps.paren_indents[0] < 2 * opt.ind_size)
656 ps.paren_indents[0] = 2 * opt.ind_size;
657 if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
658 /*
659 * this is a kluge to make sure that declarations will be
660 * aligned right if proc decl has an explicit type on it, i.e.
661 * "int a(x) {..."
662 */
663 parse(semicolon); /* I said this was a kluge... */
664 ps.in_or_st = false; /* turn off flag for structure decl or
665 * initialization */
666 }
667 /* parenthesized type following sizeof or offsetof is not a cast */
668 if (ps.keyword == 1 || ps.keyword == 2)
669 ps.not_cast_mask |= 1 << ps.p_l_follow;
670 break;
671
672 case rparen: /* got a ')' or ']' */
673 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) {
674 ps.last_u_d = true;
675 ps.cast_mask &= (1 << ps.p_l_follow) - 1;
676 ps.want_blank = opt.space_after_cast;
677 } else
678 ps.want_blank = true;
679 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1;
680 if (--ps.p_l_follow < 0) {
681 ps.p_l_follow = 0;
682 diag(0, "Extra %c", *token);
683 }
684 if (e_code == s_code) /* if the paren starts the line */
685 ps.paren_level = ps.p_l_follow; /* then indent it */
686
687 *e_code++ = token[0];
688
689 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if
690 * (...), or some such */
691 sp_sw = false;
692 force_nl = true;/* must force newline after if */
693 ps.last_u_d = true; /* inform lexi that a following
694 * operator is unary */
695 ps.in_stmt = false; /* dont use stmt continuation
696 * indentation */
697
698 parse(hd_type); /* let parser worry about if, or whatever */
699 }
700 ps.search_brace = opt.btype_2; /* this should ensure that
701 * constructs such as main(){...}
702 * and int[]{...} have their braces
703 * put in the right place */
704 break;
705
706 case unary_op: /* this could be any unary operation */
707 if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init &&
708 ps.procname[0] == '\0' && ps.paren_level == 0) {
709 /* pointer declarations */
710
711 /*
712 * if this is a unary op in a declaration, we should indent
713 * this token
714 */
715 for (i = 0; token[i]; ++i)
716 /* find length of token */;
717 indent_declaration(dec_ind - i, tabs_to_var);
718 ps.dumped_decl_indent = true;
719 }
720 else if (ps.want_blank)
721 *e_code++ = ' ';
722
723 {
724 int len = e_token - s_token;
725
726 CHECK_SIZE_CODE(len);
727 memcpy(e_code, token, len);
728 e_code += len;
729 }
730 ps.want_blank = false;
731 break;
732
733 case binary_op: /* any binary operation */
734 {
735 int len = e_token - s_token;
736
737 CHECK_SIZE_CODE(len + 1);
738 if (ps.want_blank)
739 *e_code++ = ' ';
740 memcpy(e_code, token, len);
741 e_code += len;
742 }
743 ps.want_blank = true;
744 break;
745
746 case postop: /* got a trailing ++ or -- */
747 *e_code++ = token[0];
748 *e_code++ = token[1];
749 ps.want_blank = true;
750 break;
751
752 case question: /* got a ? */
753 squest++; /* this will be used when a later colon
754 * appears so we can distinguish the
755 * <c>?<n>:<n> construct */
756 if (ps.want_blank)
757 *e_code++ = ' ';
758 *e_code++ = '?';
759 ps.want_blank = true;
760 break;
761
762 case casestmt: /* got word 'case' or 'default' */
763 scase = true; /* so we can process the later colon properly */
764 goto copy_id;
765
766 case colon: /* got a ':' */
767 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */
768 --squest;
769 if (ps.want_blank)
770 *e_code++ = ' ';
771 *e_code++ = ':';
772 ps.want_blank = true;
773 break;
774 }
775 if (ps.in_or_st) {
776 *e_code++ = ':';
777 ps.want_blank = false;
778 break;
779 }
780 ps.in_stmt = false; /* seeing a label does not imply we are in a
781 * stmt */
782 /*
783 * turn everything so far into a label
784 */
785 {
786 int len = e_code - s_code;
787
788 CHECK_SIZE_LAB(len + 3);
789 memcpy(e_lab, s_code, len);
790 e_lab += len;
791 *e_lab++ = ':';
792 *e_lab = '\0';
793 e_code = s_code;
794 }
795 force_nl = ps.pcase = scase; /* ps.pcase will be used by
796 * dump_line to decide how to
797 * indent the label. force_nl
798 * will force a case n: to be
799 * on a line by itself */
800 scase = false;
801 ps.want_blank = false;
802 break;
803
804 case semicolon: /* got a ';' */
805 if (ps.dec_nest == 0)
806 ps.in_or_st = false;/* we are not in an initialization or
807 * structure declaration */
808 scase = false; /* these will only need resetting in an error */
809 squest = 0;
810 if (ps.last_token == rparen)
811 ps.in_parameter_declaration = 0;
812 ps.cast_mask = 0;
813 ps.not_cast_mask = 0;
814 ps.block_init = 0;
815 ps.block_init_level = 0;
816 ps.just_saw_decl--;
817
818 if (ps.in_decl && s_code == e_code && !ps.block_init &&
819 !ps.dumped_decl_indent && ps.paren_level == 0) {
820 /* indent stray semicolons in declarations */
821 indent_declaration(dec_ind - 1, tabs_to_var);
822 ps.dumped_decl_indent = true;
823 }
824
825 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level
826 * structure declaration, we
827 * arent any more */
828
829 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
830
831 /*
832 * This should be true iff there were unbalanced parens in the
833 * stmt. It is a bit complicated, because the semicolon might
834 * be in a for stmt
835 */
836 diag(1, "Unbalanced parens");
837 ps.p_l_follow = 0;
838 if (sp_sw) { /* this is a check for an if, while, etc. with
839 * unbalanced parens */
840 sp_sw = false;
841 parse(hd_type); /* dont lose the if, or whatever */
842 }
843 }
844 *e_code++ = ';';
845 ps.want_blank = true;
846 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the
847 * middle of a stmt */
848
849 if (!sp_sw) { /* if not if for (;;) */
850 parse(semicolon); /* let parser know about end of stmt */
851 force_nl = true;/* force newline after an end of stmt */
852 }
853 break;
854
855 case lbrace: /* got a '{' */
856 ps.in_stmt = false; /* dont indent the {} */
857 if (!ps.block_init)
858 force_nl = true;/* force other stuff on same line as '{' onto
859 * new line */
860 else if (ps.block_init_level <= 0)
861 ps.block_init_level = 1;
862 else
863 ps.block_init_level++;
864
865 if (s_code != e_code && !ps.block_init) {
866 if (!opt.btype_2) {
867 dump_line();
868 ps.want_blank = false;
869 }
870 else if (ps.in_parameter_declaration && !ps.in_or_st) {
871 ps.i_l_follow = 0;
872 if (opt.function_brace_split) { /* dump the line prior
873 * to the brace ... */
874 dump_line();
875 ps.want_blank = false;
876 } else /* add a space between the decl and brace */
877 ps.want_blank = true;
878 }
879 }
880 if (ps.in_parameter_declaration)
881 prefix_blankline_requested = 0;
882
883 if (ps.p_l_follow > 0) { /* check for preceding unbalanced
884 * parens */
885 diag(1, "Unbalanced parens");
886 ps.p_l_follow = 0;
887 if (sp_sw) { /* check for unclosed if, for, etc. */
888 sp_sw = false;
889 parse(hd_type);
890 ps.ind_level = ps.i_l_follow;
891 }
892 }
893 if (s_code == e_code)
894 ps.ind_stmt = false; /* dont put extra indentation on line
895 * with '{' */
896 if (ps.in_decl && ps.in_or_st) { /* this is either a structure
897 * declaration or an init */
898 di_stack[ps.dec_nest] = dec_ind;
899 if (++ps.dec_nest == nitems(di_stack)) {
900 diag(0, "Reached internal limit of %zu struct levels",
901 nitems(di_stack));
902 ps.dec_nest--;
903 }
904 /* ? dec_ind = 0; */
905 }
906 else {
907 ps.decl_on_line = false; /* we can't be in the middle of
908 * a declaration, so don't do
909 * special indentation of
910 * comments */
911 if (opt.blanklines_after_declarations_at_proctop
912 && ps.in_parameter_declaration)
913 postfix_blankline_requested = 1;
914 ps.in_parameter_declaration = 0;
915 ps.in_decl = false;
916 }
917 dec_ind = 0;
918 parse(lbrace); /* let parser know about this */
919 if (ps.want_blank) /* put a blank before '{' if '{' is not at
920 * start of line */
921 *e_code++ = ' ';
922 ps.want_blank = false;
923 *e_code++ = '{';
924 ps.just_saw_decl = 0;
925 break;
926
927 case rbrace: /* got a '}' */
928 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be
929 * omitted in
930 * declarations */
931 parse(semicolon);
932 if (ps.p_l_follow) {/* check for unclosed if, for, else. */
933 diag(1, "Unbalanced parens");
934 ps.p_l_follow = 0;
935 sp_sw = false;
936 }
937 ps.just_saw_decl = 0;
938 ps.block_init_level--;
939 if (s_code != e_code && !ps.block_init) { /* '}' must be first on
940 * line */
941 if (opt.verbose)
942 diag(0, "Line broken");
943 dump_line();
944 }
945 *e_code++ = '}';
946 ps.want_blank = true;
947 ps.in_stmt = ps.ind_stmt = false;
948 if (ps.dec_nest > 0) { /* we are in multi-level structure
949 * declaration */
950 dec_ind = di_stack[--ps.dec_nest];
951 if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
952 ps.just_saw_decl = 2;
953 ps.in_decl = true;
954 }
955 prefix_blankline_requested = 0;
956 parse(rbrace); /* let parser know about this */
957 ps.search_brace = opt.cuddle_else && ps.p_stack[ps.tos] == ifhead
958 && ps.il[ps.tos] >= ps.ind_level;
959 if (ps.tos <= 1 && opt.blanklines_after_procs && ps.dec_nest <= 0)
960 postfix_blankline_requested = 1;
961 break;
962
963 case swstmt: /* got keyword "switch" */
964 sp_sw = true;
965 hd_type = swstmt; /* keep this for when we have seen the
966 * expression */
967 goto copy_id; /* go move the token into buffer */
968
969 case sp_paren: /* token is if, while, for */
970 sp_sw = true; /* the interesting stuff is done after the
971 * expression is scanned */
972 hd_type = (*token == 'i' ? ifstmt :
973 (*token == 'w' ? whilestmt : forstmt));
974
975 /*
976 * remember the type of header for later use by parser
977 */
978 goto copy_id; /* copy the token into line */
979
980 case sp_nparen: /* got else, do */
981 ps.in_stmt = false;
982 if (*token == 'e') {
983 if (e_code != s_code && (!opt.cuddle_else || e_code[-1] != '}')) {
984 if (opt.verbose)
985 diag(0, "Line broken");
986 dump_line();/* make sure this starts a line */
987 ps.want_blank = false;
988 }
989 force_nl = true;/* also, following stuff must go onto new line */
990 last_else = 1;
991 parse(elselit);
992 }
993 else {
994 if (e_code != s_code) { /* make sure this starts a line */
995 if (opt.verbose)
996 diag(0, "Line broken");
997 dump_line();
998 ps.want_blank = false;
999 }
1000 force_nl = true;/* also, following stuff must go onto new line */
1001 last_else = 0;
1002 parse(dolit);
1003 }
1004 goto copy_id; /* move the token into line */
1005
1006 case type_def:
1007 case storage:
1008 prefix_blankline_requested = 0;
1009 goto copy_id;
1010
1011 case structure:
1012 if (ps.p_l_follow > 0)
1013 goto copy_id;
1014 /* FALLTHROUGH */
1015 case decl: /* we have a declaration type (int, etc.) */
1016 parse(decl); /* let parser worry about indentation */
1017 if (ps.last_token == rparen && ps.tos <= 1) {
1018 if (s_code != e_code) {
1019 dump_line();
1020 ps.want_blank = 0;
1021 }
1022 }
1023 if (ps.in_parameter_declaration && opt.indent_parameters && ps.dec_nest == 0) {
1024 ps.ind_level = ps.i_l_follow = 1;
1025 ps.ind_stmt = 0;
1026 }
1027 ps.in_or_st = true; /* this might be a structure or initialization
1028 * declaration */
1029 ps.in_decl = ps.decl_on_line = ps.last_token != type_def;
1030 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
1031 ps.just_saw_decl = 2;
1032 prefix_blankline_requested = 0;
1033 for (i = 0; token[i++];); /* get length of token */
1034
1035 if (ps.ind_level == 0 || ps.dec_nest > 0) {
1036 /* global variable or struct member in local variable */
1037 dec_ind = opt.decl_indent > 0 ? opt.decl_indent : i;
1038 tabs_to_var = (opt.use_tabs ? opt.decl_indent > 0 : 0);
1039 } else {
1040 /* local variable */
1041 dec_ind = opt.local_decl_indent > 0 ? opt.local_decl_indent : i;
1042 tabs_to_var = (opt.use_tabs ? opt.local_decl_indent > 0 : 0);
1043 }
1044 goto copy_id;
1045
1046 case funcname:
1047 case ident: /* got an identifier or constant */
1048 if (ps.in_decl) {
1049 if (type_code == funcname) {
1050 ps.in_decl = false;
1051 if (opt.procnames_start_line && s_code != e_code) {
1052 *e_code = '\0';
1053 dump_line();
1054 }
1055 else if (ps.want_blank) {
1056 *e_code++ = ' ';
1057 }
1058 ps.want_blank = false;
1059 }
1060 else if (!ps.block_init && !ps.dumped_decl_indent &&
1061 ps.paren_level == 0) { /* if we are in a declaration, we
1062 * must indent identifier */
1063 indent_declaration(dec_ind, tabs_to_var);
1064 ps.dumped_decl_indent = true;
1065 ps.want_blank = false;
1066 }
1067 }
1068 else if (sp_sw && ps.p_l_follow == 0) {
1069 sp_sw = false;
1070 force_nl = true;
1071 ps.last_u_d = true;
1072 ps.in_stmt = false;
1073 parse(hd_type);
1074 }
1075 copy_id:
1076 {
1077 int len = e_token - s_token;
1078
1079 CHECK_SIZE_CODE(len + 1);
1080 if (ps.want_blank)
1081 *e_code++ = ' ';
1082 memcpy(e_code, s_token, len);
1083 e_code += len;
1084 }
1085 if (type_code != funcname)
1086 ps.want_blank = true;
1087 break;
1088
1089 case strpfx:
1090 {
1091 int len = e_token - s_token;
1092
1093 CHECK_SIZE_CODE(len + 1);
1094 if (ps.want_blank)
1095 *e_code++ = ' ';
1096 memcpy(e_code, token, len);
1097 e_code += len;
1098 }
1099 ps.want_blank = false;
1100 break;
1101
1102 case period: /* treat a period kind of like a binary
1103 * operation */
1104 *e_code++ = '.'; /* move the period into line */
1105 ps.want_blank = false; /* dont put a blank after a period */
1106 break;
1107
1108 case comma:
1109 ps.want_blank = (s_code != e_code); /* only put blank after comma
1110 * if comma does not start the
1111 * line */
1112 if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init &&
1113 !ps.dumped_decl_indent && ps.paren_level == 0) {
1114 /* indent leading commas and not the actual identifiers */
1115 indent_declaration(dec_ind - 1, tabs_to_var);
1116 ps.dumped_decl_indent = true;
1117 }
1118 *e_code++ = ',';
1119 if (ps.p_l_follow == 0) {
1120 if (ps.block_init_level <= 0)
1121 ps.block_init = 0;
1122 if (break_comma && (!opt.leave_comma ||
1123 count_spaces_until(compute_code_target(), s_code, e_code) >
1124 opt.max_col - opt.tabsize))
1125 force_nl = true;
1126 }
1127 break;
1128
1129 case preesc: /* got the character '#' */
1130 if ((s_com != e_com) ||
1131 (s_lab != e_lab) ||
1132 (s_code != e_code))
1133 dump_line();
1134 CHECK_SIZE_LAB(1);
1135 *e_lab++ = '#'; /* move whole line to 'label' buffer */
1136 {
1137 int in_comment = 0;
1138 int com_start = 0;
1139 char quote = 0;
1140 int com_end = 0;
1141
1142 while (*buf_ptr == ' ' || *buf_ptr == '\t') {
1143 buf_ptr++;
1144 if (buf_ptr >= buf_end)
1145 fill_buffer();
1146 }
1147 while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
1148 CHECK_SIZE_LAB(2);
1149 *e_lab = *buf_ptr++;
1150 if (buf_ptr >= buf_end)
1151 fill_buffer();
1152 switch (*e_lab++) {
1153 case BACKSLASH:
1154 if (!in_comment) {
1155 *e_lab++ = *buf_ptr++;
1156 if (buf_ptr >= buf_end)
1157 fill_buffer();
1158 }
1159 break;
1160 case '/':
1161 if (*buf_ptr == '*' && !in_comment && !quote) {
1162 in_comment = 1;
1163 *e_lab++ = *buf_ptr++;
1164 com_start = e_lab - s_lab - 2;
1165 }
1166 break;
1167 case '"':
1168 if (quote == '"')
1169 quote = 0;
1170 break;
1171 case '\'':
1172 if (quote == '\'')
1173 quote = 0;
1174 break;
1175 case '*':
1176 if (*buf_ptr == '/' && in_comment) {
1177 in_comment = 0;
1178 *e_lab++ = *buf_ptr++;
1179 com_end = e_lab - s_lab;
1180 }
1181 break;
1182 }
1183 }
1184
1185 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1186 e_lab--;
1187 if (e_lab - s_lab == com_end && bp_save == NULL) {
1188 /* comment on preprocessor line */
1189 if (sc_end == NULL) { /* if this is the first comment,
1190 * we must set up the buffer */
1191 save_com = sc_buf;
1192 sc_end = &save_com[0];
1193 }
1194 else {
1195 *sc_end++ = '\n'; /* add newline between
1196 * comments */
1197 *sc_end++ = ' ';
1198 --line_no;
1199 }
1200 if (sc_end - save_com + com_end - com_start > sc_size)
1201 errx(1, "input too long");
1202 memmove(sc_end, s_lab + com_start, com_end - com_start);
1203 sc_end += com_end - com_start;
1204 e_lab = s_lab + com_start;
1205 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1206 e_lab--;
1207 bp_save = buf_ptr; /* save current input buffer */
1208 be_save = buf_end;
1209 buf_ptr = save_com; /* fix so that subsequent calls to
1210 * lexi will take tokens out of
1211 * save_com */
1212 *sc_end++ = ' '; /* add trailing blank, just in case */
1213 buf_end = sc_end;
1214 sc_end = NULL;
1215 }
1216 CHECK_SIZE_LAB(1);
1217 *e_lab = '\0'; /* null terminate line */
1218 ps.pcase = false;
1219 }
1220
1221 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */
1222 if ((size_t)ifdef_level < nitems(state_stack)) {
1223 match_state[ifdef_level].tos = -1;
1224 state_stack[ifdef_level++] = ps;
1225 }
1226 else
1227 diag(1, "#if stack overflow");
1228 }
1229 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */
1230 if (ifdef_level <= 0)
1231 diag(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else");
1232 else {
1233 match_state[ifdef_level - 1] = ps;
1234 ps = state_stack[ifdef_level - 1];
1235 }
1236 }
1237 else if (strncmp(s_lab, "#endif", 6) == 0) {
1238 if (ifdef_level <= 0)
1239 diag(1, "Unmatched #endif");
1240 else
1241 ifdef_level--;
1242 } else {
1243 struct directives {
1244 int size;
1245 const char *string;
1246 }
1247 recognized[] = {
1248 {7, "include"},
1249 {6, "define"},
1250 {5, "undef"},
1251 {4, "line"},
1252 {5, "error"},
1253 {6, "pragma"}
1254 };
1255 int d = nitems(recognized);
1256 while (--d >= 0)
1257 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0)
1258 break;
1259 if (d < 0) {
1260 diag(1, "Unrecognized cpp directive");
1261 break;
1262 }
1263 }
1264 if (opt.blanklines_around_conditional_compilation) {
1265 postfix_blankline_requested++;
1266 n_real_blanklines = 0;
1267 }
1268 else {
1269 postfix_blankline_requested = 0;
1270 prefix_blankline_requested = 0;
1271 }
1272 break; /* subsequent processing of the newline
1273 * character will cause the line to be printed */
1274
1275 case comment: /* we have gotten a / followed by * this is a biggie */
1276 pr_comment();
1277 break;
1278 } /* end of big switch stmt */
1279
1280 *e_code = '\0'; /* make sure code section is null terminated */
1281 if (type_code != comment && type_code != newline && type_code != preesc)
1282 ps.last_token = type_code;
1283 } /* end of main while (1) loop */
1284 }
1285
1286 /*
1287 * copy input file to backup file if in_name is /blah/blah/blah/file, then
1288 * backup file will be ".Bfile" then make the backup file the input and
1289 * original input file the output
1290 */
1291 static void
1292 bakcopy(void)
1293 {
1294 int n,
1295 bakchn;
1296 char buff[8 * 1024];
1297 const char *p;
1298
1299 /* construct file name .Bfile */
1300 for (p = in_name; *p; p++); /* skip to end of string */
1301 while (p > in_name && *p != '/') /* find last '/' */
1302 p--;
1303 if (*p == '/')
1304 p++;
1305 sprintf(bakfile, "%s%s", p, simple_backup_suffix);
1306
1307 /* copy in_name to backup file */
1308 bakchn = creat(bakfile, 0600);
1309 if (bakchn < 0)
1310 err(1, "%s", bakfile);
1311 while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
1312 if (write(bakchn, buff, n) != n)
1313 err(1, "%s", bakfile);
1314 if (n < 0)
1315 err(1, "%s", in_name);
1316 close(bakchn);
1317 fclose(input);
1318
1319 /* re-open backup file as the input file */
1320 input = fopen(bakfile, "r");
1321 if (input == NULL)
1322 err(1, "%s", bakfile);
1323 /* now the original input file will be the output */
1324 output = fopen(in_name, "w");
1325 if (output == NULL) {
1326 unlink(bakfile);
1327 err(1, "%s", in_name);
1328 }
1329 }
1330
1331 static void
1332 indent_declaration(int cur_dec_ind, int tabs_to_var)
1333 {
1334 int pos = e_code - s_code;
1335 char *startpos = e_code;
1336
1337 /*
1338 * get the tab math right for indentations that are not multiples of tabsize
1339 */
1340 if ((ps.ind_level * opt.ind_size) % opt.tabsize != 0) {
1341 pos += (ps.ind_level * opt.ind_size) % opt.tabsize;
1342 cur_dec_ind += (ps.ind_level * opt.ind_size) % opt.tabsize;
1343 }
1344 if (tabs_to_var) {
1345 int tpos;
1346
1347 CHECK_SIZE_CODE(cur_dec_ind / opt.tabsize);
1348 while ((tpos = opt.tabsize * (1 + pos / opt.tabsize)) <= cur_dec_ind) {
1349 *e_code++ = '\t';
1350 pos = tpos;
1351 }
1352 }
1353 CHECK_SIZE_CODE(cur_dec_ind - pos + 1);
1354 while (pos < cur_dec_ind) {
1355 *e_code++ = ' ';
1356 pos++;
1357 }
1358 if (e_code == startpos && ps.want_blank) {
1359 *e_code++ = ' ';
1360 ps.want_blank = false;
1361 }
1362 }
1363