indent.c revision 1.31 1 /* $NetBSD: indent.c,v 1.31 2021/03/07 20:30:48 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #if 0
41 #ifndef lint
42 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93";
43 #endif /* not lint */
44 #endif
45
46 #include <sys/cdefs.h>
47 #ifndef lint
48 #if defined(__NetBSD__)
49 __RCSID("$NetBSD: indent.c,v 1.31 2021/03/07 20:30:48 rillig Exp $");
50 #elif defined(__FreeBSD__)
51 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
52 #endif
53 #endif
54
55 #include <sys/param.h>
56 #if HAVE_CAPSICUM
57 #include <sys/capsicum.h>
58 #include <capsicum_helpers.h>
59 #endif
60 #include <err.h>
61 #include <errno.h>
62 #include <fcntl.h>
63 #include <unistd.h>
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #include <ctype.h>
68
69 #include "indent.h"
70
71 struct options opt;
72 struct parser_state ps;
73
74 char *labbuf;
75 char *s_lab;
76 char *e_lab;
77 char *l_lab;
78
79 char *codebuf;
80 char *s_code;
81 char *e_code;
82 char *l_code;
83
84 char *combuf;
85 char *s_com;
86 char *e_com;
87 char *l_com;
88
89 char *tokenbuf;
90 char *s_token;
91 char *e_token;
92 char *l_token;
93
94 char *in_buffer;
95 char *in_buffer_limit;
96 char *buf_ptr;
97 char *buf_end;
98
99 char sc_buf[sc_size];
100 char *save_com;
101 char *sc_end;
102
103 char *bp_save;
104 char *be_save;
105
106 int found_err;
107 int n_real_blanklines;
108 int prefix_blankline_requested;
109 int postfix_blankline_requested;
110 int break_comma;
111 float case_ind;
112 int code_lines;
113 int had_eof;
114 int line_no;
115 int inhibit_formatting;
116 int suppress_blanklines;
117
118 int ifdef_level;
119 struct parser_state state_stack[5];
120 struct parser_state match_state[5];
121
122 FILE *input;
123 FILE *output;
124
125 static void bakcopy(void);
126 static void indent_declaration(int, int);
127
128 const char *in_name = "Standard Input"; /* will always point to name of input
129 * file */
130 const char *out_name = "Standard Output"; /* will always point to name
131 * of output file */
132 const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup
133 * files */
134 char bakfile[MAXPATHLEN] = "";
135
136 int
137 main(int argc, char **argv)
138 {
139 #if HAVE_CAPSICUM
140 cap_rights_t rights;
141 #endif
142
143 int dec_ind; /* current indentation for declarations */
144 int di_stack[20]; /* a stack of structure indentation levels */
145 int force_nl; /* when true, code must be broken */
146 token_type hd_type = end_of_file; /* used to store type of stmt
147 * for if (...), for (...), etc */
148 int i; /* local loop counter */
149 int scase; /* set to true when we see a case, so we will
150 * know what to do with the following colon */
151 int sp_sw; /* when true, we are in the expression of
152 * if(...), while(...), etc. */
153 int squest; /* when this is positive, we have seen a ?
154 * without the matching : in a <c>?<s>:<s>
155 * construct */
156 const char *t_ptr; /* used for copying tokens */
157 int tabs_to_var; /* true if using tabs to indent to var name */
158 token_type type_code; /* returned by lexi */
159
160 int last_else = 0; /* true iff last keyword was an else */
161 const char *profile_name = NULL;
162 const char *envval = NULL;
163 struct parser_state transient_state; /* a copy for lookup */
164
165 /*-----------------------------------------------*\
166 | INITIALIZATION |
167 \*-----------------------------------------------*/
168
169 found_err = 0;
170
171 ps.p_stack[0] = stmt; /* this is the parser's stack */
172 ps.last_nl = true; /* this is true if the last thing scanned was
173 * a newline */
174 ps.last_token = semicolon;
175 combuf = (char *) malloc(bufsize);
176 if (combuf == NULL)
177 err(1, NULL);
178 labbuf = (char *) malloc(bufsize);
179 if (labbuf == NULL)
180 err(1, NULL);
181 codebuf = (char *) malloc(bufsize);
182 if (codebuf == NULL)
183 err(1, NULL);
184 tokenbuf = (char *) malloc(bufsize);
185 if (tokenbuf == NULL)
186 err(1, NULL);
187 alloc_typenames();
188 init_constant_tt();
189 l_com = combuf + bufsize - 5;
190 l_lab = labbuf + bufsize - 5;
191 l_code = codebuf + bufsize - 5;
192 l_token = tokenbuf + bufsize - 5;
193 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and
194 * comment buffers */
195 combuf[1] = codebuf[1] = labbuf[1] = '\0';
196 opt.else_if = 1; /* Default else-if special processing to on */
197 s_lab = e_lab = labbuf + 1;
198 s_code = e_code = codebuf + 1;
199 s_com = e_com = combuf + 1;
200 s_token = e_token = tokenbuf + 1;
201
202 in_buffer = (char *) malloc(10);
203 if (in_buffer == NULL)
204 err(1, NULL);
205 in_buffer_limit = in_buffer + 8;
206 buf_ptr = buf_end = in_buffer;
207 line_no = 1;
208 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
209 sp_sw = force_nl = false;
210 ps.in_or_st = false;
211 ps.bl_line = true;
212 dec_ind = 0;
213 di_stack[ps.dec_nest = 0] = 0;
214 ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
215
216 scase = ps.pcase = false;
217 squest = 0;
218 sc_end = NULL;
219 bp_save = NULL;
220 be_save = NULL;
221
222 output = NULL;
223 tabs_to_var = 0;
224
225 envval = getenv("SIMPLE_BACKUP_SUFFIX");
226 if (envval)
227 simple_backup_suffix = envval;
228
229 /*--------------------------------------------------*\
230 | COMMAND LINE SCAN |
231 \*--------------------------------------------------*/
232
233 #ifdef undef
234 max_col = 78; /* -l78 */
235 lineup_to_parens = 1; /* -lp */
236 lineup_to_parens_always = 0; /* -nlpl */
237 ps.ljust_decl = 0; /* -ndj */
238 ps.com_ind = 33; /* -c33 */
239 star_comment_cont = 1; /* -sc */
240 ps.ind_size = 8; /* -i8 */
241 verbose = 0;
242 ps.decl_indent = 16; /* -di16 */
243 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value
244 * by an arg, we will set this equal to
245 * ps.decl_ind */
246 ps.indent_parameters = 1; /* -ip */
247 ps.decl_com_ind = 0; /* if this is not set to some positive value
248 * by an arg, we will set this equal to
249 * ps.com_ind */
250 btype_2 = 1; /* -br */
251 cuddle_else = 1; /* -ce */
252 ps.unindent_displace = 0; /* -d0 */
253 ps.case_indent = 0; /* -cli0 */
254 format_block_comments = 1; /* -fcb */
255 format_col1_comments = 1; /* -fc1 */
256 procnames_start_line = 1; /* -psl */
257 proc_calls_space = 0; /* -npcs */
258 comment_delimiter_on_blankline = 1; /* -cdb */
259 ps.leave_comma = 1; /* -nbc */
260 #endif
261
262 for (i = 1; i < argc; ++i)
263 if (strcmp(argv[i], "-npro") == 0)
264 break;
265 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0')
266 profile_name = argv[i]; /* non-empty -P (set profile) */
267 set_defaults();
268 if (i >= argc)
269 set_profile(profile_name);
270
271 for (i = 1; i < argc; ++i) {
272
273 /*
274 * look thru args (if any) for changes to defaults
275 */
276 if (argv[i][0] != '-') {/* no flag on parameter */
277 if (input == NULL) { /* we must have the input file */
278 in_name = argv[i]; /* remember name of input file */
279 input = fopen(in_name, "r");
280 if (input == NULL) /* check for open error */
281 err(1, "%s", in_name);
282 continue;
283 }
284 else if (output == NULL) { /* we have the output file */
285 out_name = argv[i]; /* remember name of output file */
286 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite
287 * the file */
288 errx(1, "input and output files must be different");
289 }
290 output = fopen(out_name, "w");
291 if (output == NULL) /* check for create error */
292 err(1, "%s", out_name);
293 continue;
294 }
295 errx(1, "unknown parameter: %s", argv[i]);
296 }
297 else
298 set_option(argv[i]);
299 } /* end of for */
300 if (input == NULL)
301 input = stdin;
302 if (output == NULL) {
303 if (input == stdin)
304 output = stdout;
305 else {
306 out_name = in_name;
307 bakcopy();
308 }
309 }
310
311 #if HAVE_CAPSICUM
312 /* Restrict input/output descriptors and enter Capsicum sandbox. */
313 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE);
314 if (caph_rights_limit(fileno(output), &rights) < 0)
315 err(EXIT_FAILURE, "unable to limit rights for %s", out_name);
316 cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
317 if (caph_rights_limit(fileno(input), &rights) < 0)
318 err(EXIT_FAILURE, "unable to limit rights for %s", in_name);
319 if (caph_enter() < 0)
320 err(EXIT_FAILURE, "unable to enter capability mode");
321 #endif
322
323 if (opt.com_ind <= 1)
324 opt.com_ind = 2; /* don't put normal comments before column 2 */
325 if (opt.block_comment_max_col <= 0)
326 opt.block_comment_max_col = opt.max_col;
327 if (opt.local_decl_indent < 0) /* if not specified by user, set this */
328 opt.local_decl_indent = opt.decl_indent;
329 if (opt.decl_com_ind <= 0) /* if not specified by user, set this */
330 opt.decl_com_ind = opt.ljust_decl ? (opt.com_ind <= 10 ? 2 : opt.com_ind - 8) : opt.com_ind;
331 if (opt.continuation_indent == 0)
332 opt.continuation_indent = opt.ind_size;
333 fill_buffer(); /* get first batch of stuff into input buffer */
334
335 parse(semicolon);
336 {
337 char *p = buf_ptr;
338 int col = 1;
339
340 while (1) {
341 if (*p == ' ')
342 col++;
343 else if (*p == '\t')
344 col = opt.tabsize * (1 + (col - 1) / opt.tabsize) + 1;
345 else
346 break;
347 p++;
348 }
349 if (col > opt.ind_size)
350 ps.ind_level = ps.i_l_follow = col / opt.ind_size;
351 }
352
353 /*
354 * START OF MAIN LOOP
355 */
356
357 while (1) { /* this is the main loop. it will go until we
358 * reach eof */
359 int comment_buffered = false;
360
361 type_code = lexi(&ps); /* lexi reads one token. The actual
362 * characters read are stored in "token". lexi
363 * returns a code indicating the type of token */
364
365 /*
366 * The following code moves newlines and comments following an if (),
367 * while (), else, etc. up to the start of the following stmt to
368 * a buffer. This allows proper handling of both kinds of brace
369 * placement (-br, -bl) and cuddling "else" (-ce).
370 */
371
372 while (ps.search_brace) {
373 switch (type_code) {
374 case newline:
375 if (sc_end == NULL) {
376 save_com = sc_buf;
377 save_com[0] = save_com[1] = ' ';
378 sc_end = &save_com[2];
379 }
380 *sc_end++ = '\n';
381 /*
382 * We may have inherited a force_nl == true from the previous
383 * token (like a semicolon). But once we know that a newline
384 * has been scanned in this loop, force_nl should be false.
385 *
386 * However, the force_nl == true must be preserved if newline
387 * is never scanned in this loop, so this assignment cannot be
388 * done earlier.
389 */
390 force_nl = false;
391 case form_feed:
392 break;
393 case comment:
394 if (sc_end == NULL) {
395 /*
396 * Copy everything from the start of the line, because
397 * pr_comment() will use that to calculate original
398 * indentation of a boxed comment.
399 */
400 memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4);
401 save_com = sc_buf + (buf_ptr - in_buffer - 4);
402 save_com[0] = save_com[1] = ' ';
403 sc_end = &save_com[2];
404 }
405 comment_buffered = true;
406 *sc_end++ = '/'; /* copy in start of comment */
407 *sc_end++ = '*';
408 for (;;) { /* loop until we get to the end of the comment */
409 *sc_end = *buf_ptr++;
410 if (buf_ptr >= buf_end)
411 fill_buffer();
412 if (*sc_end++ == '*' && *buf_ptr == '/')
413 break; /* we are at end of comment */
414 if (sc_end >= &save_com[sc_size]) { /* check for temp buffer
415 * overflow */
416 diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever");
417 fflush(output);
418 exit(1);
419 }
420 }
421 *sc_end++ = '/'; /* add ending slash */
422 if (++buf_ptr >= buf_end) /* get past / in buffer */
423 fill_buffer();
424 break;
425 case lbrace:
426 /*
427 * Put KNF-style lbraces before the buffered up tokens and
428 * jump out of this loop in order to avoid copying the token
429 * again under the default case of the switch below.
430 */
431 if (sc_end != NULL && opt.btype_2) {
432 save_com[0] = '{';
433 /*
434 * Originally the lbrace may have been alone on its own
435 * line, but it will be moved into "the else's line", so
436 * if there was a newline resulting from the "{" before,
437 * it must be scanned now and ignored.
438 */
439 while (isspace((unsigned char)*buf_ptr)) {
440 if (++buf_ptr >= buf_end)
441 fill_buffer();
442 if (*buf_ptr == '\n')
443 break;
444 }
445 goto sw_buffer;
446 }
447 /* FALLTHROUGH */
448 default: /* it is the start of a normal statement */
449 {
450 int remove_newlines;
451
452 remove_newlines =
453 /* "} else" */
454 (type_code == sp_nparen && *token == 'e' &&
455 e_code != s_code && e_code[-1] == '}')
456 /* "else if" */
457 || (type_code == sp_paren && *token == 'i' &&
458 last_else && opt.else_if);
459 if (remove_newlines)
460 force_nl = false;
461 if (sc_end == NULL) { /* ignore buffering if
462 * comment wasn't saved up */
463 ps.search_brace = false;
464 goto check_type;
465 }
466 while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) {
467 sc_end--;
468 }
469 if (opt.swallow_optional_blanklines ||
470 (!comment_buffered && remove_newlines)) {
471 force_nl = !remove_newlines;
472 while (sc_end > save_com && sc_end[-1] == '\n') {
473 sc_end--;
474 }
475 }
476 if (force_nl) { /* if we should insert a nl here, put
477 * it into the buffer */
478 force_nl = false;
479 --line_no; /* this will be re-increased when the
480 * newline is read from the buffer */
481 *sc_end++ = '\n';
482 *sc_end++ = ' ';
483 if (opt.verbose) /* print error msg if the line was
484 * not already broken */
485 diag(0, "Line broken");
486 }
487 for (t_ptr = token; *t_ptr; ++t_ptr)
488 *sc_end++ = *t_ptr;
489
490 sw_buffer:
491 ps.search_brace = false; /* stop looking for start of
492 * stmt */
493 bp_save = buf_ptr; /* save current input buffer */
494 be_save = buf_end;
495 buf_ptr = save_com; /* fix so that subsequent calls to
496 * lexi will take tokens out of
497 * save_com */
498 *sc_end++ = ' ';/* add trailing blank, just in case */
499 buf_end = sc_end;
500 sc_end = NULL;
501 break;
502 }
503 } /* end of switch */
504 /*
505 * We must make this check, just in case there was an unexpected
506 * EOF.
507 */
508 if (type_code != end_of_file) {
509 /*
510 * The only intended purpose of calling lexi() below is to
511 * categorize the next token in order to decide whether to
512 * continue buffering forthcoming tokens. Once the buffering
513 * is over, lexi() will be called again elsewhere on all of
514 * the tokens - this time for normal processing.
515 *
516 * Calling it for this purpose is a bug, because lexi() also
517 * changes the parser state and discards leading whitespace,
518 * which is needed mostly for comment-related considerations.
519 *
520 * Work around the former problem by giving lexi() a copy of
521 * the current parser state and discard it if the call turned
522 * out to be just a look ahead.
523 *
524 * Work around the latter problem by copying all whitespace
525 * characters into the buffer so that the later lexi() call
526 * will read them.
527 */
528 if (sc_end != NULL) {
529 while (*buf_ptr == ' ' || *buf_ptr == '\t') {
530 *sc_end++ = *buf_ptr++;
531 if (sc_end >= &save_com[sc_size]) {
532 errx(1, "input too long");
533 }
534 }
535 if (buf_ptr >= buf_end) {
536 fill_buffer();
537 }
538 }
539 transient_state = ps;
540 type_code = lexi(&transient_state); /* read another token */
541 if (type_code != newline && type_code != form_feed &&
542 type_code != comment && !transient_state.search_brace) {
543 ps = transient_state;
544 }
545 }
546 } /* end of while (search_brace) */
547 last_else = 0;
548 check_type:
549 if (type_code == end_of_file) { /* we got eof */
550 if (s_lab != e_lab || s_code != e_code
551 || s_com != e_com) /* must dump end of line */
552 dump_line();
553 if (ps.tos > 1) /* check for balanced braces */
554 diag(1, "Stuff missing from end of file");
555
556 if (opt.verbose) {
557 printf("There were %d output lines and %d comments\n",
558 ps.out_lines, ps.out_coms);
559 printf("(Lines with comments)/(Lines with code): %6.3f\n",
560 (1.0 * ps.com_lines) / code_lines);
561 }
562 fflush(output);
563 exit(found_err);
564 }
565 if (
566 (type_code != comment) &&
567 (type_code != newline) &&
568 (type_code != preesc) &&
569 (type_code != form_feed)) {
570 if (force_nl &&
571 (type_code != semicolon) &&
572 (type_code != lbrace || !opt.btype_2)) {
573 /* we should force a broken line here */
574 if (opt.verbose)
575 diag(0, "Line broken");
576 dump_line();
577 ps.want_blank = false; /* dont insert blank at line start */
578 force_nl = false;
579 }
580 ps.in_stmt = true; /* turn on flag which causes an extra level of
581 * indentation. this is turned off by a ; or
582 * '}' */
583 if (s_com != e_com) { /* the turkey has embedded a comment
584 * in a line. fix it */
585 int len = e_com - s_com;
586
587 CHECK_SIZE_CODE(len + 3);
588 *e_code++ = ' ';
589 memcpy(e_code, s_com, len);
590 e_code += len;
591 *e_code++ = ' ';
592 *e_code = '\0'; /* null terminate code sect */
593 ps.want_blank = false;
594 e_com = s_com;
595 }
596 }
597 else if (type_code != comment) /* preserve force_nl thru a comment */
598 force_nl = false; /* cancel forced newline after newline, form
599 * feed, etc */
600
601
602
603 /*-----------------------------------------------------*\
604 | do switch on type of token scanned |
605 \*-----------------------------------------------------*/
606 CHECK_SIZE_CODE(3); /* maximum number of increments of e_code
607 * before the next CHECK_SIZE_CODE or
608 * dump_line() is 2. After that there's the
609 * final increment for the null character. */
610 switch (type_code) { /* now, decide what to do with the token */
611
612 case form_feed: /* found a form feed in line */
613 ps.use_ff = true; /* a form feed is treated much like a newline */
614 dump_line();
615 ps.want_blank = false;
616 break;
617
618 case newline:
619 if (ps.last_token != comma || ps.p_l_follow > 0
620 || !opt.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
621 dump_line();
622 ps.want_blank = false;
623 }
624 ++line_no; /* keep track of input line number */
625 break;
626
627 case lparen: /* got a '(' or '[' */
628 /* count parens to make Healy happy */
629 if (++ps.p_l_follow == nitems(ps.paren_indents)) {
630 diag(0, "Reached internal limit of %zu unclosed parens",
631 nitems(ps.paren_indents));
632 ps.p_l_follow--;
633 }
634 if (*token == '[')
635 /* not a function pointer declaration or a function call */;
636 else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent &&
637 ps.procname[0] == '\0' && ps.paren_level == 0) {
638 /* function pointer declarations */
639 indent_declaration(dec_ind, tabs_to_var);
640 ps.dumped_decl_indent = true;
641 }
642 else if (ps.want_blank &&
643 ((ps.last_token != ident && ps.last_token != funcname) ||
644 opt.proc_calls_space ||
645 (ps.keyword == rw_sizeof ? opt.Bill_Shannon :
646 ps.keyword != rw_0 && ps.keyword != rw_offsetof)))
647 *e_code++ = ' ';
648 ps.want_blank = false;
649 *e_code++ = token[0];
650 ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1;
651 if (sp_sw && ps.p_l_follow == 1 && opt.extra_expression_indent
652 && ps.paren_indents[0] < 2 * opt.ind_size)
653 ps.paren_indents[0] = 2 * opt.ind_size;
654 if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
655 /*
656 * this is a kluge to make sure that declarations will be
657 * aligned right if proc decl has an explicit type on it, i.e.
658 * "int a(x) {..."
659 */
660 parse(semicolon); /* I said this was a kluge... */
661 ps.in_or_st = false; /* turn off flag for structure decl or
662 * initialization */
663 }
664 /* parenthesized type following sizeof or offsetof is not a cast */
665 if (ps.keyword == rw_offsetof || ps.keyword == rw_sizeof)
666 ps.not_cast_mask |= 1 << ps.p_l_follow;
667 break;
668
669 case rparen: /* got a ')' or ']' */
670 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) {
671 ps.last_u_d = true;
672 ps.cast_mask &= (1 << ps.p_l_follow) - 1;
673 ps.want_blank = opt.space_after_cast;
674 } else
675 ps.want_blank = true;
676 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1;
677 if (--ps.p_l_follow < 0) {
678 ps.p_l_follow = 0;
679 diag(0, "Extra %c", *token);
680 }
681 if (e_code == s_code) /* if the paren starts the line */
682 ps.paren_level = ps.p_l_follow; /* then indent it */
683
684 *e_code++ = token[0];
685
686 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if
687 * (...), or some such */
688 sp_sw = false;
689 force_nl = true;/* must force newline after if */
690 ps.last_u_d = true; /* inform lexi that a following
691 * operator is unary */
692 ps.in_stmt = false; /* dont use stmt continuation
693 * indentation */
694
695 parse(hd_type); /* let parser worry about if, or whatever */
696 }
697 ps.search_brace = opt.btype_2; /* this should ensure that
698 * constructs such as main(){...}
699 * and int[]{...} have their braces
700 * put in the right place */
701 break;
702
703 case unary_op: /* this could be any unary operation */
704 if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init &&
705 ps.procname[0] == '\0' && ps.paren_level == 0) {
706 /* pointer declarations */
707
708 /*
709 * if this is a unary op in a declaration, we should indent
710 * this token
711 */
712 for (i = 0; token[i]; ++i)
713 /* find length of token */;
714 indent_declaration(dec_ind - i, tabs_to_var);
715 ps.dumped_decl_indent = true;
716 }
717 else if (ps.want_blank)
718 *e_code++ = ' ';
719
720 {
721 int len = e_token - s_token;
722
723 CHECK_SIZE_CODE(len);
724 memcpy(e_code, token, len);
725 e_code += len;
726 }
727 ps.want_blank = false;
728 break;
729
730 case binary_op: /* any binary operation */
731 {
732 int len = e_token - s_token;
733
734 CHECK_SIZE_CODE(len + 1);
735 if (ps.want_blank)
736 *e_code++ = ' ';
737 memcpy(e_code, token, len);
738 e_code += len;
739 }
740 ps.want_blank = true;
741 break;
742
743 case postop: /* got a trailing ++ or -- */
744 *e_code++ = token[0];
745 *e_code++ = token[1];
746 ps.want_blank = true;
747 break;
748
749 case question: /* got a ? */
750 squest++; /* this will be used when a later colon
751 * appears so we can distinguish the
752 * <c>?<n>:<n> construct */
753 if (ps.want_blank)
754 *e_code++ = ' ';
755 *e_code++ = '?';
756 ps.want_blank = true;
757 break;
758
759 case casestmt: /* got word 'case' or 'default' */
760 scase = true; /* so we can process the later colon properly */
761 goto copy_id;
762
763 case colon: /* got a ':' */
764 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */
765 --squest;
766 if (ps.want_blank)
767 *e_code++ = ' ';
768 *e_code++ = ':';
769 ps.want_blank = true;
770 break;
771 }
772 if (ps.in_or_st) {
773 *e_code++ = ':';
774 ps.want_blank = false;
775 break;
776 }
777 ps.in_stmt = false; /* seeing a label does not imply we are in a
778 * stmt */
779 /*
780 * turn everything so far into a label
781 */
782 {
783 int len = e_code - s_code;
784
785 CHECK_SIZE_LAB(len + 3);
786 memcpy(e_lab, s_code, len);
787 e_lab += len;
788 *e_lab++ = ':';
789 *e_lab = '\0';
790 e_code = s_code;
791 }
792 force_nl = ps.pcase = scase; /* ps.pcase will be used by
793 * dump_line to decide how to
794 * indent the label. force_nl
795 * will force a case n: to be
796 * on a line by itself */
797 scase = false;
798 ps.want_blank = false;
799 break;
800
801 case semicolon: /* got a ';' */
802 if (ps.dec_nest == 0)
803 ps.in_or_st = false;/* we are not in an initialization or
804 * structure declaration */
805 scase = false; /* these will only need resetting in an error */
806 squest = 0;
807 if (ps.last_token == rparen)
808 ps.in_parameter_declaration = 0;
809 ps.cast_mask = 0;
810 ps.not_cast_mask = 0;
811 ps.block_init = 0;
812 ps.block_init_level = 0;
813 ps.just_saw_decl--;
814
815 if (ps.in_decl && s_code == e_code && !ps.block_init &&
816 !ps.dumped_decl_indent && ps.paren_level == 0) {
817 /* indent stray semicolons in declarations */
818 indent_declaration(dec_ind - 1, tabs_to_var);
819 ps.dumped_decl_indent = true;
820 }
821
822 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level
823 * structure declaration, we
824 * arent any more */
825
826 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
827
828 /*
829 * This should be true iff there were unbalanced parens in the
830 * stmt. It is a bit complicated, because the semicolon might
831 * be in a for stmt
832 */
833 diag(1, "Unbalanced parens");
834 ps.p_l_follow = 0;
835 if (sp_sw) { /* this is a check for an if, while, etc. with
836 * unbalanced parens */
837 sp_sw = false;
838 parse(hd_type); /* dont lose the if, or whatever */
839 }
840 }
841 *e_code++ = ';';
842 ps.want_blank = true;
843 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the
844 * middle of a stmt */
845
846 if (!sp_sw) { /* if not if for (;;) */
847 parse(semicolon); /* let parser know about end of stmt */
848 force_nl = true;/* force newline after an end of stmt */
849 }
850 break;
851
852 case lbrace: /* got a '{' */
853 ps.in_stmt = false; /* dont indent the {} */
854 if (!ps.block_init)
855 force_nl = true;/* force other stuff on same line as '{' onto
856 * new line */
857 else if (ps.block_init_level <= 0)
858 ps.block_init_level = 1;
859 else
860 ps.block_init_level++;
861
862 if (s_code != e_code && !ps.block_init) {
863 if (!opt.btype_2) {
864 dump_line();
865 ps.want_blank = false;
866 }
867 else if (ps.in_parameter_declaration && !ps.in_or_st) {
868 ps.i_l_follow = 0;
869 if (opt.function_brace_split) { /* dump the line prior
870 * to the brace ... */
871 dump_line();
872 ps.want_blank = false;
873 } else /* add a space between the decl and brace */
874 ps.want_blank = true;
875 }
876 }
877 if (ps.in_parameter_declaration)
878 prefix_blankline_requested = 0;
879
880 if (ps.p_l_follow > 0) { /* check for preceding unbalanced
881 * parens */
882 diag(1, "Unbalanced parens");
883 ps.p_l_follow = 0;
884 if (sp_sw) { /* check for unclosed if, for, etc. */
885 sp_sw = false;
886 parse(hd_type);
887 ps.ind_level = ps.i_l_follow;
888 }
889 }
890 if (s_code == e_code)
891 ps.ind_stmt = false; /* dont put extra indentation on line
892 * with '{' */
893 if (ps.in_decl && ps.in_or_st) { /* this is either a structure
894 * declaration or an init */
895 di_stack[ps.dec_nest] = dec_ind;
896 if (++ps.dec_nest == nitems(di_stack)) {
897 diag(0, "Reached internal limit of %zu struct levels",
898 nitems(di_stack));
899 ps.dec_nest--;
900 }
901 /* ? dec_ind = 0; */
902 }
903 else {
904 ps.decl_on_line = false; /* we can't be in the middle of
905 * a declaration, so don't do
906 * special indentation of
907 * comments */
908 if (opt.blanklines_after_declarations_at_proctop
909 && ps.in_parameter_declaration)
910 postfix_blankline_requested = 1;
911 ps.in_parameter_declaration = 0;
912 ps.in_decl = false;
913 }
914 dec_ind = 0;
915 parse(lbrace); /* let parser know about this */
916 if (ps.want_blank) /* put a blank before '{' if '{' is not at
917 * start of line */
918 *e_code++ = ' ';
919 ps.want_blank = false;
920 *e_code++ = '{';
921 ps.just_saw_decl = 0;
922 break;
923
924 case rbrace: /* got a '}' */
925 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be
926 * omitted in
927 * declarations */
928 parse(semicolon);
929 if (ps.p_l_follow) {/* check for unclosed if, for, else. */
930 diag(1, "Unbalanced parens");
931 ps.p_l_follow = 0;
932 sp_sw = false;
933 }
934 ps.just_saw_decl = 0;
935 ps.block_init_level--;
936 if (s_code != e_code && !ps.block_init) { /* '}' must be first on
937 * line */
938 if (opt.verbose)
939 diag(0, "Line broken");
940 dump_line();
941 }
942 *e_code++ = '}';
943 ps.want_blank = true;
944 ps.in_stmt = ps.ind_stmt = false;
945 if (ps.dec_nest > 0) { /* we are in multi-level structure
946 * declaration */
947 dec_ind = di_stack[--ps.dec_nest];
948 if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
949 ps.just_saw_decl = 2;
950 ps.in_decl = true;
951 }
952 prefix_blankline_requested = 0;
953 parse(rbrace); /* let parser know about this */
954 ps.search_brace = opt.cuddle_else && ps.p_stack[ps.tos] == ifhead
955 && ps.il[ps.tos] >= ps.ind_level;
956 if (ps.tos <= 1 && opt.blanklines_after_procs && ps.dec_nest <= 0)
957 postfix_blankline_requested = 1;
958 break;
959
960 case swstmt: /* got keyword "switch" */
961 sp_sw = true;
962 hd_type = swstmt; /* keep this for when we have seen the
963 * expression */
964 goto copy_id; /* go move the token into buffer */
965
966 case sp_paren: /* token is if, while, for */
967 sp_sw = true; /* the interesting stuff is done after the
968 * expression is scanned */
969 hd_type = (*token == 'i' ? ifstmt :
970 (*token == 'w' ? whilestmt : forstmt));
971
972 /*
973 * remember the type of header for later use by parser
974 */
975 goto copy_id; /* copy the token into line */
976
977 case sp_nparen: /* got else, do */
978 ps.in_stmt = false;
979 if (*token == 'e') {
980 if (e_code != s_code && (!opt.cuddle_else || e_code[-1] != '}')) {
981 if (opt.verbose)
982 diag(0, "Line broken");
983 dump_line();/* make sure this starts a line */
984 ps.want_blank = false;
985 }
986 force_nl = true;/* also, following stuff must go onto new line */
987 last_else = 1;
988 parse(elselit);
989 }
990 else {
991 if (e_code != s_code) { /* make sure this starts a line */
992 if (opt.verbose)
993 diag(0, "Line broken");
994 dump_line();
995 ps.want_blank = false;
996 }
997 force_nl = true;/* also, following stuff must go onto new line */
998 last_else = 0;
999 parse(dolit);
1000 }
1001 goto copy_id; /* move the token into line */
1002
1003 case type_def:
1004 case storage:
1005 prefix_blankline_requested = 0;
1006 goto copy_id;
1007
1008 case structure:
1009 if (ps.p_l_follow > 0)
1010 goto copy_id;
1011 /* FALLTHROUGH */
1012 case decl: /* we have a declaration type (int, etc.) */
1013 parse(decl); /* let parser worry about indentation */
1014 if (ps.last_token == rparen && ps.tos <= 1) {
1015 if (s_code != e_code) {
1016 dump_line();
1017 ps.want_blank = 0;
1018 }
1019 }
1020 if (ps.in_parameter_declaration && opt.indent_parameters && ps.dec_nest == 0) {
1021 ps.ind_level = ps.i_l_follow = 1;
1022 ps.ind_stmt = 0;
1023 }
1024 ps.in_or_st = true; /* this might be a structure or initialization
1025 * declaration */
1026 ps.in_decl = ps.decl_on_line = ps.last_token != type_def;
1027 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
1028 ps.just_saw_decl = 2;
1029 prefix_blankline_requested = 0;
1030 for (i = 0; token[i++];); /* get length of token */
1031
1032 if (ps.ind_level == 0 || ps.dec_nest > 0) {
1033 /* global variable or struct member in local variable */
1034 dec_ind = opt.decl_indent > 0 ? opt.decl_indent : i;
1035 tabs_to_var = (opt.use_tabs ? opt.decl_indent > 0 : 0);
1036 } else {
1037 /* local variable */
1038 dec_ind = opt.local_decl_indent > 0 ? opt.local_decl_indent : i;
1039 tabs_to_var = (opt.use_tabs ? opt.local_decl_indent > 0 : 0);
1040 }
1041 goto copy_id;
1042
1043 case funcname:
1044 case ident: /* got an identifier or constant */
1045 if (ps.in_decl) {
1046 if (type_code == funcname) {
1047 ps.in_decl = false;
1048 if (opt.procnames_start_line && s_code != e_code) {
1049 *e_code = '\0';
1050 dump_line();
1051 }
1052 else if (ps.want_blank) {
1053 *e_code++ = ' ';
1054 }
1055 ps.want_blank = false;
1056 }
1057 else if (!ps.block_init && !ps.dumped_decl_indent &&
1058 ps.paren_level == 0) { /* if we are in a declaration, we
1059 * must indent identifier */
1060 indent_declaration(dec_ind, tabs_to_var);
1061 ps.dumped_decl_indent = true;
1062 ps.want_blank = false;
1063 }
1064 }
1065 else if (sp_sw && ps.p_l_follow == 0) {
1066 sp_sw = false;
1067 force_nl = true;
1068 ps.last_u_d = true;
1069 ps.in_stmt = false;
1070 parse(hd_type);
1071 }
1072 copy_id:
1073 {
1074 int len = e_token - s_token;
1075
1076 CHECK_SIZE_CODE(len + 1);
1077 if (ps.want_blank)
1078 *e_code++ = ' ';
1079 memcpy(e_code, s_token, len);
1080 e_code += len;
1081 }
1082 if (type_code != funcname)
1083 ps.want_blank = true;
1084 break;
1085
1086 case strpfx:
1087 {
1088 int len = e_token - s_token;
1089
1090 CHECK_SIZE_CODE(len + 1);
1091 if (ps.want_blank)
1092 *e_code++ = ' ';
1093 memcpy(e_code, token, len);
1094 e_code += len;
1095 }
1096 ps.want_blank = false;
1097 break;
1098
1099 case period: /* treat a period kind of like a binary
1100 * operation */
1101 *e_code++ = '.'; /* move the period into line */
1102 ps.want_blank = false; /* dont put a blank after a period */
1103 break;
1104
1105 case comma:
1106 ps.want_blank = (s_code != e_code); /* only put blank after comma
1107 * if comma does not start the
1108 * line */
1109 if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init &&
1110 !ps.dumped_decl_indent && ps.paren_level == 0) {
1111 /* indent leading commas and not the actual identifiers */
1112 indent_declaration(dec_ind - 1, tabs_to_var);
1113 ps.dumped_decl_indent = true;
1114 }
1115 *e_code++ = ',';
1116 if (ps.p_l_follow == 0) {
1117 if (ps.block_init_level <= 0)
1118 ps.block_init = 0;
1119 if (break_comma && (!opt.leave_comma ||
1120 count_spaces_until(compute_code_target(), s_code, e_code) >
1121 opt.max_col - opt.tabsize))
1122 force_nl = true;
1123 }
1124 break;
1125
1126 case preesc: /* got the character '#' */
1127 if ((s_com != e_com) ||
1128 (s_lab != e_lab) ||
1129 (s_code != e_code))
1130 dump_line();
1131 CHECK_SIZE_LAB(1);
1132 *e_lab++ = '#'; /* move whole line to 'label' buffer */
1133 {
1134 int in_comment = 0;
1135 int com_start = 0;
1136 char quote = 0;
1137 int com_end = 0;
1138
1139 while (*buf_ptr == ' ' || *buf_ptr == '\t') {
1140 buf_ptr++;
1141 if (buf_ptr >= buf_end)
1142 fill_buffer();
1143 }
1144 while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
1145 CHECK_SIZE_LAB(2);
1146 *e_lab = *buf_ptr++;
1147 if (buf_ptr >= buf_end)
1148 fill_buffer();
1149 switch (*e_lab++) {
1150 case BACKSLASH:
1151 if (!in_comment) {
1152 *e_lab++ = *buf_ptr++;
1153 if (buf_ptr >= buf_end)
1154 fill_buffer();
1155 }
1156 break;
1157 case '/':
1158 if (*buf_ptr == '*' && !in_comment && !quote) {
1159 in_comment = 1;
1160 *e_lab++ = *buf_ptr++;
1161 com_start = e_lab - s_lab - 2;
1162 }
1163 break;
1164 case '"':
1165 if (quote == '"')
1166 quote = 0;
1167 break;
1168 case '\'':
1169 if (quote == '\'')
1170 quote = 0;
1171 break;
1172 case '*':
1173 if (*buf_ptr == '/' && in_comment) {
1174 in_comment = 0;
1175 *e_lab++ = *buf_ptr++;
1176 com_end = e_lab - s_lab;
1177 }
1178 break;
1179 }
1180 }
1181
1182 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1183 e_lab--;
1184 if (e_lab - s_lab == com_end && bp_save == NULL) {
1185 /* comment on preprocessor line */
1186 if (sc_end == NULL) { /* if this is the first comment,
1187 * we must set up the buffer */
1188 save_com = sc_buf;
1189 sc_end = &save_com[0];
1190 }
1191 else {
1192 *sc_end++ = '\n'; /* add newline between
1193 * comments */
1194 *sc_end++ = ' ';
1195 --line_no;
1196 }
1197 if (sc_end - save_com + com_end - com_start > sc_size)
1198 errx(1, "input too long");
1199 memmove(sc_end, s_lab + com_start, com_end - com_start);
1200 sc_end += com_end - com_start;
1201 e_lab = s_lab + com_start;
1202 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1203 e_lab--;
1204 bp_save = buf_ptr; /* save current input buffer */
1205 be_save = buf_end;
1206 buf_ptr = save_com; /* fix so that subsequent calls to
1207 * lexi will take tokens out of
1208 * save_com */
1209 *sc_end++ = ' '; /* add trailing blank, just in case */
1210 buf_end = sc_end;
1211 sc_end = NULL;
1212 }
1213 CHECK_SIZE_LAB(1);
1214 *e_lab = '\0'; /* null terminate line */
1215 ps.pcase = false;
1216 }
1217
1218 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */
1219 if ((size_t)ifdef_level < nitems(state_stack)) {
1220 match_state[ifdef_level].tos = -1;
1221 state_stack[ifdef_level++] = ps;
1222 }
1223 else
1224 diag(1, "#if stack overflow");
1225 }
1226 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */
1227 if (ifdef_level <= 0)
1228 diag(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else");
1229 else {
1230 match_state[ifdef_level - 1] = ps;
1231 ps = state_stack[ifdef_level - 1];
1232 }
1233 }
1234 else if (strncmp(s_lab, "#endif", 6) == 0) {
1235 if (ifdef_level <= 0)
1236 diag(1, "Unmatched #endif");
1237 else
1238 ifdef_level--;
1239 } else {
1240 struct directives {
1241 int size;
1242 const char *string;
1243 }
1244 recognized[] = {
1245 {7, "include"},
1246 {6, "define"},
1247 {5, "undef"},
1248 {4, "line"},
1249 {5, "error"},
1250 {6, "pragma"}
1251 };
1252 int d = nitems(recognized);
1253 while (--d >= 0)
1254 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0)
1255 break;
1256 if (d < 0) {
1257 diag(1, "Unrecognized cpp directive");
1258 break;
1259 }
1260 }
1261 if (opt.blanklines_around_conditional_compilation) {
1262 postfix_blankline_requested++;
1263 n_real_blanklines = 0;
1264 }
1265 else {
1266 postfix_blankline_requested = 0;
1267 prefix_blankline_requested = 0;
1268 }
1269 break; /* subsequent processing of the newline
1270 * character will cause the line to be printed */
1271
1272 case comment: /* we have gotten a / followed by * this is a biggie */
1273 pr_comment();
1274 break;
1275
1276 default:
1277 break;
1278 } /* end of big switch stmt */
1279
1280 *e_code = '\0'; /* make sure code section is null terminated */
1281 if (type_code != comment && type_code != newline && type_code != preesc)
1282 ps.last_token = type_code;
1283 } /* end of main while (1) loop */
1284 }
1285
1286 /*
1287 * copy input file to backup file if in_name is /blah/blah/blah/file, then
1288 * backup file will be ".Bfile" then make the backup file the input and
1289 * original input file the output
1290 */
1291 static void
1292 bakcopy(void)
1293 {
1294 int n,
1295 bakchn;
1296 char buff[8 * 1024];
1297 const char *p;
1298
1299 /* construct file name .Bfile */
1300 for (p = in_name; *p; p++); /* skip to end of string */
1301 while (p > in_name && *p != '/') /* find last '/' */
1302 p--;
1303 if (*p == '/')
1304 p++;
1305 sprintf(bakfile, "%s%s", p, simple_backup_suffix);
1306
1307 /* copy in_name to backup file */
1308 bakchn = creat(bakfile, 0600);
1309 if (bakchn < 0)
1310 err(1, "%s", bakfile);
1311 while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
1312 if (write(bakchn, buff, n) != n)
1313 err(1, "%s", bakfile);
1314 if (n < 0)
1315 err(1, "%s", in_name);
1316 close(bakchn);
1317 fclose(input);
1318
1319 /* re-open backup file as the input file */
1320 input = fopen(bakfile, "r");
1321 if (input == NULL)
1322 err(1, "%s", bakfile);
1323 /* now the original input file will be the output */
1324 output = fopen(in_name, "w");
1325 if (output == NULL) {
1326 unlink(bakfile);
1327 err(1, "%s", in_name);
1328 }
1329 }
1330
1331 static void
1332 indent_declaration(int cur_dec_ind, int tabs_to_var)
1333 {
1334 int pos = e_code - s_code;
1335 char *startpos = e_code;
1336
1337 /*
1338 * get the tab math right for indentations that are not multiples of tabsize
1339 */
1340 if ((ps.ind_level * opt.ind_size) % opt.tabsize != 0) {
1341 pos += (ps.ind_level * opt.ind_size) % opt.tabsize;
1342 cur_dec_ind += (ps.ind_level * opt.ind_size) % opt.tabsize;
1343 }
1344 if (tabs_to_var) {
1345 int tpos;
1346
1347 CHECK_SIZE_CODE(cur_dec_ind / opt.tabsize);
1348 while ((tpos = opt.tabsize * (1 + pos / opt.tabsize)) <= cur_dec_ind) {
1349 *e_code++ = '\t';
1350 pos = tpos;
1351 }
1352 }
1353 CHECK_SIZE_CODE(cur_dec_ind - pos + 1);
1354 while (pos < cur_dec_ind) {
1355 *e_code++ = ' ';
1356 pos++;
1357 }
1358 if (e_code == startpos && ps.want_blank) {
1359 *e_code++ = ' ';
1360 ps.want_blank = false;
1361 }
1362 }
1363