indent.c revision 1.247 1 /* $NetBSD: indent.c,v 1.247 2023/05/11 10:39:25 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #if 0
41 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93";
42 #endif
43
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$NetBSD: indent.c,v 1.247 2023/05/11 10:39:25 rillig Exp $");
47 #elif defined(__FreeBSD__)
48 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
49 #endif
50
51 #include <sys/param.h>
52 #include <assert.h>
53 #include <err.h>
54 #include <errno.h>
55 #include <fcntl.h>
56 #include <stdarg.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <unistd.h>
61
62 #include "indent.h"
63
64 struct options opt = {
65 .brace_same_line = true,
66 .comment_delimiter_on_blankline = true,
67 .cuddle_else = true,
68 .comment_column = 33,
69 .decl_indent = 16,
70 .else_if = true,
71 .function_brace_split = true,
72 .format_col1_comments = true,
73 .format_block_comments = true,
74 .indent_parameters = true,
75 .indent_size = 8,
76 .local_decl_indent = -1,
77 .lineup_to_parens = true,
78 .procnames_start_line = true,
79 .star_comment_cont = true,
80 .tabsize = 8,
81 .max_line_length = 78,
82 .use_tabs = true,
83 };
84
85 struct parser_state ps;
86
87 struct buffer token;
88
89 struct buffer lab;
90 struct buffer code;
91 struct buffer com;
92
93 bool found_err;
94 bool break_comma;
95 float case_ind;
96 bool had_eof;
97 int line_no = 1;
98 bool inhibit_formatting;
99
100 static int ifdef_level;
101 static struct parser_state state_stack[5];
102
103 FILE *input;
104 FILE *output;
105 struct output_control out;
106
107 static const char *in_name = "Standard Input";
108 static const char *out_name = "Standard Output";
109 static const char *backup_suffix = ".BAK";
110 static char bakfile[MAXPATHLEN] = "";
111
112
113 static void
114 buf_init(struct buffer *buf)
115 {
116 size_t size = 200;
117 buf->buf = xmalloc(size);
118 buf->l = buf->buf + size - 5 /* safety margin */;
119 buf->s = buf->buf + 1; /* allow accessing buf->e[-1] */
120 buf->e = buf->s;
121 buf->buf[0] = ' ';
122 buf->buf[1] = '\0';
123 }
124
125 static size_t
126 buf_len(const struct buffer *buf)
127 {
128 return (size_t)(buf->e - buf->s);
129 }
130
131 void
132 buf_expand(struct buffer *buf, size_t add_size)
133 {
134 size_t new_size = (size_t)(buf->l - buf->s) + 400 + add_size;
135 size_t len = buf_len(buf);
136 buf->buf = xrealloc(buf->buf, new_size);
137 buf->l = buf->buf + new_size - 5;
138 buf->s = buf->buf + 1;
139 buf->e = buf->s + len;
140 /* At this point, the buffer may not be null-terminated anymore. */
141 }
142
143 static void
144 buf_reserve(struct buffer *buf, size_t n)
145 {
146 if (n >= (size_t)(buf->l - buf->e))
147 buf_expand(buf, n);
148 }
149
150 void
151 buf_add_char(struct buffer *buf, char ch)
152 {
153 buf_reserve(buf, 1);
154 *buf->e++ = ch;
155 }
156
157 void
158 buf_add_range(struct buffer *buf, const char *s, const char *e)
159 {
160 size_t len = (size_t)(e - s);
161 buf_reserve(buf, len);
162 memcpy(buf->e, s, len);
163 buf->e += len;
164 }
165
166 static void
167 buf_add_buf(struct buffer *buf, const struct buffer *add)
168 {
169 buf_add_range(buf, add->s, add->e);
170 }
171
172 static void
173 buf_terminate(struct buffer *buf)
174 {
175 buf_reserve(buf, 1);
176 *buf->e = '\0';
177 }
178
179 static void
180 buf_reset(struct buffer *buf)
181 {
182 buf->e = buf->s;
183 }
184
185 void
186 diag(int level, const char *msg, ...)
187 {
188 va_list ap;
189
190 if (level != 0)
191 found_err = true;
192
193 va_start(ap, msg);
194 fprintf(stderr, "%s: %s:%d: ",
195 level == 0 ? "warning" : "error", in_name, line_no);
196 vfprintf(stderr, msg, ap);
197 fprintf(stderr, "\n");
198 va_end(ap);
199 }
200
201 /*
202 * Compute the indentation from starting at 'ind' and adding the text from
203 * 'start' to 'end'.
204 */
205 int
206 ind_add(int ind, const char *start, const char *end)
207 {
208 for (const char *p = start; p != end; ++p) {
209 if (*p == '\n' || *p == '\f')
210 ind = 0;
211 else if (*p == '\t')
212 ind = next_tab(ind);
213 else if (*p == '\b')
214 --ind;
215 else
216 ++ind;
217 }
218 return ind;
219 }
220
221 static void
222 main_init_globals(void)
223 {
224 inp_init();
225
226 buf_init(&token);
227
228 buf_init(&lab);
229 buf_init(&code);
230 buf_init(&com);
231
232 ps.s_sym[0] = psym_stmt_list;
233 ps.prev_token = lsym_semicolon;
234 ps.next_col_1 = true;
235
236 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX");
237 if (suffix != NULL)
238 backup_suffix = suffix;
239 }
240
241 /*
242 * Copy the input file to the backup file, then make the backup file the input
243 * and the original input file the output.
244 */
245 static void
246 bakcopy(void)
247 {
248 ssize_t n;
249 int bak_fd;
250 char buff[8 * 1024];
251
252 const char *last_slash = strrchr(in_name, '/');
253 snprintf(bakfile, sizeof(bakfile), "%s%s",
254 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix);
255
256 /* copy in_name to backup file */
257 bak_fd = creat(bakfile, 0600);
258 if (bak_fd < 0)
259 err(1, "%s", bakfile);
260
261 while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
262 if (write(bak_fd, buff, (size_t)n) != n)
263 err(1, "%s", bakfile);
264 if (n < 0)
265 err(1, "%s", in_name);
266
267 close(bak_fd);
268 (void)fclose(input);
269
270 /* re-open backup file as the input file */
271 input = fopen(bakfile, "r");
272 if (input == NULL)
273 err(1, "%s", bakfile);
274 /* now the original input file will be the output */
275 output = fopen(in_name, "w");
276 if (output == NULL) {
277 unlink(bakfile);
278 err(1, "%s", in_name);
279 }
280 }
281
282 static void
283 main_load_profiles(int argc, char **argv)
284 {
285 const char *profile_name = NULL;
286
287 for (int i = 1; i < argc; ++i) {
288 const char *arg = argv[i];
289
290 if (strcmp(arg, "-npro") == 0)
291 return;
292 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0')
293 profile_name = arg + 2;
294 }
295 load_profiles(profile_name);
296 }
297
298 static void
299 main_parse_command_line(int argc, char **argv)
300 {
301 for (int i = 1; i < argc; ++i) {
302 const char *arg = argv[i];
303
304 if (arg[0] == '-') {
305 set_option(arg, "Command line");
306
307 } else if (input == NULL) {
308 in_name = arg;
309 if ((input = fopen(in_name, "r")) == NULL)
310 err(1, "%s", in_name);
311
312 } else if (output == NULL) {
313 out_name = arg;
314 if (strcmp(in_name, out_name) == 0)
315 errx(1, "input and output files must be different");
316 if ((output = fopen(out_name, "w")) == NULL)
317 err(1, "%s", out_name);
318
319 } else
320 errx(1, "too many arguments: %s", arg);
321 }
322
323 if (input == NULL) {
324 input = stdin;
325 output = stdout;
326 } else if (output == NULL) {
327 out_name = in_name;
328 bakcopy();
329 }
330
331 if (opt.comment_column <= 1)
332 opt.comment_column = 2; /* don't put normal comments before column 2 */
333 if (opt.block_comment_max_line_length <= 0)
334 opt.block_comment_max_line_length = opt.max_line_length;
335 if (opt.local_decl_indent < 0) /* if not specified by user, set this */
336 opt.local_decl_indent = opt.decl_indent;
337 if (opt.decl_comment_column <= 0) /* if not specified by user, set this */
338 opt.decl_comment_column = opt.ljust_decl
339 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8)
340 : opt.comment_column;
341 if (opt.continuation_indent == 0)
342 opt.continuation_indent = opt.indent_size;
343 }
344
345 static void
346 main_prepare_parsing(void)
347 {
348 inp_read_line();
349
350 int ind = 0;
351 for (const char *p = inp_p();; p++) {
352 if (*p == ' ')
353 ind++;
354 else if (*p == '\t')
355 ind = next_tab(ind);
356 else
357 break;
358 }
359
360 if (ind >= opt.indent_size)
361 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size;
362 }
363
364 static void
365 code_add_decl_indent(int decl_ind, bool tabs_to_var)
366 {
367 int base_ind = ps.ind_level * opt.indent_size;
368 int ind = base_ind + (int)buf_len(&code);
369 int target_ind = base_ind + decl_ind;
370 char *orig_code_e = code.e;
371
372 if (tabs_to_var)
373 for (int next; (next = next_tab(ind)) <= target_ind; ind = next)
374 buf_add_char(&code, '\t');
375
376 for (; ind < target_ind; ind++)
377 buf_add_char(&code, ' ');
378
379 if (code.e == orig_code_e && ps.want_blank) {
380 buf_add_char(&code, ' ');
381 ps.want_blank = false;
382 }
383 }
384
385 static void __attribute__((__noreturn__))
386 process_eof(void)
387 {
388 if (lab.s != lab.e || code.s != code.e || com.s != com.e)
389 output_line();
390
391 if (ps.tos > 1) /* check for balanced braces */
392 diag(1, "Stuff missing from end of file");
393
394 if (opt.verbose) {
395 printf("There were %d output lines and %d comments\n",
396 ps.stats.lines, ps.stats.comments);
397 printf("(Lines with comments)/(Lines with code): %6.3f\n",
398 (1.0 * ps.stats.comment_lines) / ps.stats.code_lines);
399 }
400
401 fflush(output);
402 exit(found_err ? EXIT_FAILURE : EXIT_SUCCESS);
403 }
404
405 static void
406 maybe_break_line(lexer_symbol lsym, bool *force_nl)
407 {
408 if (!*force_nl)
409 return;
410 if (lsym == lsym_semicolon)
411 return;
412 if (lsym == lsym_lbrace && opt.brace_same_line)
413 return;
414
415 if (opt.verbose)
416 diag(0, "Line broken");
417 output_line();
418 ps.want_blank = false;
419 *force_nl = false;
420 }
421
422 static void
423 move_com_to_code(void)
424 {
425 buf_add_char(&code, ' ');
426 buf_add_buf(&code, &com);
427 buf_add_char(&code, ' ');
428 buf_terminate(&code);
429 buf_reset(&com);
430 ps.want_blank = false;
431 }
432
433 static void
434 process_form_feed(void)
435 {
436 output_line_ff();
437 ps.want_blank = false;
438 }
439
440 static void
441 process_newline(void)
442 {
443 if (ps.prev_token == lsym_comma && ps.nparen == 0 && !ps.block_init &&
444 !opt.break_after_comma && break_comma &&
445 com.s == com.e)
446 goto stay_in_line;
447
448 output_line();
449 ps.want_blank = false;
450
451 stay_in_line:
452 ++line_no;
453 }
454
455 static bool
456 want_blank_before_lparen(void)
457 {
458 if (!ps.want_blank)
459 return false;
460 if (opt.proc_calls_space)
461 return true;
462 if (ps.prev_token == lsym_rparen_or_rbracket)
463 return false;
464 if (ps.prev_token == lsym_offsetof)
465 return false;
466 if (ps.prev_token == lsym_sizeof)
467 return opt.blank_after_sizeof;
468 if (ps.prev_token == lsym_word || ps.prev_token == lsym_funcname)
469 return false;
470 return true;
471 }
472
473 static void
474 process_lparen_or_lbracket(int decl_ind, bool tabs_to_var, bool spaced_expr)
475 {
476 if (++ps.nparen == array_length(ps.paren)) {
477 diag(0, "Reached internal limit of %zu unclosed parentheses",
478 array_length(ps.paren));
479 ps.nparen--;
480 }
481
482 if (token.s[0] == '(' && ps.in_decl
483 && !ps.block_init && !ps.decl_indent_done &&
484 !ps.is_function_definition && ps.line_start_nparen == 0) {
485 /* function pointer declarations */
486 code_add_decl_indent(decl_ind, tabs_to_var);
487 ps.decl_indent_done = true;
488 } else if (want_blank_before_lparen())
489 *code.e++ = ' ';
490 ps.want_blank = false;
491 *code.e++ = token.s[0];
492
493 ps.paren[ps.nparen - 1].indent = (short)ind_add(0, code.s, code.e);
494 debug_println("paren_indents[%d] is now %d",
495 ps.nparen - 1, ps.paren[ps.nparen - 1].indent);
496
497 if (spaced_expr && ps.nparen == 1 && opt.extra_expr_indent
498 && ps.paren[0].indent < 2 * opt.indent_size) {
499 ps.paren[0].indent = (short)(2 * opt.indent_size);
500 debug_println("paren_indents[0] is now %d", ps.paren[0].indent);
501 }
502
503 if (ps.init_or_struct && *token.s == '(' && ps.tos <= 2) {
504 /*
505 * this is a kluge to make sure that declarations will be aligned
506 * right if proc decl has an explicit type on it, i.e. "int a(x) {..."
507 */
508 parse(psym_semicolon); /* I said this was a kluge... */
509 ps.init_or_struct = false;
510 }
511
512 /* parenthesized type following sizeof or offsetof is not a cast */
513 if (ps.prev_token == lsym_offsetof || ps.prev_token == lsym_sizeof)
514 ps.paren[ps.nparen - 1].no_cast = true;
515 }
516
517 static void
518 process_rparen_or_rbracket(bool *spaced_expr, bool *force_nl, stmt_head hd)
519 {
520 if (ps.paren[ps.nparen - 1].maybe_cast &&
521 !ps.paren[ps.nparen - 1].no_cast) {
522 ps.next_unary = true;
523 ps.paren[ps.nparen - 1].maybe_cast = false;
524 ps.want_blank = opt.space_after_cast;
525 } else
526 ps.want_blank = true;
527 ps.paren[ps.nparen - 1].no_cast = false;
528
529 if (ps.nparen > 0)
530 ps.nparen--;
531 else
532 diag(0, "Extra '%c'", *token.s);
533
534 if (code.e == code.s) /* if the paren starts the line */
535 ps.line_start_nparen = ps.nparen; /* then indent it */
536
537 *code.e++ = token.s[0];
538
539 if (*spaced_expr && ps.nparen == 0) { /* check for end of 'if
540 * (...)', or some such */
541 *spaced_expr = false;
542 *force_nl = true; /* must force newline after if */
543 ps.next_unary = true;
544 ps.in_stmt_or_decl = false; /* don't use stmt continuation
545 * indentation */
546
547 parse_stmt_head(hd);
548 }
549 }
550
551 static bool
552 want_blank_before_unary_op(void)
553 {
554 if (ps.want_blank)
555 return true;
556 if (token.s[0] == '+' || token.s[0] == '-')
557 return code.e > code.s && code.e[-1] == token.s[0];
558 return false;
559 }
560
561 static void
562 process_unary_op(int decl_ind, bool tabs_to_var)
563 {
564 if (!ps.decl_indent_done && ps.in_decl && !ps.block_init &&
565 !ps.is_function_definition && ps.line_start_nparen == 0) {
566 /* pointer declarations */
567 code_add_decl_indent(decl_ind - (int)buf_len(&token), tabs_to_var);
568 ps.decl_indent_done = true;
569 } else if (want_blank_before_unary_op())
570 *code.e++ = ' ';
571
572 buf_add_buf(&code, &token);
573 ps.want_blank = false;
574 }
575
576 static void
577 process_binary_op(void)
578 {
579 if (buf_len(&code) > 0)
580 buf_add_char(&code, ' ');
581 buf_add_buf(&code, &token);
582 ps.want_blank = true;
583 }
584
585 static void
586 process_postfix_op(void)
587 {
588 *code.e++ = token.s[0];
589 *code.e++ = token.s[1];
590 ps.want_blank = true;
591 }
592
593 static void
594 process_question(int *quest_level)
595 {
596 (*quest_level)++;
597 if (ps.want_blank)
598 *code.e++ = ' ';
599 *code.e++ = '?';
600 ps.want_blank = true;
601 }
602
603 static void
604 process_colon(int *quest_level, bool *force_nl, bool *seen_case)
605 {
606 if (*quest_level > 0) { /* part of a '?:' operator */
607 --*quest_level;
608 if (ps.want_blank)
609 *code.e++ = ' ';
610 *code.e++ = ':';
611 ps.want_blank = true;
612 return;
613 }
614
615 if (ps.init_or_struct) { /* bit-field */
616 *code.e++ = ':';
617 ps.want_blank = false;
618 return;
619 }
620
621 buf_add_buf(&lab, &code); /* 'case' or 'default' or named label */
622 buf_add_char(&lab, ':');
623 buf_terminate(&lab);
624 buf_reset(&code);
625
626 ps.in_stmt_or_decl = false;
627 ps.is_case_label = *seen_case;
628 *force_nl = *seen_case;
629 *seen_case = false;
630 ps.want_blank = false;
631 }
632
633 static void
634 process_semicolon(bool *seen_case, int *quest_level, int decl_ind,
635 bool tabs_to_var, bool *spaced_expr, stmt_head hd, bool *force_nl)
636 {
637 if (ps.decl_level == 0)
638 ps.init_or_struct = false;
639 *seen_case = false; /* these will only need resetting in an error */
640 *quest_level = 0;
641 if (ps.prev_token == lsym_rparen_or_rbracket)
642 ps.in_func_def_params = false;
643 ps.block_init = false;
644 ps.block_init_level = 0;
645 ps.just_saw_decl--;
646
647 if (ps.in_decl && code.s == code.e && !ps.block_init &&
648 !ps.decl_indent_done && ps.line_start_nparen == 0) {
649 /* indent stray semicolons in declarations */
650 code_add_decl_indent(decl_ind - 1, tabs_to_var);
651 ps.decl_indent_done = true;
652 }
653
654 ps.in_decl = ps.decl_level > 0; /* if we were in a first level
655 * structure declaration before, we
656 * aren't anymore */
657
658 if ((!*spaced_expr || hd != hd_for) && ps.nparen > 0) {
659
660 /*
661 * There were unbalanced parentheses in the statement. It is a bit
662 * complicated, because the semicolon might be in a for statement.
663 */
664 diag(1, "Unbalanced parentheses");
665 ps.nparen = 0;
666 if (*spaced_expr) { /* 'if', 'while', etc. */
667 *spaced_expr = false;
668 parse_stmt_head(hd);
669 }
670 }
671 *code.e++ = ';';
672 ps.want_blank = true;
673 ps.in_stmt_or_decl = ps.nparen > 0;
674
675 if (!*spaced_expr) { /* if not if for (;;) */
676 parse(psym_semicolon); /* let parser know about end of stmt */
677 *force_nl = true; /* force newline after an end of stmt */
678 }
679 }
680
681 static void
682 process_lbrace(bool *force_nl, bool *spaced_expr, stmt_head hd,
683 int *di_stack, int di_stack_cap, int *decl_ind)
684 {
685 ps.in_stmt_or_decl = false; /* don't indent the {} */
686
687 if (!ps.block_init)
688 *force_nl = true; /* force other stuff on same line as '{' onto
689 * new line */
690 else if (ps.block_init_level <= 0)
691 ps.block_init_level = 1;
692 else
693 ps.block_init_level++;
694
695 if (code.s != code.e && !ps.block_init) {
696 if (!opt.brace_same_line) {
697 output_line();
698 ps.want_blank = false;
699 } else if (ps.in_func_def_params && !ps.init_or_struct) {
700 ps.ind_level_follow = 0;
701 if (opt.function_brace_split) { /* dump the line prior to the
702 * brace ... */
703 output_line();
704 ps.want_blank = false;
705 } else /* add a space between the decl and brace */
706 ps.want_blank = true;
707 }
708 }
709
710 if (ps.in_func_def_params)
711 out.blank_line_before = false;
712
713 if (ps.nparen > 0) {
714 diag(1, "Unbalanced parentheses");
715 ps.nparen = 0;
716 if (*spaced_expr) { /* check for unclosed 'if', 'for', etc. */
717 *spaced_expr = false;
718 parse_stmt_head(hd);
719 ps.ind_level = ps.ind_level_follow;
720 }
721 }
722
723 if (code.s == code.e)
724 ps.in_stmt_cont = false; /* don't indent the '{' itself */
725 if (ps.in_decl && ps.init_or_struct) {
726 di_stack[ps.decl_level] = *decl_ind;
727 if (++ps.decl_level == di_stack_cap) {
728 diag(0, "Reached internal limit of %d struct levels",
729 di_stack_cap);
730 ps.decl_level--;
731 }
732 } else {
733 ps.decl_on_line = false; /* we can't be in the middle of a
734 * declaration, so don't do special
735 * indentation of comments */
736 if (opt.blanklines_after_decl_at_top && ps.in_func_def_params)
737 out.blank_line_after = true;
738 ps.in_func_def_params = false;
739 ps.in_decl = false;
740 }
741
742 *decl_ind = 0;
743 parse(psym_lbrace);
744 if (ps.want_blank)
745 *code.e++ = ' ';
746 ps.want_blank = false;
747 *code.e++ = '{';
748 ps.just_saw_decl = 0;
749 }
750
751 static void
752 process_rbrace(bool *spaced_expr, int *decl_ind, const int *di_stack)
753 {
754 if (ps.s_sym[ps.tos] == psym_decl && !ps.block_init) {
755 /* semicolons can be omitted in declarations */
756 parse(psym_semicolon);
757 }
758
759 if (ps.nparen > 0) { /* check for unclosed if, for, else. */
760 diag(1, "Unbalanced parentheses");
761 ps.nparen = 0;
762 *spaced_expr = false;
763 }
764
765 ps.just_saw_decl = 0;
766 ps.block_init_level--;
767
768 if (code.s != code.e && !ps.block_init) { /* '}' must be first on line */
769 if (opt.verbose)
770 diag(0, "Line broken");
771 output_line();
772 }
773
774 *code.e++ = '}';
775 ps.want_blank = true;
776 ps.in_stmt_or_decl = false;
777 ps.in_stmt_cont = false;
778
779 if (ps.decl_level > 0) { /* multi-level structure declaration */
780 *decl_ind = di_stack[--ps.decl_level];
781 if (ps.decl_level == 0 && !ps.in_func_def_params) {
782 ps.just_saw_decl = 2;
783 *decl_ind = ps.ind_level == 0
784 ? opt.decl_indent : opt.local_decl_indent;
785 }
786 ps.in_decl = true;
787 }
788
789 out.blank_line_before = false;
790 parse(psym_rbrace);
791
792 if (ps.tos <= 1 && opt.blanklines_after_procs && ps.decl_level <= 0)
793 out.blank_line_after = true;
794 }
795
796 static void
797 process_do(bool *force_nl)
798 {
799 ps.in_stmt_or_decl = false;
800
801 if (code.e != code.s) { /* make sure this starts a line */
802 if (opt.verbose)
803 diag(0, "Line broken");
804 output_line();
805 ps.want_blank = false;
806 }
807
808 *force_nl = true; /* following stuff must go onto new line */
809 parse(psym_do);
810 }
811
812 static void
813 process_else(bool *force_nl)
814 {
815 ps.in_stmt_or_decl = false;
816
817 if (code.e > code.s && !(opt.cuddle_else && code.e[-1] == '}')) {
818 if (opt.verbose)
819 diag(0, "Line broken");
820 output_line(); /* make sure this starts a line */
821 ps.want_blank = false;
822 }
823
824 *force_nl = true; /* following stuff must go onto new line */
825 parse(psym_else);
826 }
827
828 static void
829 process_type(int *decl_ind, bool *tabs_to_var)
830 {
831 parse(psym_decl); /* let the parser worry about indentation */
832
833 if (ps.prev_token == lsym_rparen_or_rbracket && ps.tos <= 1) {
834 if (code.s != code.e) {
835 output_line();
836 ps.want_blank = false;
837 }
838 }
839
840 if (ps.in_func_def_params && opt.indent_parameters &&
841 ps.decl_level == 0) {
842 ps.ind_level = ps.ind_level_follow = 1;
843 ps.in_stmt_cont = false;
844 }
845
846 ps.init_or_struct = /* maybe */ true;
847 ps.in_decl = ps.decl_on_line = ps.prev_token != lsym_typedef;
848 if (ps.decl_level <= 0)
849 ps.just_saw_decl = 2;
850
851 out.blank_line_before = false;
852
853 int len = (int)buf_len(&token) + 1;
854 int ind = ps.ind_level == 0 || ps.decl_level > 0
855 ? opt.decl_indent /* global variable or local member */
856 : opt.local_decl_indent; /* local variable */
857 *decl_ind = ind > 0 ? ind : len;
858 *tabs_to_var = opt.use_tabs && ind > 0;
859 }
860
861 static void
862 process_ident(lexer_symbol lsym, int decl_ind, bool tabs_to_var,
863 bool *spaced_expr, bool *force_nl, stmt_head hd)
864 {
865 if (ps.in_decl) {
866 if (lsym == lsym_funcname) {
867 ps.in_decl = false;
868 if (opt.procnames_start_line && code.s != code.e) {
869 *code.e = '\0';
870 output_line();
871 } else if (ps.want_blank) {
872 *code.e++ = ' ';
873 }
874 ps.want_blank = false;
875
876 } else if (!ps.block_init && !ps.decl_indent_done &&
877 ps.line_start_nparen == 0) {
878 code_add_decl_indent(decl_ind, tabs_to_var);
879 ps.decl_indent_done = true;
880 ps.want_blank = false;
881 }
882
883 } else if (*spaced_expr && ps.nparen == 0) {
884 *spaced_expr = false;
885 *force_nl = true;
886 ps.next_unary = true;
887 ps.in_stmt_or_decl = false;
888 parse_stmt_head(hd);
889 }
890 }
891
892 static void
893 copy_token(void)
894 {
895 if (ps.want_blank)
896 buf_add_char(&code, ' ');
897 buf_add_buf(&code, &token);
898 }
899
900 static void
901 process_period(void)
902 {
903 if (code.e > code.s && code.e[-1] == ',')
904 *code.e++ = ' ';
905 *code.e++ = '.';
906 ps.want_blank = false;
907 }
908
909 static void
910 process_comma(int decl_ind, bool tabs_to_var, bool *force_nl)
911 {
912 ps.want_blank = code.s != code.e; /* only put blank after comma if comma
913 * does not start the line */
914
915 if (ps.in_decl && !ps.is_function_definition && !ps.block_init &&
916 !ps.decl_indent_done && ps.line_start_nparen == 0) {
917 /* indent leading commas and not the actual identifiers */
918 code_add_decl_indent(decl_ind - 1, tabs_to_var);
919 ps.decl_indent_done = true;
920 }
921
922 *code.e++ = ',';
923
924 if (ps.nparen == 0) {
925 if (ps.block_init_level <= 0)
926 ps.block_init = false;
927 int varname_len = 8; /* rough estimate for the length of a typical
928 * variable name */
929 if (break_comma && (opt.break_after_comma ||
930 ind_add(compute_code_indent(), code.s, code.e)
931 >= opt.max_line_length - varname_len))
932 *force_nl = true;
933 }
934 }
935
936 /* move the whole line to the 'label' buffer */
937 static void
938 read_preprocessing_line(void)
939 {
940 enum {
941 PLAIN, STR, CHR, COMM
942 } state;
943
944 buf_add_char(&lab, '#');
945
946 state = PLAIN;
947 int com_start = 0, com_end = 0;
948
949 while (ch_isblank(inp_peek()))
950 inp_skip();
951
952 while (inp_peek() != '\n' || (state == COMM && !had_eof)) {
953 buf_reserve(&lab, 2);
954 *lab.e++ = inp_next();
955 switch (lab.e[-1]) {
956 case '\\':
957 if (state != COMM)
958 *lab.e++ = inp_next();
959 break;
960 case '/':
961 if (inp_peek() == '*' && state == PLAIN) {
962 state = COMM;
963 *lab.e++ = inp_next();
964 com_start = (int)buf_len(&lab) - 2;
965 }
966 break;
967 case '"':
968 if (state == STR)
969 state = PLAIN;
970 else if (state == PLAIN)
971 state = STR;
972 break;
973 case '\'':
974 if (state == CHR)
975 state = PLAIN;
976 else if (state == PLAIN)
977 state = CHR;
978 break;
979 case '*':
980 if (inp_peek() == '/' && state == COMM) {
981 state = PLAIN;
982 *lab.e++ = inp_next();
983 com_end = (int)buf_len(&lab);
984 }
985 break;
986 }
987 }
988
989 while (lab.e > lab.s && ch_isblank(lab.e[-1]))
990 lab.e--;
991 if (lab.e - lab.s == com_end && !inp_comment_seen()) {
992 /* comment on preprocessor line */
993 inp_comment_init_preproc();
994 inp_comment_add_range(lab.s + com_start, lab.s + com_end);
995 lab.e = lab.s + com_start;
996 while (lab.e > lab.s && ch_isblank(lab.e[-1]))
997 lab.e--;
998 inp_comment_add_char(' '); /* add trailing blank, just in case */
999 inp_from_comment();
1000 }
1001 buf_terminate(&lab);
1002 }
1003
1004 static void
1005 process_preprocessing(void)
1006 {
1007 if (com.s != com.e || lab.s != lab.e || code.s != code.e)
1008 output_line();
1009
1010 read_preprocessing_line();
1011
1012 ps.is_case_label = false;
1013
1014 if (strncmp(lab.s, "#if", 3) == 0) { /* also ifdef, ifndef */
1015 if ((size_t)ifdef_level < array_length(state_stack))
1016 state_stack[ifdef_level++] = ps;
1017 else
1018 diag(1, "#if stack overflow");
1019
1020 } else if (strncmp(lab.s, "#el", 3) == 0) { /* else, elif */
1021 if (ifdef_level <= 0)
1022 diag(1, lab.s[3] == 'i' ? "Unmatched #elif" : "Unmatched #else");
1023 else
1024 ps = state_stack[ifdef_level - 1];
1025
1026 } else if (strncmp(lab.s, "#endif", 6) == 0) {
1027 if (ifdef_level <= 0)
1028 diag(1, "Unmatched #endif");
1029 else
1030 ifdef_level--;
1031
1032 } else {
1033 if (strncmp(lab.s + 1, "pragma", 6) != 0 &&
1034 strncmp(lab.s + 1, "error", 5) != 0 &&
1035 strncmp(lab.s + 1, "line", 4) != 0 &&
1036 strncmp(lab.s + 1, "undef", 5) != 0 &&
1037 strncmp(lab.s + 1, "define", 6) != 0 &&
1038 strncmp(lab.s + 1, "include", 7) != 0) {
1039 diag(1, "Unrecognized cpp directive");
1040 return;
1041 }
1042 }
1043
1044 if (opt.blanklines_around_conditional_compilation) {
1045 out.blank_line_after = true;
1046 out.blank_lines_to_output = 0;
1047 } else {
1048 out.blank_line_after = false;
1049 out.blank_line_before = false;
1050 }
1051
1052 /*
1053 * subsequent processing of the newline character will cause the line to
1054 * be printed
1055 */
1056 }
1057
1058 __dead static void
1059 main_loop(void)
1060 {
1061 bool force_nl = false; /* when true, code must be broken */
1062 bool last_else = false; /* true iff last keyword was an else */
1063 int decl_ind = 0; /* current indentation for declarations */
1064 int di_stack[20]; /* a stack of structure indentation levels */
1065 bool tabs_to_var = false; /* true if using tabs to indent to var name */
1066 bool spaced_expr = false; /* whether we are in the expression of
1067 * if(...), while(...), etc. */
1068 stmt_head hd = hd_0; /* the type of statement for 'if (...)', 'for
1069 * (...)', etc */
1070 int quest_level = 0; /* when this is positive, we have seen a '?'
1071 * without the matching ':' in a '?:'
1072 * expression */
1073 bool seen_case = false; /* set to true when we see a 'case', so we
1074 * know what to do with the following colon */
1075
1076 di_stack[ps.decl_level = 0] = 0;
1077
1078 for (;;) { /* loop until we reach eof */
1079 lexer_symbol lsym = lexi();
1080
1081 if (lsym == lsym_if && last_else && opt.else_if)
1082 force_nl = false;
1083 last_else = false;
1084
1085 if (lsym == lsym_eof) {
1086 process_eof();
1087 /* NOTREACHED */
1088 }
1089
1090 if (lsym == lsym_newline || lsym == lsym_form_feed ||
1091 lsym == lsym_preprocessing)
1092 force_nl = false;
1093 else if (lsym != lsym_comment) {
1094 maybe_break_line(lsym, &force_nl);
1095 ps.in_stmt_or_decl = true; /* add an extra level of indentation;
1096 * turned off again by a ';' or '}' */
1097 if (com.s != com.e)
1098 move_com_to_code();
1099 }
1100
1101 buf_reserve(&code, 3); /* space for 2 characters plus '\0' */
1102
1103 switch (lsym) {
1104
1105 case lsym_form_feed:
1106 process_form_feed();
1107 break;
1108
1109 case lsym_newline:
1110 process_newline();
1111 break;
1112
1113 case lsym_lparen_or_lbracket:
1114 process_lparen_or_lbracket(decl_ind, tabs_to_var, spaced_expr);
1115 break;
1116
1117 case lsym_rparen_or_rbracket:
1118 process_rparen_or_rbracket(&spaced_expr, &force_nl, hd);
1119 break;
1120
1121 case lsym_unary_op:
1122 process_unary_op(decl_ind, tabs_to_var);
1123 break;
1124
1125 case lsym_binary_op:
1126 process_binary_op();
1127 break;
1128
1129 case lsym_postfix_op:
1130 process_postfix_op();
1131 break;
1132
1133 case lsym_question:
1134 process_question(&quest_level);
1135 break;
1136
1137 case lsym_case_label:
1138 seen_case = true;
1139 goto copy_token;
1140
1141 case lsym_colon:
1142 process_colon(&quest_level, &force_nl, &seen_case);
1143 break;
1144
1145 case lsym_semicolon:
1146 process_semicolon(&seen_case, &quest_level, decl_ind, tabs_to_var,
1147 &spaced_expr, hd, &force_nl);
1148 break;
1149
1150 case lsym_lbrace:
1151 process_lbrace(&force_nl, &spaced_expr, hd, di_stack,
1152 (int)array_length(di_stack), &decl_ind);
1153 break;
1154
1155 case lsym_rbrace:
1156 process_rbrace(&spaced_expr, &decl_ind, di_stack);
1157 break;
1158
1159 case lsym_switch:
1160 spaced_expr = true; /* the interesting stuff is done after the
1161 * expressions are scanned */
1162 hd = hd_switch; /* remember the type of header for later use
1163 * by the parser */
1164 goto copy_token;
1165
1166 case lsym_for:
1167 spaced_expr = true;
1168 hd = hd_for;
1169 goto copy_token;
1170
1171 case lsym_if:
1172 spaced_expr = true;
1173 hd = hd_if;
1174 goto copy_token;
1175
1176 case lsym_while:
1177 spaced_expr = true;
1178 hd = hd_while;
1179 goto copy_token;
1180
1181 case lsym_do:
1182 process_do(&force_nl);
1183 goto copy_token;
1184
1185 case lsym_else:
1186 process_else(&force_nl);
1187 last_else = true;
1188 goto copy_token;
1189
1190 case lsym_typedef:
1191 case lsym_storage_class:
1192 out.blank_line_before = false;
1193 goto copy_token;
1194
1195 case lsym_tag:
1196 if (ps.nparen > 0)
1197 goto copy_token;
1198 /* FALLTHROUGH */
1199 case lsym_type_outside_parentheses:
1200 process_type(&decl_ind, &tabs_to_var);
1201 goto copy_token;
1202
1203 case lsym_type_in_parentheses:
1204 case lsym_offsetof:
1205 case lsym_sizeof:
1206 case lsym_word:
1207 case lsym_funcname:
1208 case lsym_return:
1209 process_ident(lsym, decl_ind, tabs_to_var, &spaced_expr,
1210 &force_nl, hd);
1211 copy_token:
1212 copy_token();
1213 if (lsym != lsym_funcname)
1214 ps.want_blank = true;
1215 break;
1216
1217 case lsym_period:
1218 process_period();
1219 break;
1220
1221 case lsym_comma:
1222 process_comma(decl_ind, tabs_to_var, &force_nl);
1223 break;
1224
1225 case lsym_preprocessing:
1226 process_preprocessing();
1227 break;
1228
1229 case lsym_comment:
1230 process_comment();
1231 break;
1232
1233 default:
1234 break;
1235 }
1236
1237 *code.e = '\0';
1238 if (lsym != lsym_comment && lsym != lsym_newline &&
1239 lsym != lsym_preprocessing)
1240 ps.prev_token = lsym;
1241 }
1242 }
1243
1244 int
1245 main(int argc, char **argv)
1246 {
1247 main_init_globals();
1248 main_load_profiles(argc, argv);
1249 main_parse_command_line(argc, argv);
1250 main_prepare_parsing();
1251 main_loop();
1252 }
1253
1254 #ifdef debug
1255 void
1256 debug_printf(const char *fmt, ...)
1257 {
1258 FILE *f = output == stdout ? stderr : stdout;
1259 va_list ap;
1260
1261 va_start(ap, fmt);
1262 vfprintf(f, fmt, ap);
1263 va_end(ap);
1264 }
1265
1266 void
1267 debug_println(const char *fmt, ...)
1268 {
1269 FILE *f = output == stdout ? stderr : stdout;
1270 va_list ap;
1271
1272 va_start(ap, fmt);
1273 vfprintf(f, fmt, ap);
1274 va_end(ap);
1275 fprintf(f, "\n");
1276 }
1277
1278 void
1279 debug_vis_range(const char *prefix, const char *s, const char *e,
1280 const char *suffix)
1281 {
1282 debug_printf("%s", prefix);
1283 for (const char *p = s; p < e; p++) {
1284 if (*p == '\\' || *p == '"')
1285 debug_printf("\\%c", *p);
1286 else if (isprint((unsigned char)*p))
1287 debug_printf("%c", *p);
1288 else if (*p == '\n')
1289 debug_printf("\\n");
1290 else if (*p == '\t')
1291 debug_printf("\\t");
1292 else
1293 debug_printf("\\x%02x", (unsigned char)*p);
1294 }
1295 debug_printf("%s", suffix);
1296 }
1297 #endif
1298
1299 static void *
1300 nonnull(void *p)
1301 {
1302 if (p == NULL)
1303 err(EXIT_FAILURE, NULL);
1304 return p;
1305 }
1306
1307 void *
1308 xmalloc(size_t size)
1309 {
1310 return nonnull(malloc(size));
1311 }
1312
1313 void *
1314 xrealloc(void *p, size_t new_size)
1315 {
1316 return nonnull(realloc(p, new_size));
1317 }
1318
1319 char *
1320 xstrdup(const char *s)
1321 {
1322 return nonnull(strdup(s));
1323 }
1324