indent.c revision 1.338 1 /* $NetBSD: indent.c,v 1.338 2023/06/07 15:46:11 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: indent.c,v 1.338 2023/06/07 15:46:11 rillig Exp $");
42
43 #include <sys/param.h>
44 #include <err.h>
45 #include <fcntl.h>
46 #include <stdarg.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51
52 #include "indent.h"
53
54 struct options opt = {
55 .brace_same_line = true,
56 .comment_delimiter_on_blankline = true,
57 .cuddle_else = true,
58 .comment_column = 33,
59 .decl_indent = 16,
60 .else_if_in_same_line = true,
61 .function_brace_split = true,
62 .format_col1_comments = true,
63 .format_block_comments = true,
64 .indent_parameters = true,
65 .indent_size = 8,
66 .local_decl_indent = -1,
67 .lineup_to_parens = true,
68 .procnames_start_line = true,
69 .star_comment_cont = true,
70 .tabsize = 8,
71 .max_line_length = 78,
72 .use_tabs = true,
73 };
74
75 struct parser_state ps;
76
77 struct buffer token;
78
79 struct buffer lab;
80 struct buffer code;
81 struct buffer com;
82
83 bool found_err;
84 bool had_eof;
85 int line_no = 1;
86 enum indent_enabled indent_enabled;
87
88 static int ifdef_level;
89 static struct parser_state state_stack[5];
90
91 FILE *input;
92 FILE *output;
93
94 static const char *in_name = "Standard Input";
95 static const char *out_name = "Standard Output";
96 static const char *backup_suffix = ".BAK";
97 static char bakfile[MAXPATHLEN] = "";
98
99
100 void *
101 nonnull(void *p)
102 {
103 if (p == NULL)
104 err(EXIT_FAILURE, NULL);
105 return p;
106 }
107
108 static void
109 buf_expand(struct buffer *buf, size_t add_size)
110 {
111 buf->cap = buf->cap + add_size + 400;
112 buf->s = nonnull(realloc(buf->s, buf->cap));
113 }
114
115 void
116 buf_add_char(struct buffer *buf, char ch)
117 {
118 if (buf->len == buf->cap)
119 buf_expand(buf, 1);
120 buf->s[buf->len++] = ch;
121 }
122
123 void
124 buf_add_chars(struct buffer *buf, const char *s, size_t len)
125 {
126 if (len == 0)
127 return;
128 if (len > buf->cap - buf->len)
129 buf_expand(buf, len);
130 memcpy(buf->s + buf->len, s, len);
131 buf->len += len;
132 }
133
134 static void
135 buf_add_buf(struct buffer *buf, const struct buffer *add)
136 {
137 buf_add_chars(buf, add->s, add->len);
138 }
139
140 void
141 diag(int level, const char *msg, ...)
142 {
143 va_list ap;
144
145 if (level != 0)
146 found_err = true;
147
148 va_start(ap, msg);
149 fprintf(stderr, "%s: %s:%d: ",
150 level == 0 ? "warning" : "error", in_name, line_no);
151 vfprintf(stderr, msg, ap);
152 fprintf(stderr, "\n");
153 va_end(ap);
154 }
155
156 /*
157 * Compute the indentation from starting at 'ind' and adding the text starting
158 * at 's'.
159 */
160 int
161 ind_add(int ind, const char *s, size_t len)
162 {
163 for (const char *p = s; len > 0; p++, len--) {
164 if (*p == '\n')
165 ind = 0;
166 else if (*p == '\t')
167 ind = next_tab(ind);
168 else if (*p == '\b')
169 --ind;
170 else
171 ++ind;
172 }
173 return ind;
174 }
175
176 static void
177 init_globals(void)
178 {
179 ps.psyms.sym[0] = psym_stmt_list;
180 ps.prev_lsym = lsym_semicolon;
181 ps.next_col_1 = true;
182 ps.lbrace_kind = psym_lbrace_block;
183
184 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX");
185 if (suffix != NULL)
186 backup_suffix = suffix;
187 }
188
189 /*
190 * Copy the input file to the backup file, then make the backup file the input
191 * and the original input file the output.
192 */
193 static void
194 bakcopy(void)
195 {
196 ssize_t n;
197 int bak_fd;
198 char buff[8 * 1024];
199
200 const char *last_slash = strrchr(in_name, '/');
201 snprintf(bakfile, sizeof(bakfile), "%s%s",
202 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix);
203
204 /* copy in_name to backup file */
205 bak_fd = creat(bakfile, 0600);
206 if (bak_fd < 0)
207 err(1, "%s", bakfile);
208
209 while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
210 if (write(bak_fd, buff, (size_t)n) != n)
211 err(1, "%s", bakfile);
212 if (n < 0)
213 err(1, "%s", in_name);
214
215 close(bak_fd);
216 (void)fclose(input);
217
218 /* re-open backup file as the input file */
219 input = fopen(bakfile, "r");
220 if (input == NULL)
221 err(1, "%s", bakfile);
222 /* now the original input file will be the output */
223 output = fopen(in_name, "w");
224 if (output == NULL) {
225 unlink(bakfile);
226 err(1, "%s", in_name);
227 }
228 }
229
230 static void
231 load_profiles(int argc, char **argv)
232 {
233 const char *profile_name = NULL;
234
235 for (int i = 1; i < argc; ++i) {
236 const char *arg = argv[i];
237
238 if (strcmp(arg, "-npro") == 0)
239 return;
240 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0')
241 profile_name = arg + 2;
242 }
243
244 load_profile_files(profile_name);
245 }
246
247 static void
248 parse_command_line(int argc, char **argv)
249 {
250 for (int i = 1; i < argc; ++i) {
251 const char *arg = argv[i];
252
253 if (arg[0] == '-') {
254 set_option(arg, "Command line");
255
256 } else if (input == NULL) {
257 in_name = arg;
258 if ((input = fopen(in_name, "r")) == NULL)
259 err(1, "%s", in_name);
260
261 } else if (output == NULL) {
262 out_name = arg;
263 if (strcmp(in_name, out_name) == 0)
264 errx(1, "input and output files "
265 "must be different");
266 if ((output = fopen(out_name, "w")) == NULL)
267 err(1, "%s", out_name);
268
269 } else
270 errx(1, "too many arguments: %s", arg);
271 }
272
273 if (input == NULL) {
274 input = stdin;
275 output = stdout;
276 } else if (output == NULL) {
277 out_name = in_name;
278 bakcopy();
279 }
280
281 if (opt.comment_column <= 1)
282 opt.comment_column = 2; /* don't put normal comments in column
283 * 1, see opt.format_col1_comments */
284 if (opt.block_comment_max_line_length <= 0)
285 opt.block_comment_max_line_length = opt.max_line_length;
286 if (opt.local_decl_indent < 0)
287 opt.local_decl_indent = opt.decl_indent;
288 if (opt.decl_comment_column <= 0)
289 opt.decl_comment_column = opt.left_justify_decl
290 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8)
291 : opt.comment_column;
292 if (opt.continuation_indent == 0)
293 opt.continuation_indent = opt.indent_size;
294 }
295
296 static void
297 set_initial_indentation(void)
298 {
299 inp_read_line();
300
301 int ind = 0;
302 for (const char *p = inp_p;; p++) {
303 if (*p == ' ')
304 ind++;
305 else if (*p == '\t')
306 ind = next_tab(ind);
307 else
308 break;
309 }
310
311 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size;
312 }
313
314 static void
315 code_add_decl_indent(int decl_ind, bool tabs_to_var)
316 {
317 int base = ps.ind_level * opt.indent_size;
318 int ind = base + (int)code.len;
319 int target = base + decl_ind;
320 size_t orig_code_len = code.len;
321
322 if (tabs_to_var)
323 for (int next; (next = next_tab(ind)) <= target; ind = next)
324 buf_add_char(&code, '\t');
325
326 for (; ind < target; ind++)
327 buf_add_char(&code, ' ');
328
329 if (code.len == orig_code_len && ps.want_blank) {
330 buf_add_char(&code, ' ');
331 ps.want_blank = false;
332 }
333 }
334
335 static void
336 update_ps_decl_ptr(lexer_symbol lsym)
337 {
338 if (lsym == lsym_semicolon
339 || lsym == lsym_lbrace
340 || lsym == lsym_rbrace
341 || (lsym == lsym_lparen && ps.prev_lsym != lsym_sizeof)
342 || (lsym == lsym_comma && ps.in_decl)
343 || lsym == lsym_modifier)
344 ps.decl_ptr = dp_start;
345 else if (ps.decl_ptr == dp_start && lsym == lsym_word)
346 ps.decl_ptr = dp_word;
347 else if ((ps.decl_ptr == dp_word || ps.decl_ptr == dp_word_asterisk)
348 && (lsym == lsym_unary_op && token.s[0] == '*'))
349 ps.decl_ptr = dp_word_asterisk;
350 else
351 ps.decl_ptr = dp_other;
352 }
353
354 static void
355 update_ps_prev_tag(lexer_symbol lsym)
356 {
357 if (lsym == lsym_tag) {
358 ps.lbrace_kind = token.s[0] == 's' ? psym_lbrace_struct :
359 token.s[0] == 'u' ? psym_lbrace_union :
360 psym_lbrace_enum;
361 } else if (lsym != lsym_type_outside_parentheses
362 && lsym != lsym_word
363 && lsym != lsym_lbrace)
364 ps.lbrace_kind = psym_lbrace_block;
365 }
366
367 static int
368 process_eof(void)
369 {
370 if (lab.len > 0 || code.len > 0 || com.len > 0)
371 output_line();
372 if (indent_enabled != indent_on) {
373 indent_enabled = indent_last_off_line;
374 output_line();
375 }
376
377 if (ps.psyms.top > 1) /* check for balanced braces */
378 diag(1, "Stuff missing from end of file");
379
380 fflush(output);
381 return found_err ? EXIT_FAILURE : EXIT_SUCCESS;
382 }
383
384 static void
385 maybe_break_line(lexer_symbol lsym)
386 {
387 if (!ps.force_nl)
388 return;
389 if (lsym == lsym_semicolon)
390 return;
391 if (lsym == lsym_lbrace && opt.brace_same_line
392 && ps.prev_lsym != lsym_lbrace)
393 return;
394
395 output_line();
396 ps.force_nl = false;
397 }
398
399 static void
400 move_com_to_code(lexer_symbol lsym)
401 {
402 if (ps.want_blank)
403 buf_add_char(&code, ' ');
404 buf_add_buf(&code, &com);
405 com.len = 0;
406 ps.want_blank = lsym != lsym_rparen && lsym != lsym_rbracket;
407 }
408
409 static void
410 process_newline(void)
411 {
412 if (ps.prev_lsym == lsym_comma
413 && ps.nparen == 0 && !ps.block_init
414 && !opt.break_after_comma && ps.break_after_comma
415 && lab.len == 0 /* for preprocessing lines */
416 && com.len == 0)
417 goto stay_in_line;
418 if (ps.psyms.sym[ps.psyms.top] == psym_switch_expr
419 && opt.brace_same_line) {
420 ps.force_nl = true;
421 goto stay_in_line;
422 }
423
424 output_line();
425
426 stay_in_line:
427 ++line_no;
428 }
429
430 static bool
431 is_function_pointer_declaration(void)
432 {
433 return ps.in_decl
434 && !ps.block_init
435 && !ps.decl_indent_done
436 && !ps.is_function_definition
437 && ps.line_start_nparen == 0;
438 }
439
440 static bool
441 want_blank_before_lparen(void)
442 {
443 if (!ps.want_blank)
444 return false;
445 if (opt.proc_calls_space)
446 return true;
447 if (ps.prev_lsym == lsym_rparen || ps.prev_lsym == lsym_rbracket)
448 return false;
449 if (ps.prev_lsym == lsym_offsetof)
450 return false;
451 if (ps.prev_lsym == lsym_sizeof)
452 return opt.blank_after_sizeof;
453 if (ps.prev_lsym == lsym_word || ps.prev_lsym == lsym_funcname)
454 return false;
455 return true;
456 }
457
458 static void
459 process_lparen(void)
460 {
461 if (++ps.nparen == array_length(ps.paren)) {
462 diag(0, "Reached internal limit of %zu unclosed parentheses",
463 array_length(ps.paren));
464 ps.nparen--;
465 }
466
467 if (is_function_pointer_declaration()) {
468 code_add_decl_indent(ps.decl_ind, ps.tabs_to_var);
469 ps.decl_indent_done = true;
470 } else if (want_blank_before_lparen())
471 buf_add_char(&code, ' ');
472 ps.want_blank = false;
473 buf_add_char(&code, token.s[0]);
474
475 if (opt.extra_expr_indent && !opt.lineup_to_parens
476 && ps.spaced_expr_psym != psym_0 && ps.nparen == 1
477 && opt.continuation_indent == opt.indent_size)
478 ps.extra_expr_indent = eei_yes;
479
480 if (ps.init_or_struct && ps.psyms.top <= 2) {
481 /* A kludge to correctly align function definitions. */
482 parse(psym_stmt);
483 ps.init_or_struct = false;
484 }
485
486 int indent = ind_add(0, code.s, code.len);
487 if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0
488 && ps.nparen == 1 && indent < 2 * opt.indent_size)
489 indent = 2 * opt.indent_size;
490
491 enum paren_level_cast cast = cast_unknown;
492 if (ps.prev_lsym == lsym_offsetof
493 || ps.prev_lsym == lsym_sizeof
494 || ps.prev_lsym == lsym_for
495 || ps.prev_lsym == lsym_if
496 || ps.prev_lsym == lsym_switch
497 || ps.prev_lsym == lsym_while
498 || ps.is_function_definition)
499 cast = cast_no;
500
501 ps.paren[ps.nparen - 1].indent = indent;
502 ps.paren[ps.nparen - 1].cast = cast;
503 debug_println("paren_indents[%d] is now %s%d",
504 ps.nparen - 1, paren_level_cast_name[cast], indent);
505 }
506
507 static bool
508 want_blank_before_lbracket(void)
509 {
510 if (code.len == 0)
511 return false;
512 if (ps.prev_lsym == lsym_comma)
513 return true;
514 if (ps.prev_lsym == lsym_binary_op)
515 return true;
516 return false;
517 }
518
519 static void
520 process_lbracket(void)
521 {
522 if (++ps.nparen == array_length(ps.paren)) {
523 diag(0, "Reached internal limit of %zu unclosed parentheses",
524 array_length(ps.paren));
525 ps.nparen--;
526 }
527
528 if (want_blank_before_lbracket())
529 buf_add_char(&code, ' ');
530 ps.want_blank = false;
531 buf_add_char(&code, token.s[0]);
532
533 int indent = ind_add(0, code.s, code.len);
534
535 ps.paren[ps.nparen - 1].indent = indent;
536 ps.paren[ps.nparen - 1].cast = cast_no;
537 debug_println("paren_indents[%d] is now %d", ps.nparen - 1, indent);
538 }
539
540 static void
541 process_rparen(void)
542 {
543 if (ps.nparen == 0) {
544 diag(0, "Extra '%c'", *token.s);
545 goto unbalanced;
546 }
547
548 enum paren_level_cast cast = ps.paren[--ps.nparen].cast;
549 if (ps.decl_on_line && !ps.block_init)
550 cast = cast_no;
551
552 if (cast == cast_maybe) {
553 ps.next_unary = true;
554 ps.want_blank = opt.space_after_cast;
555 } else
556 ps.want_blank = true;
557
558 if (code.len == 0)
559 ps.line_start_nparen = ps.nparen;
560
561 unbalanced:
562 buf_add_char(&code, token.s[0]);
563
564 if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
565 if (ps.extra_expr_indent == eei_yes)
566 ps.extra_expr_indent = eei_last;
567 ps.force_nl = true;
568 ps.next_unary = true;
569 ps.in_stmt_or_decl = false;
570 parse(ps.spaced_expr_psym);
571 ps.spaced_expr_psym = psym_0;
572 ps.want_blank = true;
573 out.line_kind = lk_stmt_head;
574 }
575 }
576
577 static void
578 process_rbracket(void)
579 {
580 if (ps.nparen == 0) {
581 diag(0, "Extra '%c'", *token.s);
582 goto unbalanced;
583 }
584 --ps.nparen;
585
586 ps.want_blank = true;
587 if (code.len == 0)
588 ps.line_start_nparen = ps.nparen;
589
590 unbalanced:
591 buf_add_char(&code, token.s[0]);
592 }
593
594 static bool
595 want_blank_before_unary_op(void)
596 {
597 if (ps.want_blank)
598 return true;
599 if (token.s[0] == '+' || token.s[0] == '-')
600 return code.len > 0 && code.s[code.len - 1] == token.s[0];
601 return false;
602 }
603
604 static void
605 process_unary_op(void)
606 {
607 if (!ps.decl_indent_done && ps.in_decl && !ps.block_init &&
608 !ps.is_function_definition && ps.line_start_nparen == 0) {
609 /* pointer declarations */
610 code_add_decl_indent(ps.decl_ind - (int)token.len,
611 ps.tabs_to_var);
612 ps.decl_indent_done = true;
613 } else if (want_blank_before_unary_op())
614 buf_add_char(&code, ' ');
615
616 buf_add_buf(&code, &token);
617 ps.want_blank = false;
618 }
619
620 static void
621 process_binary_op(void)
622 {
623 if (code.len > 0 && ps.want_blank)
624 buf_add_char(&code, ' ');
625 buf_add_buf(&code, &token);
626 ps.want_blank = true;
627 }
628
629 static void
630 process_postfix_op(void)
631 {
632 buf_add_buf(&code, &token);
633 ps.want_blank = true;
634 }
635
636 static void
637 process_question(void)
638 {
639 ps.quest_level++;
640 if (code.len == 0) {
641 ps.in_stmt_cont = true;
642 ps.in_stmt_or_decl = true;
643 ps.in_decl = false;
644 }
645 if (ps.want_blank)
646 buf_add_char(&code, ' ');
647 buf_add_char(&code, '?');
648 ps.want_blank = true;
649 }
650
651 static void
652 process_colon_question(void)
653 {
654 if (code.len == 0) {
655 ps.in_stmt_cont = true;
656 ps.in_stmt_or_decl = true;
657 ps.in_decl = false;
658 }
659 if (ps.want_blank)
660 buf_add_char(&code, ' ');
661 buf_add_char(&code, ':');
662 ps.want_blank = true;
663 }
664
665 static void
666 process_colon_label(void)
667 {
668 buf_add_buf(&lab, &code);
669 buf_add_char(&lab, ':');
670 code.len = 0;
671
672 if (ps.seen_case)
673 out.line_kind = lk_case_or_default;
674 ps.in_stmt_or_decl = false;
675 ps.force_nl = ps.seen_case;
676 ps.seen_case = false;
677 ps.want_blank = false;
678 }
679
680 static void
681 process_colon_other(void)
682 {
683 buf_add_char(&code, ':');
684 ps.want_blank = false;
685 }
686
687 static void
688 process_semicolon(void)
689 {
690 if (out.line_kind == lk_stmt_head)
691 out.line_kind = lk_other;
692 if (ps.decl_level == 0)
693 ps.init_or_struct = false;
694 ps.seen_case = false; /* only needs to be reset on error */
695 ps.quest_level = 0; /* only needs to be reset on error */
696 if (ps.prev_lsym == lsym_rparen)
697 ps.in_func_def_params = false;
698 ps.block_init = false;
699 ps.block_init_level = 0;
700 ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no;
701
702 if (ps.in_decl && code.len == 0 && !ps.block_init &&
703 !ps.decl_indent_done && ps.line_start_nparen == 0) {
704 /* indent stray semicolons in declarations */
705 code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var);
706 ps.decl_indent_done = true;
707 }
708
709 ps.in_decl = ps.decl_level > 0; /* if we were in a first level
710 * structure declaration before, we
711 * aren't anymore */
712
713 if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) {
714 /* There were unbalanced parentheses in the statement. It is a
715 * bit complicated, because the semicolon might be in a for
716 * statement. */
717 diag(1, "Unbalanced parentheses");
718 ps.nparen = 0;
719 if (ps.spaced_expr_psym != psym_0) {
720 parse(ps.spaced_expr_psym);
721 ps.spaced_expr_psym = psym_0;
722 }
723 }
724 buf_add_char(&code, ';');
725 ps.want_blank = true;
726 ps.in_stmt_or_decl = ps.nparen > 0;
727 ps.decl_ind = 0;
728
729 if (ps.spaced_expr_psym == psym_0) {
730 parse(psym_stmt);
731 ps.force_nl = true;
732 }
733 }
734
735 static void
736 process_lbrace(void)
737 {
738 parser_symbol psym = ps.psyms.sym[ps.psyms.top];
739 if (ps.prev_lsym == lsym_rparen
740 && ps.psyms.top >= 2
741 && !(psym == psym_for_exprs || psym == psym_if_expr
742 || psym == psym_switch_expr || psym == psym_while_expr)) {
743 ps.block_init = true;
744 ps.init_or_struct = true;
745 }
746
747 if (out.line_kind == lk_stmt_head)
748 out.line_kind = lk_other;
749
750 ps.in_stmt_or_decl = false; /* don't indent the {} */
751
752 if (!ps.block_init)
753 ps.force_nl = true;
754 else
755 ps.block_init_level++;
756
757 if (code.len > 0 && !ps.block_init) {
758 if (!opt.brace_same_line ||
759 (code.len > 0 && code.s[code.len - 1] == '}'))
760 output_line();
761 else if (ps.in_func_def_params && !ps.init_or_struct) {
762 ps.ind_level_follow = 0;
763 if (opt.function_brace_split)
764 output_line();
765 else
766 ps.want_blank = true;
767 }
768 }
769
770 if (ps.nparen > 0) {
771 diag(1, "Unbalanced parentheses");
772 ps.nparen = 0;
773 if (ps.spaced_expr_psym != psym_0) {
774 parse(ps.spaced_expr_psym);
775 ps.spaced_expr_psym = psym_0;
776 ps.ind_level = ps.ind_level_follow;
777 }
778 }
779
780 if (code.len == 0)
781 ps.in_stmt_cont = false; /* don't indent the '{' itself
782 */
783 if (ps.in_decl && ps.init_or_struct) {
784 ps.di_stack[ps.decl_level] = ps.decl_ind;
785 if (++ps.decl_level == (int)array_length(ps.di_stack)) {
786 diag(0, "Reached internal limit of %d struct levels",
787 (int)array_length(ps.di_stack));
788 ps.decl_level--;
789 }
790 } else {
791 ps.decl_on_line = false; /* we can't be in the middle of
792 * a declaration, so don't do
793 * special indentation of
794 * comments */
795 ps.in_func_def_params = false;
796 ps.in_decl = false;
797 }
798
799 ps.decl_ind = 0;
800 parse(ps.lbrace_kind);
801 if (ps.want_blank)
802 buf_add_char(&code, ' ');
803 ps.want_blank = false;
804 buf_add_char(&code, '{');
805 ps.declaration = decl_no;
806 }
807
808 static void
809 process_rbrace(void)
810 {
811 if (ps.nparen > 0) { /* check for unclosed if, for, else. */
812 diag(1, "Unbalanced parentheses");
813 ps.nparen = 0;
814 ps.spaced_expr_psym = psym_0;
815 }
816
817 ps.declaration = decl_no;
818 if (ps.block_init_level > 0)
819 ps.block_init_level--;
820
821 if (code.len > 0 && !ps.block_init)
822 output_line();
823
824 buf_add_char(&code, '}');
825 ps.want_blank = true;
826 ps.in_stmt_or_decl = false;
827 ps.in_stmt_cont = false;
828
829 if (ps.decl_level > 0) { /* multi-level structure declaration */
830 ps.decl_ind = ps.di_stack[--ps.decl_level];
831 if (ps.decl_level == 0 && !ps.in_func_def_params) {
832 ps.declaration = decl_begin;
833 ps.decl_ind = ps.ind_level == 0
834 ? opt.decl_indent : opt.local_decl_indent;
835 }
836 ps.in_decl = true;
837 }
838
839 if (ps.psyms.top == 2)
840 out.line_kind = lk_func_end;
841
842 parse(psym_rbrace);
843
844 if (!ps.init_or_struct
845 && ps.psyms.sym[ps.psyms.top] != psym_do_stmt
846 && ps.psyms.sym[ps.psyms.top] != psym_if_expr_stmt)
847 ps.force_nl = true;
848 }
849
850 static void
851 process_do(void)
852 {
853 ps.in_stmt_or_decl = false;
854 ps.in_decl = false;
855
856 if (code.len > 0)
857 output_line();
858
859 ps.force_nl = true;
860 parse(psym_do);
861 }
862
863 static void
864 process_else(void)
865 {
866 ps.in_stmt_or_decl = false;
867
868 if (code.len > 0
869 && !(opt.cuddle_else && code.s[code.len - 1] == '}'))
870 output_line();
871
872 ps.force_nl = true;
873 parse(psym_else);
874 }
875
876 static void
877 process_type(void)
878 {
879 parse(psym_decl); /* let the parser worry about indentation */
880
881 if (ps.prev_lsym == lsym_rparen && ps.psyms.top <= 1) {
882 if (code.len > 0)
883 output_line();
884 }
885
886 if (ps.in_func_def_params && opt.indent_parameters &&
887 ps.decl_level == 0) {
888 ps.ind_level = ps.ind_level_follow = 1;
889 ps.in_stmt_cont = false;
890 }
891
892 ps.init_or_struct = /* maybe */ true;
893 ps.in_decl = ps.decl_on_line = ps.prev_lsym != lsym_typedef;
894 if (ps.decl_level <= 0)
895 ps.declaration = decl_begin;
896
897 int len = (int)token.len + 1;
898 int ind = ps.ind_level == 0 || ps.decl_level > 0
899 ? opt.decl_indent /* global variable or local member */
900 : opt.local_decl_indent; /* local variable */
901 ps.decl_ind = ind > 0 ? ind : len;
902 ps.tabs_to_var = opt.use_tabs && ind > 0;
903 }
904
905 static void
906 process_ident(lexer_symbol lsym)
907 {
908 if (ps.in_decl) {
909 if (lsym == lsym_funcname) {
910 ps.in_decl = false;
911 if (opt.procnames_start_line && code.len > 0)
912 output_line();
913 else if (ps.want_blank)
914 buf_add_char(&code, ' ');
915 ps.want_blank = false;
916
917 } else if (!ps.block_init && !ps.decl_indent_done &&
918 ps.line_start_nparen == 0) {
919 if (opt.decl_indent == 0
920 && code.len > 0 && code.s[code.len - 1] == '}')
921 ps.decl_ind =
922 ind_add(0, code.s, code.len) + 1;
923 code_add_decl_indent(ps.decl_ind, ps.tabs_to_var);
924 ps.decl_indent_done = true;
925 ps.want_blank = false;
926 }
927
928 } else if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
929 ps.force_nl = true;
930 ps.next_unary = true;
931 ps.in_stmt_or_decl = false;
932 parse(ps.spaced_expr_psym);
933 ps.spaced_expr_psym = psym_0;
934 }
935 }
936
937 static void
938 process_period(void)
939 {
940 if (code.len > 0 && code.s[code.len - 1] == ',')
941 buf_add_char(&code, ' ');
942 buf_add_char(&code, '.');
943 ps.want_blank = false;
944 }
945
946 static void
947 process_comma(void)
948 {
949 ps.want_blank = code.len > 0; /* only put blank after comma if comma
950 * does not start the line */
951
952 if (ps.in_decl && !ps.is_function_definition && !ps.block_init &&
953 !ps.decl_indent_done && ps.line_start_nparen == 0) {
954 /* indent leading commas and not the actual identifiers */
955 code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var);
956 ps.decl_indent_done = true;
957 }
958
959 buf_add_char(&code, ',');
960
961 if (ps.nparen == 0) {
962 if (ps.block_init_level == 0)
963 ps.block_init = false;
964 int typical_varname_length = 8;
965 if (ps.break_after_comma && (opt.break_after_comma ||
966 ind_add(compute_code_indent(), code.s, code.len)
967 >= opt.max_line_length - typical_varname_length))
968 ps.force_nl = true;
969 }
970 }
971
972 /* move the whole line to the 'label' buffer */
973 static void
974 read_preprocessing_line(void)
975 {
976 enum {
977 PLAIN, STR, CHR, COMM
978 } state = PLAIN;
979
980 buf_add_char(&lab, '#');
981
982 while (inp_p[0] != '\n' || (state == COMM && !had_eof)) {
983 buf_add_char(&lab, inp_next());
984 switch (lab.s[lab.len - 1]) {
985 case '\\':
986 if (state != COMM)
987 buf_add_char(&lab, inp_next());
988 break;
989 case '/':
990 if (inp_p[0] == '*' && state == PLAIN) {
991 state = COMM;
992 buf_add_char(&lab, *inp_p++);
993 }
994 break;
995 case '"':
996 if (state == STR)
997 state = PLAIN;
998 else if (state == PLAIN)
999 state = STR;
1000 break;
1001 case '\'':
1002 if (state == CHR)
1003 state = PLAIN;
1004 else if (state == PLAIN)
1005 state = CHR;
1006 break;
1007 case '*':
1008 if (inp_p[0] == '/' && state == COMM) {
1009 state = PLAIN;
1010 buf_add_char(&lab, *inp_p++);
1011 }
1012 break;
1013 }
1014 }
1015
1016 while (lab.len > 0 && ch_isblank(lab.s[lab.len - 1]))
1017 lab.len--;
1018 }
1019
1020 static void
1021 process_preprocessing(void)
1022 {
1023 if (lab.len > 0 || code.len > 0 || com.len > 0)
1024 output_line();
1025
1026 read_preprocessing_line();
1027
1028 const char *end = lab.s + lab.len;
1029 const char *dir = lab.s + 1;
1030 while (dir < end && ch_isblank(*dir))
1031 dir++;
1032 size_t dir_len = 0;
1033 while (dir + dir_len < end && ch_isalpha(dir[dir_len]))
1034 dir_len++;
1035
1036 if (dir_len >= 2 && memcmp(dir, "if", 2) == 0) {
1037 if ((size_t)ifdef_level < array_length(state_stack))
1038 state_stack[ifdef_level++] = ps;
1039 else
1040 diag(1, "#if stack overflow");
1041 out.line_kind = lk_if;
1042
1043 } else if (dir_len >= 2 && memcmp(dir, "el", 2) == 0) {
1044 if (ifdef_level <= 0)
1045 diag(1, dir[2] == 'i'
1046 ? "Unmatched #elif" : "Unmatched #else");
1047 else
1048 ps = state_stack[ifdef_level - 1];
1049
1050 } else if (dir_len == 5 && memcmp(dir, "endif", 5) == 0) {
1051 if (ifdef_level <= 0)
1052 diag(1, "Unmatched #endif");
1053 else
1054 ifdef_level--;
1055 out.line_kind = lk_endif;
1056 }
1057
1058 /* subsequent processing of the newline character will cause the line
1059 * to be printed */
1060 }
1061
1062 static void
1063 process_lsym(lexer_symbol lsym)
1064 {
1065 switch (lsym) {
1066
1067 case lsym_newline:
1068 process_newline();
1069 break;
1070
1071 case lsym_lparen:
1072 process_lparen();
1073 break;
1074
1075 case lsym_lbracket:
1076 process_lbracket();
1077 break;
1078
1079 case lsym_rparen:
1080 process_rparen();
1081 break;
1082
1083 case lsym_rbracket:
1084 process_rbracket();
1085 break;
1086
1087 case lsym_unary_op:
1088 process_unary_op();
1089 break;
1090
1091 case lsym_binary_op:
1092 process_binary_op();
1093 break;
1094
1095 case lsym_postfix_op:
1096 process_postfix_op();
1097 break;
1098
1099 case lsym_question:
1100 process_question();
1101 break;
1102
1103 case lsym_case:
1104 case lsym_default:
1105 ps.seen_case = true;
1106 goto copy_token;
1107
1108 case lsym_colon_question:
1109 process_colon_question();
1110 break;
1111
1112 case lsym_colon_label:
1113 process_colon_label();
1114 break;
1115
1116 case lsym_colon_other:
1117 process_colon_other();
1118 break;
1119
1120 case lsym_semicolon:
1121 process_semicolon();
1122 break;
1123
1124 case lsym_lbrace:
1125 process_lbrace();
1126 break;
1127
1128 case lsym_rbrace:
1129 process_rbrace();
1130 break;
1131
1132 case lsym_switch:
1133 ps.spaced_expr_psym = psym_switch_expr;
1134 goto copy_token;
1135
1136 case lsym_for:
1137 ps.spaced_expr_psym = psym_for_exprs;
1138 goto copy_token;
1139
1140 case lsym_if:
1141 ps.spaced_expr_psym = psym_if_expr;
1142 goto copy_token;
1143
1144 case lsym_while:
1145 ps.spaced_expr_psym = psym_while_expr;
1146 goto copy_token;
1147
1148 case lsym_do:
1149 process_do();
1150 goto copy_token;
1151
1152 case lsym_else:
1153 process_else();
1154 goto copy_token;
1155
1156 case lsym_typedef:
1157 case lsym_modifier:
1158 goto copy_token;
1159
1160 case lsym_tag:
1161 if (ps.nparen > 0)
1162 goto copy_token;
1163 /* FALLTHROUGH */
1164 case lsym_type_outside_parentheses:
1165 process_type();
1166 goto copy_token;
1167
1168 case lsym_type_in_parentheses:
1169 case lsym_offsetof:
1170 case lsym_sizeof:
1171 case lsym_word:
1172 case lsym_funcname:
1173 case lsym_return:
1174 process_ident(lsym);
1175 copy_token:
1176 if (ps.want_blank)
1177 buf_add_char(&code, ' ');
1178 buf_add_buf(&code, &token);
1179 if (lsym != lsym_funcname)
1180 ps.want_blank = true;
1181 break;
1182
1183 case lsym_period:
1184 process_period();
1185 break;
1186
1187 case lsym_comma:
1188 process_comma();
1189 break;
1190
1191 case lsym_preprocessing:
1192 process_preprocessing();
1193 break;
1194
1195 case lsym_comment:
1196 process_comment();
1197 break;
1198
1199 default:
1200 break;
1201 }
1202 }
1203
1204 static int
1205 indent(void)
1206 {
1207 debug_parser_state();
1208
1209 for (;;) { /* loop until we reach eof */
1210 lexer_symbol lsym = lexi();
1211
1212 debug_blank_line();
1213 debug_printf("line %d: %s", line_no, lsym_name[lsym]);
1214 debug_print_buf("token", &token);
1215 debug_buffers();
1216 debug_blank_line();
1217
1218 if (lsym == lsym_eof)
1219 return process_eof();
1220
1221 if (lsym == lsym_if && ps.prev_lsym == lsym_else
1222 && opt.else_if_in_same_line)
1223 ps.force_nl = false;
1224
1225 if (lsym == lsym_newline || lsym == lsym_preprocessing)
1226 ps.force_nl = false;
1227 else if (lsym == lsym_comment) {
1228 /* no special processing */
1229 } else {
1230 maybe_break_line(lsym);
1231 /*
1232 * Add an extra level of indentation; turned off again
1233 * by a ';' or '}'.
1234 */
1235 ps.in_stmt_or_decl = true;
1236 if (com.len > 0)
1237 move_com_to_code(lsym);
1238 update_ps_decl_ptr(lsym);
1239 update_ps_prev_tag(lsym);
1240 }
1241
1242 process_lsym(lsym);
1243
1244 debug_parser_state();
1245
1246 if (lsym != lsym_comment && lsym != lsym_newline &&
1247 lsym != lsym_preprocessing)
1248 ps.prev_lsym = lsym;
1249 }
1250 }
1251
1252 int
1253 main(int argc, char **argv)
1254 {
1255 init_globals();
1256 load_profiles(argc, argv);
1257 parse_command_line(argc, argv);
1258 set_initial_indentation();
1259 return indent();
1260 }
1261