indent.c revision 1.342 1 /* $NetBSD: indent.c,v 1.342 2023/06/09 07:20:30 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: indent.c,v 1.342 2023/06/09 07:20:30 rillig Exp $");
42
43 #include <sys/param.h>
44 #include <err.h>
45 #include <fcntl.h>
46 #include <stdarg.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51
52 #include "indent.h"
53
54 struct options opt = {
55 .brace_same_line = true,
56 .comment_delimiter_on_blankline = true,
57 .cuddle_else = true,
58 .comment_column = 33,
59 .decl_indent = 16,
60 .else_if_in_same_line = true,
61 .function_brace_split = true,
62 .format_col1_comments = true,
63 .format_block_comments = true,
64 .indent_parameters = true,
65 .indent_size = 8,
66 .local_decl_indent = -1,
67 .lineup_to_parens = true,
68 .procnames_start_line = true,
69 .star_comment_cont = true,
70 .tabsize = 8,
71 .max_line_length = 78,
72 .use_tabs = true,
73 };
74
75 struct parser_state ps;
76
77 struct buffer token;
78
79 struct buffer lab;
80 struct buffer code;
81 struct buffer com;
82
83 bool found_err;
84 bool had_eof;
85 int line_no = 1;
86 enum indent_enabled indent_enabled;
87
88 static int ifdef_level;
89 static struct parser_state state_stack[5];
90
91 FILE *input;
92 FILE *output;
93
94 static const char *in_name = "Standard Input";
95 static const char *out_name = "Standard Output";
96 static const char *backup_suffix = ".BAK";
97 static char bakfile[MAXPATHLEN] = "";
98
99
100 void *
101 nonnull(void *p)
102 {
103 if (p == NULL)
104 err(EXIT_FAILURE, NULL);
105 return p;
106 }
107
108 static void
109 buf_expand(struct buffer *buf, size_t add_size)
110 {
111 buf->cap = buf->cap + add_size + 400;
112 buf->s = nonnull(realloc(buf->s, buf->cap));
113 }
114
115 void
116 buf_add_char(struct buffer *buf, char ch)
117 {
118 if (buf->len == buf->cap)
119 buf_expand(buf, 1);
120 buf->s[buf->len++] = ch;
121 }
122
123 void
124 buf_add_chars(struct buffer *buf, const char *s, size_t len)
125 {
126 if (len == 0)
127 return;
128 if (len > buf->cap - buf->len)
129 buf_expand(buf, len);
130 memcpy(buf->s + buf->len, s, len);
131 buf->len += len;
132 }
133
134 static void
135 buf_add_buf(struct buffer *buf, const struct buffer *add)
136 {
137 buf_add_chars(buf, add->s, add->len);
138 }
139
140 void
141 diag(int level, const char *msg, ...)
142 {
143 va_list ap;
144
145 if (level != 0)
146 found_err = true;
147
148 va_start(ap, msg);
149 fprintf(stderr, "%s: %s:%d: ",
150 level == 0 ? "warning" : "error", in_name, line_no);
151 vfprintf(stderr, msg, ap);
152 fprintf(stderr, "\n");
153 va_end(ap);
154 }
155
156 /*
157 * Compute the indentation from starting at 'ind' and adding the text starting
158 * at 's'.
159 */
160 int
161 ind_add(int ind, const char *s, size_t len)
162 {
163 for (const char *p = s; len > 0; p++, len--) {
164 if (*p == '\n')
165 ind = 0;
166 else if (*p == '\t')
167 ind = next_tab(ind);
168 else if (*p == '\b')
169 --ind;
170 else
171 ++ind;
172 }
173 return ind;
174 }
175
176 static void
177 init_globals(void)
178 {
179 ps.psyms.sym[0] = psym_stmt_list;
180 ps.prev_lsym = lsym_semicolon;
181 ps.next_col_1 = true;
182 ps.lbrace_kind = psym_lbrace_block;
183
184 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX");
185 if (suffix != NULL)
186 backup_suffix = suffix;
187 }
188
189 /*
190 * Copy the input file to the backup file, then make the backup file the input
191 * and the original input file the output.
192 */
193 static void
194 bakcopy(void)
195 {
196 ssize_t n;
197 int bak_fd;
198 char buff[8 * 1024];
199
200 const char *last_slash = strrchr(in_name, '/');
201 snprintf(bakfile, sizeof(bakfile), "%s%s",
202 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix);
203
204 /* copy in_name to backup file */
205 bak_fd = creat(bakfile, 0600);
206 if (bak_fd < 0)
207 err(1, "%s", bakfile);
208
209 while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
210 if (write(bak_fd, buff, (size_t)n) != n)
211 err(1, "%s", bakfile);
212 if (n < 0)
213 err(1, "%s", in_name);
214
215 close(bak_fd);
216 (void)fclose(input);
217
218 /* re-open backup file as the input file */
219 input = fopen(bakfile, "r");
220 if (input == NULL)
221 err(1, "%s", bakfile);
222 /* now the original input file will be the output */
223 output = fopen(in_name, "w");
224 if (output == NULL) {
225 unlink(bakfile);
226 err(1, "%s", in_name);
227 }
228 }
229
230 static void
231 load_profiles(int argc, char **argv)
232 {
233 const char *profile_name = NULL;
234
235 for (int i = 1; i < argc; ++i) {
236 const char *arg = argv[i];
237
238 if (strcmp(arg, "-npro") == 0)
239 return;
240 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0')
241 profile_name = arg + 2;
242 }
243
244 load_profile_files(profile_name);
245 }
246
247 static void
248 parse_command_line(int argc, char **argv)
249 {
250 for (int i = 1; i < argc; ++i) {
251 const char *arg = argv[i];
252
253 if (arg[0] == '-') {
254 set_option(arg, "Command line");
255
256 } else if (input == NULL) {
257 in_name = arg;
258 if ((input = fopen(in_name, "r")) == NULL)
259 err(1, "%s", in_name);
260
261 } else if (output == NULL) {
262 out_name = arg;
263 if (strcmp(in_name, out_name) == 0)
264 errx(1, "input and output files "
265 "must be different");
266 if ((output = fopen(out_name, "w")) == NULL)
267 err(1, "%s", out_name);
268
269 } else
270 errx(1, "too many arguments: %s", arg);
271 }
272
273 if (input == NULL) {
274 input = stdin;
275 output = stdout;
276 } else if (output == NULL) {
277 out_name = in_name;
278 bakcopy();
279 }
280
281 if (opt.comment_column <= 1)
282 opt.comment_column = 2; /* don't put normal comments in column
283 * 1, see opt.format_col1_comments */
284 if (opt.block_comment_max_line_length <= 0)
285 opt.block_comment_max_line_length = opt.max_line_length;
286 if (opt.local_decl_indent < 0)
287 opt.local_decl_indent = opt.decl_indent;
288 if (opt.decl_comment_column <= 0)
289 opt.decl_comment_column = opt.left_justify_decl
290 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8)
291 : opt.comment_column;
292 if (opt.continuation_indent == 0)
293 opt.continuation_indent = opt.indent_size;
294 }
295
296 static void
297 set_initial_indentation(void)
298 {
299 inp_read_line();
300
301 int ind = 0;
302 for (const char *p = inp_p;; p++) {
303 if (*p == ' ')
304 ind++;
305 else if (*p == '\t')
306 ind = next_tab(ind);
307 else
308 break;
309 }
310
311 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size;
312 }
313
314 static void
315 indent_declarator(int decl_ind, bool tabs_to_var)
316 {
317 int base = ps.ind_level * opt.indent_size;
318 int ind = base + (int)code.len;
319 int target = base + decl_ind;
320 size_t orig_code_len = code.len;
321
322 if (tabs_to_var)
323 for (int next; (next = next_tab(ind)) <= target; ind = next)
324 buf_add_char(&code, '\t');
325
326 for (; ind < target; ind++)
327 buf_add_char(&code, ' ');
328
329 if (code.len == orig_code_len && ps.want_blank) {
330 buf_add_char(&code, ' ');
331 ps.want_blank = false;
332 }
333 ps.decl_indent_done = true;
334 }
335
336 static void
337 update_ps_lbrace_kind(lexer_symbol lsym)
338 {
339 if (lsym == lsym_tag) {
340 ps.lbrace_kind = token.s[0] == 's' ? psym_lbrace_struct :
341 token.s[0] == 'u' ? psym_lbrace_union :
342 psym_lbrace_enum;
343 } else if (lsym != lsym_type_outside_parentheses
344 && lsym != lsym_word
345 && lsym != lsym_lbrace)
346 ps.lbrace_kind = psym_lbrace_block;
347 }
348
349 static int
350 process_eof(void)
351 {
352 if (lab.len > 0 || code.len > 0 || com.len > 0)
353 output_line();
354 if (indent_enabled != indent_on) {
355 indent_enabled = indent_last_off_line;
356 output_line();
357 }
358
359 if (ps.psyms.top > 1) /* check for balanced braces */
360 diag(1, "Stuff missing from end of file");
361
362 fflush(output);
363 return found_err ? EXIT_FAILURE : EXIT_SUCCESS;
364 }
365
366 static void
367 maybe_break_line(lexer_symbol lsym)
368 {
369 if (!ps.force_nl)
370 return;
371 if (lsym == lsym_semicolon)
372 return;
373 if (lsym == lsym_lbrace && opt.brace_same_line
374 && ps.prev_lsym != lsym_lbrace)
375 return;
376
377 output_line();
378 ps.force_nl = false;
379 }
380
381 static void
382 move_com_to_code(lexer_symbol lsym)
383 {
384 if (ps.want_blank)
385 buf_add_char(&code, ' ');
386 buf_add_buf(&code, &com);
387 com.len = 0;
388 ps.want_blank = lsym != lsym_rparen && lsym != lsym_rbracket;
389 }
390
391 static void
392 process_newline(void)
393 {
394 if (ps.prev_lsym == lsym_comma
395 && ps.nparen == 0 && !ps.block_init
396 && !opt.break_after_comma && ps.break_after_comma
397 && lab.len == 0 /* for preprocessing lines */
398 && com.len == 0)
399 goto stay_in_line;
400 if (ps.psyms.sym[ps.psyms.top] == psym_switch_expr
401 && opt.brace_same_line) {
402 ps.force_nl = true;
403 goto stay_in_line;
404 }
405
406 output_line();
407
408 stay_in_line:
409 ++line_no;
410 }
411
412 static bool
413 is_function_pointer_declaration(void)
414 {
415 return ps.in_decl
416 && !ps.block_init
417 && !ps.decl_indent_done
418 && !ps.is_function_definition
419 && ps.line_start_nparen == 0;
420 }
421
422 static bool
423 want_blank_before_lparen(void)
424 {
425 if (!ps.want_blank)
426 return false;
427 if (opt.proc_calls_space)
428 return true;
429 if (ps.prev_lsym == lsym_rparen || ps.prev_lsym == lsym_rbracket)
430 return false;
431 if (ps.prev_lsym == lsym_offsetof)
432 return false;
433 if (ps.prev_lsym == lsym_sizeof)
434 return opt.blank_after_sizeof;
435 if (ps.prev_lsym == lsym_word || ps.prev_lsym == lsym_funcname)
436 return false;
437 return true;
438 }
439
440 static void
441 process_lparen(void)
442 {
443 if (++ps.nparen == array_length(ps.paren)) {
444 diag(0, "Reached internal limit of %zu unclosed parentheses",
445 array_length(ps.paren));
446 ps.nparen--;
447 }
448
449 if (is_function_pointer_declaration())
450 indent_declarator(ps.decl_ind, ps.tabs_to_var);
451 else if (want_blank_before_lparen())
452 buf_add_char(&code, ' ');
453 ps.want_blank = false;
454 buf_add_char(&code, token.s[0]);
455
456 if (opt.extra_expr_indent && !opt.lineup_to_parens
457 && ps.spaced_expr_psym != psym_0 && ps.nparen == 1
458 && opt.continuation_indent == opt.indent_size)
459 ps.extra_expr_indent = eei_yes;
460
461 if (ps.init_or_struct && ps.psyms.top <= 2) {
462 /* A kludge to correctly align function definitions. */
463 parse(psym_stmt);
464 ps.init_or_struct = false;
465 }
466
467 int indent = ind_add(0, code.s, code.len);
468 if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0
469 && ps.nparen == 1 && indent < 2 * opt.indent_size)
470 indent = 2 * opt.indent_size;
471
472 enum paren_level_cast cast = cast_unknown;
473 if (ps.prev_lsym == lsym_offsetof
474 || ps.prev_lsym == lsym_sizeof
475 || ps.prev_lsym == lsym_for
476 || ps.prev_lsym == lsym_if
477 || ps.prev_lsym == lsym_switch
478 || ps.prev_lsym == lsym_while
479 || ps.is_function_definition)
480 cast = cast_no;
481
482 ps.paren[ps.nparen - 1].indent = indent;
483 ps.paren[ps.nparen - 1].cast = cast;
484 debug_println("paren_indents[%d] is now %s%d",
485 ps.nparen - 1, paren_level_cast_name[cast], indent);
486 }
487
488 static void
489 process_lbracket(void)
490 {
491 if (++ps.nparen == array_length(ps.paren)) {
492 diag(0, "Reached internal limit of %zu unclosed parentheses",
493 array_length(ps.paren));
494 ps.nparen--;
495 }
496
497 if (code.len > 0
498 && (ps.prev_lsym == lsym_comma || ps.prev_lsym == lsym_binary_op))
499 buf_add_char(&code, ' ');
500 ps.want_blank = false;
501 buf_add_char(&code, token.s[0]);
502
503 int indent = ind_add(0, code.s, code.len);
504
505 ps.paren[ps.nparen - 1].indent = indent;
506 ps.paren[ps.nparen - 1].cast = cast_no;
507 debug_println("paren_indents[%d] is now %d", ps.nparen - 1, indent);
508 }
509
510 static void
511 process_rparen(void)
512 {
513 if (ps.nparen == 0) {
514 diag(0, "Extra '%c'", *token.s);
515 goto unbalanced;
516 }
517
518 enum paren_level_cast cast = ps.paren[--ps.nparen].cast;
519 if (ps.decl_on_line && !ps.block_init)
520 cast = cast_no;
521
522 if (cast == cast_maybe) {
523 ps.next_unary = true;
524 ps.want_blank = opt.space_after_cast;
525 } else
526 ps.want_blank = true;
527
528 if (code.len == 0)
529 ps.line_start_nparen = ps.nparen;
530
531 unbalanced:
532 buf_add_char(&code, token.s[0]);
533
534 if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
535 if (ps.extra_expr_indent == eei_yes)
536 ps.extra_expr_indent = eei_last;
537 ps.force_nl = true;
538 ps.next_unary = true;
539 ps.in_stmt_or_decl = false;
540 parse(ps.spaced_expr_psym);
541 ps.spaced_expr_psym = psym_0;
542 ps.want_blank = true;
543 out.line_kind = lk_stmt_head;
544 }
545 }
546
547 static void
548 process_rbracket(void)
549 {
550 if (ps.nparen == 0) {
551 diag(0, "Extra '%c'", *token.s);
552 goto unbalanced;
553 }
554 --ps.nparen;
555
556 ps.want_blank = true;
557 if (code.len == 0)
558 ps.line_start_nparen = ps.nparen;
559
560 unbalanced:
561 buf_add_char(&code, token.s[0]);
562 }
563
564 static void
565 process_unary_op(void)
566 {
567 if (is_function_pointer_declaration()) {
568 int ind = ps.decl_ind - (int)token.len;
569 indent_declarator(ind, ps.tabs_to_var);
570 ps.want_blank = false;
571 } else if ((token.s[0] == '+' || token.s[0] == '-')
572 && code.len > 0 && code.s[code.len - 1] == token.s[0])
573 ps.want_blank = true;
574
575 if (ps.want_blank)
576 buf_add_char(&code, ' ');
577 buf_add_buf(&code, &token);
578 ps.want_blank = false;
579 }
580
581 static void
582 process_postfix_op(void)
583 {
584 buf_add_buf(&code, &token);
585 ps.want_blank = true;
586 }
587
588 static void
589 process_question(void)
590 {
591 ps.quest_level++;
592 if (code.len == 0) {
593 ps.in_stmt_cont = true;
594 ps.in_stmt_or_decl = true;
595 ps.in_decl = false;
596 }
597 }
598
599 static void
600 process_colon_question(void)
601 {
602 if (code.len == 0) {
603 ps.in_stmt_cont = true;
604 ps.in_stmt_or_decl = true;
605 ps.in_decl = false;
606 }
607 }
608
609 static void
610 process_colon_label(void)
611 {
612 buf_add_buf(&lab, &code);
613 buf_add_char(&lab, ':');
614 code.len = 0;
615
616 if (ps.seen_case)
617 out.line_kind = lk_case_or_default;
618 ps.in_stmt_or_decl = false;
619 ps.force_nl = ps.seen_case;
620 ps.seen_case = false;
621 ps.want_blank = false;
622 }
623
624 static void
625 process_colon_other(void)
626 {
627 buf_add_char(&code, ':');
628 ps.want_blank = false;
629 }
630
631 static void
632 process_semicolon(void)
633 {
634 if (out.line_kind == lk_stmt_head)
635 out.line_kind = lk_other;
636 if (ps.decl_level == 0)
637 ps.init_or_struct = false;
638 ps.seen_case = false; /* only needs to be reset on error */
639 ps.quest_level = 0; /* only needs to be reset on error */
640 if (ps.prev_lsym == lsym_rparen)
641 ps.in_func_def_params = false;
642 ps.block_init = false;
643 ps.block_init_level = 0;
644 ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no;
645
646 if (ps.in_decl && code.len == 0 && !ps.block_init &&
647 !ps.decl_indent_done && ps.line_start_nparen == 0) {
648 /* indent stray semicolons in declarations */
649 indent_declarator(ps.decl_ind - 1, ps.tabs_to_var);
650 }
651
652 ps.in_decl = ps.decl_level > 0; /* if we were in a first level
653 * structure declaration before, we
654 * aren't anymore */
655
656 if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) {
657 /* There were unbalanced parentheses in the statement. It is a
658 * bit complicated, because the semicolon might be in a for
659 * statement. */
660 diag(1, "Unbalanced parentheses");
661 ps.nparen = 0;
662 if (ps.spaced_expr_psym != psym_0) {
663 parse(ps.spaced_expr_psym);
664 ps.spaced_expr_psym = psym_0;
665 }
666 }
667 buf_add_char(&code, ';');
668 ps.want_blank = true;
669 ps.in_stmt_or_decl = ps.nparen > 0;
670 ps.decl_ind = 0;
671
672 if (ps.spaced_expr_psym == psym_0) {
673 parse(psym_stmt);
674 ps.force_nl = true;
675 }
676 }
677
678 static void
679 process_lbrace(void)
680 {
681 parser_symbol psym = ps.psyms.sym[ps.psyms.top];
682 if (ps.prev_lsym == lsym_rparen
683 && ps.psyms.top >= 2
684 && !(psym == psym_for_exprs || psym == psym_if_expr
685 || psym == psym_switch_expr || psym == psym_while_expr)) {
686 ps.block_init = true;
687 ps.init_or_struct = true;
688 }
689
690 if (out.line_kind == lk_stmt_head)
691 out.line_kind = lk_other;
692
693 ps.in_stmt_or_decl = false; /* don't indent the {} */
694
695 if (!ps.block_init)
696 ps.force_nl = true;
697 else
698 ps.block_init_level++;
699
700 if (code.len > 0 && !ps.block_init) {
701 if (!opt.brace_same_line ||
702 (code.len > 0 && code.s[code.len - 1] == '}'))
703 output_line();
704 else if (ps.in_func_def_params && !ps.init_or_struct) {
705 ps.ind_level_follow = 0;
706 if (opt.function_brace_split)
707 output_line();
708 else
709 ps.want_blank = true;
710 }
711 }
712
713 if (ps.nparen > 0) {
714 diag(1, "Unbalanced parentheses");
715 ps.nparen = 0;
716 if (ps.spaced_expr_psym != psym_0) {
717 parse(ps.spaced_expr_psym);
718 ps.spaced_expr_psym = psym_0;
719 ps.ind_level = ps.ind_level_follow;
720 }
721 }
722
723 if (code.len == 0)
724 ps.in_stmt_cont = false; /* don't indent the '{' itself
725 */
726 if (ps.in_decl && ps.init_or_struct) {
727 ps.di_stack[ps.decl_level] = ps.decl_ind;
728 if (++ps.decl_level == (int)array_length(ps.di_stack)) {
729 diag(0, "Reached internal limit of %d struct levels",
730 (int)array_length(ps.di_stack));
731 ps.decl_level--;
732 }
733 } else {
734 ps.decl_on_line = false; /* we can't be in the middle of
735 * a declaration, so don't do
736 * special indentation of
737 * comments */
738 ps.in_func_def_params = false;
739 ps.in_decl = false;
740 }
741
742 ps.decl_ind = 0;
743 parse(ps.lbrace_kind);
744 if (ps.want_blank)
745 buf_add_char(&code, ' ');
746 ps.want_blank = false;
747 buf_add_char(&code, '{');
748 ps.declaration = decl_no;
749 }
750
751 static void
752 process_rbrace(void)
753 {
754 if (ps.nparen > 0) { /* check for unclosed if, for, else. */
755 diag(1, "Unbalanced parentheses");
756 ps.nparen = 0;
757 ps.spaced_expr_psym = psym_0;
758 }
759
760 ps.declaration = decl_no;
761 if (ps.block_init_level > 0)
762 ps.block_init_level--;
763
764 if (code.len > 0 && !ps.block_init)
765 output_line();
766
767 buf_add_char(&code, '}');
768 ps.want_blank = true;
769 ps.in_stmt_or_decl = false; // XXX: Initializers don't end a stmt
770 ps.in_stmt_cont = false;
771
772 if (ps.decl_level > 0) { /* multi-level structure declaration */
773 ps.decl_ind = ps.di_stack[--ps.decl_level];
774 if (ps.decl_level == 0 && !ps.in_func_def_params) {
775 ps.declaration = decl_begin;
776 ps.decl_ind = ps.ind_level == 0
777 ? opt.decl_indent : opt.local_decl_indent;
778 }
779 ps.in_decl = true;
780 }
781
782 if (ps.psyms.top == 2)
783 out.line_kind = lk_func_end;
784
785 parse(psym_rbrace);
786
787 if (!ps.init_or_struct
788 && ps.psyms.sym[ps.psyms.top] != psym_do_stmt
789 && ps.psyms.sym[ps.psyms.top] != psym_if_expr_stmt)
790 ps.force_nl = true;
791 }
792
793 static void
794 process_do(void)
795 {
796 ps.in_stmt_or_decl = false;
797 ps.in_decl = false;
798
799 if (code.len > 0)
800 output_line();
801
802 ps.force_nl = true;
803 parse(psym_do);
804 }
805
806 static void
807 process_else(void)
808 {
809 ps.in_stmt_or_decl = false;
810
811 if (code.len > 0
812 && !(opt.cuddle_else && code.s[code.len - 1] == '}'))
813 output_line();
814
815 ps.force_nl = true;
816 parse(psym_else);
817 }
818
819 static void
820 process_type(void)
821 {
822 parse(psym_decl); /* let the parser worry about indentation */
823
824 if (ps.prev_lsym == lsym_rparen && ps.psyms.top <= 1 && code.len > 0)
825 output_line();
826
827 if (ps.in_func_def_params && opt.indent_parameters &&
828 ps.decl_level == 0) {
829 ps.ind_level = ps.ind_level_follow = 1;
830 ps.in_stmt_cont = false;
831 }
832
833 ps.init_or_struct = /* maybe */ true;
834 ps.in_decl = ps.decl_on_line = ps.prev_lsym != lsym_typedef;
835 if (ps.decl_level <= 0)
836 ps.declaration = decl_begin;
837
838 int len = (int)token.len + 1;
839 int ind = ps.ind_level == 0 || ps.decl_level > 0
840 ? opt.decl_indent /* global variable or local member */
841 : opt.local_decl_indent; /* local variable */
842 ps.decl_ind = ind > 0 ? ind : len;
843 ps.tabs_to_var = opt.use_tabs && ind > 0;
844 }
845
846 static void
847 process_ident(lexer_symbol lsym)
848 {
849 if (ps.in_decl) {
850 if (lsym == lsym_funcname) {
851 ps.in_decl = false;
852 if (opt.procnames_start_line && code.len > 0)
853 output_line();
854 else if (ps.want_blank)
855 buf_add_char(&code, ' ');
856 ps.want_blank = false;
857
858 } else if (!ps.block_init && !ps.decl_indent_done &&
859 ps.line_start_nparen == 0) {
860 if (opt.decl_indent == 0
861 && code.len > 0 && code.s[code.len - 1] == '}')
862 ps.decl_ind = ind_add(0, code.s, code.len) + 1;
863 indent_declarator(ps.decl_ind, ps.tabs_to_var);
864 ps.want_blank = false;
865 }
866
867 } else if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
868 ps.force_nl = true;
869 ps.next_unary = true;
870 ps.in_stmt_or_decl = false;
871 parse(ps.spaced_expr_psym);
872 ps.spaced_expr_psym = psym_0;
873 }
874 }
875
876 static void
877 process_period(void)
878 {
879 if (code.len > 0 && code.s[code.len - 1] == ',')
880 buf_add_char(&code, ' ');
881 buf_add_char(&code, '.');
882 ps.want_blank = false;
883 }
884
885 static void
886 process_comma(void)
887 {
888 ps.want_blank = code.len > 0; /* only put blank after comma if comma
889 * does not start the line */
890
891 if (ps.in_decl && !ps.is_function_definition && !ps.block_init &&
892 !ps.decl_indent_done && ps.line_start_nparen == 0) {
893 /* indent leading commas and not the actual identifiers */
894 indent_declarator(ps.decl_ind - 1, ps.tabs_to_var);
895 }
896
897 buf_add_char(&code, ',');
898
899 if (ps.nparen == 0) {
900 if (ps.block_init_level == 0)
901 ps.block_init = false;
902 int typical_varname_length = 8;
903 if (ps.break_after_comma && (opt.break_after_comma ||
904 ind_add(compute_code_indent(), code.s, code.len)
905 >= opt.max_line_length - typical_varname_length))
906 ps.force_nl = true;
907 }
908 }
909
910 /* move the whole line to the 'label' buffer */
911 static void
912 read_preprocessing_line(void)
913 {
914 enum {
915 PLAIN, STR, CHR, COMM
916 } state = PLAIN;
917
918 buf_add_char(&lab, '#');
919
920 while (inp_p[0] != '\n' || (state == COMM && !had_eof)) {
921 buf_add_char(&lab, inp_next());
922 switch (lab.s[lab.len - 1]) {
923 case '\\':
924 if (state != COMM)
925 buf_add_char(&lab, inp_next());
926 break;
927 case '/':
928 if (inp_p[0] == '*' && state == PLAIN) {
929 state = COMM;
930 buf_add_char(&lab, *inp_p++);
931 }
932 break;
933 case '"':
934 if (state == STR)
935 state = PLAIN;
936 else if (state == PLAIN)
937 state = STR;
938 break;
939 case '\'':
940 if (state == CHR)
941 state = PLAIN;
942 else if (state == PLAIN)
943 state = CHR;
944 break;
945 case '*':
946 if (inp_p[0] == '/' && state == COMM) {
947 state = PLAIN;
948 buf_add_char(&lab, *inp_p++);
949 }
950 break;
951 }
952 }
953
954 while (lab.len > 0 && ch_isblank(lab.s[lab.len - 1]))
955 lab.len--;
956 }
957
958 static void
959 process_preprocessing(void)
960 {
961 if (lab.len > 0 || code.len > 0 || com.len > 0)
962 output_line();
963
964 read_preprocessing_line();
965
966 const char *dir = lab.s + 1, *line_end = lab.s + lab.len;
967 while (dir < line_end && ch_isblank(*dir))
968 dir++;
969 size_t dir_len = 0;
970 while (dir + dir_len < line_end && ch_isalpha(dir[dir_len]))
971 dir_len++;
972
973 if (dir_len >= 2 && memcmp(dir, "if", 2) == 0) {
974 if ((size_t)ifdef_level < array_length(state_stack))
975 state_stack[ifdef_level++] = ps;
976 else
977 diag(1, "#if stack overflow");
978 out.line_kind = lk_if;
979
980 } else if (dir_len >= 2 && memcmp(dir, "el", 2) == 0) {
981 if (ifdef_level <= 0)
982 diag(1, dir[2] == 'i'
983 ? "Unmatched #elif" : "Unmatched #else");
984 else
985 ps = state_stack[ifdef_level - 1];
986
987 } else if (dir_len == 5 && memcmp(dir, "endif", 5) == 0) {
988 if (ifdef_level <= 0)
989 diag(1, "Unmatched #endif");
990 else
991 ifdef_level--;
992 out.line_kind = lk_endif;
993 }
994 }
995
996 static void
997 process_lsym(lexer_symbol lsym)
998 {
999 switch (lsym) {
1000 /* INDENT OFF */
1001 case lsym_preprocessing: process_preprocessing(); break;
1002 case lsym_newline: process_newline(); break;
1003 case lsym_comment: process_comment(); break;
1004 case lsym_lparen: process_lparen(); break;
1005 case lsym_lbracket: process_lbracket(); break;
1006 case lsym_rparen: process_rparen(); break;
1007 case lsym_rbracket: process_rbracket(); break;
1008 case lsym_lbrace: process_lbrace(); break;
1009 case lsym_rbrace: process_rbrace(); break;
1010 case lsym_period: process_period(); break;
1011 case lsym_unary_op: process_unary_op(); break;
1012 case lsym_postfix_op: process_postfix_op(); break;
1013 case lsym_binary_op: goto copy_token;
1014 case lsym_question: process_question(); goto copy_token;
1015 case lsym_colon_question: process_colon_question(); goto copy_token;
1016 case lsym_colon_label: process_colon_label(); break;
1017 case lsym_colon_other: process_colon_other(); break;
1018 case lsym_comma: process_comma(); break;
1019 case lsym_semicolon: process_semicolon(); break;
1020 case lsym_typedef: goto copy_token;
1021 case lsym_modifier: goto copy_token;
1022 case lsym_case: ps.seen_case = true; goto copy_token;
1023 case lsym_default: ps.seen_case = true; goto copy_token;
1024 case lsym_do: process_do(); goto copy_token;
1025 case lsym_else: process_else(); goto copy_token;
1026 case lsym_for: ps.spaced_expr_psym = psym_for_exprs; goto copy_token;
1027 case lsym_if: ps.spaced_expr_psym = psym_if_expr; goto copy_token;
1028 case lsym_switch: ps.spaced_expr_psym = psym_switch_expr; goto copy_token;
1029 case lsym_while: ps.spaced_expr_psym = psym_while_expr; goto copy_token;
1030 /* INDENT ON */
1031
1032 case lsym_tag:
1033 if (ps.nparen > 0)
1034 goto copy_token;
1035 /* FALLTHROUGH */
1036 case lsym_type_outside_parentheses:
1037 process_type();
1038 goto copy_token;
1039
1040 case lsym_type_in_parentheses:
1041 case lsym_sizeof:
1042 case lsym_offsetof:
1043 case lsym_word:
1044 case lsym_funcname:
1045 case lsym_return:
1046 process_ident(lsym);
1047 copy_token:
1048 if (ps.want_blank)
1049 buf_add_char(&code, ' ');
1050 buf_add_buf(&code, &token);
1051 if (lsym != lsym_funcname)
1052 ps.want_blank = true;
1053 break;
1054
1055 default:
1056 break;
1057 }
1058 }
1059
1060 static int
1061 indent(void)
1062 {
1063 debug_parser_state();
1064
1065 for (;;) { /* loop until we reach eof */
1066 lexer_symbol lsym = lexi();
1067
1068 debug_blank_line();
1069 debug_printf("line %d: %s", line_no, lsym_name[lsym]);
1070 debug_print_buf("token", &token);
1071 debug_buffers();
1072 debug_blank_line();
1073
1074 if (lsym == lsym_eof)
1075 return process_eof();
1076
1077 if (lsym == lsym_if && ps.prev_lsym == lsym_else
1078 && opt.else_if_in_same_line)
1079 ps.force_nl = false;
1080
1081 if (lsym == lsym_newline || lsym == lsym_preprocessing)
1082 ps.force_nl = false;
1083 else if (lsym == lsym_comment) {
1084 /* no special processing */
1085 } else {
1086 maybe_break_line(lsym);
1087 ps.in_stmt_or_decl = true;
1088 if (com.len > 0)
1089 move_com_to_code(lsym);
1090 update_ps_lbrace_kind(lsym);
1091 }
1092
1093 process_lsym(lsym);
1094
1095 debug_parser_state();
1096
1097 if (lsym != lsym_comment && lsym != lsym_newline &&
1098 lsym != lsym_preprocessing)
1099 ps.prev_lsym = lsym;
1100 }
1101 }
1102
1103 int
1104 main(int argc, char **argv)
1105 {
1106 init_globals();
1107 load_profiles(argc, argv);
1108 parse_command_line(argc, argv);
1109 set_initial_indentation();
1110 return indent();
1111 }
1112