indent.c revision 1.292 1 /* $NetBSD: indent.c,v 1.292 2023/05/18 04:23:03 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: indent.c,v 1.292 2023/05/18 04:23:03 rillig Exp $");
42
43 #include <sys/param.h>
44 #include <err.h>
45 #include <fcntl.h>
46 #include <stdarg.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51
52 #include "indent.h"
53
54 struct options opt = {
55 .brace_same_line = true,
56 .comment_delimiter_on_blankline = true,
57 .cuddle_else = true,
58 .comment_column = 33,
59 .decl_indent = 16,
60 .else_if = true,
61 .function_brace_split = true,
62 .format_col1_comments = true,
63 .format_block_comments = true,
64 .indent_parameters = true,
65 .indent_size = 8,
66 .local_decl_indent = -1,
67 .lineup_to_parens = true,
68 .procnames_start_line = true,
69 .star_comment_cont = true,
70 .tabsize = 8,
71 .max_line_length = 78,
72 .use_tabs = true,
73 };
74
75 struct parser_state ps;
76
77 struct buffer token;
78
79 struct buffer lab;
80 struct buffer code;
81 struct buffer com;
82
83 bool found_err;
84 bool break_comma;
85 float case_ind;
86 bool had_eof;
87 int line_no = 1;
88 enum indent_enabled indent_enabled;
89
90 static int ifdef_level;
91 static struct parser_state state_stack[5];
92
93 FILE *input;
94 FILE *output;
95
96 static const char *in_name = "Standard Input";
97 static const char *out_name = "Standard Output";
98 static const char *backup_suffix = ".BAK";
99 static char bakfile[MAXPATHLEN] = "";
100
101
102 static void
103 buf_expand(struct buffer *buf, size_t add_size)
104 {
105 buf->cap = buf->cap + add_size + 400;
106 buf->mem = nonnull(realloc(buf->mem, buf->cap));
107 buf->st = buf->mem;
108 }
109
110 void
111 buf_add_char(struct buffer *buf, char ch)
112 {
113 if (buf->len == buf->cap)
114 buf_expand(buf, 1);
115 buf->mem[buf->len++] = ch;
116 }
117
118 void
119 buf_add_chars(struct buffer *buf, const char *s, size_t len)
120 {
121 if (len == 0)
122 return;
123 if (len > buf->cap - buf->len)
124 buf_expand(buf, len);
125 memcpy(buf->mem + buf->len, s, len);
126 buf->len += len;
127 }
128
129 static void
130 buf_add_buf(struct buffer *buf, const struct buffer *add)
131 {
132 buf_add_chars(buf, add->st, add->len);
133 }
134
135 void
136 diag(int level, const char *msg, ...)
137 {
138 va_list ap;
139
140 if (level != 0)
141 found_err = true;
142
143 va_start(ap, msg);
144 fprintf(stderr, "%s: %s:%d: ",
145 level == 0 ? "warning" : "error", in_name, line_no);
146 vfprintf(stderr, msg, ap);
147 fprintf(stderr, "\n");
148 va_end(ap);
149 }
150
151 /*
152 * Compute the indentation from starting at 'ind' and adding the text starting
153 * at 's'.
154 */
155 int
156 ind_add(int ind, const char *s, size_t len)
157 {
158 for (const char *p = s; len > 0; p++, len--) {
159 if (*p == '\n')
160 ind = 0;
161 else if (*p == '\t')
162 ind = next_tab(ind);
163 else if (*p == '\b')
164 --ind;
165 else
166 ++ind;
167 }
168 return ind;
169 }
170
171 static void
172 main_init_globals(void)
173 {
174 ps.s_sym[0] = psym_stmt_list;
175 ps.prev_token = lsym_semicolon;
176 ps.next_col_1 = true;
177
178 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX");
179 if (suffix != NULL)
180 backup_suffix = suffix;
181 }
182
183 /*
184 * Copy the input file to the backup file, then make the backup file the input
185 * and the original input file the output.
186 */
187 static void
188 bakcopy(void)
189 {
190 ssize_t n;
191 int bak_fd;
192 char buff[8 * 1024];
193
194 const char *last_slash = strrchr(in_name, '/');
195 snprintf(bakfile, sizeof(bakfile), "%s%s",
196 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix);
197
198 /* copy in_name to backup file */
199 bak_fd = creat(bakfile, 0600);
200 if (bak_fd < 0)
201 err(1, "%s", bakfile);
202
203 while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
204 if (write(bak_fd, buff, (size_t)n) != n)
205 err(1, "%s", bakfile);
206 if (n < 0)
207 err(1, "%s", in_name);
208
209 close(bak_fd);
210 (void)fclose(input);
211
212 /* re-open backup file as the input file */
213 input = fopen(bakfile, "r");
214 if (input == NULL)
215 err(1, "%s", bakfile);
216 /* now the original input file will be the output */
217 output = fopen(in_name, "w");
218 if (output == NULL) {
219 unlink(bakfile);
220 err(1, "%s", in_name);
221 }
222 }
223
224 static void
225 main_load_profiles(int argc, char **argv)
226 {
227 const char *profile_name = NULL;
228
229 for (int i = 1; i < argc; ++i) {
230 const char *arg = argv[i];
231
232 if (strcmp(arg, "-npro") == 0)
233 return;
234 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0')
235 profile_name = arg + 2;
236 }
237 load_profiles(profile_name);
238 }
239
240 static void
241 main_parse_command_line(int argc, char **argv)
242 {
243 for (int i = 1; i < argc; ++i) {
244 const char *arg = argv[i];
245
246 if (arg[0] == '-') {
247 set_option(arg, "Command line");
248
249 } else if (input == NULL) {
250 in_name = arg;
251 if ((input = fopen(in_name, "r")) == NULL)
252 err(1, "%s", in_name);
253
254 } else if (output == NULL) {
255 out_name = arg;
256 if (strcmp(in_name, out_name) == 0)
257 errx(1, "input and output files must be different");
258 if ((output = fopen(out_name, "w")) == NULL)
259 err(1, "%s", out_name);
260
261 } else
262 errx(1, "too many arguments: %s", arg);
263 }
264
265 if (input == NULL) {
266 input = stdin;
267 output = stdout;
268 } else if (output == NULL) {
269 out_name = in_name;
270 bakcopy();
271 }
272
273 if (opt.comment_column <= 1)
274 opt.comment_column = 2; /* don't put normal comments in column
275 * 1, see opt.format_col1_comments */
276 if (opt.block_comment_max_line_length <= 0)
277 opt.block_comment_max_line_length = opt.max_line_length;
278 if (opt.local_decl_indent < 0)
279 opt.local_decl_indent = opt.decl_indent;
280 if (opt.decl_comment_column <= 0)
281 opt.decl_comment_column = opt.ljust_decl
282 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8)
283 : opt.comment_column;
284 if (opt.continuation_indent == 0)
285 opt.continuation_indent = opt.indent_size;
286 }
287
288 static void
289 main_prepare_parsing(void)
290 {
291 inp_read_line();
292
293 int ind = 0;
294 for (const char *p = inp.st;; p++) {
295 if (*p == ' ')
296 ind++;
297 else if (*p == '\t')
298 ind = next_tab(ind);
299 else
300 break;
301 }
302
303 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size;
304 }
305
306 static void
307 code_add_decl_indent(int decl_ind, bool tabs_to_var)
308 {
309 int base_ind = ps.ind_level * opt.indent_size;
310 int ind = base_ind + (int)code.len;
311 int target_ind = base_ind + decl_ind;
312 size_t orig_code_len = code.len;
313
314 if (tabs_to_var)
315 for (int next; (next = next_tab(ind)) <= target_ind; ind = next)
316 buf_add_char(&code, '\t');
317
318 for (; ind < target_ind; ind++)
319 buf_add_char(&code, ' ');
320
321 if (code.len == orig_code_len && ps.want_blank) {
322 buf_add_char(&code, ' ');
323 ps.want_blank = false;
324 }
325 }
326
327 static int
328 process_eof(void)
329 {
330 if (lab.len > 0 || code.len > 0 || com.len > 0)
331 output_line();
332 if (indent_enabled != indent_on) {
333 indent_enabled = indent_last_off_line;
334 output_line();
335 }
336
337 if (ps.tos > 1) /* check for balanced braces */
338 diag(1, "Stuff missing from end of file");
339
340 fflush(output);
341 return found_err ? EXIT_FAILURE : EXIT_SUCCESS;
342 }
343
344 static void
345 maybe_break_line(lexer_symbol lsym)
346 {
347 if (!ps.force_nl)
348 return;
349 if (lsym == lsym_semicolon)
350 return;
351 if (lsym == lsym_lbrace && opt.brace_same_line)
352 return;
353
354 if (opt.verbose)
355 diag(0, "Line broken");
356 output_line();
357 ps.force_nl = false;
358 }
359
360 static bool
361 want_blank_before_comment(void)
362 {
363 if (code.len > 0) {
364 char ch = code.mem[code.len - 1];
365 return ch != '[' && ch != '(';
366 }
367 return lab.len > 0;
368 }
369
370 static void
371 move_com_to_code(lexer_symbol lsym)
372 {
373 if (want_blank_before_comment())
374 buf_add_char(&code, ' ');
375 buf_add_buf(&code, &com);
376 if (lsym != lsym_rparen_or_rbracket)
377 buf_add_char(&code, ' ');
378 com.len = 0;
379 ps.want_blank = false;
380 }
381
382 static void
383 process_newline(void)
384 {
385 if (ps.prev_token == lsym_comma && ps.nparen == 0 && !ps.block_init &&
386 !opt.break_after_comma && break_comma &&
387 com.len == 0)
388 goto stay_in_line;
389
390 output_line();
391
392 stay_in_line:
393 ++line_no;
394 }
395
396 static bool
397 is_function_pointer_declaration(void)
398 {
399 return token.st[0] == '('
400 && ps.in_decl
401 && !ps.block_init
402 && !ps.decl_indent_done
403 && !ps.is_function_definition
404 && ps.line_start_nparen == 0;
405 }
406
407 static bool
408 want_blank_before_lparen(void)
409 {
410 if (!ps.want_blank)
411 return false;
412 if (opt.proc_calls_space)
413 return true;
414 if (ps.prev_token == lsym_rparen_or_rbracket)
415 return false;
416 if (ps.prev_token == lsym_offsetof)
417 return false;
418 if (ps.prev_token == lsym_sizeof)
419 return opt.blank_after_sizeof;
420 if (ps.prev_token == lsym_word || ps.prev_token == lsym_funcname)
421 return false;
422 return true;
423 }
424
425 static bool
426 want_blank_before_lbracket(void)
427 {
428 if (code.len == 0)
429 return false;
430 if (ps.prev_token == lsym_comma)
431 return true;
432 if (ps.prev_token == lsym_binary_op)
433 return true;
434 return false;
435 }
436
437 static void
438 process_lparen_or_lbracket(void)
439 {
440 if (++ps.nparen == array_length(ps.paren)) {
441 diag(0, "Reached internal limit of %zu unclosed parentheses",
442 array_length(ps.paren));
443 ps.nparen--;
444 }
445
446 if (is_function_pointer_declaration()) {
447 code_add_decl_indent(ps.decl_ind, ps.tabs_to_var);
448 ps.decl_indent_done = true;
449 } else if (token.st[0] == '('
450 ? want_blank_before_lparen() : want_blank_before_lbracket())
451 buf_add_char(&code, ' ');
452 ps.want_blank = false;
453 buf_add_char(&code, token.st[0]);
454
455 int indent = ind_add(0, code.st, code.len);
456 enum paren_level_cast cast = cast_unknown;
457
458 if (opt.extra_expr_indent && !opt.lineup_to_parens
459 && ps.spaced_expr_psym != psym_0 && ps.nparen == 1
460 && opt.continuation_indent == opt.indent_size)
461 ps.extra_expr_indent = eei_yes;
462
463 if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0
464 && ps.nparen == 1 && indent < 2 * opt.indent_size)
465 indent = 2 * opt.indent_size;
466
467 if (ps.init_or_struct && *token.st == '(' && ps.tos <= 2) {
468 /* this is a kluge to make sure that declarations will be
469 * aligned right if proc decl has an explicit type on it, i.e.
470 * "int a(x) {..." */
471 parse(psym_0);
472 ps.init_or_struct = false;
473 }
474
475 if (ps.prev_token == lsym_offsetof || ps.prev_token == lsym_sizeof
476 || ps.is_function_definition)
477 cast = cast_no;
478
479 ps.paren[ps.nparen - 1].indent = indent;
480 ps.paren[ps.nparen - 1].cast = cast;
481 debug_println("paren_indents[%d] is now %s%d",
482 ps.nparen - 1, paren_level_cast_name[cast], indent);
483 }
484
485 static void
486 process_rparen_or_rbracket(void)
487 {
488 if (ps.nparen == 0) {
489 diag(0, "Extra '%c'", *token.st);
490 goto unbalanced;
491 }
492
493 enum paren_level_cast cast = ps.paren[--ps.nparen].cast;
494 if (ps.decl_on_line && !ps.block_init)
495 cast = cast_no;
496
497 if (cast == cast_maybe) {
498 ps.next_unary = true;
499 ps.want_blank = opt.space_after_cast;
500 } else
501 ps.want_blank = true;
502
503 if (code.len == 0) /* if the paren starts the line */
504 ps.line_start_nparen = ps.nparen; /* then indent it */
505
506 unbalanced:
507 buf_add_char(&code, token.st[0]);
508
509 if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
510 if (ps.extra_expr_indent == eei_yes)
511 ps.extra_expr_indent = eei_last;
512 ps.force_nl = true;
513 ps.next_unary = true;
514 ps.in_stmt_or_decl = false;
515 parse(ps.spaced_expr_psym);
516 ps.spaced_expr_psym = psym_0;
517 ps.want_blank = true;
518 }
519 }
520
521 static bool
522 want_blank_before_unary_op(void)
523 {
524 if (ps.want_blank)
525 return true;
526 if (token.st[0] == '+' || token.st[0] == '-')
527 return code.len > 0 && code.mem[code.len - 1] == token.st[0];
528 return false;
529 }
530
531 static void
532 process_unary_op(void)
533 {
534 if (!ps.decl_indent_done && ps.in_decl && !ps.block_init &&
535 !ps.is_function_definition && ps.line_start_nparen == 0) {
536 /* pointer declarations */
537 code_add_decl_indent(ps.decl_ind - (int)token.len, ps.tabs_to_var);
538 ps.decl_indent_done = true;
539 } else if (want_blank_before_unary_op())
540 buf_add_char(&code, ' ');
541
542 buf_add_buf(&code, &token);
543 ps.want_blank = false;
544 }
545
546 static void
547 process_binary_op(void)
548 {
549 if (code.len > 0 && ps.want_blank)
550 buf_add_char(&code, ' ');
551 buf_add_buf(&code, &token);
552 ps.want_blank = true;
553 }
554
555 static void
556 process_postfix_op(void)
557 {
558 buf_add_buf(&code, &token);
559 ps.want_blank = true;
560 }
561
562 static void
563 process_question(void)
564 {
565 ps.quest_level++;
566 if (code.len == 0) {
567 ps.in_stmt_cont = true;
568 ps.in_stmt_or_decl = true;
569 ps.in_decl = false;
570 }
571 if (ps.want_blank)
572 buf_add_char(&code, ' ');
573 buf_add_char(&code, '?');
574 ps.want_blank = true;
575 }
576
577 static void
578 process_colon(void)
579 {
580 if (ps.quest_level > 0) { /* part of a '?:' operator */
581 ps.quest_level--;
582 if (code.len == 0) {
583 ps.in_stmt_cont = true;
584 ps.in_stmt_or_decl = true;
585 ps.in_decl = false;
586 }
587 if (ps.want_blank)
588 buf_add_char(&code, ' ');
589 buf_add_char(&code, ':');
590 ps.want_blank = true;
591 return;
592 }
593
594 if (ps.init_or_struct) { /* bit-field */
595 buf_add_char(&code, ':');
596 ps.want_blank = false;
597 return;
598 }
599
600 buf_add_buf(&lab, &code); /* 'case' or 'default' or named label
601 */
602 buf_add_char(&lab, ':');
603 code.len = 0;
604
605 ps.in_stmt_or_decl = false;
606 ps.is_case_label = ps.seen_case;
607 ps.force_nl = ps.seen_case;
608 ps.seen_case = false;
609 ps.want_blank = false;
610 }
611
612 static void
613 process_semicolon(void)
614 {
615 if (ps.decl_level == 0)
616 ps.init_or_struct = false;
617 ps.seen_case = false; /* only needs to be reset on error */
618 ps.quest_level = 0; /* only needs to be reset on error */
619 if (ps.prev_token == lsym_rparen_or_rbracket)
620 ps.in_func_def_params = false;
621 ps.block_init = false;
622 ps.block_init_level = 0;
623 ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no;
624
625 if (ps.in_decl && code.len == 0 && !ps.block_init &&
626 !ps.decl_indent_done && ps.line_start_nparen == 0) {
627 /* indent stray semicolons in declarations */
628 code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var);
629 ps.decl_indent_done = true;
630 }
631
632 ps.in_decl = ps.decl_level > 0; /* if we were in a first level
633 * structure declaration before, we
634 * aren't anymore */
635
636 if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) {
637 /* There were unbalanced parentheses in the statement. It is a
638 * bit complicated, because the semicolon might be in a for
639 * statement. */
640 diag(1, "Unbalanced parentheses");
641 ps.nparen = 0;
642 if (ps.spaced_expr_psym != psym_0) {
643 parse(ps.spaced_expr_psym);
644 ps.spaced_expr_psym = psym_0;
645 }
646 }
647 buf_add_char(&code, ';');
648 ps.want_blank = true;
649 ps.in_stmt_or_decl = ps.nparen > 0;
650
651 if (ps.spaced_expr_psym == psym_0) {
652 parse(psym_0); /* let parser know about end of stmt */
653 ps.force_nl = true;
654 }
655 }
656
657 static void
658 process_lbrace(void)
659 {
660 ps.in_stmt_or_decl = false; /* don't indent the {} */
661
662 if (!ps.block_init)
663 ps.force_nl = true;
664 else if (ps.block_init_level <= 0)
665 ps.block_init_level = 1;
666 else
667 ps.block_init_level++;
668
669 if (code.len > 0 && !ps.block_init) {
670 if (!opt.brace_same_line)
671 output_line();
672 else if (ps.in_func_def_params && !ps.init_or_struct) {
673 ps.ind_level_follow = 0;
674 if (opt.function_brace_split)
675 output_line();
676 else
677 ps.want_blank = true;
678 }
679 }
680
681 if (ps.nparen > 0) {
682 diag(1, "Unbalanced parentheses");
683 ps.nparen = 0;
684 if (ps.spaced_expr_psym != psym_0) {
685 parse(ps.spaced_expr_psym);
686 ps.spaced_expr_psym = psym_0;
687 ps.ind_level = ps.ind_level_follow;
688 }
689 }
690
691 if (code.len == 0)
692 ps.in_stmt_cont = false; /* don't indent the '{' itself
693 */
694 if (ps.in_decl && ps.init_or_struct) {
695 ps.di_stack[ps.decl_level] = ps.decl_ind;
696 if (++ps.decl_level == (int)array_length(ps.di_stack)) {
697 diag(0, "Reached internal limit of %d struct levels",
698 (int)array_length(ps.di_stack));
699 ps.decl_level--;
700 }
701 } else {
702 ps.decl_on_line = false; /* we can't be in the middle of
703 * a declaration, so don't do
704 * special indentation of
705 * comments */
706 ps.in_func_def_params = false;
707 ps.in_decl = false;
708 }
709
710 ps.decl_ind = 0;
711 parse(psym_lbrace);
712 if (ps.want_blank)
713 buf_add_char(&code, ' ');
714 ps.want_blank = false;
715 buf_add_char(&code, '{');
716 ps.declaration = decl_no;
717 }
718
719 static void
720 process_rbrace(void)
721 {
722 if (ps.nparen > 0) { /* check for unclosed if, for, else. */
723 diag(1, "Unbalanced parentheses");
724 ps.nparen = 0;
725 ps.spaced_expr_psym = psym_0;
726 }
727
728 ps.declaration = decl_no;
729 ps.block_init_level--;
730
731 if (code.len > 0 && !ps.block_init) {
732 if (opt.verbose)
733 diag(0, "Line broken");
734 output_line();
735 }
736
737 buf_add_char(&code, '}');
738 ps.want_blank = true;
739 ps.in_stmt_or_decl = false;
740 ps.in_stmt_cont = false;
741
742 if (ps.decl_level > 0) { /* multi-level structure declaration */
743 ps.decl_ind = ps.di_stack[--ps.decl_level];
744 if (ps.decl_level == 0 && !ps.in_func_def_params) {
745 ps.declaration = decl_begin;
746 ps.decl_ind = ps.ind_level == 0
747 ? opt.decl_indent : opt.local_decl_indent;
748 }
749 ps.in_decl = true;
750 }
751
752 parse(psym_rbrace);
753 }
754
755 static void
756 process_do(void)
757 {
758 ps.in_stmt_or_decl = false;
759
760 if (code.len > 0) { /* make sure this starts a line */
761 if (opt.verbose)
762 diag(0, "Line broken");
763 output_line();
764 }
765
766 ps.force_nl = true;
767 parse(psym_do);
768 }
769
770 static void
771 process_else(void)
772 {
773 ps.in_stmt_or_decl = false;
774
775 if (code.len > 0 && !(opt.cuddle_else && code.mem[code.len - 1] == '}')) {
776 if (opt.verbose)
777 diag(0, "Line broken");
778 output_line();
779 }
780
781 ps.force_nl = true;
782 parse(psym_else);
783 }
784
785 static void
786 process_type(void)
787 {
788 parse(psym_decl); /* let the parser worry about indentation */
789
790 if (ps.prev_token == lsym_rparen_or_rbracket && ps.tos <= 1) {
791 if (code.len > 0)
792 output_line();
793 }
794
795 if (ps.in_func_def_params && opt.indent_parameters &&
796 ps.decl_level == 0) {
797 ps.ind_level = ps.ind_level_follow = 1;
798 ps.in_stmt_cont = false;
799 }
800
801 ps.init_or_struct = /* maybe */ true;
802 ps.in_decl = ps.decl_on_line = ps.prev_token != lsym_typedef;
803 if (ps.decl_level <= 0)
804 ps.declaration = decl_begin;
805
806 int len = (int)token.len + 1;
807 int ind = ps.ind_level == 0 || ps.decl_level > 0
808 ? opt.decl_indent /* global variable or local member */
809 : opt.local_decl_indent; /* local variable */
810 ps.decl_ind = ind > 0 ? ind : len;
811 ps.tabs_to_var = opt.use_tabs && ind > 0;
812 }
813
814 static void
815 process_ident(lexer_symbol lsym)
816 {
817 if (ps.in_decl) {
818 if (lsym == lsym_funcname) {
819 ps.in_decl = false;
820 if (opt.procnames_start_line && code.len > 0)
821 output_line();
822 else if (ps.want_blank)
823 buf_add_char(&code, ' ');
824 ps.want_blank = false;
825
826 } else if (!ps.block_init && !ps.decl_indent_done &&
827 ps.line_start_nparen == 0) {
828 if (opt.decl_indent == 0
829 && code.len > 0 && code.mem[code.len - 1] == '}')
830 ps.decl_ind = ind_add(0, code.st, code.len) + 1;
831 code_add_decl_indent(ps.decl_ind, ps.tabs_to_var);
832 ps.decl_indent_done = true;
833 ps.want_blank = false;
834 }
835
836 } else if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
837 ps.force_nl = true;
838 ps.next_unary = true;
839 ps.in_stmt_or_decl = false;
840 parse(ps.spaced_expr_psym);
841 ps.spaced_expr_psym = psym_0;
842 }
843 }
844
845 static void
846 process_period(void)
847 {
848 if (code.len > 0 && code.mem[code.len - 1] == ',')
849 buf_add_char(&code, ' ');
850 buf_add_char(&code, '.');
851 ps.want_blank = false;
852 }
853
854 static void
855 process_comma(void)
856 {
857 ps.want_blank = code.len > 0; /* only put blank after comma if comma
858 * does not start the line */
859
860 if (ps.in_decl && !ps.is_function_definition && !ps.block_init &&
861 !ps.decl_indent_done && ps.line_start_nparen == 0) {
862 /* indent leading commas and not the actual identifiers */
863 code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var);
864 ps.decl_indent_done = true;
865 }
866
867 buf_add_char(&code, ',');
868
869 if (ps.nparen == 0) {
870 if (ps.block_init_level <= 0)
871 ps.block_init = false;
872 int typical_varname_length = 8;
873 if (break_comma && (opt.break_after_comma ||
874 ind_add(compute_code_indent(), code.st, code.len)
875 >= opt.max_line_length - typical_varname_length))
876 ps.force_nl = true;
877 }
878 }
879
880 /* move the whole line to the 'label' buffer */
881 static void
882 read_preprocessing_line(void)
883 {
884 enum {
885 PLAIN, STR, CHR, COMM
886 } state = PLAIN;
887
888 buf_add_char(&lab, '#');
889
890 while (ch_isblank(inp.st[0]))
891 buf_add_char(&lab, *inp.st++);
892
893 while (inp.st[0] != '\n' || (state == COMM && !had_eof)) {
894 buf_add_char(&lab, inp_next());
895 switch (lab.mem[lab.len - 1]) {
896 case '\\':
897 if (state != COMM)
898 buf_add_char(&lab, inp_next());
899 break;
900 case '/':
901 if (inp.st[0] == '*' && state == PLAIN) {
902 state = COMM;
903 buf_add_char(&lab, *inp.st++);
904 }
905 break;
906 case '"':
907 if (state == STR)
908 state = PLAIN;
909 else if (state == PLAIN)
910 state = STR;
911 break;
912 case '\'':
913 if (state == CHR)
914 state = PLAIN;
915 else if (state == PLAIN)
916 state = CHR;
917 break;
918 case '*':
919 if (inp.st[0] == '/' && state == COMM) {
920 state = PLAIN;
921 buf_add_char(&lab, *inp.st++);
922 }
923 break;
924 }
925 }
926
927 while (lab.len > 0 && ch_isblank(lab.mem[lab.len - 1]))
928 lab.len--;
929 }
930
931 typedef struct {
932 const char *s;
933 const char *e;
934 } substring;
935
936 static bool
937 substring_equals(substring ss, const char *str)
938 {
939 size_t len = (size_t)(ss.e - ss.s);
940 return len == strlen(str) && memcmp(ss.s, str, len) == 0;
941 }
942
943 static bool
944 substring_starts_with(substring ss, const char *prefix)
945 {
946 while (ss.s < ss.e && *prefix != '\0' && *ss.s == *prefix)
947 ss.s++, prefix++;
948 return *prefix == '\0';
949 }
950
951 static void
952 process_preprocessing(void)
953 {
954 if (lab.len > 0 || code.len > 0 || com.len > 0)
955 output_line();
956
957 read_preprocessing_line();
958
959 ps.is_case_label = false;
960
961 const char *end = lab.mem + lab.len;
962 substring dir;
963 dir.s = lab.st + 1;
964 while (dir.s < end && ch_isblank(*dir.s))
965 dir.s++;
966 dir.e = dir.s;
967 while (dir.e < end && ch_isalpha(*dir.e))
968 dir.e++;
969
970 if (substring_starts_with(dir, "if")) { /* also ifdef, ifndef */
971 if ((size_t)ifdef_level < array_length(state_stack))
972 state_stack[ifdef_level++] = ps;
973 else
974 diag(1, "#if stack overflow");
975
976 } else if (substring_starts_with(dir, "el")) { /* else, elif */
977 if (ifdef_level <= 0)
978 diag(1, dir.s[2] == 'i' ? "Unmatched #elif" : "Unmatched #else");
979 else
980 ps = state_stack[ifdef_level - 1];
981
982 } else if (substring_equals(dir, "endif")) {
983 if (ifdef_level <= 0)
984 diag(1, "Unmatched #endif");
985 else
986 ifdef_level--;
987
988 } else {
989 if (!substring_equals(dir, "pragma") &&
990 !substring_equals(dir, "error") &&
991 !substring_equals(dir, "line") &&
992 !substring_equals(dir, "undef") &&
993 !substring_equals(dir, "define") &&
994 !substring_equals(dir, "include")) {
995 diag(1, "Unrecognized cpp directive \"%.*s\"",
996 (int)(dir.e - dir.s), dir.s);
997 return;
998 }
999 }
1000
1001 /* subsequent processing of the newline character will cause the line
1002 * to be printed */
1003 }
1004
1005 static int
1006 main_loop(void)
1007 {
1008
1009 ps.di_stack[ps.decl_level = 0] = 0;
1010
1011 for (;;) { /* loop until we reach eof */
1012 lexer_symbol lsym = lexi();
1013
1014 if (lsym == lsym_eof)
1015 return process_eof();
1016
1017 if (lsym == lsym_if && ps.prev_token == lsym_else && opt.else_if)
1018 ps.force_nl = false;
1019
1020 if (lsym == lsym_newline || lsym == lsym_preprocessing)
1021 ps.force_nl = false;
1022 else if (lsym != lsym_comment) {
1023 maybe_break_line(lsym);
1024 ps.in_stmt_or_decl = true; /* add an extra level of
1025 * indentation; turned
1026 * off again by a ';' or
1027 * '}' */
1028 if (com.len > 0)
1029 move_com_to_code(lsym);
1030 }
1031
1032 switch (lsym) {
1033
1034 case lsym_newline:
1035 process_newline();
1036 break;
1037
1038 case lsym_lparen_or_lbracket:
1039 process_lparen_or_lbracket();
1040 break;
1041
1042 case lsym_rparen_or_rbracket:
1043 process_rparen_or_rbracket();
1044 break;
1045
1046 case lsym_unary_op:
1047 process_unary_op();
1048 break;
1049
1050 case lsym_binary_op:
1051 process_binary_op();
1052 break;
1053
1054 case lsym_postfix_op:
1055 process_postfix_op();
1056 break;
1057
1058 case lsym_question:
1059 process_question();
1060 break;
1061
1062 case lsym_case_label:
1063 ps.seen_case = true;
1064 goto copy_token;
1065
1066 case lsym_colon:
1067 process_colon();
1068 break;
1069
1070 case lsym_semicolon:
1071 process_semicolon();
1072 break;
1073
1074 case lsym_lbrace:
1075 process_lbrace();
1076 break;
1077
1078 case lsym_rbrace:
1079 process_rbrace();
1080 break;
1081
1082 case lsym_switch:
1083 ps.spaced_expr_psym = psym_switch_expr;
1084 goto copy_token;
1085
1086 case lsym_for:
1087 ps.spaced_expr_psym = psym_for_exprs;
1088 goto copy_token;
1089
1090 case lsym_if:
1091 ps.spaced_expr_psym = psym_if_expr;
1092 goto copy_token;
1093
1094 case lsym_while:
1095 ps.spaced_expr_psym = psym_while_expr;
1096 goto copy_token;
1097
1098 case lsym_do:
1099 process_do();
1100 goto copy_token;
1101
1102 case lsym_else:
1103 process_else();
1104 goto copy_token;
1105
1106 case lsym_typedef:
1107 case lsym_storage_class:
1108 goto copy_token;
1109
1110 case lsym_tag:
1111 if (ps.nparen > 0)
1112 goto copy_token;
1113 /* FALLTHROUGH */
1114 case lsym_type_outside_parentheses:
1115 process_type();
1116 goto copy_token;
1117
1118 case lsym_type_in_parentheses:
1119 case lsym_offsetof:
1120 case lsym_sizeof:
1121 case lsym_word:
1122 case lsym_funcname:
1123 case lsym_return:
1124 process_ident(lsym);
1125 copy_token:
1126 if (ps.want_blank)
1127 buf_add_char(&code, ' ');
1128 buf_add_buf(&code, &token);
1129 if (lsym != lsym_funcname)
1130 ps.want_blank = true;
1131 break;
1132
1133 case lsym_period:
1134 process_period();
1135 break;
1136
1137 case lsym_comma:
1138 process_comma();
1139 break;
1140
1141 case lsym_preprocessing:
1142 process_preprocessing();
1143 break;
1144
1145 case lsym_comment:
1146 process_comment();
1147 break;
1148
1149 default:
1150 break;
1151 }
1152
1153 if (lsym != lsym_comment && lsym != lsym_newline &&
1154 lsym != lsym_preprocessing)
1155 ps.prev_token = lsym;
1156 }
1157 }
1158
1159 int
1160 main(int argc, char **argv)
1161 {
1162 main_init_globals();
1163 main_load_profiles(argc, argv);
1164 main_parse_command_line(argc, argv);
1165 main_prepare_parsing();
1166 return main_loop();
1167 }
1168
1169 void *
1170 nonnull(void *p)
1171 {
1172 if (p == NULL)
1173 err(EXIT_FAILURE, NULL);
1174 return p;
1175 }
1176