indent.c revision 1.287 1 /* $NetBSD: indent.c,v 1.287 2023/05/16 08:04:03 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
8 * Copyright (c) 1980, 1993
9 * The Regents of the University of California. All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: indent.c,v 1.287 2023/05/16 08:04:03 rillig Exp $");
42
43 #include <sys/param.h>
44 #include <err.h>
45 #include <fcntl.h>
46 #include <stdarg.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51
52 #include "indent.h"
53
54 struct options opt = {
55 .brace_same_line = true,
56 .comment_delimiter_on_blankline = true,
57 .cuddle_else = true,
58 .comment_column = 33,
59 .decl_indent = 16,
60 .else_if = true,
61 .function_brace_split = true,
62 .format_col1_comments = true,
63 .format_block_comments = true,
64 .indent_parameters = true,
65 .indent_size = 8,
66 .local_decl_indent = -1,
67 .lineup_to_parens = true,
68 .procnames_start_line = true,
69 .star_comment_cont = true,
70 .tabsize = 8,
71 .max_line_length = 78,
72 .use_tabs = true,
73 };
74
75 struct parser_state ps;
76
77 struct buffer token;
78
79 struct buffer lab;
80 struct buffer code;
81 struct buffer com;
82
83 bool found_err;
84 bool break_comma;
85 float case_ind;
86 bool had_eof;
87 int line_no = 1;
88 enum indent_enabled indent_enabled;
89
90 static int ifdef_level;
91 static struct parser_state state_stack[5];
92
93 FILE *input;
94 FILE *output;
95
96 static const char *in_name = "Standard Input";
97 static const char *out_name = "Standard Output";
98 static const char *backup_suffix = ".BAK";
99 static char bakfile[MAXPATHLEN] = "";
100
101
102 static void
103 buf_expand(struct buffer *buf, size_t add_size)
104 {
105 buf->cap = buf->cap + add_size + 400;
106 buf->mem = nonnull(realloc(buf->mem, buf->cap));
107 buf->st = buf->mem;
108 }
109
110 void
111 buf_add_char(struct buffer *buf, char ch)
112 {
113 if (buf->len == buf->cap)
114 buf_expand(buf, 1);
115 buf->mem[buf->len++] = ch;
116 }
117
118 void
119 buf_add_chars(struct buffer *buf, const char *s, size_t len)
120 {
121 if (len == 0)
122 return;
123 if (len > buf->cap - buf->len)
124 buf_expand(buf, len);
125 memcpy(buf->mem + buf->len, s, len);
126 buf->len += len;
127 }
128
129 static void
130 buf_add_buf(struct buffer *buf, const struct buffer *add)
131 {
132 buf_add_chars(buf, add->st, add->len);
133 }
134
135 void
136 diag(int level, const char *msg, ...)
137 {
138 va_list ap;
139
140 if (level != 0)
141 found_err = true;
142
143 va_start(ap, msg);
144 fprintf(stderr, "%s: %s:%d: ",
145 level == 0 ? "warning" : "error", in_name, line_no);
146 vfprintf(stderr, msg, ap);
147 fprintf(stderr, "\n");
148 va_end(ap);
149 }
150
151 /*
152 * Compute the indentation from starting at 'ind' and adding the text starting
153 * at 's'.
154 */
155 int
156 ind_add(int ind, const char *s, size_t len)
157 {
158 for (const char *p = s; len > 0; p++, len--) {
159 if (*p == '\n' || *p == '\f')
160 ind = 0;
161 else if (*p == '\t')
162 ind = next_tab(ind);
163 else if (*p == '\b')
164 --ind;
165 else
166 ++ind;
167 }
168 return ind;
169 }
170
171 static void
172 main_init_globals(void)
173 {
174 ps.s_sym[0] = psym_stmt_list;
175 ps.prev_token = lsym_semicolon;
176 ps.next_col_1 = true;
177
178 const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX");
179 if (suffix != NULL)
180 backup_suffix = suffix;
181 }
182
183 /*
184 * Copy the input file to the backup file, then make the backup file the input
185 * and the original input file the output.
186 */
187 static void
188 bakcopy(void)
189 {
190 ssize_t n;
191 int bak_fd;
192 char buff[8 * 1024];
193
194 const char *last_slash = strrchr(in_name, '/');
195 snprintf(bakfile, sizeof(bakfile), "%s%s",
196 last_slash != NULL ? last_slash + 1 : in_name, backup_suffix);
197
198 /* copy in_name to backup file */
199 bak_fd = creat(bakfile, 0600);
200 if (bak_fd < 0)
201 err(1, "%s", bakfile);
202
203 while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
204 if (write(bak_fd, buff, (size_t)n) != n)
205 err(1, "%s", bakfile);
206 if (n < 0)
207 err(1, "%s", in_name);
208
209 close(bak_fd);
210 (void)fclose(input);
211
212 /* re-open backup file as the input file */
213 input = fopen(bakfile, "r");
214 if (input == NULL)
215 err(1, "%s", bakfile);
216 /* now the original input file will be the output */
217 output = fopen(in_name, "w");
218 if (output == NULL) {
219 unlink(bakfile);
220 err(1, "%s", in_name);
221 }
222 }
223
224 static void
225 main_load_profiles(int argc, char **argv)
226 {
227 const char *profile_name = NULL;
228
229 for (int i = 1; i < argc; ++i) {
230 const char *arg = argv[i];
231
232 if (strcmp(arg, "-npro") == 0)
233 return;
234 if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0')
235 profile_name = arg + 2;
236 }
237 load_profiles(profile_name);
238 }
239
240 static void
241 main_parse_command_line(int argc, char **argv)
242 {
243 for (int i = 1; i < argc; ++i) {
244 const char *arg = argv[i];
245
246 if (arg[0] == '-') {
247 set_option(arg, "Command line");
248
249 } else if (input == NULL) {
250 in_name = arg;
251 if ((input = fopen(in_name, "r")) == NULL)
252 err(1, "%s", in_name);
253
254 } else if (output == NULL) {
255 out_name = arg;
256 if (strcmp(in_name, out_name) == 0)
257 errx(1, "input and output files must be different");
258 if ((output = fopen(out_name, "w")) == NULL)
259 err(1, "%s", out_name);
260
261 } else
262 errx(1, "too many arguments: %s", arg);
263 }
264
265 if (input == NULL) {
266 input = stdin;
267 output = stdout;
268 } else if (output == NULL) {
269 out_name = in_name;
270 bakcopy();
271 }
272
273 if (opt.comment_column <= 1)
274 opt.comment_column = 2; /* don't put normal comments in column 1, see
275 * opt.format_col1_comments */
276 if (opt.block_comment_max_line_length <= 0)
277 opt.block_comment_max_line_length = opt.max_line_length;
278 if (opt.local_decl_indent < 0)
279 opt.local_decl_indent = opt.decl_indent;
280 if (opt.decl_comment_column <= 0)
281 opt.decl_comment_column = opt.ljust_decl
282 ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8)
283 : opt.comment_column;
284 if (opt.continuation_indent == 0)
285 opt.continuation_indent = opt.indent_size;
286 }
287
288 static void
289 main_prepare_parsing(void)
290 {
291 inp_read_line();
292
293 int ind = 0;
294 for (const char *p = inp_p();; p++) {
295 if (*p == ' ')
296 ind++;
297 else if (*p == '\t')
298 ind = next_tab(ind);
299 else
300 break;
301 }
302
303 ps.ind_level = ps.ind_level_follow = ind / opt.indent_size;
304 }
305
306 static void
307 code_add_decl_indent(int decl_ind, bool tabs_to_var)
308 {
309 int base_ind = ps.ind_level * opt.indent_size;
310 int ind = base_ind + (int)code.len;
311 int target_ind = base_ind + decl_ind;
312 size_t orig_code_len = code.len;
313
314 if (tabs_to_var)
315 for (int next; (next = next_tab(ind)) <= target_ind; ind = next)
316 buf_add_char(&code, '\t');
317
318 for (; ind < target_ind; ind++)
319 buf_add_char(&code, ' ');
320
321 if (code.len == orig_code_len && ps.want_blank) {
322 buf_add_char(&code, ' ');
323 ps.want_blank = false;
324 }
325 }
326
327 static int
328 process_eof(void)
329 {
330 if (lab.len > 0 || code.len > 0 || com.len > 0)
331 output_line();
332 if (indent_enabled != indent_on) {
333 indent_enabled = indent_last_off_line;
334 output_line();
335 }
336
337 if (ps.tos > 1) /* check for balanced braces */
338 diag(1, "Stuff missing from end of file");
339
340 fflush(output);
341 return found_err ? EXIT_FAILURE : EXIT_SUCCESS;
342 }
343
344 static void
345 maybe_break_line(lexer_symbol lsym)
346 {
347 if (!ps.force_nl)
348 return;
349 if (lsym == lsym_semicolon)
350 return;
351 if (lsym == lsym_lbrace && opt.brace_same_line)
352 return;
353
354 if (opt.verbose)
355 diag(0, "Line broken");
356 output_line();
357 ps.force_nl = false;
358 }
359
360 static void
361 move_com_to_code(void)
362 {
363 if (lab.len > 0 || code.len > 0)
364 buf_add_char(&code, ' ');
365 buf_add_buf(&code, &com);
366 buf_add_char(&code, ' ');
367 com.len = 0;
368 ps.want_blank = false;
369 }
370
371 static void
372 process_form_feed(void)
373 {
374 output_line_ff();
375 ps.want_blank = false;
376 }
377
378 static void
379 process_newline(void)
380 {
381 if (ps.prev_token == lsym_comma && ps.nparen == 0 && !ps.block_init &&
382 !opt.break_after_comma && break_comma &&
383 com.len == 0)
384 goto stay_in_line;
385
386 output_line();
387
388 stay_in_line:
389 ++line_no;
390 }
391
392 static bool
393 is_function_pointer_declaration(void)
394 {
395 return token.st[0] == '('
396 && ps.in_decl
397 && !ps.block_init
398 && !ps.decl_indent_done
399 && !ps.is_function_definition
400 && ps.line_start_nparen == 0;
401 }
402
403 static bool
404 want_blank_before_lparen(void)
405 {
406 if (!ps.want_blank)
407 return false;
408 if (opt.proc_calls_space)
409 return true;
410 if (ps.prev_token == lsym_rparen_or_rbracket)
411 return false;
412 if (ps.prev_token == lsym_offsetof)
413 return false;
414 if (ps.prev_token == lsym_sizeof)
415 return opt.blank_after_sizeof;
416 if (ps.prev_token == lsym_word || ps.prev_token == lsym_funcname)
417 return false;
418 return true;
419 }
420
421 static bool
422 want_blank_before_lbracket(void)
423 {
424 if (code.len == 0)
425 return false;
426 if (ps.prev_token == lsym_comma)
427 return true;
428 if (ps.prev_token == lsym_binary_op)
429 return true;
430 return false;
431 }
432
433 static void
434 process_lparen_or_lbracket(void)
435 {
436 if (++ps.nparen == array_length(ps.paren)) {
437 diag(0, "Reached internal limit of %zu unclosed parentheses",
438 array_length(ps.paren));
439 ps.nparen--;
440 }
441
442 if (is_function_pointer_declaration()) {
443 code_add_decl_indent(ps.decl_ind, ps.tabs_to_var);
444 ps.decl_indent_done = true;
445 } else if (token.st[0] == '('
446 ? want_blank_before_lparen() : want_blank_before_lbracket())
447 buf_add_char(&code, ' ');
448 ps.want_blank = false;
449 buf_add_char(&code, token.st[0]);
450
451 int indent = ind_add(0, code.st, code.len);
452 enum paren_level_cast cast = cast_unknown;
453
454 if (opt.extra_expr_indent && !opt.lineup_to_parens
455 && ps.spaced_expr_psym != psym_0 && ps.nparen == 1
456 && opt.continuation_indent == opt.indent_size)
457 ps.extra_expr_indent = eei_yes;
458
459 if (opt.extra_expr_indent && ps.spaced_expr_psym != psym_0
460 && ps.nparen == 1 && indent < 2 * opt.indent_size)
461 indent = 2 * opt.indent_size;
462
463 if (ps.init_or_struct && *token.st == '(' && ps.tos <= 2) {
464 /*
465 * this is a kluge to make sure that declarations will be aligned
466 * right if proc decl has an explicit type on it, i.e. "int a(x) {..."
467 */
468 parse(psym_0);
469 ps.init_or_struct = false;
470 }
471
472 if (ps.prev_token == lsym_offsetof || ps.prev_token == lsym_sizeof
473 || ps.is_function_definition)
474 cast = cast_no;
475
476 ps.paren[ps.nparen - 1].indent = (short)indent;
477 ps.paren[ps.nparen - 1].cast = cast;
478 debug_println("paren_indents[%d] is now %s%d",
479 ps.nparen - 1, paren_level_cast_name[cast], indent);
480 }
481
482 static void
483 process_rparen_or_rbracket(void)
484 {
485 if (ps.nparen == 0) {
486 diag(0, "Extra '%c'", *token.st);
487 goto unbalanced;
488 }
489
490 enum paren_level_cast cast = ps.paren[--ps.nparen].cast;
491 if (ps.decl_on_line && !ps.block_init)
492 cast = cast_no;
493
494 if (cast == cast_maybe) {
495 ps.next_unary = true;
496 ps.want_blank = opt.space_after_cast;
497 } else
498 ps.want_blank = true;
499
500 if (code.len == 0) /* if the paren starts the line */
501 ps.line_start_nparen = ps.nparen; /* then indent it */
502
503 unbalanced:
504 buf_add_char(&code, token.st[0]);
505
506 if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
507 if (ps.extra_expr_indent == eei_yes)
508 ps.extra_expr_indent = eei_last;
509 ps.force_nl = true;
510 ps.next_unary = true;
511 ps.in_stmt_or_decl = false;
512 parse(ps.spaced_expr_psym);
513 ps.spaced_expr_psym = psym_0;
514 ps.want_blank = true;
515 }
516 }
517
518 static bool
519 want_blank_before_unary_op(void)
520 {
521 if (ps.want_blank)
522 return true;
523 if (token.st[0] == '+' || token.st[0] == '-')
524 return code.len > 0 && code.mem[code.len - 1] == token.st[0];
525 return false;
526 }
527
528 static void
529 process_unary_op(void)
530 {
531 if (!ps.decl_indent_done && ps.in_decl && !ps.block_init &&
532 !ps.is_function_definition && ps.line_start_nparen == 0) {
533 /* pointer declarations */
534 code_add_decl_indent(ps.decl_ind - (int)token.len, ps.tabs_to_var);
535 ps.decl_indent_done = true;
536 } else if (want_blank_before_unary_op())
537 buf_add_char(&code, ' ');
538
539 buf_add_buf(&code, &token);
540 ps.want_blank = false;
541 }
542
543 static void
544 process_binary_op(void)
545 {
546 if (code.len > 0 && ps.want_blank)
547 buf_add_char(&code, ' ');
548 buf_add_buf(&code, &token);
549 ps.want_blank = true;
550 }
551
552 static void
553 process_postfix_op(void)
554 {
555 buf_add_buf(&code, &token);
556 ps.want_blank = true;
557 }
558
559 static void
560 process_question(void)
561 {
562 ps.quest_level++;
563 if (code.len == 0) {
564 ps.in_stmt_cont = true;
565 ps.in_stmt_or_decl = true;
566 ps.in_decl = false;
567 }
568 if (ps.want_blank)
569 buf_add_char(&code, ' ');
570 buf_add_char(&code, '?');
571 ps.want_blank = true;
572 }
573
574 static void
575 process_colon(void)
576 {
577 if (ps.quest_level > 0) { /* part of a '?:' operator */
578 ps.quest_level--;
579 if (code.len == 0) {
580 ps.in_stmt_cont = true;
581 ps.in_stmt_or_decl = true;
582 ps.in_decl = false;
583 }
584 if (ps.want_blank)
585 buf_add_char(&code, ' ');
586 buf_add_char(&code, ':');
587 ps.want_blank = true;
588 return;
589 }
590
591 if (ps.init_or_struct) { /* bit-field */
592 buf_add_char(&code, ':');
593 ps.want_blank = false;
594 return;
595 }
596
597 buf_add_buf(&lab, &code); /* 'case' or 'default' or named label */
598 buf_add_char(&lab, ':');
599 code.len = 0;
600
601 ps.in_stmt_or_decl = false;
602 ps.is_case_label = ps.seen_case;
603 ps.force_nl = ps.seen_case;
604 ps.seen_case = false;
605 ps.want_blank = false;
606 }
607
608 static void
609 process_semicolon(void)
610 {
611 if (ps.decl_level == 0)
612 ps.init_or_struct = false;
613 ps.seen_case = false; /* only needs to be reset on error */
614 ps.quest_level = 0; /* only needs to be reset on error */
615 if (ps.prev_token == lsym_rparen_or_rbracket)
616 ps.in_func_def_params = false;
617 ps.block_init = false;
618 ps.block_init_level = 0;
619 ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no;
620
621 if (ps.in_decl && code.len == 0 && !ps.block_init &&
622 !ps.decl_indent_done && ps.line_start_nparen == 0) {
623 /* indent stray semicolons in declarations */
624 code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var);
625 ps.decl_indent_done = true;
626 }
627
628 ps.in_decl = ps.decl_level > 0; /* if we were in a first level
629 * structure declaration before, we
630 * aren't anymore */
631
632 if (ps.nparen > 0 && ps.spaced_expr_psym != psym_for_exprs) {
633 /*
634 * There were unbalanced parentheses in the statement. It is a bit
635 * complicated, because the semicolon might be in a for statement.
636 */
637 diag(1, "Unbalanced parentheses");
638 ps.nparen = 0;
639 if (ps.spaced_expr_psym != psym_0) {
640 parse(ps.spaced_expr_psym);
641 ps.spaced_expr_psym = psym_0;
642 }
643 }
644 buf_add_char(&code, ';');
645 ps.want_blank = true;
646 ps.in_stmt_or_decl = ps.nparen > 0;
647
648 if (ps.spaced_expr_psym == psym_0) {
649 parse(psym_0); /* let parser know about end of stmt */
650 ps.force_nl = true;
651 }
652 }
653
654 static void
655 process_lbrace(void)
656 {
657 ps.in_stmt_or_decl = false; /* don't indent the {} */
658
659 if (!ps.block_init)
660 ps.force_nl = true;
661 else if (ps.block_init_level <= 0)
662 ps.block_init_level = 1;
663 else
664 ps.block_init_level++;
665
666 if (code.len > 0 && !ps.block_init) {
667 if (!opt.brace_same_line)
668 output_line();
669 else if (ps.in_func_def_params && !ps.init_or_struct) {
670 ps.ind_level_follow = 0;
671 if (opt.function_brace_split)
672 output_line();
673 else
674 ps.want_blank = true;
675 }
676 }
677
678 if (ps.nparen > 0) {
679 diag(1, "Unbalanced parentheses");
680 ps.nparen = 0;
681 if (ps.spaced_expr_psym != psym_0) {
682 parse(ps.spaced_expr_psym);
683 ps.spaced_expr_psym = psym_0;
684 ps.ind_level = ps.ind_level_follow;
685 }
686 }
687
688 if (code.len == 0)
689 ps.in_stmt_cont = false; /* don't indent the '{' itself */
690 if (ps.in_decl && ps.init_or_struct) {
691 ps.di_stack[ps.decl_level] = ps.decl_ind;
692 if (++ps.decl_level == (int)array_length(ps.di_stack)) {
693 diag(0, "Reached internal limit of %d struct levels",
694 (int)array_length(ps.di_stack));
695 ps.decl_level--;
696 }
697 } else {
698 ps.decl_on_line = false; /* we can't be in the middle of a
699 * declaration, so don't do special
700 * indentation of comments */
701 ps.in_func_def_params = false;
702 ps.in_decl = false;
703 }
704
705 ps.decl_ind = 0;
706 parse(psym_lbrace);
707 if (ps.want_blank)
708 buf_add_char(&code, ' ');
709 ps.want_blank = false;
710 buf_add_char(&code, '{');
711 ps.declaration = decl_no;
712 }
713
714 static void
715 process_rbrace(void)
716 {
717 if (ps.nparen > 0) { /* check for unclosed if, for, else. */
718 diag(1, "Unbalanced parentheses");
719 ps.nparen = 0;
720 ps.spaced_expr_psym = psym_0;
721 }
722
723 ps.declaration = decl_no;
724 ps.block_init_level--;
725
726 if (code.len > 0 && !ps.block_init) {
727 if (opt.verbose)
728 diag(0, "Line broken");
729 output_line();
730 }
731
732 buf_add_char(&code, '}');
733 ps.want_blank = true;
734 ps.in_stmt_or_decl = false;
735 ps.in_stmt_cont = false;
736
737 if (ps.decl_level > 0) { /* multi-level structure declaration */
738 ps.decl_ind = ps.di_stack[--ps.decl_level];
739 if (ps.decl_level == 0 && !ps.in_func_def_params) {
740 ps.declaration = decl_begin;
741 ps.decl_ind = ps.ind_level == 0
742 ? opt.decl_indent : opt.local_decl_indent;
743 }
744 ps.in_decl = true;
745 }
746
747 parse(psym_rbrace);
748 }
749
750 static void
751 process_do(void)
752 {
753 ps.in_stmt_or_decl = false;
754
755 if (code.len > 0) { /* make sure this starts a line */
756 if (opt.verbose)
757 diag(0, "Line broken");
758 output_line();
759 }
760
761 ps.force_nl = true;
762 parse(psym_do);
763 }
764
765 static void
766 process_else(void)
767 {
768 ps.in_stmt_or_decl = false;
769
770 if (code.len > 0 && !(opt.cuddle_else && code.mem[code.len - 1] == '}')) {
771 if (opt.verbose)
772 diag(0, "Line broken");
773 output_line();
774 }
775
776 ps.force_nl = true;
777 parse(psym_else);
778 }
779
780 static void
781 process_type(void)
782 {
783 parse(psym_decl); /* let the parser worry about indentation */
784
785 if (ps.prev_token == lsym_rparen_or_rbracket && ps.tos <= 1) {
786 if (code.len > 0)
787 output_line();
788 }
789
790 if (ps.in_func_def_params && opt.indent_parameters &&
791 ps.decl_level == 0) {
792 ps.ind_level = ps.ind_level_follow = 1;
793 ps.in_stmt_cont = false;
794 }
795
796 ps.init_or_struct = /* maybe */ true;
797 ps.in_decl = ps.decl_on_line = ps.prev_token != lsym_typedef;
798 if (ps.decl_level <= 0)
799 ps.declaration = decl_begin;
800
801 int len = (int)token.len + 1;
802 int ind = ps.ind_level == 0 || ps.decl_level > 0
803 ? opt.decl_indent /* global variable or local member */
804 : opt.local_decl_indent; /* local variable */
805 ps.decl_ind = ind > 0 ? ind : len;
806 ps.tabs_to_var = opt.use_tabs && ind > 0;
807 }
808
809 static void
810 process_ident(lexer_symbol lsym)
811 {
812 if (ps.in_decl) {
813 if (lsym == lsym_funcname) {
814 ps.in_decl = false;
815 if (opt.procnames_start_line && code.len > 0)
816 output_line();
817 else if (ps.want_blank)
818 buf_add_char(&code, ' ');
819 ps.want_blank = false;
820
821 } else if (!ps.block_init && !ps.decl_indent_done &&
822 ps.line_start_nparen == 0) {
823 if (opt.decl_indent == 0
824 && code.len > 0 && code.mem[code.len - 1] == '}')
825 ps.decl_ind = ind_add(0, code.st, code.len) + 1;
826 code_add_decl_indent(ps.decl_ind, ps.tabs_to_var);
827 ps.decl_indent_done = true;
828 ps.want_blank = false;
829 }
830
831 } else if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) {
832 ps.force_nl = true;
833 ps.next_unary = true;
834 ps.in_stmt_or_decl = false;
835 parse(ps.spaced_expr_psym);
836 ps.spaced_expr_psym = psym_0;
837 }
838 }
839
840 static void
841 process_period(void)
842 {
843 if (code.len > 0 && code.mem[code.len - 1] == ',')
844 buf_add_char(&code, ' ');
845 buf_add_char(&code, '.');
846 ps.want_blank = false;
847 }
848
849 static void
850 process_comma(void)
851 {
852 ps.want_blank = code.len > 0; /* only put blank after comma if comma
853 * does not start the line */
854
855 if (ps.in_decl && !ps.is_function_definition && !ps.block_init &&
856 !ps.decl_indent_done && ps.line_start_nparen == 0) {
857 /* indent leading commas and not the actual identifiers */
858 code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var);
859 ps.decl_indent_done = true;
860 }
861
862 buf_add_char(&code, ',');
863
864 if (ps.nparen == 0) {
865 if (ps.block_init_level <= 0)
866 ps.block_init = false;
867 int typical_varname_length = 8;
868 if (break_comma && (opt.break_after_comma ||
869 ind_add(compute_code_indent(), code.st, code.len)
870 >= opt.max_line_length - typical_varname_length))
871 ps.force_nl = true;
872 }
873 }
874
875 /* move the whole line to the 'label' buffer */
876 static void
877 read_preprocessing_line(void)
878 {
879 enum {
880 PLAIN, STR, CHR, COMM
881 } state = PLAIN;
882
883 buf_add_char(&lab, '#');
884
885 while (ch_isblank(inp_peek()))
886 buf_add_char(&lab, inp_next());
887
888 while (inp_peek() != '\n' || (state == COMM && !had_eof)) {
889 buf_add_char(&lab, inp_next());
890 switch (lab.mem[lab.len - 1]) {
891 case '\\':
892 if (state != COMM)
893 buf_add_char(&lab, inp_next());
894 break;
895 case '/':
896 if (inp_peek() == '*' && state == PLAIN) {
897 state = COMM;
898 buf_add_char(&lab, inp_next());
899 }
900 break;
901 case '"':
902 if (state == STR)
903 state = PLAIN;
904 else if (state == PLAIN)
905 state = STR;
906 break;
907 case '\'':
908 if (state == CHR)
909 state = PLAIN;
910 else if (state == PLAIN)
911 state = CHR;
912 break;
913 case '*':
914 if (inp_peek() == '/' && state == COMM) {
915 state = PLAIN;
916 buf_add_char(&lab, inp_next());
917 }
918 break;
919 }
920 }
921
922 while (lab.len > 0 && ch_isblank(lab.mem[lab.len - 1]))
923 lab.len--;
924 }
925
926 typedef struct {
927 const char *s;
928 const char *e;
929 } substring;
930
931 static bool
932 substring_equals(substring ss, const char *str)
933 {
934 size_t len = (size_t)(ss.e - ss.s);
935 return len == strlen(str) && memcmp(ss.s, str, len) == 0;
936 }
937
938 static bool
939 substring_starts_with(substring ss, const char *prefix)
940 {
941 while (ss.s < ss.e && *prefix != '\0' && *ss.s == *prefix)
942 ss.s++, prefix++;
943 return *prefix == '\0';
944 }
945
946 static void
947 process_preprocessing(void)
948 {
949 if (lab.len > 0 || code.len > 0 || com.len > 0)
950 output_line();
951
952 read_preprocessing_line();
953
954 ps.is_case_label = false;
955
956 const char *end = lab.mem + lab.len;
957 substring dir;
958 dir.s = lab.st + 1;
959 while (dir.s < end && ch_isblank(*dir.s))
960 dir.s++;
961 dir.e = dir.s;
962 while (dir.e < end && ch_isalpha(*dir.e))
963 dir.e++;
964
965 if (substring_starts_with(dir, "if")) { /* also ifdef, ifndef */
966 if ((size_t)ifdef_level < array_length(state_stack))
967 state_stack[ifdef_level++] = ps;
968 else
969 diag(1, "#if stack overflow");
970
971 } else if (substring_starts_with(dir, "el")) { /* else, elif */
972 if (ifdef_level <= 0)
973 diag(1, dir.s[2] == 'i' ? "Unmatched #elif" : "Unmatched #else");
974 else
975 ps = state_stack[ifdef_level - 1];
976
977 } else if (substring_equals(dir, "endif")) {
978 if (ifdef_level <= 0)
979 diag(1, "Unmatched #endif");
980 else
981 ifdef_level--;
982
983 } else {
984 if (!substring_equals(dir, "pragma") &&
985 !substring_equals(dir, "error") &&
986 !substring_equals(dir, "line") &&
987 !substring_equals(dir, "undef") &&
988 !substring_equals(dir, "define") &&
989 !substring_equals(dir, "include")) {
990 diag(1, "Unrecognized cpp directive \"%.*s\"",
991 (int)(dir.e - dir.s), dir.s);
992 return;
993 }
994 }
995
996 /*
997 * subsequent processing of the newline character will cause the line to
998 * be printed
999 */
1000 }
1001
1002 static int
1003 main_loop(void)
1004 {
1005
1006 ps.di_stack[ps.decl_level = 0] = 0;
1007
1008 for (;;) { /* loop until we reach eof */
1009 lexer_symbol lsym = lexi();
1010
1011 if (lsym == lsym_eof)
1012 return process_eof();
1013
1014 if (lsym == lsym_if && ps.prev_token == lsym_else && opt.else_if)
1015 ps.force_nl = false;
1016
1017 if (lsym == lsym_newline || lsym == lsym_form_feed ||
1018 lsym == lsym_preprocessing)
1019 ps.force_nl = false;
1020 else if (lsym != lsym_comment) {
1021 maybe_break_line(lsym);
1022 ps.in_stmt_or_decl = true; /* add an extra level of indentation;
1023 * turned off again by a ';' or '}' */
1024 if (com.len > 0)
1025 move_com_to_code();
1026 }
1027
1028 switch (lsym) {
1029
1030 case lsym_form_feed:
1031 process_form_feed();
1032 break;
1033
1034 case lsym_newline:
1035 process_newline();
1036 break;
1037
1038 case lsym_lparen_or_lbracket:
1039 process_lparen_or_lbracket();
1040 break;
1041
1042 case lsym_rparen_or_rbracket:
1043 process_rparen_or_rbracket();
1044 break;
1045
1046 case lsym_unary_op:
1047 process_unary_op();
1048 break;
1049
1050 case lsym_binary_op:
1051 process_binary_op();
1052 break;
1053
1054 case lsym_postfix_op:
1055 process_postfix_op();
1056 break;
1057
1058 case lsym_question:
1059 process_question();
1060 break;
1061
1062 case lsym_case_label:
1063 ps.seen_case = true;
1064 goto copy_token;
1065
1066 case lsym_colon:
1067 process_colon();
1068 break;
1069
1070 case lsym_semicolon:
1071 process_semicolon();
1072 break;
1073
1074 case lsym_lbrace:
1075 process_lbrace();
1076 break;
1077
1078 case lsym_rbrace:
1079 process_rbrace();
1080 break;
1081
1082 case lsym_switch:
1083 ps.spaced_expr_psym = psym_switch_expr;
1084 goto copy_token;
1085
1086 case lsym_for:
1087 ps.spaced_expr_psym = psym_for_exprs;
1088 goto copy_token;
1089
1090 case lsym_if:
1091 ps.spaced_expr_psym = psym_if_expr;
1092 goto copy_token;
1093
1094 case lsym_while:
1095 ps.spaced_expr_psym = psym_while_expr;
1096 goto copy_token;
1097
1098 case lsym_do:
1099 process_do();
1100 goto copy_token;
1101
1102 case lsym_else:
1103 process_else();
1104 goto copy_token;
1105
1106 case lsym_typedef:
1107 case lsym_storage_class:
1108 goto copy_token;
1109
1110 case lsym_tag:
1111 if (ps.nparen > 0)
1112 goto copy_token;
1113 /* FALLTHROUGH */
1114 case lsym_type_outside_parentheses:
1115 process_type();
1116 goto copy_token;
1117
1118 case lsym_type_in_parentheses:
1119 case lsym_offsetof:
1120 case lsym_sizeof:
1121 case lsym_word:
1122 case lsym_funcname:
1123 case lsym_return:
1124 process_ident(lsym);
1125 copy_token:
1126 if (ps.want_blank)
1127 buf_add_char(&code, ' ');
1128 buf_add_buf(&code, &token);
1129 if (lsym != lsym_funcname)
1130 ps.want_blank = true;
1131 break;
1132
1133 case lsym_period:
1134 process_period();
1135 break;
1136
1137 case lsym_comma:
1138 process_comma();
1139 break;
1140
1141 case lsym_preprocessing:
1142 process_preprocessing();
1143 break;
1144
1145 case lsym_comment:
1146 process_comment();
1147 break;
1148
1149 default:
1150 break;
1151 }
1152
1153 if (lsym != lsym_comment && lsym != lsym_newline &&
1154 lsym != lsym_preprocessing)
1155 ps.prev_token = lsym;
1156 }
1157 }
1158
1159 int
1160 main(int argc, char **argv)
1161 {
1162 main_init_globals();
1163 main_load_profiles(argc, argv);
1164 main_parse_command_line(argc, argv);
1165 main_prepare_parsing();
1166 return main_loop();
1167 }
1168
1169 void *
1170 nonnull(void *p)
1171 {
1172 if (p == NULL)
1173 err(EXIT_FAILURE, NULL);
1174 return p;
1175 }
1176