Home | History | Annotate | Line # | Download | only in indent
indent.c revision 1.247
      1 /*	$NetBSD: indent.c,v 1.247 2023/05/11 10:39:25 rillig Exp $	*/
      2 
      3 /*-
      4  * SPDX-License-Identifier: BSD-4-Clause
      5  *
      6  * Copyright (c) 1985 Sun Microsystems, Inc.
      7  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
      8  * Copyright (c) 1980, 1993
      9  *	The Regents of the University of California.  All rights reserved.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the University of
     22  *	California, Berkeley and its contributors.
     23  * 4. Neither the name of the University nor the names of its contributors
     24  *    may be used to endorse or promote products derived from this software
     25  *    without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37  * SUCH DAMAGE.
     38  */
     39 
     40 #if 0
     41 static char sccsid[] = "@(#)indent.c	5.17 (Berkeley) 6/7/93";
     42 #endif
     43 
     44 #include <sys/cdefs.h>
     45 #if defined(__NetBSD__)
     46 __RCSID("$NetBSD: indent.c,v 1.247 2023/05/11 10:39:25 rillig Exp $");
     47 #elif defined(__FreeBSD__)
     48 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
     49 #endif
     50 
     51 #include <sys/param.h>
     52 #include <assert.h>
     53 #include <err.h>
     54 #include <errno.h>
     55 #include <fcntl.h>
     56 #include <stdarg.h>
     57 #include <stdio.h>
     58 #include <stdlib.h>
     59 #include <string.h>
     60 #include <unistd.h>
     61 
     62 #include "indent.h"
     63 
     64 struct options opt = {
     65     .brace_same_line = true,
     66     .comment_delimiter_on_blankline = true,
     67     .cuddle_else = true,
     68     .comment_column = 33,
     69     .decl_indent = 16,
     70     .else_if = true,
     71     .function_brace_split = true,
     72     .format_col1_comments = true,
     73     .format_block_comments = true,
     74     .indent_parameters = true,
     75     .indent_size = 8,
     76     .local_decl_indent = -1,
     77     .lineup_to_parens = true,
     78     .procnames_start_line = true,
     79     .star_comment_cont = true,
     80     .tabsize = 8,
     81     .max_line_length = 78,
     82     .use_tabs = true,
     83 };
     84 
     85 struct parser_state ps;
     86 
     87 struct buffer token;
     88 
     89 struct buffer lab;
     90 struct buffer code;
     91 struct buffer com;
     92 
     93 bool found_err;
     94 bool break_comma;
     95 float case_ind;
     96 bool had_eof;
     97 int line_no = 1;
     98 bool inhibit_formatting;
     99 
    100 static int ifdef_level;
    101 static struct parser_state state_stack[5];
    102 
    103 FILE *input;
    104 FILE *output;
    105 struct output_control out;
    106 
    107 static const char *in_name = "Standard Input";
    108 static const char *out_name = "Standard Output";
    109 static const char *backup_suffix = ".BAK";
    110 static char bakfile[MAXPATHLEN] = "";
    111 
    112 
    113 static void
    114 buf_init(struct buffer *buf)
    115 {
    116     size_t size = 200;
    117     buf->buf = xmalloc(size);
    118     buf->l = buf->buf + size - 5 /* safety margin */;
    119     buf->s = buf->buf + 1;	/* allow accessing buf->e[-1] */
    120     buf->e = buf->s;
    121     buf->buf[0] = ' ';
    122     buf->buf[1] = '\0';
    123 }
    124 
    125 static size_t
    126 buf_len(const struct buffer *buf)
    127 {
    128     return (size_t)(buf->e - buf->s);
    129 }
    130 
    131 void
    132 buf_expand(struct buffer *buf, size_t add_size)
    133 {
    134     size_t new_size = (size_t)(buf->l - buf->s) + 400 + add_size;
    135     size_t len = buf_len(buf);
    136     buf->buf = xrealloc(buf->buf, new_size);
    137     buf->l = buf->buf + new_size - 5;
    138     buf->s = buf->buf + 1;
    139     buf->e = buf->s + len;
    140     /* At this point, the buffer may not be null-terminated anymore. */
    141 }
    142 
    143 static void
    144 buf_reserve(struct buffer *buf, size_t n)
    145 {
    146     if (n >= (size_t)(buf->l - buf->e))
    147 	buf_expand(buf, n);
    148 }
    149 
    150 void
    151 buf_add_char(struct buffer *buf, char ch)
    152 {
    153     buf_reserve(buf, 1);
    154     *buf->e++ = ch;
    155 }
    156 
    157 void
    158 buf_add_range(struct buffer *buf, const char *s, const char *e)
    159 {
    160     size_t len = (size_t)(e - s);
    161     buf_reserve(buf, len);
    162     memcpy(buf->e, s, len);
    163     buf->e += len;
    164 }
    165 
    166 static void
    167 buf_add_buf(struct buffer *buf, const struct buffer *add)
    168 {
    169     buf_add_range(buf, add->s, add->e);
    170 }
    171 
    172 static void
    173 buf_terminate(struct buffer *buf)
    174 {
    175     buf_reserve(buf, 1);
    176     *buf->e = '\0';
    177 }
    178 
    179 static void
    180 buf_reset(struct buffer *buf)
    181 {
    182     buf->e = buf->s;
    183 }
    184 
    185 void
    186 diag(int level, const char *msg, ...)
    187 {
    188     va_list ap;
    189 
    190     if (level != 0)
    191 	found_err = true;
    192 
    193     va_start(ap, msg);
    194     fprintf(stderr, "%s: %s:%d: ",
    195 	level == 0 ? "warning" : "error", in_name, line_no);
    196     vfprintf(stderr, msg, ap);
    197     fprintf(stderr, "\n");
    198     va_end(ap);
    199 }
    200 
    201 /*
    202  * Compute the indentation from starting at 'ind' and adding the text from
    203  * 'start' to 'end'.
    204  */
    205 int
    206 ind_add(int ind, const char *start, const char *end)
    207 {
    208     for (const char *p = start; p != end; ++p) {
    209 	if (*p == '\n' || *p == '\f')
    210 	    ind = 0;
    211 	else if (*p == '\t')
    212 	    ind = next_tab(ind);
    213 	else if (*p == '\b')
    214 	    --ind;
    215 	else
    216 	    ++ind;
    217     }
    218     return ind;
    219 }
    220 
    221 static void
    222 main_init_globals(void)
    223 {
    224     inp_init();
    225 
    226     buf_init(&token);
    227 
    228     buf_init(&lab);
    229     buf_init(&code);
    230     buf_init(&com);
    231 
    232     ps.s_sym[0] = psym_stmt_list;
    233     ps.prev_token = lsym_semicolon;
    234     ps.next_col_1 = true;
    235 
    236     const char *suffix = getenv("SIMPLE_BACKUP_SUFFIX");
    237     if (suffix != NULL)
    238 	backup_suffix = suffix;
    239 }
    240 
    241 /*
    242  * Copy the input file to the backup file, then make the backup file the input
    243  * and the original input file the output.
    244  */
    245 static void
    246 bakcopy(void)
    247 {
    248     ssize_t n;
    249     int bak_fd;
    250     char buff[8 * 1024];
    251 
    252     const char *last_slash = strrchr(in_name, '/');
    253     snprintf(bakfile, sizeof(bakfile), "%s%s",
    254 	last_slash != NULL ? last_slash + 1 : in_name, backup_suffix);
    255 
    256     /* copy in_name to backup file */
    257     bak_fd = creat(bakfile, 0600);
    258     if (bak_fd < 0)
    259 	err(1, "%s", bakfile);
    260 
    261     while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
    262 	if (write(bak_fd, buff, (size_t)n) != n)
    263 	    err(1, "%s", bakfile);
    264     if (n < 0)
    265 	err(1, "%s", in_name);
    266 
    267     close(bak_fd);
    268     (void)fclose(input);
    269 
    270     /* re-open backup file as the input file */
    271     input = fopen(bakfile, "r");
    272     if (input == NULL)
    273 	err(1, "%s", bakfile);
    274     /* now the original input file will be the output */
    275     output = fopen(in_name, "w");
    276     if (output == NULL) {
    277 	unlink(bakfile);
    278 	err(1, "%s", in_name);
    279     }
    280 }
    281 
    282 static void
    283 main_load_profiles(int argc, char **argv)
    284 {
    285     const char *profile_name = NULL;
    286 
    287     for (int i = 1; i < argc; ++i) {
    288 	const char *arg = argv[i];
    289 
    290 	if (strcmp(arg, "-npro") == 0)
    291 	    return;
    292 	if (arg[0] == '-' && arg[1] == 'P' && arg[2] != '\0')
    293 	    profile_name = arg + 2;
    294     }
    295     load_profiles(profile_name);
    296 }
    297 
    298 static void
    299 main_parse_command_line(int argc, char **argv)
    300 {
    301     for (int i = 1; i < argc; ++i) {
    302 	const char *arg = argv[i];
    303 
    304 	if (arg[0] == '-') {
    305 	    set_option(arg, "Command line");
    306 
    307 	} else if (input == NULL) {
    308 	    in_name = arg;
    309 	    if ((input = fopen(in_name, "r")) == NULL)
    310 		err(1, "%s", in_name);
    311 
    312 	} else if (output == NULL) {
    313 	    out_name = arg;
    314 	    if (strcmp(in_name, out_name) == 0)
    315 		errx(1, "input and output files must be different");
    316 	    if ((output = fopen(out_name, "w")) == NULL)
    317 		err(1, "%s", out_name);
    318 
    319 	} else
    320 	    errx(1, "too many arguments: %s", arg);
    321     }
    322 
    323     if (input == NULL) {
    324 	input = stdin;
    325 	output = stdout;
    326     } else if (output == NULL) {
    327 	out_name = in_name;
    328 	bakcopy();
    329     }
    330 
    331     if (opt.comment_column <= 1)
    332 	opt.comment_column = 2;	/* don't put normal comments before column 2 */
    333     if (opt.block_comment_max_line_length <= 0)
    334 	opt.block_comment_max_line_length = opt.max_line_length;
    335     if (opt.local_decl_indent < 0)	/* if not specified by user, set this */
    336 	opt.local_decl_indent = opt.decl_indent;
    337     if (opt.decl_comment_column <= 0)	/* if not specified by user, set this */
    338 	opt.decl_comment_column = opt.ljust_decl
    339 	    ? (opt.comment_column <= 10 ? 2 : opt.comment_column - 8)
    340 	    : opt.comment_column;
    341     if (opt.continuation_indent == 0)
    342 	opt.continuation_indent = opt.indent_size;
    343 }
    344 
    345 static void
    346 main_prepare_parsing(void)
    347 {
    348     inp_read_line();
    349 
    350     int ind = 0;
    351     for (const char *p = inp_p();; p++) {
    352 	if (*p == ' ')
    353 	    ind++;
    354 	else if (*p == '\t')
    355 	    ind = next_tab(ind);
    356 	else
    357 	    break;
    358     }
    359 
    360     if (ind >= opt.indent_size)
    361 	ps.ind_level = ps.ind_level_follow = ind / opt.indent_size;
    362 }
    363 
    364 static void
    365 code_add_decl_indent(int decl_ind, bool tabs_to_var)
    366 {
    367     int base_ind = ps.ind_level * opt.indent_size;
    368     int ind = base_ind + (int)buf_len(&code);
    369     int target_ind = base_ind + decl_ind;
    370     char *orig_code_e = code.e;
    371 
    372     if (tabs_to_var)
    373 	for (int next; (next = next_tab(ind)) <= target_ind; ind = next)
    374 	    buf_add_char(&code, '\t');
    375 
    376     for (; ind < target_ind; ind++)
    377 	buf_add_char(&code, ' ');
    378 
    379     if (code.e == orig_code_e && ps.want_blank) {
    380 	buf_add_char(&code, ' ');
    381 	ps.want_blank = false;
    382     }
    383 }
    384 
    385 static void __attribute__((__noreturn__))
    386 process_eof(void)
    387 {
    388     if (lab.s != lab.e || code.s != code.e || com.s != com.e)
    389 	output_line();
    390 
    391     if (ps.tos > 1)		/* check for balanced braces */
    392 	diag(1, "Stuff missing from end of file");
    393 
    394     if (opt.verbose) {
    395 	printf("There were %d output lines and %d comments\n",
    396 	    ps.stats.lines, ps.stats.comments);
    397 	printf("(Lines with comments)/(Lines with code): %6.3f\n",
    398 	    (1.0 * ps.stats.comment_lines) / ps.stats.code_lines);
    399     }
    400 
    401     fflush(output);
    402     exit(found_err ? EXIT_FAILURE : EXIT_SUCCESS);
    403 }
    404 
    405 static void
    406 maybe_break_line(lexer_symbol lsym, bool *force_nl)
    407 {
    408     if (!*force_nl)
    409 	return;
    410     if (lsym == lsym_semicolon)
    411 	return;
    412     if (lsym == lsym_lbrace && opt.brace_same_line)
    413 	return;
    414 
    415     if (opt.verbose)
    416 	diag(0, "Line broken");
    417     output_line();
    418     ps.want_blank = false;
    419     *force_nl = false;
    420 }
    421 
    422 static void
    423 move_com_to_code(void)
    424 {
    425     buf_add_char(&code, ' ');
    426     buf_add_buf(&code, &com);
    427     buf_add_char(&code, ' ');
    428     buf_terminate(&code);
    429     buf_reset(&com);
    430     ps.want_blank = false;
    431 }
    432 
    433 static void
    434 process_form_feed(void)
    435 {
    436     output_line_ff();
    437     ps.want_blank = false;
    438 }
    439 
    440 static void
    441 process_newline(void)
    442 {
    443     if (ps.prev_token == lsym_comma && ps.nparen == 0 && !ps.block_init &&
    444 	!opt.break_after_comma && break_comma &&
    445 	com.s == com.e)
    446 	goto stay_in_line;
    447 
    448     output_line();
    449     ps.want_blank = false;
    450 
    451 stay_in_line:
    452     ++line_no;
    453 }
    454 
    455 static bool
    456 want_blank_before_lparen(void)
    457 {
    458     if (!ps.want_blank)
    459 	return false;
    460     if (opt.proc_calls_space)
    461 	return true;
    462     if (ps.prev_token == lsym_rparen_or_rbracket)
    463 	return false;
    464     if (ps.prev_token == lsym_offsetof)
    465 	return false;
    466     if (ps.prev_token == lsym_sizeof)
    467 	return opt.blank_after_sizeof;
    468     if (ps.prev_token == lsym_word || ps.prev_token == lsym_funcname)
    469 	return false;
    470     return true;
    471 }
    472 
    473 static void
    474 process_lparen_or_lbracket(int decl_ind, bool tabs_to_var, bool spaced_expr)
    475 {
    476     if (++ps.nparen == array_length(ps.paren)) {
    477 	diag(0, "Reached internal limit of %zu unclosed parentheses",
    478 	    array_length(ps.paren));
    479 	ps.nparen--;
    480     }
    481 
    482     if (token.s[0] == '(' && ps.in_decl
    483 	&& !ps.block_init && !ps.decl_indent_done &&
    484 	!ps.is_function_definition && ps.line_start_nparen == 0) {
    485 	/* function pointer declarations */
    486 	code_add_decl_indent(decl_ind, tabs_to_var);
    487 	ps.decl_indent_done = true;
    488     } else if (want_blank_before_lparen())
    489 	*code.e++ = ' ';
    490     ps.want_blank = false;
    491     *code.e++ = token.s[0];
    492 
    493     ps.paren[ps.nparen - 1].indent = (short)ind_add(0, code.s, code.e);
    494     debug_println("paren_indents[%d] is now %d",
    495 	ps.nparen - 1, ps.paren[ps.nparen - 1].indent);
    496 
    497     if (spaced_expr && ps.nparen == 1 && opt.extra_expr_indent
    498 	    && ps.paren[0].indent < 2 * opt.indent_size) {
    499 	ps.paren[0].indent = (short)(2 * opt.indent_size);
    500 	debug_println("paren_indents[0] is now %d", ps.paren[0].indent);
    501     }
    502 
    503     if (ps.init_or_struct && *token.s == '(' && ps.tos <= 2) {
    504 	/*
    505 	 * this is a kluge to make sure that declarations will be aligned
    506 	 * right if proc decl has an explicit type on it, i.e. "int a(x) {..."
    507 	 */
    508 	parse(psym_semicolon);	/* I said this was a kluge... */
    509 	ps.init_or_struct = false;
    510     }
    511 
    512     /* parenthesized type following sizeof or offsetof is not a cast */
    513     if (ps.prev_token == lsym_offsetof || ps.prev_token == lsym_sizeof)
    514 	ps.paren[ps.nparen - 1].no_cast = true;
    515 }
    516 
    517 static void
    518 process_rparen_or_rbracket(bool *spaced_expr, bool *force_nl, stmt_head hd)
    519 {
    520     if (ps.paren[ps.nparen - 1].maybe_cast &&
    521 	!ps.paren[ps.nparen - 1].no_cast) {
    522 	ps.next_unary = true;
    523 	ps.paren[ps.nparen - 1].maybe_cast = false;
    524 	ps.want_blank = opt.space_after_cast;
    525     } else
    526 	ps.want_blank = true;
    527     ps.paren[ps.nparen - 1].no_cast = false;
    528 
    529     if (ps.nparen > 0)
    530 	ps.nparen--;
    531     else
    532 	diag(0, "Extra '%c'", *token.s);
    533 
    534     if (code.e == code.s)	/* if the paren starts the line */
    535 	ps.line_start_nparen = ps.nparen;	/* then indent it */
    536 
    537     *code.e++ = token.s[0];
    538 
    539     if (*spaced_expr && ps.nparen == 0) {	/* check for end of 'if
    540 						 * (...)', or some such */
    541 	*spaced_expr = false;
    542 	*force_nl = true;	/* must force newline after if */
    543 	ps.next_unary = true;
    544 	ps.in_stmt_or_decl = false;	/* don't use stmt continuation
    545 					 * indentation */
    546 
    547 	parse_stmt_head(hd);
    548     }
    549 }
    550 
    551 static bool
    552 want_blank_before_unary_op(void)
    553 {
    554     if (ps.want_blank)
    555 	return true;
    556     if (token.s[0] == '+' || token.s[0] == '-')
    557 	return code.e > code.s && code.e[-1] == token.s[0];
    558     return false;
    559 }
    560 
    561 static void
    562 process_unary_op(int decl_ind, bool tabs_to_var)
    563 {
    564     if (!ps.decl_indent_done && ps.in_decl && !ps.block_init &&
    565 	!ps.is_function_definition && ps.line_start_nparen == 0) {
    566 	/* pointer declarations */
    567 	code_add_decl_indent(decl_ind - (int)buf_len(&token), tabs_to_var);
    568 	ps.decl_indent_done = true;
    569     } else if (want_blank_before_unary_op())
    570 	*code.e++ = ' ';
    571 
    572     buf_add_buf(&code, &token);
    573     ps.want_blank = false;
    574 }
    575 
    576 static void
    577 process_binary_op(void)
    578 {
    579     if (buf_len(&code) > 0)
    580 	buf_add_char(&code, ' ');
    581     buf_add_buf(&code, &token);
    582     ps.want_blank = true;
    583 }
    584 
    585 static void
    586 process_postfix_op(void)
    587 {
    588     *code.e++ = token.s[0];
    589     *code.e++ = token.s[1];
    590     ps.want_blank = true;
    591 }
    592 
    593 static void
    594 process_question(int *quest_level)
    595 {
    596     (*quest_level)++;
    597     if (ps.want_blank)
    598 	*code.e++ = ' ';
    599     *code.e++ = '?';
    600     ps.want_blank = true;
    601 }
    602 
    603 static void
    604 process_colon(int *quest_level, bool *force_nl, bool *seen_case)
    605 {
    606     if (*quest_level > 0) {	/* part of a '?:' operator */
    607 	--*quest_level;
    608 	if (ps.want_blank)
    609 	    *code.e++ = ' ';
    610 	*code.e++ = ':';
    611 	ps.want_blank = true;
    612 	return;
    613     }
    614 
    615     if (ps.init_or_struct) {	/* bit-field */
    616 	*code.e++ = ':';
    617 	ps.want_blank = false;
    618 	return;
    619     }
    620 
    621     buf_add_buf(&lab, &code);	/* 'case' or 'default' or named label */
    622     buf_add_char(&lab, ':');
    623     buf_terminate(&lab);
    624     buf_reset(&code);
    625 
    626     ps.in_stmt_or_decl = false;
    627     ps.is_case_label = *seen_case;
    628     *force_nl = *seen_case;
    629     *seen_case = false;
    630     ps.want_blank = false;
    631 }
    632 
    633 static void
    634 process_semicolon(bool *seen_case, int *quest_level, int decl_ind,
    635     bool tabs_to_var, bool *spaced_expr, stmt_head hd, bool *force_nl)
    636 {
    637     if (ps.decl_level == 0)
    638 	ps.init_or_struct = false;
    639     *seen_case = false;		/* these will only need resetting in an error */
    640     *quest_level = 0;
    641     if (ps.prev_token == lsym_rparen_or_rbracket)
    642 	ps.in_func_def_params = false;
    643     ps.block_init = false;
    644     ps.block_init_level = 0;
    645     ps.just_saw_decl--;
    646 
    647     if (ps.in_decl && code.s == code.e && !ps.block_init &&
    648 	!ps.decl_indent_done && ps.line_start_nparen == 0) {
    649 	/* indent stray semicolons in declarations */
    650 	code_add_decl_indent(decl_ind - 1, tabs_to_var);
    651 	ps.decl_indent_done = true;
    652     }
    653 
    654     ps.in_decl = ps.decl_level > 0;	/* if we were in a first level
    655 					 * structure declaration before, we
    656 					 * aren't anymore */
    657 
    658     if ((!*spaced_expr || hd != hd_for) && ps.nparen > 0) {
    659 
    660 	/*
    661 	 * There were unbalanced parentheses in the statement. It is a bit
    662 	 * complicated, because the semicolon might be in a for statement.
    663 	 */
    664 	diag(1, "Unbalanced parentheses");
    665 	ps.nparen = 0;
    666 	if (*spaced_expr) {	/* 'if', 'while', etc. */
    667 	    *spaced_expr = false;
    668 	    parse_stmt_head(hd);
    669 	}
    670     }
    671     *code.e++ = ';';
    672     ps.want_blank = true;
    673     ps.in_stmt_or_decl = ps.nparen > 0;
    674 
    675     if (!*spaced_expr) {	/* if not if for (;;) */
    676 	parse(psym_semicolon);	/* let parser know about end of stmt */
    677 	*force_nl = true;	/* force newline after an end of stmt */
    678     }
    679 }
    680 
    681 static void
    682 process_lbrace(bool *force_nl, bool *spaced_expr, stmt_head hd,
    683     int *di_stack, int di_stack_cap, int *decl_ind)
    684 {
    685     ps.in_stmt_or_decl = false;	/* don't indent the {} */
    686 
    687     if (!ps.block_init)
    688 	*force_nl = true;	/* force other stuff on same line as '{' onto
    689 				 * new line */
    690     else if (ps.block_init_level <= 0)
    691 	ps.block_init_level = 1;
    692     else
    693 	ps.block_init_level++;
    694 
    695     if (code.s != code.e && !ps.block_init) {
    696 	if (!opt.brace_same_line) {
    697 	    output_line();
    698 	    ps.want_blank = false;
    699 	} else if (ps.in_func_def_params && !ps.init_or_struct) {
    700 	    ps.ind_level_follow = 0;
    701 	    if (opt.function_brace_split) {	/* dump the line prior to the
    702 						 * brace ... */
    703 		output_line();
    704 		ps.want_blank = false;
    705 	    } else		/* add a space between the decl and brace */
    706 		ps.want_blank = true;
    707 	}
    708     }
    709 
    710     if (ps.in_func_def_params)
    711 	out.blank_line_before = false;
    712 
    713     if (ps.nparen > 0) {
    714 	diag(1, "Unbalanced parentheses");
    715 	ps.nparen = 0;
    716 	if (*spaced_expr) {	/* check for unclosed 'if', 'for', etc. */
    717 	    *spaced_expr = false;
    718 	    parse_stmt_head(hd);
    719 	    ps.ind_level = ps.ind_level_follow;
    720 	}
    721     }
    722 
    723     if (code.s == code.e)
    724 	ps.in_stmt_cont = false;	/* don't indent the '{' itself */
    725     if (ps.in_decl && ps.init_or_struct) {
    726 	di_stack[ps.decl_level] = *decl_ind;
    727 	if (++ps.decl_level == di_stack_cap) {
    728 	    diag(0, "Reached internal limit of %d struct levels",
    729 		di_stack_cap);
    730 	    ps.decl_level--;
    731 	}
    732     } else {
    733 	ps.decl_on_line = false;	/* we can't be in the middle of a
    734 					 * declaration, so don't do special
    735 					 * indentation of comments */
    736 	if (opt.blanklines_after_decl_at_top && ps.in_func_def_params)
    737 	    out.blank_line_after = true;
    738 	ps.in_func_def_params = false;
    739 	ps.in_decl = false;
    740     }
    741 
    742     *decl_ind = 0;
    743     parse(psym_lbrace);
    744     if (ps.want_blank)
    745 	*code.e++ = ' ';
    746     ps.want_blank = false;
    747     *code.e++ = '{';
    748     ps.just_saw_decl = 0;
    749 }
    750 
    751 static void
    752 process_rbrace(bool *spaced_expr, int *decl_ind, const int *di_stack)
    753 {
    754     if (ps.s_sym[ps.tos] == psym_decl && !ps.block_init) {
    755 	/* semicolons can be omitted in declarations */
    756 	parse(psym_semicolon);
    757     }
    758 
    759     if (ps.nparen > 0) {	/* check for unclosed if, for, else. */
    760 	diag(1, "Unbalanced parentheses");
    761 	ps.nparen = 0;
    762 	*spaced_expr = false;
    763     }
    764 
    765     ps.just_saw_decl = 0;
    766     ps.block_init_level--;
    767 
    768     if (code.s != code.e && !ps.block_init) {	/* '}' must be first on line */
    769 	if (opt.verbose)
    770 	    diag(0, "Line broken");
    771 	output_line();
    772     }
    773 
    774     *code.e++ = '}';
    775     ps.want_blank = true;
    776     ps.in_stmt_or_decl = false;
    777     ps.in_stmt_cont = false;
    778 
    779     if (ps.decl_level > 0) {	/* multi-level structure declaration */
    780 	*decl_ind = di_stack[--ps.decl_level];
    781 	if (ps.decl_level == 0 && !ps.in_func_def_params) {
    782 	    ps.just_saw_decl = 2;
    783 	    *decl_ind = ps.ind_level == 0
    784 		? opt.decl_indent : opt.local_decl_indent;
    785 	}
    786 	ps.in_decl = true;
    787     }
    788 
    789     out.blank_line_before = false;
    790     parse(psym_rbrace);
    791 
    792     if (ps.tos <= 1 && opt.blanklines_after_procs && ps.decl_level <= 0)
    793 	out.blank_line_after = true;
    794 }
    795 
    796 static void
    797 process_do(bool *force_nl)
    798 {
    799     ps.in_stmt_or_decl = false;
    800 
    801     if (code.e != code.s) {	/* make sure this starts a line */
    802 	if (opt.verbose)
    803 	    diag(0, "Line broken");
    804 	output_line();
    805 	ps.want_blank = false;
    806     }
    807 
    808     *force_nl = true;		/* following stuff must go onto new line */
    809     parse(psym_do);
    810 }
    811 
    812 static void
    813 process_else(bool *force_nl)
    814 {
    815     ps.in_stmt_or_decl = false;
    816 
    817     if (code.e > code.s && !(opt.cuddle_else && code.e[-1] == '}')) {
    818 	if (opt.verbose)
    819 	    diag(0, "Line broken");
    820 	output_line();		/* make sure this starts a line */
    821 	ps.want_blank = false;
    822     }
    823 
    824     *force_nl = true;		/* following stuff must go onto new line */
    825     parse(psym_else);
    826 }
    827 
    828 static void
    829 process_type(int *decl_ind, bool *tabs_to_var)
    830 {
    831     parse(psym_decl);		/* let the parser worry about indentation */
    832 
    833     if (ps.prev_token == lsym_rparen_or_rbracket && ps.tos <= 1) {
    834 	if (code.s != code.e) {
    835 	    output_line();
    836 	    ps.want_blank = false;
    837 	}
    838     }
    839 
    840     if (ps.in_func_def_params && opt.indent_parameters &&
    841 	    ps.decl_level == 0) {
    842 	ps.ind_level = ps.ind_level_follow = 1;
    843 	ps.in_stmt_cont = false;
    844     }
    845 
    846     ps.init_or_struct = /* maybe */ true;
    847     ps.in_decl = ps.decl_on_line = ps.prev_token != lsym_typedef;
    848     if (ps.decl_level <= 0)
    849 	ps.just_saw_decl = 2;
    850 
    851     out.blank_line_before = false;
    852 
    853     int len = (int)buf_len(&token) + 1;
    854     int ind = ps.ind_level == 0 || ps.decl_level > 0
    855 	? opt.decl_indent	/* global variable or local member */
    856 	: opt.local_decl_indent;	/* local variable */
    857     *decl_ind = ind > 0 ? ind : len;
    858     *tabs_to_var = opt.use_tabs && ind > 0;
    859 }
    860 
    861 static void
    862 process_ident(lexer_symbol lsym, int decl_ind, bool tabs_to_var,
    863     bool *spaced_expr, bool *force_nl, stmt_head hd)
    864 {
    865     if (ps.in_decl) {
    866 	if (lsym == lsym_funcname) {
    867 	    ps.in_decl = false;
    868 	    if (opt.procnames_start_line && code.s != code.e) {
    869 		*code.e = '\0';
    870 		output_line();
    871 	    } else if (ps.want_blank) {
    872 		*code.e++ = ' ';
    873 	    }
    874 	    ps.want_blank = false;
    875 
    876 	} else if (!ps.block_init && !ps.decl_indent_done &&
    877 		ps.line_start_nparen == 0) {
    878 	    code_add_decl_indent(decl_ind, tabs_to_var);
    879 	    ps.decl_indent_done = true;
    880 	    ps.want_blank = false;
    881 	}
    882 
    883     } else if (*spaced_expr && ps.nparen == 0) {
    884 	*spaced_expr = false;
    885 	*force_nl = true;
    886 	ps.next_unary = true;
    887 	ps.in_stmt_or_decl = false;
    888 	parse_stmt_head(hd);
    889     }
    890 }
    891 
    892 static void
    893 copy_token(void)
    894 {
    895     if (ps.want_blank)
    896 	buf_add_char(&code, ' ');
    897     buf_add_buf(&code, &token);
    898 }
    899 
    900 static void
    901 process_period(void)
    902 {
    903     if (code.e > code.s && code.e[-1] == ',')
    904 	*code.e++ = ' ';
    905     *code.e++ = '.';
    906     ps.want_blank = false;
    907 }
    908 
    909 static void
    910 process_comma(int decl_ind, bool tabs_to_var, bool *force_nl)
    911 {
    912     ps.want_blank = code.s != code.e;	/* only put blank after comma if comma
    913 					 * does not start the line */
    914 
    915     if (ps.in_decl && !ps.is_function_definition && !ps.block_init &&
    916 	    !ps.decl_indent_done && ps.line_start_nparen == 0) {
    917 	/* indent leading commas and not the actual identifiers */
    918 	code_add_decl_indent(decl_ind - 1, tabs_to_var);
    919 	ps.decl_indent_done = true;
    920     }
    921 
    922     *code.e++ = ',';
    923 
    924     if (ps.nparen == 0) {
    925 	if (ps.block_init_level <= 0)
    926 	    ps.block_init = false;
    927 	int varname_len = 8;	/* rough estimate for the length of a typical
    928 				 * variable name */
    929 	if (break_comma && (opt.break_after_comma ||
    930 		ind_add(compute_code_indent(), code.s, code.e)
    931 		>= opt.max_line_length - varname_len))
    932 	    *force_nl = true;
    933     }
    934 }
    935 
    936 /* move the whole line to the 'label' buffer */
    937 static void
    938 read_preprocessing_line(void)
    939 {
    940     enum {
    941 	PLAIN, STR, CHR, COMM
    942     } state;
    943 
    944     buf_add_char(&lab, '#');
    945 
    946     state = PLAIN;
    947     int com_start = 0, com_end = 0;
    948 
    949     while (ch_isblank(inp_peek()))
    950 	inp_skip();
    951 
    952     while (inp_peek() != '\n' || (state == COMM && !had_eof)) {
    953 	buf_reserve(&lab, 2);
    954 	*lab.e++ = inp_next();
    955 	switch (lab.e[-1]) {
    956 	case '\\':
    957 	    if (state != COMM)
    958 		*lab.e++ = inp_next();
    959 	    break;
    960 	case '/':
    961 	    if (inp_peek() == '*' && state == PLAIN) {
    962 		state = COMM;
    963 		*lab.e++ = inp_next();
    964 		com_start = (int)buf_len(&lab) - 2;
    965 	    }
    966 	    break;
    967 	case '"':
    968 	    if (state == STR)
    969 		state = PLAIN;
    970 	    else if (state == PLAIN)
    971 		state = STR;
    972 	    break;
    973 	case '\'':
    974 	    if (state == CHR)
    975 		state = PLAIN;
    976 	    else if (state == PLAIN)
    977 		state = CHR;
    978 	    break;
    979 	case '*':
    980 	    if (inp_peek() == '/' && state == COMM) {
    981 		state = PLAIN;
    982 		*lab.e++ = inp_next();
    983 		com_end = (int)buf_len(&lab);
    984 	    }
    985 	    break;
    986 	}
    987     }
    988 
    989     while (lab.e > lab.s && ch_isblank(lab.e[-1]))
    990 	lab.e--;
    991     if (lab.e - lab.s == com_end && !inp_comment_seen()) {
    992 	/* comment on preprocessor line */
    993 	inp_comment_init_preproc();
    994 	inp_comment_add_range(lab.s + com_start, lab.s + com_end);
    995 	lab.e = lab.s + com_start;
    996 	while (lab.e > lab.s && ch_isblank(lab.e[-1]))
    997 	    lab.e--;
    998 	inp_comment_add_char(' ');	/* add trailing blank, just in case */
    999 	inp_from_comment();
   1000     }
   1001     buf_terminate(&lab);
   1002 }
   1003 
   1004 static void
   1005 process_preprocessing(void)
   1006 {
   1007     if (com.s != com.e || lab.s != lab.e || code.s != code.e)
   1008 	output_line();
   1009 
   1010     read_preprocessing_line();
   1011 
   1012     ps.is_case_label = false;
   1013 
   1014     if (strncmp(lab.s, "#if", 3) == 0) {	/* also ifdef, ifndef */
   1015 	if ((size_t)ifdef_level < array_length(state_stack))
   1016 	    state_stack[ifdef_level++] = ps;
   1017 	else
   1018 	    diag(1, "#if stack overflow");
   1019 
   1020     } else if (strncmp(lab.s, "#el", 3) == 0) {	/* else, elif */
   1021 	if (ifdef_level <= 0)
   1022 	    diag(1, lab.s[3] == 'i' ? "Unmatched #elif" : "Unmatched #else");
   1023 	else
   1024 	    ps = state_stack[ifdef_level - 1];
   1025 
   1026     } else if (strncmp(lab.s, "#endif", 6) == 0) {
   1027 	if (ifdef_level <= 0)
   1028 	    diag(1, "Unmatched #endif");
   1029 	else
   1030 	    ifdef_level--;
   1031 
   1032     } else {
   1033 	if (strncmp(lab.s + 1, "pragma", 6) != 0 &&
   1034 	    strncmp(lab.s + 1, "error", 5) != 0 &&
   1035 	    strncmp(lab.s + 1, "line", 4) != 0 &&
   1036 	    strncmp(lab.s + 1, "undef", 5) != 0 &&
   1037 	    strncmp(lab.s + 1, "define", 6) != 0 &&
   1038 	    strncmp(lab.s + 1, "include", 7) != 0) {
   1039 	    diag(1, "Unrecognized cpp directive");
   1040 	    return;
   1041 	}
   1042     }
   1043 
   1044     if (opt.blanklines_around_conditional_compilation) {
   1045 	out.blank_line_after = true;
   1046 	out.blank_lines_to_output = 0;
   1047     } else {
   1048 	out.blank_line_after = false;
   1049 	out.blank_line_before = false;
   1050     }
   1051 
   1052     /*
   1053      * subsequent processing of the newline character will cause the line to
   1054      * be printed
   1055      */
   1056 }
   1057 
   1058 __dead static void
   1059 main_loop(void)
   1060 {
   1061     bool force_nl = false;	/* when true, code must be broken */
   1062     bool last_else = false;	/* true iff last keyword was an else */
   1063     int decl_ind = 0;		/* current indentation for declarations */
   1064     int di_stack[20];		/* a stack of structure indentation levels */
   1065     bool tabs_to_var = false;	/* true if using tabs to indent to var name */
   1066     bool spaced_expr = false;	/* whether we are in the expression of
   1067 				 * if(...), while(...), etc. */
   1068     stmt_head hd = hd_0;	/* the type of statement for 'if (...)', 'for
   1069 				 * (...)', etc */
   1070     int quest_level = 0;	/* when this is positive, we have seen a '?'
   1071 				 * without the matching ':' in a '?:'
   1072 				 * expression */
   1073     bool seen_case = false;	/* set to true when we see a 'case', so we
   1074 				 * know what to do with the following colon */
   1075 
   1076     di_stack[ps.decl_level = 0] = 0;
   1077 
   1078     for (;;) {			/* loop until we reach eof */
   1079 	lexer_symbol lsym = lexi();
   1080 
   1081 	if (lsym == lsym_if && last_else && opt.else_if)
   1082 	    force_nl = false;
   1083 	last_else = false;
   1084 
   1085 	if (lsym == lsym_eof) {
   1086 	    process_eof();
   1087 	    /* NOTREACHED */
   1088 	}
   1089 
   1090 	if (lsym == lsym_newline || lsym == lsym_form_feed ||
   1091 		lsym == lsym_preprocessing)
   1092 	    force_nl = false;
   1093 	else if (lsym != lsym_comment) {
   1094 	    maybe_break_line(lsym, &force_nl);
   1095 	    ps.in_stmt_or_decl = true;	/* add an extra level of indentation;
   1096 					 * turned off again by a ';' or '}' */
   1097 	    if (com.s != com.e)
   1098 		move_com_to_code();
   1099 	}
   1100 
   1101 	buf_reserve(&code, 3);	/* space for 2 characters plus '\0' */
   1102 
   1103 	switch (lsym) {
   1104 
   1105 	case lsym_form_feed:
   1106 	    process_form_feed();
   1107 	    break;
   1108 
   1109 	case lsym_newline:
   1110 	    process_newline();
   1111 	    break;
   1112 
   1113 	case lsym_lparen_or_lbracket:
   1114 	    process_lparen_or_lbracket(decl_ind, tabs_to_var, spaced_expr);
   1115 	    break;
   1116 
   1117 	case lsym_rparen_or_rbracket:
   1118 	    process_rparen_or_rbracket(&spaced_expr, &force_nl, hd);
   1119 	    break;
   1120 
   1121 	case lsym_unary_op:
   1122 	    process_unary_op(decl_ind, tabs_to_var);
   1123 	    break;
   1124 
   1125 	case lsym_binary_op:
   1126 	    process_binary_op();
   1127 	    break;
   1128 
   1129 	case lsym_postfix_op:
   1130 	    process_postfix_op();
   1131 	    break;
   1132 
   1133 	case lsym_question:
   1134 	    process_question(&quest_level);
   1135 	    break;
   1136 
   1137 	case lsym_case_label:
   1138 	    seen_case = true;
   1139 	    goto copy_token;
   1140 
   1141 	case lsym_colon:
   1142 	    process_colon(&quest_level, &force_nl, &seen_case);
   1143 	    break;
   1144 
   1145 	case lsym_semicolon:
   1146 	    process_semicolon(&seen_case, &quest_level, decl_ind, tabs_to_var,
   1147 		&spaced_expr, hd, &force_nl);
   1148 	    break;
   1149 
   1150 	case lsym_lbrace:
   1151 	    process_lbrace(&force_nl, &spaced_expr, hd, di_stack,
   1152 		(int)array_length(di_stack), &decl_ind);
   1153 	    break;
   1154 
   1155 	case lsym_rbrace:
   1156 	    process_rbrace(&spaced_expr, &decl_ind, di_stack);
   1157 	    break;
   1158 
   1159 	case lsym_switch:
   1160 	    spaced_expr = true;	/* the interesting stuff is done after the
   1161 				 * expressions are scanned */
   1162 	    hd = hd_switch;	/* remember the type of header for later use
   1163 				 * by the parser */
   1164 	    goto copy_token;
   1165 
   1166 	case lsym_for:
   1167 	    spaced_expr = true;
   1168 	    hd = hd_for;
   1169 	    goto copy_token;
   1170 
   1171 	case lsym_if:
   1172 	    spaced_expr = true;
   1173 	    hd = hd_if;
   1174 	    goto copy_token;
   1175 
   1176 	case lsym_while:
   1177 	    spaced_expr = true;
   1178 	    hd = hd_while;
   1179 	    goto copy_token;
   1180 
   1181 	case lsym_do:
   1182 	    process_do(&force_nl);
   1183 	    goto copy_token;
   1184 
   1185 	case lsym_else:
   1186 	    process_else(&force_nl);
   1187 	    last_else = true;
   1188 	    goto copy_token;
   1189 
   1190 	case lsym_typedef:
   1191 	case lsym_storage_class:
   1192 	    out.blank_line_before = false;
   1193 	    goto copy_token;
   1194 
   1195 	case lsym_tag:
   1196 	    if (ps.nparen > 0)
   1197 		goto copy_token;
   1198 	    /* FALLTHROUGH */
   1199 	case lsym_type_outside_parentheses:
   1200 	    process_type(&decl_ind, &tabs_to_var);
   1201 	    goto copy_token;
   1202 
   1203 	case lsym_type_in_parentheses:
   1204 	case lsym_offsetof:
   1205 	case lsym_sizeof:
   1206 	case lsym_word:
   1207 	case lsym_funcname:
   1208 	case lsym_return:
   1209 	    process_ident(lsym, decl_ind, tabs_to_var, &spaced_expr,
   1210 		&force_nl, hd);
   1211     copy_token:
   1212 	    copy_token();
   1213 	    if (lsym != lsym_funcname)
   1214 		ps.want_blank = true;
   1215 	    break;
   1216 
   1217 	case lsym_period:
   1218 	    process_period();
   1219 	    break;
   1220 
   1221 	case lsym_comma:
   1222 	    process_comma(decl_ind, tabs_to_var, &force_nl);
   1223 	    break;
   1224 
   1225 	case lsym_preprocessing:
   1226 	    process_preprocessing();
   1227 	    break;
   1228 
   1229 	case lsym_comment:
   1230 	    process_comment();
   1231 	    break;
   1232 
   1233 	default:
   1234 	    break;
   1235 	}
   1236 
   1237 	*code.e = '\0';
   1238 	if (lsym != lsym_comment && lsym != lsym_newline &&
   1239 		lsym != lsym_preprocessing)
   1240 	    ps.prev_token = lsym;
   1241     }
   1242 }
   1243 
   1244 int
   1245 main(int argc, char **argv)
   1246 {
   1247     main_init_globals();
   1248     main_load_profiles(argc, argv);
   1249     main_parse_command_line(argc, argv);
   1250     main_prepare_parsing();
   1251     main_loop();
   1252 }
   1253 
   1254 #ifdef debug
   1255 void
   1256 debug_printf(const char *fmt, ...)
   1257 {
   1258     FILE *f = output == stdout ? stderr : stdout;
   1259     va_list ap;
   1260 
   1261     va_start(ap, fmt);
   1262     vfprintf(f, fmt, ap);
   1263     va_end(ap);
   1264 }
   1265 
   1266 void
   1267 debug_println(const char *fmt, ...)
   1268 {
   1269     FILE *f = output == stdout ? stderr : stdout;
   1270     va_list ap;
   1271 
   1272     va_start(ap, fmt);
   1273     vfprintf(f, fmt, ap);
   1274     va_end(ap);
   1275     fprintf(f, "\n");
   1276 }
   1277 
   1278 void
   1279 debug_vis_range(const char *prefix, const char *s, const char *e,
   1280     const char *suffix)
   1281 {
   1282     debug_printf("%s", prefix);
   1283     for (const char *p = s; p < e; p++) {
   1284 	if (*p == '\\' || *p == '"')
   1285 	    debug_printf("\\%c", *p);
   1286 	else if (isprint((unsigned char)*p))
   1287 	    debug_printf("%c", *p);
   1288 	else if (*p == '\n')
   1289 	    debug_printf("\\n");
   1290 	else if (*p == '\t')
   1291 	    debug_printf("\\t");
   1292 	else
   1293 	    debug_printf("\\x%02x", (unsigned char)*p);
   1294     }
   1295     debug_printf("%s", suffix);
   1296 }
   1297 #endif
   1298 
   1299 static void *
   1300 nonnull(void *p)
   1301 {
   1302     if (p == NULL)
   1303 	err(EXIT_FAILURE, NULL);
   1304     return p;
   1305 }
   1306 
   1307 void *
   1308 xmalloc(size_t size)
   1309 {
   1310     return nonnull(malloc(size));
   1311 }
   1312 
   1313 void *
   1314 xrealloc(void *p, size_t new_size)
   1315 {
   1316     return nonnull(realloc(p, new_size));
   1317 }
   1318 
   1319 char *
   1320 xstrdup(const char *s)
   1321 {
   1322     return nonnull(strdup(s));
   1323 }
   1324