Home | History | Annotate | Line # | Download | only in indent
io.c revision 1.139
      1 /*	$NetBSD: io.c,v 1.139 2021/11/26 15:21:38 rillig Exp $	*/
      2 
      3 /*-
      4  * SPDX-License-Identifier: BSD-4-Clause
      5  *
      6  * Copyright (c) 1985 Sun Microsystems, Inc.
      7  * Copyright (c) 1980, 1993
      8  *	The Regents of the University of California.  All rights reserved.
      9  * All rights reserved.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the University of
     22  *	California, Berkeley and its contributors.
     23  * 4. Neither the name of the University nor the names of its contributors
     24  *    may be used to endorse or promote products derived from this software
     25  *    without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37  * SUCH DAMAGE.
     38  */
     39 
     40 #if 0
     41 static char sccsid[] = "@(#)io.c	8.1 (Berkeley) 6/6/93";
     42 #endif
     43 
     44 #include <sys/cdefs.h>
     45 #if defined(__NetBSD__)
     46 __RCSID("$NetBSD: io.c,v 1.139 2021/11/26 15:21:38 rillig Exp $");
     47 #elif defined(__FreeBSD__)
     48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
     49 #endif
     50 
     51 #include <assert.h>
     52 #include <stdio.h>
     53 #include <stdlib.h>
     54 #include <string.h>
     55 
     56 #include "indent.h"
     57 
     58 static struct {
     59     struct buffer inp;		/* one line of input, ready to be split into
     60 				 * tokens; occasionally this buffer switches
     61 				 * to save_com_buf */
     62     char save_com_buf[5000];	/* input text is saved here when looking for
     63 				 * the brace after an if, while, etc */
     64     char *save_com_s;		/* start of the comment in save_com_buf */
     65     char *save_com_e;		/* end of the comment in save_com_buf */
     66 
     67     char *saved_inp_s;		/* saved value of inp.s when taking input from
     68 				 * save_com */
     69     char *saved_inp_e;		/* saved value of inp.e */
     70 } inbuf;
     71 
     72 static int paren_indent;
     73 static bool suppress_blanklines;
     74 
     75 
     76 void
     77 inp_init(void)
     78 {
     79     inbuf.inp.buf = xmalloc(10);
     80     inbuf.inp.l = inbuf.inp.buf + 8;
     81     inbuf.inp.s = inbuf.inp.buf;
     82     inbuf.inp.e = inbuf.inp.buf;
     83 }
     84 
     85 const char *
     86 inp_p(void)
     87 {
     88     return inbuf.inp.s;
     89 }
     90 
     91 const char *
     92 inp_line_start(void)
     93 {
     94     /*
     95      * The comment we're about to read usually comes from inp.buf, unless
     96      * it has been copied into save_com.
     97      */
     98     return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
     99 }
    100 
    101 const char *
    102 inp_line_end(void)
    103 {
    104     return inbuf.inp.e;
    105 }
    106 
    107 char
    108 inp_peek(void)
    109 {
    110     return *inbuf.inp.s;
    111 }
    112 
    113 char
    114 inp_lookahead(size_t i)
    115 {
    116     return inbuf.inp.s[i];
    117 }
    118 
    119 void
    120 inp_skip(void)
    121 {
    122     inbuf.inp.s++;
    123     if (inbuf.inp.s >= inbuf.inp.e)
    124 	inp_read_line();
    125 }
    126 
    127 char
    128 inp_next(void)
    129 {
    130     char ch = inp_peek();
    131     inp_skip();
    132     return ch;
    133 }
    134 
    135 #ifdef debug
    136 static void
    137 debug_inp_buf(const char *name, const char *s, const char *e)
    138 {
    139     if (s != NULL && e != NULL) {
    140 	debug_printf("    %-12s ", name);
    141 	debug_vis_range("\"", s, e, "\"\n");
    142     }
    143 }
    144 
    145 void
    146 debug_inp(const char *prefix)
    147 {
    148     debug_println("%s %s:", __func__, prefix);
    149     if (inbuf.saved_inp_s == NULL)
    150 	debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s);
    151     debug_inp_buf("inp", inbuf.inp.s, inbuf.inp.e);	/* never null */
    152     debug_inp_buf("save_com.buf", inbuf.save_com_buf, inbuf.save_com_s);
    153     debug_inp_buf("save_com", inbuf.save_com_s, inbuf.save_com_e);
    154     debug_inp_buf("saved_inp", inbuf.saved_inp_s, inbuf.saved_inp_e);
    155 }
    156 #endif
    157 
    158 static void
    159 inp_comment_check_size(size_t n)
    160 {
    161     if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
    162 	array_length(inbuf.save_com_buf))
    163 	return;
    164 
    165     diag(1, "Internal buffer overflow - "
    166 	    "Move big comment from right after if, while, or whatever");
    167     fflush(output);
    168     exit(1);
    169 }
    170 
    171 void
    172 inp_comment_init_newline(void)
    173 {
    174     if (inbuf.save_com_e != NULL)
    175 	return;
    176 
    177     inbuf.save_com_s = inbuf.save_com_buf;
    178     inbuf.save_com_s[0] = ' ';	/* see inp_comment_insert_lbrace */
    179     inbuf.save_com_s[1] = ' ';	/* see inp_comment_insert_lbrace */
    180     inbuf.save_com_e = &inbuf.save_com_s[2];
    181     debug_inp(__func__);
    182 }
    183 
    184 void
    185 inp_comment_init_comment(void)
    186 {
    187     if (inbuf.save_com_e != NULL)
    188 	return;
    189 
    190     /*
    191      * Copy everything from the start of the line, because
    192      * process_comment() will use that to calculate the original
    193      * indentation of a boxed comment.
    194      */
    195     /*
    196      * TODO: Don't store anything in the memory range [input.inp.buf,
    197      * input.inp.s), as that data can easily get lost.
    198      */
    199     /*
    200      * FIXME: The '4' below is completely wrong. For example, in the snippet
    201      * 'if(expr)/''*comment', the 'r)' of the code is not copied. If there
    202      * is an additional line break before the ')', memcpy tries to copy
    203      * (size_t)-1 bytes.
    204      *
    205      * The original author of this magic number doesn't remember its purpose
    206      * anymore, so there is no point in keeping it. The existing tests must
    207      * still pass though.
    208      */
    209     assert((size_t)(inbuf.inp.s - inbuf.inp.buf) >= 4);
    210     size_t line_len = (size_t)(inbuf.inp.s - inbuf.inp.buf) - 4;
    211     assert(line_len < array_length(inbuf.save_com_buf));
    212 
    213     memcpy(inbuf.save_com_buf, inbuf.inp.buf, line_len);
    214     inbuf.save_com_s = inbuf.save_com_buf + line_len;
    215 
    216     inbuf.save_com_s[0] = ' ';	/* see inp_comment_insert_lbrace */
    217     inbuf.save_com_s[1] = ' ';	/* see inp_comment_insert_lbrace */
    218     inbuf.save_com_e = &inbuf.save_com_s[2];
    219 
    220     debug_vis_range("search_stmt_comment: before save_com is \"",
    221 	inbuf.save_com_buf, inbuf.save_com_s, "\"\n");
    222     debug_vis_range("search_stmt_comment: save_com is \"",
    223 	inbuf.save_com_s, inbuf.save_com_e, "\"\n");
    224 }
    225 
    226 void
    227 inp_comment_init_preproc(void)
    228 {
    229     if (inbuf.save_com_e == NULL) {	/* if this is the first comment, we
    230 					 * must set up the buffer */
    231 	inbuf.save_com_s = inbuf.save_com_buf;
    232 	inbuf.save_com_e = inbuf.save_com_s;
    233     } else {
    234 	inp_comment_add_char('\n');	/* add newline between comments */
    235 	inp_comment_add_char(' ');
    236 	--line_no;
    237     }
    238 }
    239 
    240 void
    241 inp_comment_add_char(char ch)
    242 {
    243     inp_comment_check_size(1);
    244     *inbuf.save_com_e++ = ch;
    245 }
    246 
    247 void
    248 inp_comment_add_range(const char *s, const char *e)
    249 {
    250     size_t len = (size_t)(e - s);
    251     inp_comment_check_size(len);
    252     memcpy(inbuf.save_com_e, s, len);
    253     inbuf.save_com_e += len;
    254 }
    255 
    256 bool
    257 inp_comment_complete_block(void)
    258 {
    259     return inbuf.save_com_e[-2] == '*' && inbuf.save_com_e[-1] == '/';
    260 }
    261 
    262 bool
    263 inp_comment_seen(void)
    264 {
    265     return inbuf.save_com_e != NULL;
    266 }
    267 
    268 void
    269 inp_comment_rtrim(void)
    270 {
    271     while (inbuf.save_com_e > inbuf.save_com_s && ch_isblank(inbuf.save_com_e[-1]))
    272 	inbuf.save_com_e--;
    273 }
    274 
    275 void
    276 inp_comment_rtrim_newline(void)
    277 {
    278     while (inbuf.save_com_e > inbuf.save_com_s && inbuf.save_com_e[-1] == '\n')
    279 	inbuf.save_com_e--;
    280 }
    281 
    282 void
    283 inp_from_comment(void)
    284 {
    285     debug_inp("before inp_from_comment");
    286     inbuf.saved_inp_s = inbuf.inp.s;
    287     inbuf.saved_inp_e = inbuf.inp.e;
    288 
    289     inbuf.inp.s = inbuf.save_com_s;	/* redirect lexi input to save_com_s */
    290     inbuf.inp.e = inbuf.save_com_e;
    291     inbuf.save_com_s = NULL;
    292     inbuf.save_com_e = NULL;
    293     debug_inp("after inp_from_comment");
    294 }
    295 
    296 /*
    297  * After having read from save_com, continue with the rest of the input line
    298  * before reading the next line from the input file.
    299  */
    300 static bool
    301 inp_from_file(void)
    302 {
    303     if (inbuf.saved_inp_s == NULL)
    304 	return false;
    305 
    306     inbuf.inp.s = inbuf.saved_inp_s;
    307     inbuf.inp.e = inbuf.saved_inp_e;
    308     inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
    309     debug_println("switched inp.s back to saved_inp_s");
    310     return inbuf.inp.s < inbuf.inp.e;
    311 }
    312 
    313 void
    314 inp_comment_insert_lbrace(void)
    315 {
    316     assert(inbuf.save_com_s[0] == ' ');	/* see inp_comment_init_newline */
    317     inbuf.save_com_s[0] = '{';
    318 }
    319 
    320 static void
    321 inp_add(char ch)
    322 {
    323     if (inbuf.inp.e >= inbuf.inp.l) {
    324 	size_t new_size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10;
    325 	size_t offset = (size_t)(inbuf.inp.e - inbuf.inp.buf);
    326 	inbuf.inp.buf = xrealloc(inbuf.inp.buf, new_size);
    327 	inbuf.inp.s = inbuf.inp.buf;
    328 	inbuf.inp.e = inbuf.inp.buf + offset;
    329 	inbuf.inp.l = inbuf.inp.buf + new_size - 2;
    330     }
    331     *inbuf.inp.e++ = ch;
    332 }
    333 
    334 static void
    335 inp_read_next_line(FILE *f)
    336 {
    337     inbuf.inp.s = inbuf.inp.buf;
    338     inbuf.inp.e = inbuf.inp.buf;
    339 
    340     for (;;) {
    341 	int ch = getc(f);
    342 	if (ch == EOF) {
    343 	    if (!inhibit_formatting) {
    344 		inp_add(' ');
    345 		inp_add('\n');
    346 	    }
    347 	    had_eof = true;
    348 	    break;
    349 	}
    350 
    351 	if (ch != '\0')
    352 	    inp_add((char)ch);
    353 	if (ch == '\n')
    354 	    break;
    355     }
    356 }
    357 
    358 static void
    359 output_char(char ch)
    360 {
    361     fputc(ch, output);
    362     debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
    363 }
    364 
    365 static void
    366 output_range(const char *s, const char *e)
    367 {
    368     fwrite(s, 1, (size_t)(e - s), output);
    369     debug_vis_range("output_range \"", s, e, "\"\n");
    370 }
    371 
    372 static int
    373 output_indent(int old_ind, int new_ind)
    374 {
    375     int ind = old_ind;
    376 
    377     if (opt.use_tabs) {
    378 	int tabsize = opt.tabsize;
    379 	int n = new_ind / tabsize - ind / tabsize;
    380 	if (n > 0)
    381 	    ind -= ind % tabsize;
    382 	for (int i = 0; i < n; i++) {
    383 	    fputc('\t', output);
    384 	    ind += tabsize;
    385 	}
    386     }
    387 
    388     for (; ind < new_ind; ind++)
    389 	fputc(' ', output);
    390 
    391     debug_println("output_indent %d", ind);
    392     return ind;
    393 }
    394 
    395 static int
    396 dump_line_label(void)
    397 {
    398     int ind;
    399 
    400     while (lab.e > lab.s && ch_isblank(lab.e[-1]))
    401 	lab.e--;
    402     *lab.e = '\0';
    403 
    404     ind = output_indent(0, compute_label_indent());
    405     output_range(lab.s, lab.e);
    406     ind = ind_add(ind, lab.s, lab.e);
    407 
    408     ps.is_case_label = false;
    409     return ind;
    410 }
    411 
    412 static int
    413 dump_line_code(int ind)
    414 {
    415 
    416     int target_ind = compute_code_indent();
    417     for (int i = 0; i < ps.p_l_follow; i++) {
    418 	if (ps.paren_indents[i] >= 0) {
    419 	    int paren_ind = ps.paren_indents[i];
    420 	    ps.paren_indents[i] = (short)(-1 - (paren_ind + target_ind));
    421 	    debug_println(
    422 		"setting paren_indents[%d] from %d to %d for column %d",
    423 		i, paren_ind, ps.paren_indents[i], target_ind + 1);
    424 	}
    425     }
    426 
    427     ind = output_indent(ind, target_ind);
    428     output_range(code.s, code.e);
    429     return ind_add(ind, code.s, code.e);
    430 }
    431 
    432 static void
    433 dump_line_comment(int ind)
    434 {
    435     int target_ind = ps.com_ind;
    436     const char *p = com.s;
    437 
    438     target_ind += ps.comment_delta;
    439 
    440     /* consider original indentation in case this is a box comment */
    441     for (; *p == '\t'; p++)
    442 	target_ind += opt.tabsize;
    443 
    444     for (; target_ind < 0; p++) {
    445 	if (*p == ' ')
    446 	    target_ind++;
    447 	else if (*p == '\t')
    448 	    target_ind = next_tab(target_ind);
    449 	else {
    450 	    target_ind = 0;
    451 	    break;
    452 	}
    453     }
    454 
    455     /* if comment can't fit on this line, put it on the next line */
    456     if (ind > target_ind) {
    457 	output_char('\n');
    458 	ind = 0;
    459 	ps.stats.lines++;
    460     }
    461 
    462     while (com.e > p && ch_isspace(com.e[-1]))
    463 	com.e--;
    464 
    465     (void)output_indent(ind, target_ind);
    466     output_range(p, com.e);
    467 
    468     ps.comment_delta = ps.n_comment_delta;
    469     ps.stats.comment_lines++;
    470 }
    471 
    472 /*
    473  * Write a line of formatted source to the output file. The line consists of
    474  * the label, the code and the comment.
    475  */
    476 static void
    477 output_line(char line_terminator)
    478 {
    479     static bool first_line = true;
    480 
    481     ps.is_function_definition = false;
    482 
    483     if (code.s == code.e && lab.s == lab.e && com.s == com.e) {
    484 	if (suppress_blanklines)
    485 	    suppress_blanklines = false;
    486 	else
    487 	    blank_lines_to_output++;
    488 
    489     } else if (!inhibit_formatting) {
    490 	suppress_blanklines = false;
    491 	if (blank_line_before && !first_line) {
    492 	    if (opt.swallow_optional_blanklines) {
    493 		if (blank_lines_to_output == 1)
    494 		    blank_lines_to_output = 0;
    495 	    } else {
    496 		if (blank_lines_to_output == 0)
    497 		    blank_lines_to_output = 1;
    498 	    }
    499 	}
    500 
    501 	for (; blank_lines_to_output > 0; blank_lines_to_output--)
    502 	    output_char('\n');
    503 
    504 	if (ps.ind_level == 0)
    505 	    ps.in_stmt_cont = false;	/* this is a class A kludge */
    506 
    507 	if (lab.e != lab.s || code.e != code.s)
    508 	    ps.stats.code_lines++;
    509 
    510 	int ind = 0;
    511 	if (lab.e != lab.s)
    512 	    ind = dump_line_label();
    513 	if (code.e != code.s)
    514 	    ind = dump_line_code(ind);
    515 	if (com.e != com.s)
    516 	    dump_line_comment(ind);
    517 
    518 	output_char(line_terminator);
    519 	ps.stats.lines++;
    520 
    521 	if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) {
    522 	    blank_line_before = true;
    523 	    ps.just_saw_decl = 0;
    524 	} else
    525 	    blank_line_before = blank_line_after;
    526 	blank_line_after = false;
    527     }
    528 
    529     ps.decl_on_line = ps.in_decl;	/* for proper comment indentation */
    530     ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl;
    531     ps.decl_indent_done = false;
    532 
    533     *(lab.e = lab.s) = '\0';	/* reset buffers */
    534     *(code.e = code.s) = '\0';
    535     *(com.e = com.s = com.buf + 1) = '\0';
    536 
    537     ps.ind_level = ps.ind_level_follow;
    538     ps.paren_level = ps.p_l_follow;
    539 
    540     if (ps.paren_level > 0) {
    541 	/* TODO: explain what negative indentation means */
    542 	paren_indent = -1 - ps.paren_indents[ps.paren_level - 1];
    543 	debug_println("paren_indent is now %d", paren_indent);
    544     }
    545 
    546     first_line = false;
    547 }
    548 
    549 void
    550 dump_line(void)
    551 {
    552     output_line('\n');
    553 }
    554 
    555 void
    556 dump_line_ff(void)
    557 {
    558     output_line('\f');
    559 }
    560 
    561 static int
    562 compute_code_indent_lineup(int base_ind)
    563 {
    564     int ti = paren_indent;
    565     int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length;
    566     if (overflow < 0)
    567 	return ti;
    568 
    569     if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) {
    570 	ti -= overflow + 2;
    571 	if (ti > base_ind)
    572 	    return ti;
    573 	return base_ind;
    574     }
    575 
    576     return ti;
    577 }
    578 
    579 int
    580 compute_code_indent(void)
    581 {
    582     int base_ind = ps.ind_level * opt.indent_size;
    583 
    584     if (ps.paren_level == 0) {
    585 	if (ps.in_stmt_cont)
    586 	    return base_ind + opt.continuation_indent;
    587 	return base_ind;
    588     }
    589 
    590     if (opt.lineup_to_parens) {
    591 	if (opt.lineup_to_parens_always)
    592 	    return paren_indent;
    593 	return compute_code_indent_lineup(base_ind);
    594     }
    595 
    596     if (2 * opt.continuation_indent == opt.indent_size)
    597 	return base_ind + opt.continuation_indent;
    598     else
    599 	return base_ind + opt.continuation_indent * ps.paren_level;
    600 }
    601 
    602 int
    603 compute_label_indent(void)
    604 {
    605     if (ps.is_case_label)
    606 	return (int)(case_ind * (float)opt.indent_size);
    607     if (lab.s[0] == '#')
    608 	return 0;
    609     return opt.indent_size * (ps.ind_level - 2);
    610 }
    611 
    612 static void
    613 skip_blank(const char **pp)
    614 {
    615     while (ch_isblank(**pp))
    616 	(*pp)++;
    617 }
    618 
    619 static bool
    620 skip_string(const char **pp, const char *s)
    621 {
    622     size_t len = strlen(s);
    623     if (strncmp(*pp, s, len) == 0) {
    624 	*pp += len;
    625 	return true;
    626     }
    627     return false;
    628 }
    629 
    630 static void
    631 parse_indent_comment(void)
    632 {
    633     bool on;
    634 
    635     const char *p = inbuf.inp.buf;
    636 
    637     skip_blank(&p);
    638     if (!skip_string(&p, "/*"))
    639 	return;
    640     skip_blank(&p);
    641     if (!skip_string(&p, "INDENT"))
    642 	return;
    643     skip_blank(&p);
    644 
    645     if (*p == '*' || skip_string(&p, "ON"))
    646 	on = true;
    647     else if (skip_string(&p, "OFF"))
    648 	on = false;
    649     else
    650 	return;
    651 
    652     skip_blank(&p);
    653     if (!skip_string(&p, "*/\n"))
    654 	return;
    655 
    656     if (com.s != com.e || lab.s != lab.e || code.s != code.e)
    657 	dump_line();
    658 
    659     inhibit_formatting = !on;
    660     if (on) {
    661 	blank_lines_to_output = 0;
    662 	blank_line_after = false;
    663 	blank_line_before = false;
    664 	suppress_blanklines = true;
    665     }
    666 }
    667 
    668 void
    669 inp_read_line(void)
    670 {
    671     if (inp_from_file())
    672 	return;
    673 
    674     inp_read_next_line(input);
    675 
    676     parse_indent_comment();
    677 
    678     if (inhibit_formatting)
    679 	output_range(inbuf.inp.s, inbuf.inp.e);
    680 }
    681