Home | History | Annotate | Line # | Download | only in indent
io.c revision 1.150
      1 /*	$NetBSD: io.c,v 1.150 2023/05/11 18:13:55 rillig Exp $	*/
      2 
      3 /*-
      4  * SPDX-License-Identifier: BSD-4-Clause
      5  *
      6  * Copyright (c) 1985 Sun Microsystems, Inc.
      7  * Copyright (c) 1980, 1993
      8  *	The Regents of the University of California.  All rights reserved.
      9  * All rights reserved.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the University of
     22  *	California, Berkeley and its contributors.
     23  * 4. Neither the name of the University nor the names of its contributors
     24  *    may be used to endorse or promote products derived from this software
     25  *    without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37  * SUCH DAMAGE.
     38  */
     39 
     40 #if 0
     41 static char sccsid[] = "@(#)io.c	8.1 (Berkeley) 6/6/93";
     42 #endif
     43 
     44 #include <sys/cdefs.h>
     45 #if defined(__NetBSD__)
     46 __RCSID("$NetBSD: io.c,v 1.150 2023/05/11 18:13:55 rillig Exp $");
     47 #elif defined(__FreeBSD__)
     48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
     49 #endif
     50 
     51 #include <assert.h>
     52 #include <stdio.h>
     53 #include <stdlib.h>
     54 #include <string.h>
     55 
     56 #include "indent.h"
     57 
     58 /*
     59  * There are 3 modes for reading the input.
     60  *
     61  * default: In this mode, the input comes from the input file. The buffer
     62  * 'inp' contains the current line, terminated with '\n'. The current read
     63  * position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other
     64  * pointers are null.
     65  *
     66  * copy-in: After reading 'if (expr)' or similar tokens, the input still comes
     67  * from 'inp', but instead of processing it, it is copied to 'save_com'. The
     68  * goal of this mode is to move the comments after the '{', that is to
     69  * transform 'if (expr) comment {' to 'if (expr) { comment'. When the next
     70  * token cannot be part of this transformation, switch to copy-out.
     71  *
     72  * copy-out: In this mode, the input comes from 'save_com', which contains the
     73  * tokens to be placed after the '{'. The input still comes from the range
     74  * [inp.s, inp.e), but these two members have been overwritten with pointers
     75  * into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual.
     76  * In this mode, inp.e[-1] is usually not terminated with '\n'. After reading
     77  * all tokens from save_com, switch to default mode again.
     78  */
     79 static struct {
     80     struct buffer inp;		/* one line of input, ready to be split into
     81 				 * tokens; occasionally 's' and 'e' switch
     82 				 * to save_com_buf */
     83     char save_com_buf[5000];	/* input text is saved here when looking for
     84 				 * the brace after an if, while, etc */
     85     char *save_com_s;		/* start of the comment in save_com_buf, or
     86 				 * null */
     87     char *save_com_e;		/* end of the comment in save_com_buf, or
     88 				 * null */
     89 
     90     char *saved_inp_s;		/* saved value of inp.s when taking input from
     91 				 * save_com, or null */
     92     char *saved_inp_e;		/* saved value of inp.e, or null */
     93 } inbuf;
     94 
     95 static int paren_indent;
     96 
     97 
     98 void
     99 inp_init(void)
    100 {
    101     inbuf.inp.buf = xmalloc(10);
    102     inbuf.inp.l = inbuf.inp.buf + 8;
    103     inbuf.inp.s = inbuf.inp.buf;
    104     inbuf.inp.e = inbuf.inp.buf;
    105 }
    106 
    107 const char *
    108 inp_p(void)
    109 {
    110     assert(inbuf.inp.s < inbuf.inp.e);
    111     return inbuf.inp.s;
    112 }
    113 
    114 const char *
    115 inp_line_start(void)
    116 {
    117     return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
    118 }
    119 
    120 const char *
    121 inp_line_end(void)
    122 {
    123     return inbuf.inp.e;
    124 }
    125 
    126 char
    127 inp_peek(void)
    128 {
    129     assert(inbuf.inp.s < inbuf.inp.e);
    130     return *inbuf.inp.s;
    131 }
    132 
    133 char
    134 inp_lookahead(size_t i)
    135 {
    136     assert(i < (size_t)(inbuf.inp.e - inbuf.inp.s));
    137     return inbuf.inp.s[i];
    138 }
    139 
    140 void
    141 inp_skip(void)
    142 {
    143     assert(inbuf.inp.s < inbuf.inp.e);
    144     inbuf.inp.s++;
    145     if (inbuf.inp.s >= inbuf.inp.e)
    146 	inp_read_line();
    147 }
    148 
    149 char
    150 inp_next(void)
    151 {
    152     char ch = inp_peek();
    153     inp_skip();
    154     return ch;
    155 }
    156 
    157 #ifdef debug
    158 static void
    159 debug_inp_buf(const char *name, const char *s, const char *e)
    160 {
    161     if (s != NULL && e != NULL) {
    162 	debug_printf("    %-12s ", name);
    163 	debug_vis_range("\"", s, e, "\"\n");
    164     }
    165 }
    166 
    167 void
    168 debug_inp(const char *prefix)
    169 {
    170     assert(inp_line_start() <= inbuf.inp.s);
    171     assert(inbuf.inp.s <= inbuf.inp.e);
    172 
    173     debug_println("%s %s:", __func__, prefix);
    174     if (inbuf.saved_inp_s == NULL)
    175 	debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s);
    176     debug_inp_buf("inp", inbuf.inp.s, inbuf.inp.e);	/* never null */
    177     debug_inp_buf("save_com.buf", inbuf.save_com_buf, inbuf.save_com_s);
    178     debug_inp_buf("save_com", inbuf.save_com_s, inbuf.save_com_e);
    179     debug_inp_buf("saved_inp", inbuf.saved_inp_s, inbuf.saved_inp_e);
    180 }
    181 #endif
    182 
    183 static void
    184 inp_comment_check_size(size_t n)
    185 {
    186     if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
    187 	array_length(inbuf.save_com_buf))
    188 	return;
    189 
    190     diag(1, "Internal buffer overflow - "
    191 	"Move big comment from right after if, while, or whatever");
    192     fflush(output);
    193     exit(1);
    194 }
    195 
    196 void
    197 inp_comment_init_newline(void)
    198 {
    199     if (inbuf.save_com_e != NULL)
    200 	return;
    201 
    202     inbuf.save_com_s = inbuf.save_com_buf;
    203     inbuf.save_com_s[0] = ' ';	/* see inp_comment_insert_lbrace */
    204     inbuf.save_com_s[1] = ' ';	/* see inp_comment_insert_lbrace */
    205     inbuf.save_com_e = &inbuf.save_com_s[2];
    206     debug_inp(__func__);
    207 }
    208 
    209 void
    210 inp_comment_init_comment(void)
    211 {
    212     if (inbuf.save_com_e != NULL)
    213 	return;
    214 
    215     /*
    216      * Copy everything from the start of the line, because process_comment()
    217      * will use that to calculate the original indentation of a boxed comment.
    218      */
    219     /*
    220      * TODO: Don't store anything in the memory range [input.inp.buf,
    221      * input.inp.s), as that data can easily get lost.
    222      */
    223     /*
    224      * FIXME: The '4' below is completely wrong. For example, in the snippet
    225      * 'if(expr)/''*comment', the 'r)' of the code is not copied. If there is
    226      * an additional line break before the ')', memcpy tries to copy
    227      * (size_t)-1 bytes.
    228      *
    229      * The original author of this magic number doesn't remember its purpose
    230      * anymore, so there is no point in keeping it. The existing tests must
    231      * still pass though.
    232      */
    233     assert((size_t)(inbuf.inp.s - inbuf.inp.buf) >= 4);
    234     size_t line_len = (size_t)(inbuf.inp.s - inbuf.inp.buf) - 4;
    235     assert(line_len < array_length(inbuf.save_com_buf));
    236 
    237     memcpy(inbuf.save_com_buf, inbuf.inp.buf, line_len);
    238     inbuf.save_com_s = inbuf.save_com_buf + line_len;
    239 
    240     inbuf.save_com_s[0] = ' ';	/* see inp_comment_insert_lbrace */
    241     inbuf.save_com_s[1] = ' ';	/* see inp_comment_insert_lbrace */
    242     inbuf.save_com_e = &inbuf.save_com_s[2];
    243 
    244     debug_vis_range("search_stmt_comment: before save_com is \"",
    245 	inbuf.save_com_buf, inbuf.save_com_s, "\"\n");
    246     debug_vis_range("search_stmt_comment: save_com is \"",
    247 	inbuf.save_com_s, inbuf.save_com_e, "\"\n");
    248 }
    249 
    250 void
    251 inp_comment_init_preproc(void)
    252 {
    253     if (inbuf.save_com_e == NULL) {	/* if this is the first comment, we
    254 					 * must set up the buffer */
    255 	/*
    256 	 * XXX: No space is reserved for a potential '{' here, unlike in
    257 	 * inp_comment_init_comment.
    258 	 */
    259 	inbuf.save_com_s = inbuf.save_com_buf;
    260 	inbuf.save_com_e = inbuf.save_com_s;
    261     } else {
    262 	inp_comment_add_char('\n');	/* add newline between comments */
    263 	inp_comment_add_char(' ');
    264 	--line_no;
    265     }
    266 }
    267 
    268 void
    269 inp_comment_add_char(char ch)
    270 {
    271     inp_comment_check_size(1);
    272     *inbuf.save_com_e++ = ch;
    273 }
    274 
    275 void
    276 inp_comment_add_range(const char *s, const char *e)
    277 {
    278     size_t len = (size_t)(e - s);
    279     inp_comment_check_size(len);
    280     memcpy(inbuf.save_com_e, s, len);
    281     inbuf.save_com_e += len;
    282 }
    283 
    284 bool
    285 inp_comment_complete_block(void)
    286 {
    287     return inbuf.save_com_e[-2] == '*' && inbuf.save_com_e[-1] == '/';
    288 }
    289 
    290 bool
    291 inp_comment_seen(void)
    292 {
    293     return inbuf.save_com_e != NULL;
    294 }
    295 
    296 void
    297 inp_comment_rtrim_blank(void)
    298 {
    299     while (inbuf.save_com_e > inbuf.save_com_s &&
    300 	    ch_isblank(inbuf.save_com_e[-1]))
    301 	inbuf.save_com_e--;
    302 }
    303 
    304 void
    305 inp_comment_rtrim_newline(void)
    306 {
    307     while (inbuf.save_com_e > inbuf.save_com_s &&
    308 	    inbuf.save_com_e[-1] == '\n')
    309 	inbuf.save_com_e--;
    310 }
    311 
    312 /*
    313  * Switch the input to come from save_com, replaying the copied tokens while
    314  * looking for the next '{'.
    315  */
    316 void
    317 inp_from_comment(void)
    318 {
    319     debug_inp("before inp_from_comment");
    320     inbuf.saved_inp_s = inbuf.inp.s;
    321     inbuf.saved_inp_e = inbuf.inp.e;
    322 
    323     inbuf.inp.s = inbuf.save_com_s;
    324     inbuf.inp.e = inbuf.save_com_e;
    325     inbuf.save_com_s = NULL;
    326     inbuf.save_com_e = NULL;
    327     debug_inp("after inp_from_comment");
    328 }
    329 
    330 /*
    331  * After having read from save_com, continue with the rest of the input line
    332  * before reading the next line from the input file.
    333  */
    334 static bool
    335 inp_from_file(void)
    336 {
    337     if (inbuf.saved_inp_s == NULL)
    338 	return false;
    339 
    340     inbuf.inp.s = inbuf.saved_inp_s;
    341     inbuf.inp.e = inbuf.saved_inp_e;
    342     inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
    343     debug_println("switched inp.s back to saved_inp_s");
    344     return inbuf.inp.s < inbuf.inp.e;
    345 }
    346 
    347 void
    348 inp_comment_insert_lbrace(void)
    349 {
    350     assert(inbuf.save_com_s[0] == ' ');	/* see inp_comment_init_newline */
    351     inbuf.save_com_s[0] = '{';
    352 }
    353 
    354 static void
    355 inp_add(char ch)
    356 {
    357     if (inbuf.inp.e >= inbuf.inp.l) {
    358 	size_t new_size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10;
    359 	size_t offset = (size_t)(inbuf.inp.e - inbuf.inp.buf);
    360 	inbuf.inp.buf = xrealloc(inbuf.inp.buf, new_size);
    361 	inbuf.inp.s = inbuf.inp.buf;
    362 	inbuf.inp.e = inbuf.inp.buf + offset;
    363 	inbuf.inp.l = inbuf.inp.buf + new_size - 2;
    364     }
    365     *inbuf.inp.e++ = ch;
    366 }
    367 
    368 static void
    369 inp_read_next_line(FILE *f)
    370 {
    371     inbuf.inp.s = inbuf.inp.buf;
    372     inbuf.inp.e = inbuf.inp.buf;
    373 
    374     for (;;) {
    375 	int ch = getc(f);
    376 	if (ch == EOF) {
    377 	    if (!inhibit_formatting) {
    378 		inp_add(' ');
    379 		inp_add('\n');
    380 	    }
    381 	    had_eof = true;
    382 	    break;
    383 	}
    384 
    385 	if (ch != '\0')
    386 	    inp_add((char)ch);
    387 	if (ch == '\n')
    388 	    break;
    389     }
    390 }
    391 
    392 static void
    393 output_char(char ch)
    394 {
    395     fputc(ch, output);
    396     debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
    397 }
    398 
    399 static void
    400 output_range(const char *s, const char *e)
    401 {
    402     fwrite(s, 1, (size_t)(e - s), output);
    403     debug_vis_range("output_range \"", s, e, "\"\n");
    404 }
    405 
    406 static int
    407 output_indent(int old_ind, int new_ind)
    408 {
    409     int ind = old_ind;
    410 
    411     if (opt.use_tabs) {
    412 	int tabsize = opt.tabsize;
    413 	int n = new_ind / tabsize - ind / tabsize;
    414 	if (n > 0)
    415 	    ind -= ind % tabsize;
    416 	for (int i = 0; i < n; i++) {
    417 	    fputc('\t', output);
    418 	    ind += tabsize;
    419 	}
    420     }
    421 
    422     for (; ind < new_ind; ind++)
    423 	fputc(' ', output);
    424 
    425     debug_println("output_indent %d", ind);
    426     return ind;
    427 }
    428 
    429 static int
    430 output_line_label(void)
    431 {
    432     int ind;
    433 
    434     while (lab.e > lab.s && ch_isblank(lab.e[-1]))
    435 	lab.e--;
    436     *lab.e = '\0';
    437 
    438     ind = output_indent(0, compute_label_indent());
    439     output_range(lab.s, lab.e);
    440     ind = ind_add(ind, lab.s, lab.e);
    441 
    442     ps.is_case_label = false;
    443     return ind;
    444 }
    445 
    446 static int
    447 output_line_code(int ind)
    448 {
    449 
    450     int target_ind = compute_code_indent();
    451     for (int i = 0; i < ps.nparen; i++) {
    452 	if (ps.paren[i].indent >= 0) {
    453 	    int paren_ind = ps.paren[i].indent;
    454 	    ps.paren[i].indent = (short)(-1 - (paren_ind + target_ind));
    455 	    debug_println(
    456 		"setting paren_indents[%d] from %d to %d for column %d",
    457 		i, paren_ind, ps.paren[i].indent, target_ind + 1);
    458 	}
    459     }
    460 
    461     ind = output_indent(ind, target_ind);
    462     output_range(code.s, code.e);
    463     return ind_add(ind, code.s, code.e);
    464 }
    465 
    466 static void
    467 output_line_comment(int ind)
    468 {
    469     int target_ind = ps.com_ind;
    470     const char *p = com.s;
    471 
    472     target_ind += ps.comment_delta;
    473 
    474     /* consider original indentation in case this is a box comment */
    475     for (; *p == '\t'; p++)
    476 	target_ind += opt.tabsize;
    477 
    478     for (; target_ind < 0; p++) {
    479 	if (*p == ' ')
    480 	    target_ind++;
    481 	else if (*p == '\t')
    482 	    target_ind = next_tab(target_ind);
    483 	else {
    484 	    target_ind = 0;
    485 	    break;
    486 	}
    487     }
    488 
    489     /* if comment can't fit on this line, put it on the next line */
    490     if (ind > target_ind) {
    491 	output_char('\n');
    492 	ind = 0;
    493 	ps.stats.lines++;
    494     }
    495 
    496     while (com.e > p && ch_isspace(com.e[-1]))
    497 	com.e--;
    498 
    499     (void)output_indent(ind, target_ind);
    500     output_range(p, com.e);
    501 
    502     ps.comment_delta = ps.n_comment_delta;
    503     ps.stats.comment_lines++;
    504 }
    505 
    506 /*
    507  * Write a line of formatted source to the output file. The line consists of
    508  * the label, the code and the comment.
    509  */
    510 static void
    511 output_complete_line(char line_terminator)
    512 {
    513     ps.is_function_definition = false;
    514 
    515     if (!inhibit_formatting) {
    516 	if (ps.ind_level == 0)
    517 	    ps.in_stmt_cont = false;	/* this is a class A kludge */
    518 
    519 	if (lab.e != lab.s || code.e != code.s)
    520 	    ps.stats.code_lines++;
    521 
    522 	int ind = 0;
    523 	if (lab.e != lab.s)
    524 	    ind = output_line_label();
    525 	if (code.e != code.s)
    526 	    ind = output_line_code(ind);
    527 	if (com.e != com.s)
    528 	    output_line_comment(ind);
    529 
    530 	output_char(line_terminator);
    531 	ps.stats.lines++;
    532 
    533 	/* TODO: rename to blank_line_after_decl */
    534 	if (ps.just_saw_decl == 1 && opt.blanklines_after_decl)
    535 	    ps.just_saw_decl = 0;
    536     }
    537 
    538     ps.decl_on_line = ps.in_decl;	/* for proper comment indentation */
    539     ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl;
    540     ps.decl_indent_done = false;
    541 
    542     *(lab.e = lab.s) = '\0';	/* reset buffers */
    543     *(code.e = code.s) = '\0';
    544     *(com.e = com.s = com.buf + 1) = '\0';
    545 
    546     ps.ind_level = ps.ind_level_follow;
    547     ps.line_start_nparen = ps.nparen;
    548 
    549     if (ps.nparen > 0) {
    550 	/* TODO: explain what negative indentation means */
    551 	paren_indent = -1 - ps.paren[ps.nparen - 1].indent;
    552 	debug_println("paren_indent is now %d", paren_indent);
    553     }
    554 }
    555 
    556 void
    557 output_line(void)
    558 {
    559     output_complete_line('\n');
    560 }
    561 
    562 void
    563 output_line_ff(void)
    564 {
    565     output_complete_line('\f');
    566 }
    567 
    568 static int
    569 compute_code_indent_lineup(int base_ind)
    570 {
    571     int ti = paren_indent;
    572     int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length;
    573     if (overflow < 0)
    574 	return ti;
    575 
    576     if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) {
    577 	ti -= overflow + 2;
    578 	if (ti > base_ind)
    579 	    return ti;
    580 	return base_ind;
    581     }
    582 
    583     return ti;
    584 }
    585 
    586 int
    587 compute_code_indent(void)
    588 {
    589     int base_ind = ps.ind_level * opt.indent_size;
    590 
    591     if (ps.line_start_nparen == 0) {
    592 	if (ps.in_stmt_cont && ps.in_enum != in_enum_brace)
    593 	    return base_ind + opt.continuation_indent;
    594 	return base_ind;
    595     }
    596 
    597     if (opt.lineup_to_parens) {
    598 	if (opt.lineup_to_parens_always)
    599 	    return paren_indent;
    600 	return compute_code_indent_lineup(base_ind);
    601     }
    602 
    603     if (2 * opt.continuation_indent == opt.indent_size)
    604 	return base_ind + opt.continuation_indent;
    605     else
    606 	return base_ind + opt.continuation_indent * ps.line_start_nparen;
    607 }
    608 
    609 int
    610 compute_label_indent(void)
    611 {
    612     if (ps.is_case_label)
    613 	return (int)(case_ind * (float)opt.indent_size);
    614     if (lab.s[0] == '#')
    615 	return 0;
    616     return opt.indent_size * (ps.ind_level - 2);
    617 }
    618 
    619 static void
    620 skip_blank(const char **pp)
    621 {
    622     while (ch_isblank(**pp))
    623 	(*pp)++;
    624 }
    625 
    626 static bool
    627 skip_string(const char **pp, const char *s)
    628 {
    629     size_t len = strlen(s);
    630     if (strncmp(*pp, s, len) == 0) {
    631 	*pp += len;
    632 	return true;
    633     }
    634     return false;
    635 }
    636 
    637 static void
    638 parse_indent_comment(void)
    639 {
    640     bool on;
    641 
    642     const char *p = inbuf.inp.buf;
    643 
    644     skip_blank(&p);
    645     if (!skip_string(&p, "/*"))
    646 	return;
    647     skip_blank(&p);
    648     if (!skip_string(&p, "INDENT"))
    649 	return;
    650 
    651     skip_blank(&p);
    652     if (*p == '*' || skip_string(&p, "ON"))
    653 	on = true;
    654     else if (skip_string(&p, "OFF"))
    655 	on = false;
    656     else
    657 	return;
    658 
    659     skip_blank(&p);
    660     if (!skip_string(&p, "*/\n"))
    661 	return;
    662 
    663     if (com.s != com.e || lab.s != lab.e || code.s != code.e)
    664 	output_line();
    665 
    666     inhibit_formatting = !on;
    667 }
    668 
    669 void
    670 inp_read_line(void)
    671 {
    672     if (inp_from_file())
    673 	return;
    674 
    675     inp_read_next_line(input);
    676 
    677     parse_indent_comment();
    678 
    679     if (inhibit_formatting)
    680 	output_range(inbuf.inp.s, inbuf.inp.e);
    681 }
    682