Home | History | Annotate | Line # | Download | only in indent
io.c revision 1.151
      1 /*	$NetBSD: io.c,v 1.151 2023/05/11 18:26:56 rillig Exp $	*/
      2 
      3 /*-
      4  * SPDX-License-Identifier: BSD-4-Clause
      5  *
      6  * Copyright (c) 1985 Sun Microsystems, Inc.
      7  * Copyright (c) 1980, 1993
      8  *	The Regents of the University of California.  All rights reserved.
      9  * All rights reserved.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the University of
     22  *	California, Berkeley and its contributors.
     23  * 4. Neither the name of the University nor the names of its contributors
     24  *    may be used to endorse or promote products derived from this software
     25  *    without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37  * SUCH DAMAGE.
     38  */
     39 
     40 #if 0
     41 static char sccsid[] = "@(#)io.c	8.1 (Berkeley) 6/6/93";
     42 #endif
     43 
     44 #include <sys/cdefs.h>
     45 #if defined(__NetBSD__)
     46 __RCSID("$NetBSD: io.c,v 1.151 2023/05/11 18:26:56 rillig Exp $");
     47 #elif defined(__FreeBSD__)
     48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
     49 #endif
     50 
     51 #include <assert.h>
     52 #include <stdio.h>
     53 #include <stdlib.h>
     54 #include <string.h>
     55 
     56 #include "indent.h"
     57 
     58 /*
     59  * There are 3 modes for reading the input.
     60  *
     61  * default: In this mode, the input comes from the input file. The buffer
     62  * 'inp' contains the current line, terminated with '\n'. The current read
     63  * position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other
     64  * pointers are null.
     65  *
     66  * copy-in: After reading 'if (expr)' or similar tokens, the input still comes
     67  * from 'inp', but instead of processing it, it is copied to 'save_com'. The
     68  * goal of this mode is to move the comments after the '{', that is to
     69  * transform 'if (expr) comment {' to 'if (expr) { comment'. When the next
     70  * token cannot be part of this transformation, switch to copy-out.
     71  *
     72  * copy-out: In this mode, the input comes from 'save_com', which contains the
     73  * tokens to be placed after the '{'. The input still comes from the range
     74  * [inp.s, inp.e), but these two members have been overwritten with pointers
     75  * into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual.
     76  * In this mode, inp.e[-1] is usually not terminated with '\n'. After reading
     77  * all tokens from save_com, switch to default mode again.
     78  */
     79 static struct {
     80     struct buffer inp;		/* one line of input, ready to be split into
     81 				 * tokens; occasionally 's' and 'e' switch
     82 				 * to save_com_buf */
     83     char save_com_buf[5000];	/* input text is saved here when looking for
     84 				 * the brace after an if, while, etc */
     85     char *save_com_s;		/* start of the comment in save_com_buf, or
     86 				 * null */
     87     char *save_com_e;		/* end of the comment in save_com_buf, or
     88 				 * null */
     89 
     90     char *saved_inp_s;		/* saved value of inp.s when taking input from
     91 				 * save_com, or null */
     92     char *saved_inp_e;		/* saved value of inp.e, or null */
     93 } inbuf;
     94 
     95 static int paren_indent;
     96 
     97 
     98 void
     99 inp_init(void)
    100 {
    101     inbuf.inp.buf = xmalloc(10);
    102     inbuf.inp.l = inbuf.inp.buf + 8;
    103     inbuf.inp.s = inbuf.inp.buf;
    104     inbuf.inp.e = inbuf.inp.buf;
    105 }
    106 
    107 const char *
    108 inp_p(void)
    109 {
    110     assert(inbuf.inp.s < inbuf.inp.e);
    111     return inbuf.inp.s;
    112 }
    113 
    114 const char *
    115 inp_line_start(void)
    116 {
    117     return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
    118 }
    119 
    120 const char *
    121 inp_line_end(void)
    122 {
    123     return inbuf.inp.e;
    124 }
    125 
    126 char
    127 inp_peek(void)
    128 {
    129     assert(inbuf.inp.s < inbuf.inp.e);
    130     return *inbuf.inp.s;
    131 }
    132 
    133 char
    134 inp_lookahead(size_t i)
    135 {
    136     assert(i < (size_t)(inbuf.inp.e - inbuf.inp.s));
    137     return inbuf.inp.s[i];
    138 }
    139 
    140 void
    141 inp_skip(void)
    142 {
    143     assert(inbuf.inp.s < inbuf.inp.e);
    144     inbuf.inp.s++;
    145     if (inbuf.inp.s >= inbuf.inp.e)
    146 	inp_read_line();
    147 }
    148 
    149 char
    150 inp_next(void)
    151 {
    152     char ch = inp_peek();
    153     inp_skip();
    154     return ch;
    155 }
    156 
    157 #ifdef debug
    158 static void
    159 debug_inp_buf(const char *name, const char *s, const char *e)
    160 {
    161     if (s != NULL && e != NULL) {
    162 	debug_printf("    %-12s ", name);
    163 	debug_vis_range("\"", s, e, "\"\n");
    164     }
    165 }
    166 
    167 void
    168 debug_inp(const char *prefix)
    169 {
    170     assert(inp_line_start() <= inbuf.inp.s);
    171     assert(inbuf.inp.s <= inbuf.inp.e);
    172 
    173     debug_println("%s %s:", __func__, prefix);
    174     if (inbuf.saved_inp_s == NULL)
    175 	debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s);
    176     debug_inp_buf("inp", inbuf.inp.s, inbuf.inp.e);	/* never null */
    177     debug_inp_buf("save_com.buf", inbuf.save_com_buf, inbuf.save_com_s);
    178     debug_inp_buf("save_com", inbuf.save_com_s, inbuf.save_com_e);
    179     debug_inp_buf("saved_inp", inbuf.saved_inp_s, inbuf.saved_inp_e);
    180 }
    181 #endif
    182 
    183 static void
    184 inp_comment_check_size(size_t n)
    185 {
    186     if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
    187 	array_length(inbuf.save_com_buf))
    188 	return;
    189 
    190     diag(1, "Internal buffer overflow - "
    191 	"Move big comment from right after if, while, or whatever");
    192     fflush(output);
    193     exit(1);
    194 }
    195 
    196 void
    197 inp_comment_init_preproc(void)
    198 {
    199     if (inbuf.save_com_e == NULL) {	/* if this is the first comment, we
    200 					 * must set up the buffer */
    201 	/*
    202 	 * XXX: No space is reserved for a potential '{' here, unlike in
    203 	 * inp_comment_init_comment.
    204 	 */
    205 	inbuf.save_com_s = inbuf.save_com_buf;
    206 	inbuf.save_com_e = inbuf.save_com_s;
    207     } else {
    208 	inp_comment_add_char('\n');	/* add newline between comments */
    209 	inp_comment_add_char(' ');
    210 	--line_no;
    211     }
    212 }
    213 
    214 void
    215 inp_comment_add_char(char ch)
    216 {
    217     inp_comment_check_size(1);
    218     *inbuf.save_com_e++ = ch;
    219 }
    220 
    221 void
    222 inp_comment_add_range(const char *s, const char *e)
    223 {
    224     size_t len = (size_t)(e - s);
    225     inp_comment_check_size(len);
    226     memcpy(inbuf.save_com_e, s, len);
    227     inbuf.save_com_e += len;
    228 }
    229 
    230 bool
    231 inp_comment_seen(void)
    232 {
    233     return inbuf.save_com_e != NULL;
    234 }
    235 
    236 /*
    237  * Switch the input to come from save_com, replaying the copied tokens while
    238  * looking for the next '{'.
    239  */
    240 void
    241 inp_from_comment(void)
    242 {
    243     debug_inp("before inp_from_comment");
    244     inbuf.saved_inp_s = inbuf.inp.s;
    245     inbuf.saved_inp_e = inbuf.inp.e;
    246 
    247     inbuf.inp.s = inbuf.save_com_s;
    248     inbuf.inp.e = inbuf.save_com_e;
    249     inbuf.save_com_s = NULL;
    250     inbuf.save_com_e = NULL;
    251     debug_inp("after inp_from_comment");
    252 }
    253 
    254 /*
    255  * After having read from save_com, continue with the rest of the input line
    256  * before reading the next line from the input file.
    257  */
    258 static bool
    259 inp_from_file(void)
    260 {
    261     if (inbuf.saved_inp_s == NULL)
    262 	return false;
    263 
    264     inbuf.inp.s = inbuf.saved_inp_s;
    265     inbuf.inp.e = inbuf.saved_inp_e;
    266     inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
    267     debug_println("switched inp.s back to saved_inp_s");
    268     return inbuf.inp.s < inbuf.inp.e;
    269 }
    270 
    271 static void
    272 inp_add(char ch)
    273 {
    274     if (inbuf.inp.e >= inbuf.inp.l) {
    275 	size_t new_size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10;
    276 	size_t offset = (size_t)(inbuf.inp.e - inbuf.inp.buf);
    277 	inbuf.inp.buf = xrealloc(inbuf.inp.buf, new_size);
    278 	inbuf.inp.s = inbuf.inp.buf;
    279 	inbuf.inp.e = inbuf.inp.buf + offset;
    280 	inbuf.inp.l = inbuf.inp.buf + new_size - 2;
    281     }
    282     *inbuf.inp.e++ = ch;
    283 }
    284 
    285 static void
    286 inp_read_next_line(FILE *f)
    287 {
    288     inbuf.inp.s = inbuf.inp.buf;
    289     inbuf.inp.e = inbuf.inp.buf;
    290 
    291     for (;;) {
    292 	int ch = getc(f);
    293 	if (ch == EOF) {
    294 	    if (!inhibit_formatting) {
    295 		inp_add(' ');
    296 		inp_add('\n');
    297 	    }
    298 	    had_eof = true;
    299 	    break;
    300 	}
    301 
    302 	if (ch != '\0')
    303 	    inp_add((char)ch);
    304 	if (ch == '\n')
    305 	    break;
    306     }
    307 }
    308 
    309 static void
    310 output_char(char ch)
    311 {
    312     fputc(ch, output);
    313     debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
    314 }
    315 
    316 static void
    317 output_range(const char *s, const char *e)
    318 {
    319     fwrite(s, 1, (size_t)(e - s), output);
    320     debug_vis_range("output_range \"", s, e, "\"\n");
    321 }
    322 
    323 static int
    324 output_indent(int old_ind, int new_ind)
    325 {
    326     int ind = old_ind;
    327 
    328     if (opt.use_tabs) {
    329 	int tabsize = opt.tabsize;
    330 	int n = new_ind / tabsize - ind / tabsize;
    331 	if (n > 0)
    332 	    ind -= ind % tabsize;
    333 	for (int i = 0; i < n; i++) {
    334 	    fputc('\t', output);
    335 	    ind += tabsize;
    336 	}
    337     }
    338 
    339     for (; ind < new_ind; ind++)
    340 	fputc(' ', output);
    341 
    342     debug_println("output_indent %d", ind);
    343     return ind;
    344 }
    345 
    346 static int
    347 output_line_label(void)
    348 {
    349     int ind;
    350 
    351     while (lab.e > lab.s && ch_isblank(lab.e[-1]))
    352 	lab.e--;
    353     *lab.e = '\0';
    354 
    355     ind = output_indent(0, compute_label_indent());
    356     output_range(lab.s, lab.e);
    357     ind = ind_add(ind, lab.s, lab.e);
    358 
    359     ps.is_case_label = false;
    360     return ind;
    361 }
    362 
    363 static int
    364 output_line_code(int ind)
    365 {
    366 
    367     int target_ind = compute_code_indent();
    368     for (int i = 0; i < ps.nparen; i++) {
    369 	if (ps.paren[i].indent >= 0) {
    370 	    int paren_ind = ps.paren[i].indent;
    371 	    ps.paren[i].indent = (short)(-1 - (paren_ind + target_ind));
    372 	    debug_println(
    373 		"setting paren_indents[%d] from %d to %d for column %d",
    374 		i, paren_ind, ps.paren[i].indent, target_ind + 1);
    375 	}
    376     }
    377 
    378     ind = output_indent(ind, target_ind);
    379     output_range(code.s, code.e);
    380     return ind_add(ind, code.s, code.e);
    381 }
    382 
    383 static void
    384 output_line_comment(int ind)
    385 {
    386     int target_ind = ps.com_ind;
    387     const char *p = com.s;
    388 
    389     target_ind += ps.comment_delta;
    390 
    391     /* consider original indentation in case this is a box comment */
    392     for (; *p == '\t'; p++)
    393 	target_ind += opt.tabsize;
    394 
    395     for (; target_ind < 0; p++) {
    396 	if (*p == ' ')
    397 	    target_ind++;
    398 	else if (*p == '\t')
    399 	    target_ind = next_tab(target_ind);
    400 	else {
    401 	    target_ind = 0;
    402 	    break;
    403 	}
    404     }
    405 
    406     /* if comment can't fit on this line, put it on the next line */
    407     if (ind > target_ind) {
    408 	output_char('\n');
    409 	ind = 0;
    410 	ps.stats.lines++;
    411     }
    412 
    413     while (com.e > p && ch_isspace(com.e[-1]))
    414 	com.e--;
    415 
    416     (void)output_indent(ind, target_ind);
    417     output_range(p, com.e);
    418 
    419     ps.comment_delta = ps.n_comment_delta;
    420     ps.stats.comment_lines++;
    421 }
    422 
    423 /*
    424  * Write a line of formatted source to the output file. The line consists of
    425  * the label, the code and the comment.
    426  */
    427 static void
    428 output_complete_line(char line_terminator)
    429 {
    430     ps.is_function_definition = false;
    431 
    432     if (!inhibit_formatting) {
    433 	if (ps.ind_level == 0)
    434 	    ps.in_stmt_cont = false;	/* this is a class A kludge */
    435 
    436 	if (lab.e != lab.s || code.e != code.s)
    437 	    ps.stats.code_lines++;
    438 
    439 	int ind = 0;
    440 	if (lab.e != lab.s)
    441 	    ind = output_line_label();
    442 	if (code.e != code.s)
    443 	    ind = output_line_code(ind);
    444 	if (com.e != com.s)
    445 	    output_line_comment(ind);
    446 
    447 	output_char(line_terminator);
    448 	ps.stats.lines++;
    449 
    450 	/* TODO: rename to blank_line_after_decl */
    451 	if (ps.just_saw_decl == 1 && opt.blanklines_after_decl)
    452 	    ps.just_saw_decl = 0;
    453     }
    454 
    455     ps.decl_on_line = ps.in_decl;	/* for proper comment indentation */
    456     ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl;
    457     ps.decl_indent_done = false;
    458 
    459     *(lab.e = lab.s) = '\0';	/* reset buffers */
    460     *(code.e = code.s) = '\0';
    461     *(com.e = com.s = com.buf + 1) = '\0';
    462 
    463     ps.ind_level = ps.ind_level_follow;
    464     ps.line_start_nparen = ps.nparen;
    465 
    466     if (ps.nparen > 0) {
    467 	/* TODO: explain what negative indentation means */
    468 	paren_indent = -1 - ps.paren[ps.nparen - 1].indent;
    469 	debug_println("paren_indent is now %d", paren_indent);
    470     }
    471 }
    472 
    473 void
    474 output_line(void)
    475 {
    476     output_complete_line('\n');
    477 }
    478 
    479 void
    480 output_line_ff(void)
    481 {
    482     output_complete_line('\f');
    483 }
    484 
    485 static int
    486 compute_code_indent_lineup(int base_ind)
    487 {
    488     int ti = paren_indent;
    489     int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length;
    490     if (overflow < 0)
    491 	return ti;
    492 
    493     if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) {
    494 	ti -= overflow + 2;
    495 	if (ti > base_ind)
    496 	    return ti;
    497 	return base_ind;
    498     }
    499 
    500     return ti;
    501 }
    502 
    503 int
    504 compute_code_indent(void)
    505 {
    506     int base_ind = ps.ind_level * opt.indent_size;
    507 
    508     if (ps.line_start_nparen == 0) {
    509 	if (ps.in_stmt_cont && ps.in_enum != in_enum_brace)
    510 	    return base_ind + opt.continuation_indent;
    511 	return base_ind;
    512     }
    513 
    514     if (opt.lineup_to_parens) {
    515 	if (opt.lineup_to_parens_always)
    516 	    return paren_indent;
    517 	return compute_code_indent_lineup(base_ind);
    518     }
    519 
    520     if (2 * opt.continuation_indent == opt.indent_size)
    521 	return base_ind + opt.continuation_indent;
    522     else
    523 	return base_ind + opt.continuation_indent * ps.line_start_nparen;
    524 }
    525 
    526 int
    527 compute_label_indent(void)
    528 {
    529     if (ps.is_case_label)
    530 	return (int)(case_ind * (float)opt.indent_size);
    531     if (lab.s[0] == '#')
    532 	return 0;
    533     return opt.indent_size * (ps.ind_level - 2);
    534 }
    535 
    536 static void
    537 skip_blank(const char **pp)
    538 {
    539     while (ch_isblank(**pp))
    540 	(*pp)++;
    541 }
    542 
    543 static bool
    544 skip_string(const char **pp, const char *s)
    545 {
    546     size_t len = strlen(s);
    547     if (strncmp(*pp, s, len) == 0) {
    548 	*pp += len;
    549 	return true;
    550     }
    551     return false;
    552 }
    553 
    554 static void
    555 parse_indent_comment(void)
    556 {
    557     bool on;
    558 
    559     const char *p = inbuf.inp.buf;
    560 
    561     skip_blank(&p);
    562     if (!skip_string(&p, "/*"))
    563 	return;
    564     skip_blank(&p);
    565     if (!skip_string(&p, "INDENT"))
    566 	return;
    567 
    568     skip_blank(&p);
    569     if (*p == '*' || skip_string(&p, "ON"))
    570 	on = true;
    571     else if (skip_string(&p, "OFF"))
    572 	on = false;
    573     else
    574 	return;
    575 
    576     skip_blank(&p);
    577     if (!skip_string(&p, "*/\n"))
    578 	return;
    579 
    580     if (com.s != com.e || lab.s != lab.e || code.s != code.e)
    581 	output_line();
    582 
    583     inhibit_formatting = !on;
    584 }
    585 
    586 void
    587 inp_read_line(void)
    588 {
    589     if (inp_from_file())
    590 	return;
    591 
    592     inp_read_next_line(input);
    593 
    594     parse_indent_comment();
    595 
    596     if (inhibit_formatting)
    597 	output_range(inbuf.inp.s, inbuf.inp.e);
    598 }
    599