io.c revision 1.138 1 /* $NetBSD: io.c,v 1.138 2021/11/26 15:08:48 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #if 0
41 static char sccsid[] = "@(#)io.c 8.1 (Berkeley) 6/6/93";
42 #endif
43
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$NetBSD: io.c,v 1.138 2021/11/26 15:08:48 rillig Exp $");
47 #elif defined(__FreeBSD__)
48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
49 #endif
50
51 #include <assert.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55
56 #include "indent.h"
57
58 static struct {
59 struct buffer inp; /* one line of input, ready to be split into
60 * tokens; occasionally this buffer switches
61 * to save_com_buf */
62 char save_com_buf[5000]; /* input text is saved here when looking for
63 * the brace after an if, while, etc */
64 char *save_com_s; /* start of the comment in save_com_buf */
65 char *save_com_e; /* end of the comment in save_com_buf */
66
67 char *saved_inp_s; /* saved value of inp.s when taking input from
68 * save_com */
69 char *saved_inp_e; /* saved value of inp.e */
70 } inbuf;
71
72 static int paren_indent;
73 static bool suppress_blanklines;
74
75
76 void
77 inp_init(void)
78 {
79 inbuf.inp.buf = xmalloc(10);
80 inbuf.inp.l = inbuf.inp.buf + 8;
81 inbuf.inp.s = inbuf.inp.buf;
82 inbuf.inp.e = inbuf.inp.buf;
83 }
84
85 const char *
86 inp_p(void)
87 {
88 return inbuf.inp.s;
89 }
90
91 const char *
92 inp_line_start(void)
93 {
94 /*
95 * The comment we're about to read usually comes from inp.buf, unless
96 * it has been copied into save_com.
97 */
98 return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
99 }
100
101 const char *
102 inp_line_end(void)
103 {
104 return inbuf.inp.e;
105 }
106
107 char
108 inp_peek(void)
109 {
110 return *inbuf.inp.s;
111 }
112
113 char
114 inp_lookahead(size_t i)
115 {
116 return inbuf.inp.s[i];
117 }
118
119 void
120 inp_skip(void)
121 {
122 inbuf.inp.s++;
123 if (inbuf.inp.s >= inbuf.inp.e)
124 inp_read_line();
125 }
126
127 char
128 inp_next(void)
129 {
130 char ch = inp_peek();
131 inp_skip();
132 return ch;
133 }
134
135 #ifdef debug
136 void
137 debug_inp(const char *prefix)
138 {
139 debug_printf("%s:", prefix);
140 debug_vis_range(" inp \"", inbuf.inp.s, inbuf.inp.e, "\"");
141 if (inbuf.save_com_s != NULL)
142 debug_vis_range(" save_com \"",
143 inbuf.save_com_s, inbuf.save_com_e, "\"");
144 if (inbuf.saved_inp_s != NULL)
145 debug_vis_range(" saved_inp \"",
146 inbuf.saved_inp_s, inbuf.saved_inp_e, "\"");
147 debug_printf("\n");
148 }
149 #endif
150
151 static void
152 inp_comment_check_size(size_t n)
153 {
154 if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
155 array_length(inbuf.save_com_buf))
156 return;
157
158 diag(1, "Internal buffer overflow - "
159 "Move big comment from right after if, while, or whatever");
160 fflush(output);
161 exit(1);
162 }
163
164 void
165 inp_comment_init_newline(void)
166 {
167 if (inbuf.save_com_e != NULL)
168 return;
169
170 inbuf.save_com_s = inbuf.save_com_buf;
171 inbuf.save_com_s[0] = ' '; /* see inp_comment_insert_lbrace */
172 inbuf.save_com_s[1] = ' '; /* see inp_comment_insert_lbrace */
173 inbuf.save_com_e = &inbuf.save_com_s[2];
174 debug_inp(__func__);
175 }
176
177 void
178 inp_comment_init_comment(void)
179 {
180 if (inbuf.save_com_e != NULL)
181 return;
182
183 /*
184 * Copy everything from the start of the line, because
185 * process_comment() will use that to calculate the original
186 * indentation of a boxed comment.
187 */
188 /*
189 * TODO: Don't store anything in the memory range [input.inp.buf,
190 * input.inp.s), as that data can easily get lost.
191 */
192 /*
193 * FIXME: The '4' below is completely wrong. For example, in the snippet
194 * 'if(expr)/''*comment', the 'r)' of the code is not copied. If there
195 * is an additional line break before the ')', memcpy tries to copy
196 * (size_t)-1 bytes.
197 *
198 * The original author of this magic number doesn't remember its purpose
199 * anymore, so there is no point in keeping it. The existing tests must
200 * still pass though.
201 */
202 assert((size_t)(inbuf.inp.s - inbuf.inp.buf) >= 4);
203 size_t line_len = (size_t)(inbuf.inp.s - inbuf.inp.buf) - 4;
204 assert(line_len < array_length(inbuf.save_com_buf));
205 memcpy(inbuf.save_com_buf, inbuf.inp.buf, line_len);
206 inbuf.save_com_s = inbuf.save_com_buf + line_len;
207 inbuf.save_com_s[0] = ' '; /* see inp_comment_insert_lbrace */
208 inbuf.save_com_s[1] = ' '; /* see inp_comment_insert_lbrace */
209 inbuf.save_com_e = &inbuf.save_com_s[2];
210 debug_vis_range("search_stmt_comment: before save_com is \"",
211 inbuf.save_com_buf, inbuf.save_com_s, "\"\n");
212 debug_vis_range("search_stmt_comment: save_com is \"",
213 inbuf.save_com_s, inbuf.save_com_e, "\"\n");
214 }
215
216 void
217 inp_comment_init_preproc(void)
218 {
219 if (inbuf.save_com_e == NULL) { /* if this is the first comment, we
220 * must set up the buffer */
221 inbuf.save_com_s = inbuf.save_com_buf;
222 inbuf.save_com_e = inbuf.save_com_s;
223 } else {
224 inp_comment_add_char('\n'); /* add newline between comments */
225 inp_comment_add_char(' ');
226 --line_no;
227 }
228 }
229
230 void
231 inp_comment_add_char(char ch)
232 {
233 inp_comment_check_size(1);
234 *inbuf.save_com_e++ = ch;
235 }
236
237 void
238 inp_comment_add_range(const char *s, const char *e)
239 {
240 size_t len = (size_t)(e - s);
241 inp_comment_check_size(len);
242 memcpy(inbuf.save_com_e, s, len);
243 inbuf.save_com_e += len;
244 }
245
246 bool
247 inp_comment_complete_block(void)
248 {
249 return inbuf.save_com_e[-2] == '*' && inbuf.save_com_e[-1] == '/';
250 }
251
252 bool
253 inp_comment_seen(void)
254 {
255 return inbuf.save_com_e != NULL;
256 }
257
258 void
259 inp_comment_rtrim(void)
260 {
261 while (inbuf.save_com_e > inbuf.save_com_s && ch_isblank(inbuf.save_com_e[-1]))
262 inbuf.save_com_e--;
263 }
264
265 void
266 inp_comment_rtrim_newline(void)
267 {
268 while (inbuf.save_com_e > inbuf.save_com_s && inbuf.save_com_e[-1] == '\n')
269 inbuf.save_com_e--;
270 }
271
272 void
273 inp_from_comment(void)
274 {
275 inbuf.saved_inp_s = inbuf.inp.s;
276 inbuf.saved_inp_e = inbuf.inp.e;
277
278 inbuf.inp.s = inbuf.save_com_s; /* redirect lexi input to save_com_s */
279 inbuf.inp.e = inbuf.save_com_e;
280 inbuf.save_com_s = NULL;
281 inbuf.save_com_e = NULL;
282 debug_inp(__func__);
283 }
284
285 /*
286 * After having read from save_com, continue with the rest of the input line
287 * before reading the next line from the input file.
288 */
289 static bool
290 inp_from_file(void)
291 {
292 if (inbuf.saved_inp_s == NULL)
293 return false;
294
295 inbuf.inp.s = inbuf.saved_inp_s;
296 inbuf.inp.e = inbuf.saved_inp_e;
297 inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
298 debug_println("switched inp.s back to saved_inp_s");
299 return inbuf.inp.s < inbuf.inp.e;
300 }
301
302 void
303 inp_comment_insert_lbrace(void)
304 {
305 assert(inbuf.save_com_s[0] == ' '); /* see inp_comment_init_newline */
306 inbuf.save_com_s[0] = '{';
307 }
308
309 static void
310 inp_add(char ch)
311 {
312 if (inbuf.inp.e >= inbuf.inp.l) {
313 size_t new_size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10;
314 size_t offset = (size_t)(inbuf.inp.e - inbuf.inp.buf);
315 inbuf.inp.buf = xrealloc(inbuf.inp.buf, new_size);
316 inbuf.inp.s = inbuf.inp.buf;
317 inbuf.inp.e = inbuf.inp.buf + offset;
318 inbuf.inp.l = inbuf.inp.buf + new_size - 2;
319 }
320 *inbuf.inp.e++ = ch;
321 }
322
323 static void
324 inp_read_next_line(FILE *f)
325 {
326 inbuf.inp.s = inbuf.inp.buf;
327 inbuf.inp.e = inbuf.inp.buf;
328
329 for (;;) {
330 int ch = getc(f);
331 if (ch == EOF) {
332 if (!inhibit_formatting) {
333 inp_add(' ');
334 inp_add('\n');
335 }
336 had_eof = true;
337 break;
338 }
339
340 if (ch != '\0')
341 inp_add((char)ch);
342 if (ch == '\n')
343 break;
344 }
345 }
346
347 static void
348 output_char(char ch)
349 {
350 fputc(ch, output);
351 debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
352 }
353
354 static void
355 output_range(const char *s, const char *e)
356 {
357 fwrite(s, 1, (size_t)(e - s), output);
358 debug_vis_range("output_range \"", s, e, "\"\n");
359 }
360
361 static int
362 output_indent(int old_ind, int new_ind)
363 {
364 int ind = old_ind;
365
366 if (opt.use_tabs) {
367 int tabsize = opt.tabsize;
368 int n = new_ind / tabsize - ind / tabsize;
369 if (n > 0)
370 ind -= ind % tabsize;
371 for (int i = 0; i < n; i++) {
372 fputc('\t', output);
373 ind += tabsize;
374 }
375 }
376
377 for (; ind < new_ind; ind++)
378 fputc(' ', output);
379
380 debug_println("output_indent %d", ind);
381 return ind;
382 }
383
384 static int
385 dump_line_label(void)
386 {
387 int ind;
388
389 while (lab.e > lab.s && ch_isblank(lab.e[-1]))
390 lab.e--;
391 *lab.e = '\0';
392
393 ind = output_indent(0, compute_label_indent());
394 output_range(lab.s, lab.e);
395 ind = ind_add(ind, lab.s, lab.e);
396
397 ps.is_case_label = false;
398 return ind;
399 }
400
401 static int
402 dump_line_code(int ind)
403 {
404
405 int target_ind = compute_code_indent();
406 for (int i = 0; i < ps.p_l_follow; i++) {
407 if (ps.paren_indents[i] >= 0) {
408 int paren_ind = ps.paren_indents[i];
409 ps.paren_indents[i] = (short)(-1 - (paren_ind + target_ind));
410 debug_println(
411 "setting paren_indents[%d] from %d to %d for column %d",
412 i, paren_ind, ps.paren_indents[i], target_ind + 1);
413 }
414 }
415
416 ind = output_indent(ind, target_ind);
417 output_range(code.s, code.e);
418 return ind_add(ind, code.s, code.e);
419 }
420
421 static void
422 dump_line_comment(int ind)
423 {
424 int target_ind = ps.com_ind;
425 const char *p = com.s;
426
427 target_ind += ps.comment_delta;
428
429 /* consider original indentation in case this is a box comment */
430 for (; *p == '\t'; p++)
431 target_ind += opt.tabsize;
432
433 for (; target_ind < 0; p++) {
434 if (*p == ' ')
435 target_ind++;
436 else if (*p == '\t')
437 target_ind = next_tab(target_ind);
438 else {
439 target_ind = 0;
440 break;
441 }
442 }
443
444 /* if comment can't fit on this line, put it on the next line */
445 if (ind > target_ind) {
446 output_char('\n');
447 ind = 0;
448 ps.stats.lines++;
449 }
450
451 while (com.e > p && ch_isspace(com.e[-1]))
452 com.e--;
453
454 (void)output_indent(ind, target_ind);
455 output_range(p, com.e);
456
457 ps.comment_delta = ps.n_comment_delta;
458 ps.stats.comment_lines++;
459 }
460
461 /*
462 * Write a line of formatted source to the output file. The line consists of
463 * the label, the code and the comment.
464 */
465 static void
466 output_line(char line_terminator)
467 {
468 static bool first_line = true;
469
470 ps.is_function_definition = false;
471
472 if (code.s == code.e && lab.s == lab.e && com.s == com.e) {
473 if (suppress_blanklines)
474 suppress_blanklines = false;
475 else
476 blank_lines_to_output++;
477
478 } else if (!inhibit_formatting) {
479 suppress_blanklines = false;
480 if (blank_line_before && !first_line) {
481 if (opt.swallow_optional_blanklines) {
482 if (blank_lines_to_output == 1)
483 blank_lines_to_output = 0;
484 } else {
485 if (blank_lines_to_output == 0)
486 blank_lines_to_output = 1;
487 }
488 }
489
490 for (; blank_lines_to_output > 0; blank_lines_to_output--)
491 output_char('\n');
492
493 if (ps.ind_level == 0)
494 ps.in_stmt_cont = false; /* this is a class A kludge */
495
496 if (lab.e != lab.s || code.e != code.s)
497 ps.stats.code_lines++;
498
499 int ind = 0;
500 if (lab.e != lab.s)
501 ind = dump_line_label();
502 if (code.e != code.s)
503 ind = dump_line_code(ind);
504 if (com.e != com.s)
505 dump_line_comment(ind);
506
507 output_char(line_terminator);
508 ps.stats.lines++;
509
510 if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) {
511 blank_line_before = true;
512 ps.just_saw_decl = 0;
513 } else
514 blank_line_before = blank_line_after;
515 blank_line_after = false;
516 }
517
518 ps.decl_on_line = ps.in_decl; /* for proper comment indentation */
519 ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl;
520 ps.decl_indent_done = false;
521
522 *(lab.e = lab.s) = '\0'; /* reset buffers */
523 *(code.e = code.s) = '\0';
524 *(com.e = com.s = com.buf + 1) = '\0';
525
526 ps.ind_level = ps.ind_level_follow;
527 ps.paren_level = ps.p_l_follow;
528
529 if (ps.paren_level > 0) {
530 /* TODO: explain what negative indentation means */
531 paren_indent = -1 - ps.paren_indents[ps.paren_level - 1];
532 debug_println("paren_indent is now %d", paren_indent);
533 }
534
535 first_line = false;
536 }
537
538 void
539 dump_line(void)
540 {
541 output_line('\n');
542 }
543
544 void
545 dump_line_ff(void)
546 {
547 output_line('\f');
548 }
549
550 static int
551 compute_code_indent_lineup(int base_ind)
552 {
553 int ti = paren_indent;
554 int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length;
555 if (overflow < 0)
556 return ti;
557
558 if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) {
559 ti -= overflow + 2;
560 if (ti > base_ind)
561 return ti;
562 return base_ind;
563 }
564
565 return ti;
566 }
567
568 int
569 compute_code_indent(void)
570 {
571 int base_ind = ps.ind_level * opt.indent_size;
572
573 if (ps.paren_level == 0) {
574 if (ps.in_stmt_cont)
575 return base_ind + opt.continuation_indent;
576 return base_ind;
577 }
578
579 if (opt.lineup_to_parens) {
580 if (opt.lineup_to_parens_always)
581 return paren_indent;
582 return compute_code_indent_lineup(base_ind);
583 }
584
585 if (2 * opt.continuation_indent == opt.indent_size)
586 return base_ind + opt.continuation_indent;
587 else
588 return base_ind + opt.continuation_indent * ps.paren_level;
589 }
590
591 int
592 compute_label_indent(void)
593 {
594 if (ps.is_case_label)
595 return (int)(case_ind * (float)opt.indent_size);
596 if (lab.s[0] == '#')
597 return 0;
598 return opt.indent_size * (ps.ind_level - 2);
599 }
600
601 static void
602 skip_blank(const char **pp)
603 {
604 while (ch_isblank(**pp))
605 (*pp)++;
606 }
607
608 static bool
609 skip_string(const char **pp, const char *s)
610 {
611 size_t len = strlen(s);
612 if (strncmp(*pp, s, len) == 0) {
613 *pp += len;
614 return true;
615 }
616 return false;
617 }
618
619 static void
620 parse_indent_comment(void)
621 {
622 bool on;
623
624 const char *p = inbuf.inp.buf;
625
626 skip_blank(&p);
627 if (!skip_string(&p, "/*"))
628 return;
629 skip_blank(&p);
630 if (!skip_string(&p, "INDENT"))
631 return;
632 skip_blank(&p);
633
634 if (*p == '*' || skip_string(&p, "ON"))
635 on = true;
636 else if (skip_string(&p, "OFF"))
637 on = false;
638 else
639 return;
640
641 skip_blank(&p);
642 if (!skip_string(&p, "*/\n"))
643 return;
644
645 if (com.s != com.e || lab.s != lab.e || code.s != code.e)
646 dump_line();
647
648 inhibit_formatting = !on;
649 if (on) {
650 blank_lines_to_output = 0;
651 blank_line_after = false;
652 blank_line_before = false;
653 suppress_blanklines = true;
654 }
655 }
656
657 void
658 inp_read_line(void)
659 {
660 if (inp_from_file())
661 return;
662
663 inp_read_next_line(input);
664
665 parse_indent_comment();
666
667 if (inhibit_formatting)
668 output_range(inbuf.inp.s, inbuf.inp.e);
669 }
670