io.c revision 1.131 1 /* $NetBSD: io.c,v 1.131 2021/11/25 07:45:32 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #if 0
41 static char sccsid[] = "@(#)io.c 8.1 (Berkeley) 6/6/93";
42 #endif
43
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$NetBSD: io.c,v 1.131 2021/11/25 07:45:32 rillig Exp $");
47 #elif defined(__FreeBSD__)
48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
49 #endif
50
51 #include <assert.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55
56 #include "indent.h"
57
58 static struct {
59 struct buffer inp; /* one line of input, ready to be split into
60 * tokens; occasionally this buffer switches
61 * to save_com_buf */
62 char save_com_buf[5000]; /* input text is saved here when looking for
63 * the brace after an if, while, etc */
64 char *save_com_s; /* start of the comment in save_com_buf */
65 char *save_com_e; /* end of the comment in save_com_buf */
66
67 char *saved_inp_s; /* saved value of inp.s when taking input from
68 * save_com */
69 char *saved_inp_e; /* saved value of inp.e */
70 } inbuf;
71
72 static int paren_indent;
73 static bool suppress_blanklines;
74
75
76 void
77 inp_init(void)
78 {
79 inbuf.inp.buf = xmalloc(10);
80 inbuf.inp.l = inbuf.inp.buf + 8;
81 inbuf.inp.s = inbuf.inp.buf;
82 inbuf.inp.e = inbuf.inp.buf;
83 }
84
85 const char *
86 inp_p(void)
87 {
88 return inbuf.inp.s;
89 }
90
91 const char *
92 inp_line_start(void)
93 {
94 /*
95 * The comment we're about to read usually comes from inp.buf, unless
96 * it has been copied into save_com.
97 *
98 * XXX: ordered comparison between pointers from different objects
99 * invokes undefined behavior (C99 6.5.8).
100 */
101 return inbuf.inp.s >= inbuf.save_com_buf &&
102 inbuf.inp.s < inbuf.save_com_buf + array_length(inbuf.save_com_buf)
103 ? inbuf.save_com_buf : inbuf.inp.buf;
104 }
105
106 const char *
107 inp_line_end(void)
108 {
109 return inbuf.inp.e;
110 }
111
112 char
113 inp_peek(void)
114 {
115 return *inbuf.inp.s;
116 }
117
118 char
119 inp_lookahead(size_t i)
120 {
121 return inbuf.inp.s[i];
122 }
123
124 void
125 inp_skip(void)
126 {
127 inbuf.inp.s++;
128 if (inbuf.inp.s >= inbuf.inp.e)
129 inp_read_line();
130 }
131
132 char
133 inp_next(void)
134 {
135 char ch = inp_peek();
136 inp_skip();
137 return ch;
138 }
139
140 #ifdef debug
141 void
142 debug_inp(const char *prefix)
143 {
144 debug_printf("%s:", prefix);
145 debug_vis_range(" inp \"", inbuf.inp.s, inbuf.inp.e, "\"");
146 if (inbuf.save_com_s != NULL)
147 debug_vis_range(" save_com \"",
148 inbuf.save_com_s, inbuf.save_com_e, "\"");
149 if (inbuf.saved_inp_s != NULL)
150 debug_vis_range(" saved_inp \"",
151 inbuf.saved_inp_s, inbuf.saved_inp_e, "\"");
152 debug_printf("\n");
153 }
154 #endif
155
156 static void
157 inp_comment_check_size(size_t n)
158 {
159 if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
160 array_length(inbuf.save_com_buf))
161 return;
162
163 diag(1, "Internal buffer overflow - "
164 "Move big comment from right after if, while, or whatever");
165 fflush(output);
166 exit(1);
167 }
168
169 void
170 inp_comment_init_newline(void)
171 {
172 if (inbuf.save_com_e != NULL)
173 return;
174
175 inbuf.save_com_s = inbuf.save_com_buf;
176 inbuf.save_com_s[0] = ' '; /* see search_stmt_lbrace */
177 inbuf.save_com_s[1] = ' '; /* see search_stmt_lbrace */
178 inbuf.save_com_e = &inbuf.save_com_s[2];
179 debug_inp(__func__);
180 }
181
182 void
183 inp_comment_init_comment(void)
184 {
185 if (inbuf.save_com_e != NULL)
186 return;
187
188 /*
189 * Copy everything from the start of the line, because
190 * process_comment() will use that to calculate the original
191 * indentation of a boxed comment.
192 */
193 /*
194 * TODO: Don't store anything in the memory range [input.inp.buf,
195 * input.inp.s), as that data can easily get lost.
196 */
197 /*
198 * FIXME: The '4' below is completely wrong. For example, in the snippet
199 * 'if(expr)/''*comment', the 'r)' of the code is not copied. If there
200 * is an additional line break before the ')', memcpy tries to copy
201 * (size_t)-1 bytes.
202 *
203 * The original author of this magic number doesn't remember its purpose
204 * anymore, so there is no point in keeping it. The existing tests must
205 * still pass though.
206 */
207 assert((size_t)(inbuf.inp.s - inbuf.inp.buf) >= 4);
208 size_t line_len = (size_t)(inbuf.inp.s - inbuf.inp.buf) - 4;
209 assert(line_len < array_length(inbuf.save_com_buf));
210 memcpy(inbuf.save_com_buf, inbuf.inp.buf, line_len);
211 inbuf.save_com_s = inbuf.save_com_buf + line_len;
212 inbuf.save_com_s[0] = ' '; /* see search_stmt_lbrace */
213 inbuf.save_com_s[1] = ' '; /* see search_stmt_lbrace */
214 inbuf.save_com_e = &inbuf.save_com_s[2];
215 debug_vis_range("search_stmt_comment: before save_com is \"",
216 inbuf.save_com_buf, inbuf.save_com_s, "\"\n");
217 debug_vis_range("search_stmt_comment: save_com is \"",
218 inbuf.save_com_s, inbuf.save_com_e, "\"\n");
219 }
220
221 void
222 inp_comment_init_preproc(void)
223 {
224 if (inbuf.save_com_e == NULL) { /* if this is the first comment, we
225 * must set up the buffer */
226 inbuf.save_com_s = inbuf.save_com_buf;
227 inbuf.save_com_e = inbuf.save_com_s;
228 } else {
229 inp_comment_add_char('\n'); /* add newline between comments */
230 inp_comment_add_char(' ');
231 --line_no;
232 }
233 }
234
235 void
236 inp_comment_add_char(char ch)
237 {
238 inp_comment_check_size(1);
239 *inbuf.save_com_e++ = ch;
240 }
241
242 void
243 inp_comment_add_range(const char *s, const char *e)
244 {
245 size_t len = (size_t)(e - s);
246 inp_comment_check_size(len);
247 memcpy(inbuf.save_com_e, s, len);
248 inbuf.save_com_e += len;
249 }
250
251 bool
252 inp_comment_complete_block(void)
253 {
254 return inbuf.save_com_e[-2] == '*' && inbuf.save_com_e[-1] == '/';
255 }
256
257 bool
258 inp_comment_seen(void)
259 {
260 return inbuf.save_com_e != NULL;
261 }
262
263 void
264 inp_comment_rtrim(void)
265 {
266 while (inbuf.save_com_e > inbuf.save_com_s && ch_isblank(inbuf.save_com_e[-1]))
267 inbuf.save_com_e--;
268 }
269
270 void
271 inp_comment_rtrim_newline(void)
272 {
273 while (inbuf.save_com_e > inbuf.save_com_s && inbuf.save_com_e[-1] == '\n')
274 inbuf.save_com_e--;
275 }
276
277 void
278 inp_from_comment(void)
279 {
280 inbuf.saved_inp_s = inbuf.inp.s;
281 inbuf.saved_inp_e = inbuf.inp.e;
282
283 inbuf.inp.s = inbuf.save_com_s; /* redirect lexi input to save_com_s */
284 inbuf.inp.e = inbuf.save_com_e;
285 inbuf.save_com_s = NULL;
286 inbuf.save_com_e = NULL;
287 debug_inp(__func__);
288 }
289
290 void
291 inp_comment_insert_lbrace(void)
292 {
293 assert(inbuf.save_com_s[0] == ' '); /* see inp_comment_init_newline */
294 inbuf.save_com_s[0] = '{';
295 }
296
297 static void
298 output_char(char ch)
299 {
300 fputc(ch, output);
301 debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
302 }
303
304 static void
305 output_range(const char *s, const char *e)
306 {
307 fwrite(s, 1, (size_t)(e - s), output);
308 debug_vis_range("output_range \"", s, e, "\"\n");
309 }
310
311 static inline void
312 output_string(const char *s)
313 {
314 output_range(s, s + strlen(s));
315 }
316
317 static int
318 output_indent(int old_ind, int new_ind)
319 {
320 int ind = old_ind;
321
322 if (opt.use_tabs) {
323 int tabsize = opt.tabsize;
324 int n = new_ind / tabsize - ind / tabsize;
325 if (n > 0)
326 ind -= ind % tabsize;
327 for (int i = 0; i < n; i++) {
328 fputc('\t', output);
329 ind += tabsize;
330 }
331 }
332
333 for (; ind < new_ind; ind++)
334 fputc(' ', output);
335
336 debug_println("output_indent %d", ind);
337 return ind;
338 }
339
340 static int
341 dump_line_label(void)
342 {
343 int ind;
344
345 while (lab.e > lab.s && ch_isblank(lab.e[-1]))
346 lab.e--;
347 *lab.e = '\0';
348
349 ind = output_indent(0, compute_label_indent());
350
351 if (lab.s[0] == '#' && (strncmp(lab.s, "#else", 5) == 0
352 || strncmp(lab.s, "#endif", 6) == 0)) {
353 const char *s = lab.s;
354 if (lab.e[-1] == '\n')
355 lab.e--;
356 do {
357 output_char(*s++);
358 } while (s < lab.e && 'a' <= *s && *s <= 'z');
359
360 while (s < lab.e && ch_isblank(*s))
361 s++;
362
363 if (s < lab.e) {
364 if (s[0] == '/' && s[1] == '*') {
365 output_char('\t');
366 output_range(s, lab.e);
367 } else {
368 output_string("\t/* ");
369 output_range(s, lab.e);
370 output_string(" */");
371 }
372 }
373 } else
374 output_range(lab.s, lab.e);
375 ind = ind_add(ind, lab.s, lab.e);
376
377 ps.is_case_label = false;
378 return ind;
379 }
380
381 static int
382 dump_line_code(int ind)
383 {
384
385 int target_ind = compute_code_indent();
386 for (int i = 0; i < ps.p_l_follow; i++) {
387 if (ps.paren_indents[i] >= 0) {
388 int paren_ind = ps.paren_indents[i];
389 ps.paren_indents[i] = (short)(-1 - (paren_ind + target_ind));
390 debug_println(
391 "setting paren_indents[%d] from %d to %d for column %d",
392 i, paren_ind, ps.paren_indents[i], target_ind + 1);
393 }
394 }
395
396 ind = output_indent(ind, target_ind);
397 output_range(code.s, code.e);
398 return ind_add(ind, code.s, code.e);
399 }
400
401 static void
402 dump_line_comment(int ind)
403 {
404 int target_ind = ps.com_ind;
405 const char *p = com.s;
406
407 target_ind += ps.comment_delta;
408
409 /* consider original indentation in case this is a box comment */
410 for (; *p == '\t'; p++)
411 target_ind += opt.tabsize;
412
413 for (; target_ind < 0; p++) {
414 if (*p == ' ')
415 target_ind++;
416 else if (*p == '\t')
417 target_ind = next_tab(target_ind);
418 else {
419 target_ind = 0;
420 break;
421 }
422 }
423
424 /* if comment can't fit on this line, put it on the next line */
425 if (ind > target_ind) {
426 output_char('\n');
427 ind = 0;
428 ps.stats.lines++;
429 }
430
431 while (com.e > p && ch_isspace(com.e[-1]))
432 com.e--;
433
434 (void)output_indent(ind, target_ind);
435 output_range(p, com.e);
436
437 ps.comment_delta = ps.n_comment_delta;
438 ps.stats.comment_lines++;
439 }
440
441 /*
442 * Write a line of formatted source to the output file. The line consists of
443 * the label, the code and the comment.
444 */
445 static void
446 output_line(char line_terminator)
447 {
448 static bool first_line = true;
449
450 ps.is_function_definition = false;
451
452 if (code.s == code.e && lab.s == lab.e && com.s == com.e) {
453 if (suppress_blanklines)
454 suppress_blanklines = false;
455 else
456 blank_lines_to_output++;
457
458 } else if (!inhibit_formatting) {
459 suppress_blanklines = false;
460 if (blank_line_before && !first_line) {
461 if (opt.swallow_optional_blanklines) {
462 if (blank_lines_to_output == 1)
463 blank_lines_to_output = 0;
464 } else {
465 if (blank_lines_to_output == 0)
466 blank_lines_to_output = 1;
467 }
468 }
469
470 for (; blank_lines_to_output > 0; blank_lines_to_output--)
471 output_char('\n');
472
473 if (ps.ind_level == 0)
474 ps.in_stmt_cont = false; /* this is a class A kludge */
475
476 if (lab.e != lab.s || code.e != code.s)
477 ps.stats.code_lines++;
478
479 int ind = 0;
480 if (lab.e != lab.s)
481 ind = dump_line_label();
482 if (code.e != code.s)
483 ind = dump_line_code(ind);
484 if (com.e != com.s)
485 dump_line_comment(ind);
486
487 output_char(line_terminator);
488 ps.stats.lines++;
489
490 if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) {
491 blank_line_before = true;
492 ps.just_saw_decl = 0;
493 } else
494 blank_line_before = blank_line_after;
495 blank_line_after = false;
496 }
497
498 ps.decl_on_line = ps.in_decl; /* for proper comment indentation */
499 ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl;
500 ps.decl_indent_done = false;
501
502 *(lab.e = lab.s) = '\0'; /* reset buffers */
503 *(code.e = code.s) = '\0';
504 *(com.e = com.s = com.buf + 1) = '\0';
505
506 ps.ind_level = ps.ind_level_follow;
507 ps.paren_level = ps.p_l_follow;
508
509 if (ps.paren_level > 0) {
510 /* TODO: explain what negative indentation means */
511 paren_indent = -1 - ps.paren_indents[ps.paren_level - 1];
512 debug_println("paren_indent is now %d", paren_indent);
513 }
514
515 first_line = false;
516 }
517
518 void
519 dump_line(void)
520 {
521 output_line('\n');
522 }
523
524 void
525 dump_line_ff(void)
526 {
527 output_line('\f');
528 }
529
530 static int
531 compute_code_indent_lineup(int base_ind)
532 {
533 int ti = paren_indent;
534 int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length;
535 if (overflow < 0)
536 return ti;
537
538 if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) {
539 ti -= overflow + 2;
540 if (ti > base_ind)
541 return ti;
542 return base_ind;
543 }
544
545 return ti;
546 }
547
548 int
549 compute_code_indent(void)
550 {
551 int base_ind = ps.ind_level * opt.indent_size;
552
553 if (ps.paren_level == 0) {
554 if (ps.in_stmt_cont)
555 return base_ind + opt.continuation_indent;
556 return base_ind;
557 }
558
559 if (opt.lineup_to_parens) {
560 if (opt.lineup_to_parens_always)
561 return paren_indent;
562 return compute_code_indent_lineup(base_ind);
563 }
564
565 if (2 * opt.continuation_indent == opt.indent_size)
566 return base_ind + opt.continuation_indent;
567 else
568 return base_ind + opt.continuation_indent * ps.paren_level;
569 }
570
571 int
572 compute_label_indent(void)
573 {
574 if (ps.is_case_label)
575 return (int)(case_ind * (float)opt.indent_size);
576 if (lab.s[0] == '#')
577 return 0;
578 return opt.indent_size * (ps.ind_level - 2);
579 }
580
581 static void
582 skip_blank(const char **pp)
583 {
584 while (ch_isblank(**pp))
585 (*pp)++;
586 }
587
588 static bool
589 skip_string(const char **pp, const char *s)
590 {
591 size_t len = strlen(s);
592 if (strncmp(*pp, s, len) == 0) {
593 *pp += len;
594 return true;
595 }
596 return false;
597 }
598
599 static void
600 parse_indent_comment(void)
601 {
602 bool on;
603
604 const char *p = inbuf.inp.buf;
605
606 skip_blank(&p);
607 if (!skip_string(&p, "/*"))
608 return;
609 skip_blank(&p);
610 if (!skip_string(&p, "INDENT"))
611 return;
612 skip_blank(&p);
613
614 if (*p == '*' || skip_string(&p, "ON"))
615 on = true;
616 else if (skip_string(&p, "OFF"))
617 on = false;
618 else
619 return;
620
621 skip_blank(&p);
622 if (!skip_string(&p, "*/\n"))
623 return;
624
625 if (com.s != com.e || lab.s != lab.e || code.s != code.e)
626 dump_line();
627
628 inhibit_formatting = !on;
629 if (on) {
630 blank_lines_to_output = 0;
631 blank_line_after = false;
632 blank_line_before = false;
633 suppress_blanklines = true;
634 }
635 }
636
637 /*
638 * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
639 *
640 * All rights reserved
641 */
642 void
643 inp_read_line(void)
644 {
645 char *p;
646 int ch;
647 FILE *f = input;
648
649 if (inbuf.saved_inp_s != NULL) { /* there is a partly filled input buffer left */
650 inbuf.inp.s = inbuf.saved_inp_s; /* do not read anything, just switch buffers */
651 inbuf.inp.e = inbuf.saved_inp_e;
652 inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
653 debug_println("switched inp.s back to saved_inp_s");
654 if (inbuf.inp.s < inbuf.inp.e)
655 return; /* only return if there is really something in
656 * this buffer */
657 }
658
659 for (p = inbuf.inp.buf;;) {
660 if (p >= inbuf.inp.l) {
661 size_t size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10;
662 size_t offset = (size_t)(p - inbuf.inp.buf);
663 inbuf.inp.buf = xrealloc(inbuf.inp.buf, size);
664 p = inbuf.inp.buf + offset;
665 inbuf.inp.l = inbuf.inp.buf + size - 2;
666 }
667
668 if ((ch = getc(f)) == EOF) {
669 if (!inhibit_formatting) {
670 *p++ = ' ';
671 *p++ = '\n';
672 }
673 had_eof = true;
674 break;
675 }
676
677 if (ch != '\0')
678 *p++ = (char)ch;
679 if (ch == '\n')
680 break;
681 }
682
683 inbuf.inp.s = inbuf.inp.buf;
684 inbuf.inp.e = p;
685
686 if (p - inbuf.inp.s >= 3 && p[-3] == '*' && p[-2] == '/')
687 parse_indent_comment();
688
689 if (inhibit_formatting)
690 output_range(inbuf.inp.s, inbuf.inp.e);
691 }
692
693 int
694 ind_add(int ind, const char *start, const char *end)
695 {
696 for (const char *p = start; p != end; ++p) {
697 if (*p == '\n' || *p == '\f')
698 ind = 0;
699 else if (*p == '\t')
700 ind = next_tab(ind);
701 else if (*p == '\b')
702 --ind;
703 else
704 ++ind;
705 }
706 return ind;
707 }
708