io.c revision 1.124 1 /* $NetBSD: io.c,v 1.124 2021/11/19 18:25:50 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #if 0
41 static char sccsid[] = "@(#)io.c 8.1 (Berkeley) 6/6/93";
42 #endif
43
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$NetBSD: io.c,v 1.124 2021/11/19 18:25:50 rillig Exp $");
47 #elif defined(__FreeBSD__)
48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
49 #endif
50
51 #include <assert.h>
52 #include <ctype.h>
53 #include <stdarg.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57
58 #include "indent.h"
59
60 static struct input_buffer {
61 struct buffer inp; /* one line of input, ready to be split into
62 * tokens; occasionally this buffer switches
63 * to save_com_buf */
64 char save_com_buf[5000]; /* input text is saved here when looking for
65 * the brace after an if, while, etc */
66 char *save_com_s; /* start of the comment in save_com_buf */
67 char *save_com_e; /* end of the comment in save_com_buf */
68
69 char *saved_inp_s; /* saved value of inp.s when taking input from
70 * save_com */
71 char *saved_inp_e; /* similarly saved value of inp.e */
72 } inbuf;
73
74 static int paren_indent;
75 static bool suppress_blanklines;
76
77
78 void
79 inp_init(void)
80 {
81 inbuf.inp.buf = xmalloc(10);
82 inbuf.inp.l = inbuf.inp.buf + 8;
83 inbuf.inp.s = inbuf.inp.buf;
84 inbuf.inp.e = inbuf.inp.buf;
85 }
86
87 const char *
88 inp_p(void)
89 {
90 return inbuf.inp.s;
91 }
92
93 const char *
94 inp_line_start(void)
95 {
96 /*
97 * The comment we're about to read usually comes from inp.buf, unless
98 * it has been copied into save_com.
99 *
100 * XXX: ordered comparison between pointers from different objects
101 * invokes undefined behavior (C99 6.5.8).
102 */
103 return inbuf.inp.s >= inbuf.save_com_buf &&
104 inbuf.inp.s < inbuf.save_com_buf + array_length(inbuf.save_com_buf)
105 ? inbuf.save_com_buf : inbuf.inp.buf;
106 }
107
108 const char *
109 inp_line_end(void)
110 {
111 return inbuf.inp.e;
112 }
113
114 char
115 inp_peek(void)
116 {
117 return *inbuf.inp.s;
118 }
119
120 char
121 inp_lookahead(size_t i)
122 {
123 return inbuf.inp.s[i];
124 }
125
126 void
127 inp_skip(void)
128 {
129 inbuf.inp.s++;
130 if (inbuf.inp.s >= inbuf.inp.e)
131 inp_read_line();
132 }
133
134 char
135 inp_next(void)
136 {
137 char ch = inp_peek();
138 inp_skip();
139 return ch;
140 }
141
142 #ifdef debug
143 void
144 debug_inp(const char *prefix)
145 {
146 debug_printf("%s:", prefix);
147 debug_vis_range(" inp \"", inbuf.inp.s, inbuf.inp.e, "\"");
148 if (inbuf.save_com_s != NULL)
149 debug_vis_range(" save_com \"",
150 inbuf.save_com_s, inbuf.save_com_e, "\"");
151 if (inbuf.saved_inp_s != NULL)
152 debug_vis_range(" saved_inp \"",
153 inbuf.saved_inp_s, inbuf.saved_inp_e, "\"");
154 debug_printf("\n");
155 }
156 #else
157 #define debug_inp(prefix) do { } while (false)
158 #endif
159
160
161 static void
162 inp_comment_check_size(size_t n)
163 {
164 if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
165 array_length(inbuf.save_com_buf))
166 return;
167
168 diag(1, "Internal buffer overflow - "
169 "Move big comment from right after if, while, or whatever");
170 fflush(output);
171 exit(1);
172 }
173
174 void
175 inp_comment_init_newline(void)
176 {
177 if (inbuf.save_com_e != NULL)
178 return;
179
180 inbuf.save_com_s = inbuf.save_com_buf;
181 inbuf.save_com_s[0] = ' '; /* see search_stmt_lbrace */
182 inbuf.save_com_s[1] = ' '; /* see search_stmt_lbrace */
183 inbuf.save_com_e = &inbuf.save_com_s[2];
184 debug_inp(__func__);
185 }
186
187 void
188 inp_comment_init_comment(void)
189 {
190 if (inbuf.save_com_e != NULL)
191 return;
192
193 /*
194 * Copy everything from the start of the line, because
195 * process_comment() will use that to calculate the original
196 * indentation of a boxed comment.
197 */
198 /*
199 * TODO: Don't store anything in the memory range [input.inp.buf,
200 * input.inp.s), as that data can easily get lost.
201 */
202 /*
203 * FIXME: This '4' needs an explanation. For example, in the snippet
204 * 'if(expr)/''*comment', the 'r)' of the code is not copied. If there
205 * is an additional line break before the ')', memcpy tries to copy
206 * (size_t)-1 bytes.
207 */
208 assert((size_t)(inbuf.inp.s - inbuf.inp.buf) >= 4);
209 size_t line_len = (size_t)(inbuf.inp.s - inbuf.inp.buf) - 4;
210 assert(line_len < array_length(inbuf.save_com_buf));
211 memcpy(inbuf.save_com_buf, inbuf.inp.buf, line_len);
212 inbuf.save_com_s = inbuf.save_com_buf + line_len;
213 inbuf.save_com_s[0] = ' '; /* see search_stmt_lbrace */
214 inbuf.save_com_s[1] = ' '; /* see search_stmt_lbrace */
215 inbuf.save_com_e = &inbuf.save_com_s[2];
216 debug_vis_range("search_stmt_comment: before save_com is \"",
217 inbuf.save_com_buf, inbuf.save_com_s, "\"\n");
218 debug_vis_range("search_stmt_comment: save_com is \"",
219 inbuf.save_com_s, inbuf.save_com_e, "\"\n");
220 }
221
222 void
223 inp_comment_init_preproc(void)
224 {
225 if (inbuf.save_com_e == NULL) { /* if this is the first comment, we
226 * must set up the buffer */
227 inbuf.save_com_s = inbuf.save_com_buf;
228 inbuf.save_com_e = inbuf.save_com_s;
229 } else {
230 inp_comment_add_char('\n'); /* add newline between comments */
231 inp_comment_add_char(' ');
232 --line_no;
233 }
234 }
235
236 void
237 inp_comment_add_char(char ch)
238 {
239 inp_comment_check_size(1);
240 *inbuf.save_com_e++ = ch;
241 }
242
243 void
244 inp_comment_add_range(const char *s, const char *e)
245 {
246 size_t len = (size_t)(e - s);
247 inp_comment_check_size(len);
248 memcpy(inbuf.save_com_e, s, len);
249 inbuf.save_com_e += len;
250 }
251
252 bool
253 inp_comment_complete_block(void)
254 {
255 return inbuf.save_com_e[-2] == '*' && inbuf.save_com_e[-1] == '/';
256 }
257
258 bool
259 inp_comment_seen(void)
260 {
261 /* TODO: assert((inbuf.save_com_s != NULL) == (inbuf.save_com_e != NULL)); */
262 return inbuf.save_com_e != NULL;
263 }
264
265 void
266 inp_comment_rtrim(void)
267 {
268 while (inbuf.save_com_e > inbuf.save_com_s && ch_isblank(inbuf.save_com_e[-1]))
269 inbuf.save_com_e--;
270 }
271
272 void
273 inp_comment_rtrim_newline(void)
274 {
275 while (inbuf.save_com_e > inbuf.save_com_s && inbuf.save_com_e[-1] == '\n')
276 inbuf.save_com_e--;
277 }
278
279 void
280 inp_from_comment(void)
281 {
282 inbuf.saved_inp_s = inbuf.inp.s;
283 inbuf.saved_inp_e = inbuf.inp.e;
284
285 inbuf.inp.s = inbuf.save_com_s; /* redirect lexi input to save_com_s */
286 inbuf.inp.e = inbuf.save_com_e;
287 /* XXX: what about save_com_s? */
288 inbuf.save_com_e = NULL;
289 debug_inp(__func__);
290 }
291
292 void
293 inp_comment_insert_lbrace(void)
294 {
295 assert(inbuf.save_com_s[0] == ' '); /* see inp_comment_init_newline */
296 inbuf.save_com_s[0] = '{';
297 }
298
299 static void
300 output_char(char ch)
301 {
302 fputc(ch, output);
303 debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
304 }
305
306 static void
307 output_range(const char *s, const char *e)
308 {
309 fwrite(s, 1, (size_t)(e - s), output);
310 debug_vis_range("output_range \"", s, e, "\"\n");
311 }
312
313 static inline void
314 output_string(const char *s)
315 {
316 output_range(s, s + strlen(s));
317 }
318
319 static int
320 output_indent(int old_ind, int new_ind)
321 {
322 int ind = old_ind;
323
324 if (opt.use_tabs) {
325 int tabsize = opt.tabsize;
326 int n = new_ind / tabsize - ind / tabsize;
327 if (n > 0)
328 ind -= ind % tabsize;
329 for (int i = 0; i < n; i++) {
330 fputc('\t', output);
331 ind += tabsize;
332 }
333 }
334
335 for (; ind < new_ind; ind++)
336 fputc(' ', output);
337
338 debug_println("output_indent %d", ind);
339 return ind;
340 }
341
342 static int
343 dump_line_label(void)
344 {
345 int ind;
346
347 while (lab.e > lab.s && ch_isblank(lab.e[-1]))
348 lab.e--;
349 *lab.e = '\0';
350
351 ind = output_indent(0, compute_label_indent());
352
353 if (lab.s[0] == '#' && (strncmp(lab.s, "#else", 5) == 0
354 || strncmp(lab.s, "#endif", 6) == 0)) {
355 const char *s = lab.s;
356 if (lab.e[-1] == '\n')
357 lab.e--;
358 do {
359 output_char(*s++);
360 } while (s < lab.e && 'a' <= *s && *s <= 'z');
361
362 while (s < lab.e && ch_isblank(*s))
363 s++;
364
365 if (s < lab.e) {
366 if (s[0] == '/' && s[1] == '*') {
367 output_char('\t');
368 output_range(s, lab.e);
369 } else {
370 output_string("\t/* ");
371 output_range(s, lab.e);
372 output_string(" */");
373 }
374 }
375 } else
376 output_range(lab.s, lab.e);
377 ind = ind_add(ind, lab.s, lab.e);
378
379 ps.is_case_label = false;
380 return ind;
381 }
382
383 static int
384 dump_line_code(int ind)
385 {
386
387 int target_ind = compute_code_indent();
388 for (int i = 0; i < ps.p_l_follow; i++) {
389 if (ps.paren_indents[i] >= 0) {
390 int paren_ind = ps.paren_indents[i];
391 ps.paren_indents[i] = (short)(-1 - (paren_ind + target_ind));
392 debug_println(
393 "setting paren_indents[%d] from %d to %d for column %d",
394 i, paren_ind, ps.paren_indents[i], target_ind + 1);
395 }
396 }
397
398 ind = output_indent(ind, target_ind);
399 output_range(code.s, code.e);
400 return ind_add(ind, code.s, code.e);
401 }
402
403 static void
404 dump_line_comment(int ind)
405 {
406 int target_ind = ps.com_ind;
407 const char *p = com.s;
408
409 target_ind += ps.comment_delta;
410
411 /* consider original indentation in case this is a box comment */
412 for (; *p == '\t'; p++)
413 target_ind += opt.tabsize;
414
415 for (; target_ind < 0; p++) {
416 if (*p == ' ')
417 target_ind++;
418 else if (*p == '\t')
419 target_ind = next_tab(target_ind);
420 else {
421 target_ind = 0;
422 break;
423 }
424 }
425
426 /* if comment can't fit on this line, put it on the next line */
427 if (ind > target_ind) {
428 output_char('\n');
429 ind = 0;
430 ps.stats.lines++;
431 }
432
433 while (com.e > p && isspace((unsigned char)com.e[-1]))
434 com.e--;
435
436 (void)output_indent(ind, target_ind);
437 output_range(p, com.e);
438
439 ps.comment_delta = ps.n_comment_delta;
440 ps.stats.comment_lines++;
441 }
442
443 /*
444 * Write a line of formatted source to the output file. The line consists of
445 * the label, the code and the comment.
446 */
447 static void
448 output_line(char line_terminator)
449 {
450 static bool first_line = true;
451
452 ps.procname[0] = '\0';
453
454 if (code.s == code.e && lab.s == lab.e && com.s == com.e) {
455 if (suppress_blanklines)
456 suppress_blanklines = false;
457 else
458 blank_lines_to_output++;
459
460 } else if (!inhibit_formatting) {
461 suppress_blanklines = false;
462 if (blank_line_before && !first_line) {
463 if (opt.swallow_optional_blanklines) {
464 if (blank_lines_to_output == 1)
465 blank_lines_to_output = 0;
466 } else {
467 if (blank_lines_to_output == 0)
468 blank_lines_to_output = 1;
469 }
470 }
471
472 for (; blank_lines_to_output > 0; blank_lines_to_output--)
473 output_char('\n');
474
475 if (ps.ind_level == 0)
476 ps.ind_stmt = false; /* this is a class A kludge. don't do
477 * additional statement indentation if
478 * we are at bracket level 0 */
479
480 if (lab.e != lab.s || code.e != code.s)
481 ps.stats.code_lines++;
482
483 int ind = 0;
484 if (lab.e != lab.s)
485 ind = dump_line_label();
486 if (code.e != code.s)
487 ind = dump_line_code(ind);
488 if (com.e != com.s)
489 dump_line_comment(ind);
490
491 output_char(line_terminator);
492 ps.stats.lines++;
493
494 if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) {
495 blank_line_before = true;
496 ps.just_saw_decl = 0;
497 } else
498 blank_line_before = blank_line_after;
499 blank_line_after = false;
500 }
501
502 ps.decl_on_line = ps.in_decl; /* for proper comment indentation */
503 ps.ind_stmt = ps.in_stmt && !ps.in_decl;
504 ps.decl_indent_done = false;
505
506 *(lab.e = lab.s) = '\0'; /* reset buffers */
507 *(code.e = code.s) = '\0';
508 *(com.e = com.s = com.buf + 1) = '\0';
509
510 ps.ind_level = ps.ind_level_follow;
511 ps.paren_level = ps.p_l_follow;
512
513 if (ps.paren_level > 0) {
514 /* TODO: explain what negative indentation means */
515 paren_indent = -1 - ps.paren_indents[ps.paren_level - 1];
516 debug_println("paren_indent is now %d", paren_indent);
517 }
518
519 first_line = false;
520 }
521
522 void
523 dump_line(void)
524 {
525 output_line('\n');
526 }
527
528 void
529 dump_line_ff(void)
530 {
531 output_line('\f');
532 }
533
534 static int
535 compute_code_indent_lineup(int base_ind)
536 {
537 int ti = paren_indent;
538 int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length;
539 if (overflow < 0)
540 return ti;
541
542 if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) {
543 ti -= overflow + 2;
544 if (ti > base_ind)
545 return ti;
546 return base_ind;
547 }
548
549 return ti;
550 }
551
552 int
553 compute_code_indent(void)
554 {
555 int base_ind = ps.ind_level * opt.indent_size;
556
557 if (ps.paren_level == 0) {
558 if (ps.ind_stmt)
559 return base_ind + opt.continuation_indent;
560 return base_ind;
561 }
562
563 if (opt.lineup_to_parens) {
564 if (opt.lineup_to_parens_always)
565 return paren_indent;
566 return compute_code_indent_lineup(base_ind);
567 }
568
569 if (2 * opt.continuation_indent == opt.indent_size)
570 return base_ind + opt.continuation_indent;
571 else
572 return base_ind + opt.continuation_indent * ps.paren_level;
573 }
574
575 int
576 compute_label_indent(void)
577 {
578 if (ps.is_case_label)
579 return (int)(case_ind * (float)opt.indent_size);
580 if (lab.s[0] == '#')
581 return 0;
582 return opt.indent_size * (ps.ind_level - 2);
583 }
584
585 static void
586 skip_blank(const char **pp)
587 {
588 while (ch_isblank(**pp))
589 (*pp)++;
590 }
591
592 static bool
593 skip_string(const char **pp, const char *s)
594 {
595 size_t len = strlen(s);
596 if (strncmp(*pp, s, len) == 0) {
597 *pp += len;
598 return true;
599 }
600 return false;
601 }
602
603 static void
604 parse_indent_comment(void)
605 {
606 bool on;
607
608 const char *p = inbuf.inp.buf;
609
610 skip_blank(&p);
611 if (!skip_string(&p, "/*"))
612 return;
613 skip_blank(&p);
614 if (!skip_string(&p, "INDENT"))
615 return;
616 skip_blank(&p);
617
618 if (*p == '*' || skip_string(&p, "ON"))
619 on = true;
620 else if (skip_string(&p, "OFF"))
621 on = false;
622 else
623 return;
624
625 skip_blank(&p);
626 if (!skip_string(&p, "*/\n"))
627 return;
628
629 if (com.s != com.e || lab.s != lab.e || code.s != code.e)
630 dump_line();
631
632 inhibit_formatting = !on;
633 if (on) {
634 blank_lines_to_output = 0;
635 blank_line_after = false;
636 blank_line_before = false;
637 suppress_blanklines = true;
638 }
639 }
640
641 /*
642 * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
643 *
644 * All rights reserved
645 */
646 void
647 inp_read_line(void)
648 {
649 char *p;
650 int ch;
651 FILE *f = input;
652
653 if (inbuf.saved_inp_s != NULL) { /* there is a partly filled input buffer left */
654 inbuf.inp.s = inbuf.saved_inp_s; /* do not read anything, just switch buffers */
655 inbuf.inp.e = inbuf.saved_inp_e;
656 inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
657 debug_println("switched inp.s back to saved_inp_s");
658 if (inbuf.inp.s < inbuf.inp.e)
659 return; /* only return if there is really something in
660 * this buffer */
661 }
662
663 for (p = inbuf.inp.buf;;) {
664 if (p >= inbuf.inp.l) {
665 size_t size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10;
666 size_t offset = (size_t)(p - inbuf.inp.buf);
667 inbuf.inp.buf = xrealloc(inbuf.inp.buf, size);
668 p = inbuf.inp.buf + offset;
669 inbuf.inp.l = inbuf.inp.buf + size - 2;
670 }
671
672 if ((ch = getc(f)) == EOF) {
673 if (!inhibit_formatting) {
674 *p++ = ' ';
675 *p++ = '\n';
676 }
677 had_eof = true;
678 break;
679 }
680
681 if (ch != '\0')
682 *p++ = (char)ch;
683 if (ch == '\n')
684 break;
685 }
686
687 inbuf.inp.s = inbuf.inp.buf;
688 inbuf.inp.e = p;
689
690 if (p - inbuf.inp.s >= 3 && p[-3] == '*' && p[-2] == '/')
691 parse_indent_comment();
692
693 if (inhibit_formatting)
694 output_range(inbuf.inp.s, inbuf.inp.e);
695 }
696
697 int
698 ind_add(int ind, const char *start, const char *end)
699 {
700 for (const char *p = start; p != end; ++p) {
701 if (*p == '\n' || *p == '\f')
702 ind = 0;
703 else if (*p == '\t')
704 ind = next_tab(ind);
705 else if (*p == '\b')
706 --ind;
707 else
708 ++ind;
709 }
710 return ind;
711 }
712