io.c revision 1.133 1 /* $NetBSD: io.c,v 1.133 2021/11/25 21:59:40 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #if 0
41 static char sccsid[] = "@(#)io.c 8.1 (Berkeley) 6/6/93";
42 #endif
43
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$NetBSD: io.c,v 1.133 2021/11/25 21:59:40 rillig Exp $");
47 #elif defined(__FreeBSD__)
48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
49 #endif
50
51 #include <assert.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55
56 #include "indent.h"
57
58 static struct {
59 struct buffer inp; /* one line of input, ready to be split into
60 * tokens; occasionally this buffer switches
61 * to save_com_buf */
62 char save_com_buf[5000]; /* input text is saved here when looking for
63 * the brace after an if, while, etc */
64 char *save_com_s; /* start of the comment in save_com_buf */
65 char *save_com_e; /* end of the comment in save_com_buf */
66
67 char *saved_inp_s; /* saved value of inp.s when taking input from
68 * save_com */
69 char *saved_inp_e; /* saved value of inp.e */
70 } inbuf;
71
72 static int paren_indent;
73 static bool suppress_blanklines;
74
75
76 void
77 inp_init(void)
78 {
79 inbuf.inp.buf = xmalloc(10);
80 inbuf.inp.l = inbuf.inp.buf + 8;
81 inbuf.inp.s = inbuf.inp.buf;
82 inbuf.inp.e = inbuf.inp.buf;
83 }
84
85 const char *
86 inp_p(void)
87 {
88 return inbuf.inp.s;
89 }
90
91 const char *
92 inp_line_start(void)
93 {
94 /*
95 * The comment we're about to read usually comes from inp.buf, unless
96 * it has been copied into save_com.
97 */
98 return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
99 }
100
101 const char *
102 inp_line_end(void)
103 {
104 return inbuf.inp.e;
105 }
106
107 char
108 inp_peek(void)
109 {
110 return *inbuf.inp.s;
111 }
112
113 char
114 inp_lookahead(size_t i)
115 {
116 return inbuf.inp.s[i];
117 }
118
119 void
120 inp_skip(void)
121 {
122 inbuf.inp.s++;
123 if (inbuf.inp.s >= inbuf.inp.e)
124 inp_read_line();
125 }
126
127 char
128 inp_next(void)
129 {
130 char ch = inp_peek();
131 inp_skip();
132 return ch;
133 }
134
135 #ifdef debug
136 void
137 debug_inp(const char *prefix)
138 {
139 debug_printf("%s:", prefix);
140 debug_vis_range(" inp \"", inbuf.inp.s, inbuf.inp.e, "\"");
141 if (inbuf.save_com_s != NULL)
142 debug_vis_range(" save_com \"",
143 inbuf.save_com_s, inbuf.save_com_e, "\"");
144 if (inbuf.saved_inp_s != NULL)
145 debug_vis_range(" saved_inp \"",
146 inbuf.saved_inp_s, inbuf.saved_inp_e, "\"");
147 debug_printf("\n");
148 }
149 #endif
150
151 static void
152 inp_comment_check_size(size_t n)
153 {
154 if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
155 array_length(inbuf.save_com_buf))
156 return;
157
158 diag(1, "Internal buffer overflow - "
159 "Move big comment from right after if, while, or whatever");
160 fflush(output);
161 exit(1);
162 }
163
164 void
165 inp_comment_init_newline(void)
166 {
167 if (inbuf.save_com_e != NULL)
168 return;
169
170 inbuf.save_com_s = inbuf.save_com_buf;
171 inbuf.save_com_s[0] = ' '; /* see inp_comment_insert_lbrace */
172 inbuf.save_com_s[1] = ' '; /* see inp_comment_insert_lbrace */
173 inbuf.save_com_e = &inbuf.save_com_s[2];
174 debug_inp(__func__);
175 }
176
177 void
178 inp_comment_init_comment(void)
179 {
180 if (inbuf.save_com_e != NULL)
181 return;
182
183 /*
184 * Copy everything from the start of the line, because
185 * process_comment() will use that to calculate the original
186 * indentation of a boxed comment.
187 */
188 /*
189 * TODO: Don't store anything in the memory range [input.inp.buf,
190 * input.inp.s), as that data can easily get lost.
191 */
192 /*
193 * FIXME: The '4' below is completely wrong. For example, in the snippet
194 * 'if(expr)/''*comment', the 'r)' of the code is not copied. If there
195 * is an additional line break before the ')', memcpy tries to copy
196 * (size_t)-1 bytes.
197 *
198 * The original author of this magic number doesn't remember its purpose
199 * anymore, so there is no point in keeping it. The existing tests must
200 * still pass though.
201 */
202 assert((size_t)(inbuf.inp.s - inbuf.inp.buf) >= 4);
203 size_t line_len = (size_t)(inbuf.inp.s - inbuf.inp.buf) - 4;
204 assert(line_len < array_length(inbuf.save_com_buf));
205 memcpy(inbuf.save_com_buf, inbuf.inp.buf, line_len);
206 inbuf.save_com_s = inbuf.save_com_buf + line_len;
207 inbuf.save_com_s[0] = ' '; /* see inp_comment_insert_lbrace */
208 inbuf.save_com_s[1] = ' '; /* see inp_comment_insert_lbrace */
209 inbuf.save_com_e = &inbuf.save_com_s[2];
210 debug_vis_range("search_stmt_comment: before save_com is \"",
211 inbuf.save_com_buf, inbuf.save_com_s, "\"\n");
212 debug_vis_range("search_stmt_comment: save_com is \"",
213 inbuf.save_com_s, inbuf.save_com_e, "\"\n");
214 }
215
216 void
217 inp_comment_init_preproc(void)
218 {
219 if (inbuf.save_com_e == NULL) { /* if this is the first comment, we
220 * must set up the buffer */
221 inbuf.save_com_s = inbuf.save_com_buf;
222 inbuf.save_com_e = inbuf.save_com_s;
223 } else {
224 inp_comment_add_char('\n'); /* add newline between comments */
225 inp_comment_add_char(' ');
226 --line_no;
227 }
228 }
229
230 void
231 inp_comment_add_char(char ch)
232 {
233 inp_comment_check_size(1);
234 *inbuf.save_com_e++ = ch;
235 }
236
237 void
238 inp_comment_add_range(const char *s, const char *e)
239 {
240 size_t len = (size_t)(e - s);
241 inp_comment_check_size(len);
242 memcpy(inbuf.save_com_e, s, len);
243 inbuf.save_com_e += len;
244 }
245
246 bool
247 inp_comment_complete_block(void)
248 {
249 return inbuf.save_com_e[-2] == '*' && inbuf.save_com_e[-1] == '/';
250 }
251
252 bool
253 inp_comment_seen(void)
254 {
255 return inbuf.save_com_e != NULL;
256 }
257
258 void
259 inp_comment_rtrim(void)
260 {
261 while (inbuf.save_com_e > inbuf.save_com_s && ch_isblank(inbuf.save_com_e[-1]))
262 inbuf.save_com_e--;
263 }
264
265 void
266 inp_comment_rtrim_newline(void)
267 {
268 while (inbuf.save_com_e > inbuf.save_com_s && inbuf.save_com_e[-1] == '\n')
269 inbuf.save_com_e--;
270 }
271
272 void
273 inp_from_comment(void)
274 {
275 inbuf.saved_inp_s = inbuf.inp.s;
276 inbuf.saved_inp_e = inbuf.inp.e;
277
278 inbuf.inp.s = inbuf.save_com_s; /* redirect lexi input to save_com_s */
279 inbuf.inp.e = inbuf.save_com_e;
280 inbuf.save_com_s = NULL;
281 inbuf.save_com_e = NULL;
282 debug_inp(__func__);
283 }
284
285 void
286 inp_comment_insert_lbrace(void)
287 {
288 assert(inbuf.save_com_s[0] == ' '); /* see inp_comment_init_newline */
289 inbuf.save_com_s[0] = '{';
290 }
291
292 static void
293 output_char(char ch)
294 {
295 fputc(ch, output);
296 debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
297 }
298
299 static void
300 output_range(const char *s, const char *e)
301 {
302 fwrite(s, 1, (size_t)(e - s), output);
303 debug_vis_range("output_range \"", s, e, "\"\n");
304 }
305
306 static inline void
307 output_string(const char *s)
308 {
309 output_range(s, s + strlen(s));
310 }
311
312 static int
313 output_indent(int old_ind, int new_ind)
314 {
315 int ind = old_ind;
316
317 if (opt.use_tabs) {
318 int tabsize = opt.tabsize;
319 int n = new_ind / tabsize - ind / tabsize;
320 if (n > 0)
321 ind -= ind % tabsize;
322 for (int i = 0; i < n; i++) {
323 fputc('\t', output);
324 ind += tabsize;
325 }
326 }
327
328 for (; ind < new_ind; ind++)
329 fputc(' ', output);
330
331 debug_println("output_indent %d", ind);
332 return ind;
333 }
334
335 static int
336 dump_line_label(void)
337 {
338 int ind;
339
340 while (lab.e > lab.s && ch_isblank(lab.e[-1]))
341 lab.e--;
342 *lab.e = '\0';
343
344 ind = output_indent(0, compute_label_indent());
345
346 if (lab.s[0] == '#' && (strncmp(lab.s, "#else", 5) == 0
347 || strncmp(lab.s, "#endif", 6) == 0)) {
348 const char *s = lab.s;
349 if (lab.e[-1] == '\n')
350 lab.e--;
351 do {
352 output_char(*s++);
353 } while (s < lab.e && 'a' <= *s && *s <= 'z');
354
355 while (s < lab.e && ch_isblank(*s))
356 s++;
357
358 if (s < lab.e) {
359 if (s[0] == '/' && s[1] == '*') {
360 output_char('\t');
361 output_range(s, lab.e);
362 } else {
363 output_string("\t/* ");
364 output_range(s, lab.e);
365 output_string(" */");
366 }
367 }
368 } else
369 output_range(lab.s, lab.e);
370 ind = ind_add(ind, lab.s, lab.e);
371
372 ps.is_case_label = false;
373 return ind;
374 }
375
376 static int
377 dump_line_code(int ind)
378 {
379
380 int target_ind = compute_code_indent();
381 for (int i = 0; i < ps.p_l_follow; i++) {
382 if (ps.paren_indents[i] >= 0) {
383 int paren_ind = ps.paren_indents[i];
384 ps.paren_indents[i] = (short)(-1 - (paren_ind + target_ind));
385 debug_println(
386 "setting paren_indents[%d] from %d to %d for column %d",
387 i, paren_ind, ps.paren_indents[i], target_ind + 1);
388 }
389 }
390
391 ind = output_indent(ind, target_ind);
392 output_range(code.s, code.e);
393 return ind_add(ind, code.s, code.e);
394 }
395
396 static void
397 dump_line_comment(int ind)
398 {
399 int target_ind = ps.com_ind;
400 const char *p = com.s;
401
402 target_ind += ps.comment_delta;
403
404 /* consider original indentation in case this is a box comment */
405 for (; *p == '\t'; p++)
406 target_ind += opt.tabsize;
407
408 for (; target_ind < 0; p++) {
409 if (*p == ' ')
410 target_ind++;
411 else if (*p == '\t')
412 target_ind = next_tab(target_ind);
413 else {
414 target_ind = 0;
415 break;
416 }
417 }
418
419 /* if comment can't fit on this line, put it on the next line */
420 if (ind > target_ind) {
421 output_char('\n');
422 ind = 0;
423 ps.stats.lines++;
424 }
425
426 while (com.e > p && ch_isspace(com.e[-1]))
427 com.e--;
428
429 (void)output_indent(ind, target_ind);
430 output_range(p, com.e);
431
432 ps.comment_delta = ps.n_comment_delta;
433 ps.stats.comment_lines++;
434 }
435
436 /*
437 * Write a line of formatted source to the output file. The line consists of
438 * the label, the code and the comment.
439 */
440 static void
441 output_line(char line_terminator)
442 {
443 static bool first_line = true;
444
445 ps.is_function_definition = false;
446
447 if (code.s == code.e && lab.s == lab.e && com.s == com.e) {
448 if (suppress_blanklines)
449 suppress_blanklines = false;
450 else
451 blank_lines_to_output++;
452
453 } else if (!inhibit_formatting) {
454 suppress_blanklines = false;
455 if (blank_line_before && !first_line) {
456 if (opt.swallow_optional_blanklines) {
457 if (blank_lines_to_output == 1)
458 blank_lines_to_output = 0;
459 } else {
460 if (blank_lines_to_output == 0)
461 blank_lines_to_output = 1;
462 }
463 }
464
465 for (; blank_lines_to_output > 0; blank_lines_to_output--)
466 output_char('\n');
467
468 if (ps.ind_level == 0)
469 ps.in_stmt_cont = false; /* this is a class A kludge */
470
471 if (lab.e != lab.s || code.e != code.s)
472 ps.stats.code_lines++;
473
474 int ind = 0;
475 if (lab.e != lab.s)
476 ind = dump_line_label();
477 if (code.e != code.s)
478 ind = dump_line_code(ind);
479 if (com.e != com.s)
480 dump_line_comment(ind);
481
482 output_char(line_terminator);
483 ps.stats.lines++;
484
485 if (ps.just_saw_decl == 1 && opt.blanklines_after_decl) {
486 blank_line_before = true;
487 ps.just_saw_decl = 0;
488 } else
489 blank_line_before = blank_line_after;
490 blank_line_after = false;
491 }
492
493 ps.decl_on_line = ps.in_decl; /* for proper comment indentation */
494 ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl;
495 ps.decl_indent_done = false;
496
497 *(lab.e = lab.s) = '\0'; /* reset buffers */
498 *(code.e = code.s) = '\0';
499 *(com.e = com.s = com.buf + 1) = '\0';
500
501 ps.ind_level = ps.ind_level_follow;
502 ps.paren_level = ps.p_l_follow;
503
504 if (ps.paren_level > 0) {
505 /* TODO: explain what negative indentation means */
506 paren_indent = -1 - ps.paren_indents[ps.paren_level - 1];
507 debug_println("paren_indent is now %d", paren_indent);
508 }
509
510 first_line = false;
511 }
512
513 void
514 dump_line(void)
515 {
516 output_line('\n');
517 }
518
519 void
520 dump_line_ff(void)
521 {
522 output_line('\f');
523 }
524
525 static int
526 compute_code_indent_lineup(int base_ind)
527 {
528 int ti = paren_indent;
529 int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length;
530 if (overflow < 0)
531 return ti;
532
533 if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) {
534 ti -= overflow + 2;
535 if (ti > base_ind)
536 return ti;
537 return base_ind;
538 }
539
540 return ti;
541 }
542
543 int
544 compute_code_indent(void)
545 {
546 int base_ind = ps.ind_level * opt.indent_size;
547
548 if (ps.paren_level == 0) {
549 if (ps.in_stmt_cont)
550 return base_ind + opt.continuation_indent;
551 return base_ind;
552 }
553
554 if (opt.lineup_to_parens) {
555 if (opt.lineup_to_parens_always)
556 return paren_indent;
557 return compute_code_indent_lineup(base_ind);
558 }
559
560 if (2 * opt.continuation_indent == opt.indent_size)
561 return base_ind + opt.continuation_indent;
562 else
563 return base_ind + opt.continuation_indent * ps.paren_level;
564 }
565
566 int
567 compute_label_indent(void)
568 {
569 if (ps.is_case_label)
570 return (int)(case_ind * (float)opt.indent_size);
571 if (lab.s[0] == '#')
572 return 0;
573 return opt.indent_size * (ps.ind_level - 2);
574 }
575
576 static void
577 skip_blank(const char **pp)
578 {
579 while (ch_isblank(**pp))
580 (*pp)++;
581 }
582
583 static bool
584 skip_string(const char **pp, const char *s)
585 {
586 size_t len = strlen(s);
587 if (strncmp(*pp, s, len) == 0) {
588 *pp += len;
589 return true;
590 }
591 return false;
592 }
593
594 static void
595 parse_indent_comment(void)
596 {
597 bool on;
598
599 const char *p = inbuf.inp.buf;
600
601 skip_blank(&p);
602 if (!skip_string(&p, "/*"))
603 return;
604 skip_blank(&p);
605 if (!skip_string(&p, "INDENT"))
606 return;
607 skip_blank(&p);
608
609 if (*p == '*' || skip_string(&p, "ON"))
610 on = true;
611 else if (skip_string(&p, "OFF"))
612 on = false;
613 else
614 return;
615
616 skip_blank(&p);
617 if (!skip_string(&p, "*/\n"))
618 return;
619
620 if (com.s != com.e || lab.s != lab.e || code.s != code.e)
621 dump_line();
622
623 inhibit_formatting = !on;
624 if (on) {
625 blank_lines_to_output = 0;
626 blank_line_after = false;
627 blank_line_before = false;
628 suppress_blanklines = true;
629 }
630 }
631
632 /*
633 * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
634 *
635 * All rights reserved
636 */
637 void
638 inp_read_line(void)
639 {
640 char *p;
641 int ch;
642 FILE *f = input;
643
644 if (inbuf.saved_inp_s != NULL) { /* there is a partly filled input buffer left */
645 inbuf.inp.s = inbuf.saved_inp_s; /* do not read anything, just switch buffers */
646 inbuf.inp.e = inbuf.saved_inp_e;
647 inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
648 debug_println("switched inp.s back to saved_inp_s");
649 if (inbuf.inp.s < inbuf.inp.e)
650 return; /* only return if there is really something in
651 * this buffer */
652 }
653
654 for (p = inbuf.inp.buf;;) {
655 if (p >= inbuf.inp.l) {
656 size_t size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10;
657 size_t offset = (size_t)(p - inbuf.inp.buf);
658 inbuf.inp.buf = xrealloc(inbuf.inp.buf, size);
659 p = inbuf.inp.buf + offset;
660 inbuf.inp.l = inbuf.inp.buf + size - 2;
661 }
662
663 if ((ch = getc(f)) == EOF) {
664 if (!inhibit_formatting) {
665 *p++ = ' ';
666 *p++ = '\n';
667 }
668 had_eof = true;
669 break;
670 }
671
672 if (ch != '\0')
673 *p++ = (char)ch;
674 if (ch == '\n')
675 break;
676 }
677
678 inbuf.inp.s = inbuf.inp.buf;
679 inbuf.inp.e = p;
680
681 if (p - inbuf.inp.s >= 3 && p[-3] == '*' && p[-2] == '/')
682 parse_indent_comment();
683
684 if (inhibit_formatting)
685 output_range(inbuf.inp.s, inbuf.inp.e);
686 }
687
688 int
689 ind_add(int ind, const char *start, const char *end)
690 {
691 for (const char *p = start; p != end; ++p) {
692 if (*p == '\n' || *p == '\f')
693 ind = 0;
694 else if (*p == '\t')
695 ind = next_tab(ind);
696 else if (*p == '\b')
697 --ind;
698 else
699 ++ind;
700 }
701 return ind;
702 }
703