io.c revision 1.151 1 /* $NetBSD: io.c,v 1.151 2023/05/11 18:26:56 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #if 0
41 static char sccsid[] = "@(#)io.c 8.1 (Berkeley) 6/6/93";
42 #endif
43
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$NetBSD: io.c,v 1.151 2023/05/11 18:26:56 rillig Exp $");
47 #elif defined(__FreeBSD__)
48 __FBSDID("$FreeBSD: head/usr.bin/indent/io.c 334927 2018-06-10 16:44:18Z pstef $");
49 #endif
50
51 #include <assert.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55
56 #include "indent.h"
57
58 /*
59 * There are 3 modes for reading the input.
60 *
61 * default: In this mode, the input comes from the input file. The buffer
62 * 'inp' contains the current line, terminated with '\n'. The current read
63 * position is inp.s, and there is always inp.buf <= inp.s < inp.e. All other
64 * pointers are null.
65 *
66 * copy-in: After reading 'if (expr)' or similar tokens, the input still comes
67 * from 'inp', but instead of processing it, it is copied to 'save_com'. The
68 * goal of this mode is to move the comments after the '{', that is to
69 * transform 'if (expr) comment {' to 'if (expr) { comment'. When the next
70 * token cannot be part of this transformation, switch to copy-out.
71 *
72 * copy-out: In this mode, the input comes from 'save_com', which contains the
73 * tokens to be placed after the '{'. The input still comes from the range
74 * [inp.s, inp.e), but these two members have been overwritten with pointers
75 * into save_com_buf, so inp.buf and inp.s are unrelated, which is unusual.
76 * In this mode, inp.e[-1] is usually not terminated with '\n'. After reading
77 * all tokens from save_com, switch to default mode again.
78 */
79 static struct {
80 struct buffer inp; /* one line of input, ready to be split into
81 * tokens; occasionally 's' and 'e' switch
82 * to save_com_buf */
83 char save_com_buf[5000]; /* input text is saved here when looking for
84 * the brace after an if, while, etc */
85 char *save_com_s; /* start of the comment in save_com_buf, or
86 * null */
87 char *save_com_e; /* end of the comment in save_com_buf, or
88 * null */
89
90 char *saved_inp_s; /* saved value of inp.s when taking input from
91 * save_com, or null */
92 char *saved_inp_e; /* saved value of inp.e, or null */
93 } inbuf;
94
95 static int paren_indent;
96
97
98 void
99 inp_init(void)
100 {
101 inbuf.inp.buf = xmalloc(10);
102 inbuf.inp.l = inbuf.inp.buf + 8;
103 inbuf.inp.s = inbuf.inp.buf;
104 inbuf.inp.e = inbuf.inp.buf;
105 }
106
107 const char *
108 inp_p(void)
109 {
110 assert(inbuf.inp.s < inbuf.inp.e);
111 return inbuf.inp.s;
112 }
113
114 const char *
115 inp_line_start(void)
116 {
117 return inbuf.saved_inp_s != NULL ? inbuf.save_com_buf : inbuf.inp.buf;
118 }
119
120 const char *
121 inp_line_end(void)
122 {
123 return inbuf.inp.e;
124 }
125
126 char
127 inp_peek(void)
128 {
129 assert(inbuf.inp.s < inbuf.inp.e);
130 return *inbuf.inp.s;
131 }
132
133 char
134 inp_lookahead(size_t i)
135 {
136 assert(i < (size_t)(inbuf.inp.e - inbuf.inp.s));
137 return inbuf.inp.s[i];
138 }
139
140 void
141 inp_skip(void)
142 {
143 assert(inbuf.inp.s < inbuf.inp.e);
144 inbuf.inp.s++;
145 if (inbuf.inp.s >= inbuf.inp.e)
146 inp_read_line();
147 }
148
149 char
150 inp_next(void)
151 {
152 char ch = inp_peek();
153 inp_skip();
154 return ch;
155 }
156
157 #ifdef debug
158 static void
159 debug_inp_buf(const char *name, const char *s, const char *e)
160 {
161 if (s != NULL && e != NULL) {
162 debug_printf(" %-12s ", name);
163 debug_vis_range("\"", s, e, "\"\n");
164 }
165 }
166
167 void
168 debug_inp(const char *prefix)
169 {
170 assert(inp_line_start() <= inbuf.inp.s);
171 assert(inbuf.inp.s <= inbuf.inp.e);
172
173 debug_println("%s %s:", __func__, prefix);
174 if (inbuf.saved_inp_s == NULL)
175 debug_inp_buf("inp.buf", inbuf.inp.buf, inbuf.inp.s);
176 debug_inp_buf("inp", inbuf.inp.s, inbuf.inp.e); /* never null */
177 debug_inp_buf("save_com.buf", inbuf.save_com_buf, inbuf.save_com_s);
178 debug_inp_buf("save_com", inbuf.save_com_s, inbuf.save_com_e);
179 debug_inp_buf("saved_inp", inbuf.saved_inp_s, inbuf.saved_inp_e);
180 }
181 #endif
182
183 static void
184 inp_comment_check_size(size_t n)
185 {
186 if ((size_t)(inbuf.save_com_e - inbuf.save_com_buf) + n <=
187 array_length(inbuf.save_com_buf))
188 return;
189
190 diag(1, "Internal buffer overflow - "
191 "Move big comment from right after if, while, or whatever");
192 fflush(output);
193 exit(1);
194 }
195
196 void
197 inp_comment_init_preproc(void)
198 {
199 if (inbuf.save_com_e == NULL) { /* if this is the first comment, we
200 * must set up the buffer */
201 /*
202 * XXX: No space is reserved for a potential '{' here, unlike in
203 * inp_comment_init_comment.
204 */
205 inbuf.save_com_s = inbuf.save_com_buf;
206 inbuf.save_com_e = inbuf.save_com_s;
207 } else {
208 inp_comment_add_char('\n'); /* add newline between comments */
209 inp_comment_add_char(' ');
210 --line_no;
211 }
212 }
213
214 void
215 inp_comment_add_char(char ch)
216 {
217 inp_comment_check_size(1);
218 *inbuf.save_com_e++ = ch;
219 }
220
221 void
222 inp_comment_add_range(const char *s, const char *e)
223 {
224 size_t len = (size_t)(e - s);
225 inp_comment_check_size(len);
226 memcpy(inbuf.save_com_e, s, len);
227 inbuf.save_com_e += len;
228 }
229
230 bool
231 inp_comment_seen(void)
232 {
233 return inbuf.save_com_e != NULL;
234 }
235
236 /*
237 * Switch the input to come from save_com, replaying the copied tokens while
238 * looking for the next '{'.
239 */
240 void
241 inp_from_comment(void)
242 {
243 debug_inp("before inp_from_comment");
244 inbuf.saved_inp_s = inbuf.inp.s;
245 inbuf.saved_inp_e = inbuf.inp.e;
246
247 inbuf.inp.s = inbuf.save_com_s;
248 inbuf.inp.e = inbuf.save_com_e;
249 inbuf.save_com_s = NULL;
250 inbuf.save_com_e = NULL;
251 debug_inp("after inp_from_comment");
252 }
253
254 /*
255 * After having read from save_com, continue with the rest of the input line
256 * before reading the next line from the input file.
257 */
258 static bool
259 inp_from_file(void)
260 {
261 if (inbuf.saved_inp_s == NULL)
262 return false;
263
264 inbuf.inp.s = inbuf.saved_inp_s;
265 inbuf.inp.e = inbuf.saved_inp_e;
266 inbuf.saved_inp_s = inbuf.saved_inp_e = NULL;
267 debug_println("switched inp.s back to saved_inp_s");
268 return inbuf.inp.s < inbuf.inp.e;
269 }
270
271 static void
272 inp_add(char ch)
273 {
274 if (inbuf.inp.e >= inbuf.inp.l) {
275 size_t new_size = (size_t)(inbuf.inp.l - inbuf.inp.buf) * 2 + 10;
276 size_t offset = (size_t)(inbuf.inp.e - inbuf.inp.buf);
277 inbuf.inp.buf = xrealloc(inbuf.inp.buf, new_size);
278 inbuf.inp.s = inbuf.inp.buf;
279 inbuf.inp.e = inbuf.inp.buf + offset;
280 inbuf.inp.l = inbuf.inp.buf + new_size - 2;
281 }
282 *inbuf.inp.e++ = ch;
283 }
284
285 static void
286 inp_read_next_line(FILE *f)
287 {
288 inbuf.inp.s = inbuf.inp.buf;
289 inbuf.inp.e = inbuf.inp.buf;
290
291 for (;;) {
292 int ch = getc(f);
293 if (ch == EOF) {
294 if (!inhibit_formatting) {
295 inp_add(' ');
296 inp_add('\n');
297 }
298 had_eof = true;
299 break;
300 }
301
302 if (ch != '\0')
303 inp_add((char)ch);
304 if (ch == '\n')
305 break;
306 }
307 }
308
309 static void
310 output_char(char ch)
311 {
312 fputc(ch, output);
313 debug_vis_range("output_char '", &ch, &ch + 1, "'\n");
314 }
315
316 static void
317 output_range(const char *s, const char *e)
318 {
319 fwrite(s, 1, (size_t)(e - s), output);
320 debug_vis_range("output_range \"", s, e, "\"\n");
321 }
322
323 static int
324 output_indent(int old_ind, int new_ind)
325 {
326 int ind = old_ind;
327
328 if (opt.use_tabs) {
329 int tabsize = opt.tabsize;
330 int n = new_ind / tabsize - ind / tabsize;
331 if (n > 0)
332 ind -= ind % tabsize;
333 for (int i = 0; i < n; i++) {
334 fputc('\t', output);
335 ind += tabsize;
336 }
337 }
338
339 for (; ind < new_ind; ind++)
340 fputc(' ', output);
341
342 debug_println("output_indent %d", ind);
343 return ind;
344 }
345
346 static int
347 output_line_label(void)
348 {
349 int ind;
350
351 while (lab.e > lab.s && ch_isblank(lab.e[-1]))
352 lab.e--;
353 *lab.e = '\0';
354
355 ind = output_indent(0, compute_label_indent());
356 output_range(lab.s, lab.e);
357 ind = ind_add(ind, lab.s, lab.e);
358
359 ps.is_case_label = false;
360 return ind;
361 }
362
363 static int
364 output_line_code(int ind)
365 {
366
367 int target_ind = compute_code_indent();
368 for (int i = 0; i < ps.nparen; i++) {
369 if (ps.paren[i].indent >= 0) {
370 int paren_ind = ps.paren[i].indent;
371 ps.paren[i].indent = (short)(-1 - (paren_ind + target_ind));
372 debug_println(
373 "setting paren_indents[%d] from %d to %d for column %d",
374 i, paren_ind, ps.paren[i].indent, target_ind + 1);
375 }
376 }
377
378 ind = output_indent(ind, target_ind);
379 output_range(code.s, code.e);
380 return ind_add(ind, code.s, code.e);
381 }
382
383 static void
384 output_line_comment(int ind)
385 {
386 int target_ind = ps.com_ind;
387 const char *p = com.s;
388
389 target_ind += ps.comment_delta;
390
391 /* consider original indentation in case this is a box comment */
392 for (; *p == '\t'; p++)
393 target_ind += opt.tabsize;
394
395 for (; target_ind < 0; p++) {
396 if (*p == ' ')
397 target_ind++;
398 else if (*p == '\t')
399 target_ind = next_tab(target_ind);
400 else {
401 target_ind = 0;
402 break;
403 }
404 }
405
406 /* if comment can't fit on this line, put it on the next line */
407 if (ind > target_ind) {
408 output_char('\n');
409 ind = 0;
410 ps.stats.lines++;
411 }
412
413 while (com.e > p && ch_isspace(com.e[-1]))
414 com.e--;
415
416 (void)output_indent(ind, target_ind);
417 output_range(p, com.e);
418
419 ps.comment_delta = ps.n_comment_delta;
420 ps.stats.comment_lines++;
421 }
422
423 /*
424 * Write a line of formatted source to the output file. The line consists of
425 * the label, the code and the comment.
426 */
427 static void
428 output_complete_line(char line_terminator)
429 {
430 ps.is_function_definition = false;
431
432 if (!inhibit_formatting) {
433 if (ps.ind_level == 0)
434 ps.in_stmt_cont = false; /* this is a class A kludge */
435
436 if (lab.e != lab.s || code.e != code.s)
437 ps.stats.code_lines++;
438
439 int ind = 0;
440 if (lab.e != lab.s)
441 ind = output_line_label();
442 if (code.e != code.s)
443 ind = output_line_code(ind);
444 if (com.e != com.s)
445 output_line_comment(ind);
446
447 output_char(line_terminator);
448 ps.stats.lines++;
449
450 /* TODO: rename to blank_line_after_decl */
451 if (ps.just_saw_decl == 1 && opt.blanklines_after_decl)
452 ps.just_saw_decl = 0;
453 }
454
455 ps.decl_on_line = ps.in_decl; /* for proper comment indentation */
456 ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl;
457 ps.decl_indent_done = false;
458
459 *(lab.e = lab.s) = '\0'; /* reset buffers */
460 *(code.e = code.s) = '\0';
461 *(com.e = com.s = com.buf + 1) = '\0';
462
463 ps.ind_level = ps.ind_level_follow;
464 ps.line_start_nparen = ps.nparen;
465
466 if (ps.nparen > 0) {
467 /* TODO: explain what negative indentation means */
468 paren_indent = -1 - ps.paren[ps.nparen - 1].indent;
469 debug_println("paren_indent is now %d", paren_indent);
470 }
471 }
472
473 void
474 output_line(void)
475 {
476 output_complete_line('\n');
477 }
478
479 void
480 output_line_ff(void)
481 {
482 output_complete_line('\f');
483 }
484
485 static int
486 compute_code_indent_lineup(int base_ind)
487 {
488 int ti = paren_indent;
489 int overflow = ind_add(ti, code.s, code.e) - opt.max_line_length;
490 if (overflow < 0)
491 return ti;
492
493 if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) {
494 ti -= overflow + 2;
495 if (ti > base_ind)
496 return ti;
497 return base_ind;
498 }
499
500 return ti;
501 }
502
503 int
504 compute_code_indent(void)
505 {
506 int base_ind = ps.ind_level * opt.indent_size;
507
508 if (ps.line_start_nparen == 0) {
509 if (ps.in_stmt_cont && ps.in_enum != in_enum_brace)
510 return base_ind + opt.continuation_indent;
511 return base_ind;
512 }
513
514 if (opt.lineup_to_parens) {
515 if (opt.lineup_to_parens_always)
516 return paren_indent;
517 return compute_code_indent_lineup(base_ind);
518 }
519
520 if (2 * opt.continuation_indent == opt.indent_size)
521 return base_ind + opt.continuation_indent;
522 else
523 return base_ind + opt.continuation_indent * ps.line_start_nparen;
524 }
525
526 int
527 compute_label_indent(void)
528 {
529 if (ps.is_case_label)
530 return (int)(case_ind * (float)opt.indent_size);
531 if (lab.s[0] == '#')
532 return 0;
533 return opt.indent_size * (ps.ind_level - 2);
534 }
535
536 static void
537 skip_blank(const char **pp)
538 {
539 while (ch_isblank(**pp))
540 (*pp)++;
541 }
542
543 static bool
544 skip_string(const char **pp, const char *s)
545 {
546 size_t len = strlen(s);
547 if (strncmp(*pp, s, len) == 0) {
548 *pp += len;
549 return true;
550 }
551 return false;
552 }
553
554 static void
555 parse_indent_comment(void)
556 {
557 bool on;
558
559 const char *p = inbuf.inp.buf;
560
561 skip_blank(&p);
562 if (!skip_string(&p, "/*"))
563 return;
564 skip_blank(&p);
565 if (!skip_string(&p, "INDENT"))
566 return;
567
568 skip_blank(&p);
569 if (*p == '*' || skip_string(&p, "ON"))
570 on = true;
571 else if (skip_string(&p, "OFF"))
572 on = false;
573 else
574 return;
575
576 skip_blank(&p);
577 if (!skip_string(&p, "*/\n"))
578 return;
579
580 if (com.s != com.e || lab.s != lab.e || code.s != code.e)
581 output_line();
582
583 inhibit_formatting = !on;
584 }
585
586 void
587 inp_read_line(void)
588 {
589 if (inp_from_file())
590 return;
591
592 inp_read_next_line(input);
593
594 parse_indent_comment();
595
596 if (inhibit_formatting)
597 output_range(inbuf.inp.s, inbuf.inp.e);
598 }
599