indent.c revision 1.1.1.2 1 /*-
2 * SPDX-License-Identifier: BSD-4-Clause
3 *
4 * Copyright (c) 1985 Sun Microsystems, Inc.
5 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
6 * Copyright (c) 1980, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38 #if 0
39 #ifndef lint
40 static char sccsid[] = "@(#)indent.c 5.17 (Berkeley) 6/7/93";
41 #endif /* not lint */
42 #endif
43
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $");
46
47 #include <sys/param.h>
48 #include <sys/capsicum.h>
49 #include <capsicum_helpers.h>
50 #include <err.h>
51 #include <errno.h>
52 #include <fcntl.h>
53 #include <unistd.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <ctype.h>
58 #include "indent_globs.h"
59 #include "indent_codes.h"
60 #include "indent.h"
61
62 static void bakcopy(void);
63 static void indent_declaration(int, int);
64
65 const char *in_name = "Standard Input"; /* will always point to name of input
66 * file */
67 const char *out_name = "Standard Output"; /* will always point to name
68 * of output file */
69 const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup
70 * files */
71 char bakfile[MAXPATHLEN] = "";
72
73 int
74 main(int argc, char **argv)
75 {
76 cap_rights_t rights;
77
78 int dec_ind; /* current indentation for declarations */
79 int di_stack[20]; /* a stack of structure indentation levels */
80 int force_nl; /* when true, code must be broken */
81 int hd_type = 0; /* used to store type of stmt for if (...),
82 * for (...), etc */
83 int i; /* local loop counter */
84 int scase; /* set to true when we see a case, so we will
85 * know what to do with the following colon */
86 int sp_sw; /* when true, we are in the expression of
87 * if(...), while(...), etc. */
88 int squest; /* when this is positive, we have seen a ?
89 * without the matching : in a <c>?<s>:<s>
90 * construct */
91 const char *t_ptr; /* used for copying tokens */
92 int tabs_to_var; /* true if using tabs to indent to var name */
93 int type_code; /* the type of token, returned by lexi */
94
95 int last_else = 0; /* true iff last keyword was an else */
96 const char *profile_name = NULL;
97 const char *envval = NULL;
98 struct parser_state transient_state; /* a copy for lookup */
99
100 /*-----------------------------------------------*\
101 | INITIALIZATION |
102 \*-----------------------------------------------*/
103
104 found_err = 0;
105
106 ps.p_stack[0] = stmt; /* this is the parser's stack */
107 ps.last_nl = true; /* this is true if the last thing scanned was
108 * a newline */
109 ps.last_token = semicolon;
110 combuf = (char *) malloc(bufsize);
111 if (combuf == NULL)
112 err(1, NULL);
113 labbuf = (char *) malloc(bufsize);
114 if (labbuf == NULL)
115 err(1, NULL);
116 codebuf = (char *) malloc(bufsize);
117 if (codebuf == NULL)
118 err(1, NULL);
119 tokenbuf = (char *) malloc(bufsize);
120 if (tokenbuf == NULL)
121 err(1, NULL);
122 alloc_typenames();
123 init_constant_tt();
124 l_com = combuf + bufsize - 5;
125 l_lab = labbuf + bufsize - 5;
126 l_code = codebuf + bufsize - 5;
127 l_token = tokenbuf + bufsize - 5;
128 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and
129 * comment buffers */
130 combuf[1] = codebuf[1] = labbuf[1] = '\0';
131 opt.else_if = 1; /* Default else-if special processing to on */
132 s_lab = e_lab = labbuf + 1;
133 s_code = e_code = codebuf + 1;
134 s_com = e_com = combuf + 1;
135 s_token = e_token = tokenbuf + 1;
136
137 in_buffer = (char *) malloc(10);
138 if (in_buffer == NULL)
139 err(1, NULL);
140 in_buffer_limit = in_buffer + 8;
141 buf_ptr = buf_end = in_buffer;
142 line_no = 1;
143 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
144 sp_sw = force_nl = false;
145 ps.in_or_st = false;
146 ps.bl_line = true;
147 dec_ind = 0;
148 di_stack[ps.dec_nest = 0] = 0;
149 ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
150
151 scase = ps.pcase = false;
152 squest = 0;
153 sc_end = NULL;
154 bp_save = NULL;
155 be_save = NULL;
156
157 output = NULL;
158 tabs_to_var = 0;
159
160 envval = getenv("SIMPLE_BACKUP_SUFFIX");
161 if (envval)
162 simple_backup_suffix = envval;
163
164 /*--------------------------------------------------*\
165 | COMMAND LINE SCAN |
166 \*--------------------------------------------------*/
167
168 #ifdef undef
169 max_col = 78; /* -l78 */
170 lineup_to_parens = 1; /* -lp */
171 lineup_to_parens_always = 0; /* -nlpl */
172 ps.ljust_decl = 0; /* -ndj */
173 ps.com_ind = 33; /* -c33 */
174 star_comment_cont = 1; /* -sc */
175 ps.ind_size = 8; /* -i8 */
176 verbose = 0;
177 ps.decl_indent = 16; /* -di16 */
178 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value
179 * by an arg, we will set this equal to
180 * ps.decl_ind */
181 ps.indent_parameters = 1; /* -ip */
182 ps.decl_com_ind = 0; /* if this is not set to some positive value
183 * by an arg, we will set this equal to
184 * ps.com_ind */
185 btype_2 = 1; /* -br */
186 cuddle_else = 1; /* -ce */
187 ps.unindent_displace = 0; /* -d0 */
188 ps.case_indent = 0; /* -cli0 */
189 format_block_comments = 1; /* -fcb */
190 format_col1_comments = 1; /* -fc1 */
191 procnames_start_line = 1; /* -psl */
192 proc_calls_space = 0; /* -npcs */
193 comment_delimiter_on_blankline = 1; /* -cdb */
194 ps.leave_comma = 1; /* -nbc */
195 #endif
196
197 for (i = 1; i < argc; ++i)
198 if (strcmp(argv[i], "-npro") == 0)
199 break;
200 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0')
201 profile_name = argv[i]; /* non-empty -P (set profile) */
202 set_defaults();
203 if (i >= argc)
204 set_profile(profile_name);
205
206 for (i = 1; i < argc; ++i) {
207
208 /*
209 * look thru args (if any) for changes to defaults
210 */
211 if (argv[i][0] != '-') {/* no flag on parameter */
212 if (input == NULL) { /* we must have the input file */
213 in_name = argv[i]; /* remember name of input file */
214 input = fopen(in_name, "r");
215 if (input == NULL) /* check for open error */
216 err(1, "%s", in_name);
217 continue;
218 }
219 else if (output == NULL) { /* we have the output file */
220 out_name = argv[i]; /* remember name of output file */
221 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite
222 * the file */
223 errx(1, "input and output files must be different");
224 }
225 output = fopen(out_name, "w");
226 if (output == NULL) /* check for create error */
227 err(1, "%s", out_name);
228 continue;
229 }
230 errx(1, "unknown parameter: %s", argv[i]);
231 }
232 else
233 set_option(argv[i]);
234 } /* end of for */
235 if (input == NULL)
236 input = stdin;
237 if (output == NULL) {
238 if (input == stdin)
239 output = stdout;
240 else {
241 out_name = in_name;
242 bakcopy();
243 }
244 }
245
246 /* Restrict input/output descriptors and enter Capsicum sandbox. */
247 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE);
248 if (caph_rights_limit(fileno(output), &rights) < 0)
249 err(EXIT_FAILURE, "unable to limit rights for %s", out_name);
250 cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
251 if (caph_rights_limit(fileno(input), &rights) < 0)
252 err(EXIT_FAILURE, "unable to limit rights for %s", in_name);
253 if (caph_enter() < 0)
254 err(EXIT_FAILURE, "unable to enter capability mode");
255
256 if (opt.com_ind <= 1)
257 opt.com_ind = 2; /* don't put normal comments before column 2 */
258 if (opt.block_comment_max_col <= 0)
259 opt.block_comment_max_col = opt.max_col;
260 if (opt.local_decl_indent < 0) /* if not specified by user, set this */
261 opt.local_decl_indent = opt.decl_indent;
262 if (opt.decl_com_ind <= 0) /* if not specified by user, set this */
263 opt.decl_com_ind = opt.ljust_decl ? (opt.com_ind <= 10 ? 2 : opt.com_ind - 8) : opt.com_ind;
264 if (opt.continuation_indent == 0)
265 opt.continuation_indent = opt.ind_size;
266 fill_buffer(); /* get first batch of stuff into input buffer */
267
268 parse(semicolon);
269 {
270 char *p = buf_ptr;
271 int col = 1;
272
273 while (1) {
274 if (*p == ' ')
275 col++;
276 else if (*p == '\t')
277 col = opt.tabsize * (1 + (col - 1) / opt.tabsize) + 1;
278 else
279 break;
280 p++;
281 }
282 if (col > opt.ind_size)
283 ps.ind_level = ps.i_l_follow = col / opt.ind_size;
284 }
285
286 /*
287 * START OF MAIN LOOP
288 */
289
290 while (1) { /* this is the main loop. it will go until we
291 * reach eof */
292 int comment_buffered = false;
293
294 type_code = lexi(&ps); /* lexi reads one token. The actual
295 * characters read are stored in "token". lexi
296 * returns a code indicating the type of token */
297
298 /*
299 * The following code moves newlines and comments following an if (),
300 * while (), else, etc. up to the start of the following stmt to
301 * a buffer. This allows proper handling of both kinds of brace
302 * placement (-br, -bl) and cuddling "else" (-ce).
303 */
304
305 while (ps.search_brace) {
306 switch (type_code) {
307 case newline:
308 if (sc_end == NULL) {
309 save_com = sc_buf;
310 save_com[0] = save_com[1] = ' ';
311 sc_end = &save_com[2];
312 }
313 *sc_end++ = '\n';
314 /*
315 * We may have inherited a force_nl == true from the previous
316 * token (like a semicolon). But once we know that a newline
317 * has been scanned in this loop, force_nl should be false.
318 *
319 * However, the force_nl == true must be preserved if newline
320 * is never scanned in this loop, so this assignment cannot be
321 * done earlier.
322 */
323 force_nl = false;
324 case form_feed:
325 break;
326 case comment:
327 if (sc_end == NULL) {
328 /*
329 * Copy everything from the start of the line, because
330 * pr_comment() will use that to calculate original
331 * indentation of a boxed comment.
332 */
333 memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4);
334 save_com = sc_buf + (buf_ptr - in_buffer - 4);
335 save_com[0] = save_com[1] = ' ';
336 sc_end = &save_com[2];
337 }
338 comment_buffered = true;
339 *sc_end++ = '/'; /* copy in start of comment */
340 *sc_end++ = '*';
341 for (;;) { /* loop until we get to the end of the comment */
342 *sc_end = *buf_ptr++;
343 if (buf_ptr >= buf_end)
344 fill_buffer();
345 if (*sc_end++ == '*' && *buf_ptr == '/')
346 break; /* we are at end of comment */
347 if (sc_end >= &save_com[sc_size]) { /* check for temp buffer
348 * overflow */
349 diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever");
350 fflush(output);
351 exit(1);
352 }
353 }
354 *sc_end++ = '/'; /* add ending slash */
355 if (++buf_ptr >= buf_end) /* get past / in buffer */
356 fill_buffer();
357 break;
358 case lbrace:
359 /*
360 * Put KNF-style lbraces before the buffered up tokens and
361 * jump out of this loop in order to avoid copying the token
362 * again under the default case of the switch below.
363 */
364 if (sc_end != NULL && opt.btype_2) {
365 save_com[0] = '{';
366 /*
367 * Originally the lbrace may have been alone on its own
368 * line, but it will be moved into "the else's line", so
369 * if there was a newline resulting from the "{" before,
370 * it must be scanned now and ignored.
371 */
372 while (isspace((unsigned char)*buf_ptr)) {
373 if (++buf_ptr >= buf_end)
374 fill_buffer();
375 if (*buf_ptr == '\n')
376 break;
377 }
378 goto sw_buffer;
379 }
380 /* FALLTHROUGH */
381 default: /* it is the start of a normal statement */
382 {
383 int remove_newlines;
384
385 remove_newlines =
386 /* "} else" */
387 (type_code == sp_nparen && *token == 'e' &&
388 e_code != s_code && e_code[-1] == '}')
389 /* "else if" */
390 || (type_code == sp_paren && *token == 'i' &&
391 last_else && opt.else_if);
392 if (remove_newlines)
393 force_nl = false;
394 if (sc_end == NULL) { /* ignore buffering if
395 * comment wasn't saved up */
396 ps.search_brace = false;
397 goto check_type;
398 }
399 while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) {
400 sc_end--;
401 }
402 if (opt.swallow_optional_blanklines ||
403 (!comment_buffered && remove_newlines)) {
404 force_nl = !remove_newlines;
405 while (sc_end > save_com && sc_end[-1] == '\n') {
406 sc_end--;
407 }
408 }
409 if (force_nl) { /* if we should insert a nl here, put
410 * it into the buffer */
411 force_nl = false;
412 --line_no; /* this will be re-increased when the
413 * newline is read from the buffer */
414 *sc_end++ = '\n';
415 *sc_end++ = ' ';
416 if (opt.verbose) /* print error msg if the line was
417 * not already broken */
418 diag2(0, "Line broken");
419 }
420 for (t_ptr = token; *t_ptr; ++t_ptr)
421 *sc_end++ = *t_ptr;
422
423 sw_buffer:
424 ps.search_brace = false; /* stop looking for start of
425 * stmt */
426 bp_save = buf_ptr; /* save current input buffer */
427 be_save = buf_end;
428 buf_ptr = save_com; /* fix so that subsequent calls to
429 * lexi will take tokens out of
430 * save_com */
431 *sc_end++ = ' ';/* add trailing blank, just in case */
432 buf_end = sc_end;
433 sc_end = NULL;
434 break;
435 }
436 } /* end of switch */
437 /*
438 * We must make this check, just in case there was an unexpected
439 * EOF.
440 */
441 if (type_code != 0) {
442 /*
443 * The only intended purpose of calling lexi() below is to
444 * categorize the next token in order to decide whether to
445 * continue buffering forthcoming tokens. Once the buffering
446 * is over, lexi() will be called again elsewhere on all of
447 * the tokens - this time for normal processing.
448 *
449 * Calling it for this purpose is a bug, because lexi() also
450 * changes the parser state and discards leading whitespace,
451 * which is needed mostly for comment-related considerations.
452 *
453 * Work around the former problem by giving lexi() a copy of
454 * the current parser state and discard it if the call turned
455 * out to be just a look ahead.
456 *
457 * Work around the latter problem by copying all whitespace
458 * characters into the buffer so that the later lexi() call
459 * will read them.
460 */
461 if (sc_end != NULL) {
462 while (*buf_ptr == ' ' || *buf_ptr == '\t') {
463 *sc_end++ = *buf_ptr++;
464 if (sc_end >= &save_com[sc_size]) {
465 errx(1, "input too long");
466 }
467 }
468 if (buf_ptr >= buf_end) {
469 fill_buffer();
470 }
471 }
472 transient_state = ps;
473 type_code = lexi(&transient_state); /* read another token */
474 if (type_code != newline && type_code != form_feed &&
475 type_code != comment && !transient_state.search_brace) {
476 ps = transient_state;
477 }
478 }
479 } /* end of while (search_brace) */
480 last_else = 0;
481 check_type:
482 if (type_code == 0) { /* we got eof */
483 if (s_lab != e_lab || s_code != e_code
484 || s_com != e_com) /* must dump end of line */
485 dump_line();
486 if (ps.tos > 1) /* check for balanced braces */
487 diag2(1, "Stuff missing from end of file");
488
489 if (opt.verbose) {
490 printf("There were %d output lines and %d comments\n",
491 ps.out_lines, ps.out_coms);
492 printf("(Lines with comments)/(Lines with code): %6.3f\n",
493 (1.0 * ps.com_lines) / code_lines);
494 }
495 fflush(output);
496 exit(found_err);
497 }
498 if (
499 (type_code != comment) &&
500 (type_code != newline) &&
501 (type_code != preesc) &&
502 (type_code != form_feed)) {
503 if (force_nl &&
504 (type_code != semicolon) &&
505 (type_code != lbrace || !opt.btype_2)) {
506 /* we should force a broken line here */
507 if (opt.verbose)
508 diag2(0, "Line broken");
509 dump_line();
510 ps.want_blank = false; /* dont insert blank at line start */
511 force_nl = false;
512 }
513 ps.in_stmt = true; /* turn on flag which causes an extra level of
514 * indentation. this is turned off by a ; or
515 * '}' */
516 if (s_com != e_com) { /* the turkey has embedded a comment
517 * in a line. fix it */
518 int len = e_com - s_com;
519
520 CHECK_SIZE_CODE(len + 3);
521 *e_code++ = ' ';
522 memcpy(e_code, s_com, len);
523 e_code += len;
524 *e_code++ = ' ';
525 *e_code = '\0'; /* null terminate code sect */
526 ps.want_blank = false;
527 e_com = s_com;
528 }
529 }
530 else if (type_code != comment) /* preserve force_nl thru a comment */
531 force_nl = false; /* cancel forced newline after newline, form
532 * feed, etc */
533
534
535
536 /*-----------------------------------------------------*\
537 | do switch on type of token scanned |
538 \*-----------------------------------------------------*/
539 CHECK_SIZE_CODE(3); /* maximum number of increments of e_code
540 * before the next CHECK_SIZE_CODE or
541 * dump_line() is 2. After that there's the
542 * final increment for the null character. */
543 switch (type_code) { /* now, decide what to do with the token */
544
545 case form_feed: /* found a form feed in line */
546 ps.use_ff = true; /* a form feed is treated much like a newline */
547 dump_line();
548 ps.want_blank = false;
549 break;
550
551 case newline:
552 if (ps.last_token != comma || ps.p_l_follow > 0
553 || !opt.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
554 dump_line();
555 ps.want_blank = false;
556 }
557 ++line_no; /* keep track of input line number */
558 break;
559
560 case lparen: /* got a '(' or '[' */
561 /* count parens to make Healy happy */
562 if (++ps.p_l_follow == nitems(ps.paren_indents)) {
563 diag3(0, "Reached internal limit of %d unclosed parens",
564 nitems(ps.paren_indents));
565 ps.p_l_follow--;
566 }
567 if (*token == '[')
568 /* not a function pointer declaration or a function call */;
569 else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent &&
570 ps.procname[0] == '\0' && ps.paren_level == 0) {
571 /* function pointer declarations */
572 indent_declaration(dec_ind, tabs_to_var);
573 ps.dumped_decl_indent = true;
574 }
575 else if (ps.want_blank &&
576 ((ps.last_token != ident && ps.last_token != funcname) ||
577 opt.proc_calls_space ||
578 /* offsetof (1) is never allowed a space; sizeof (2) gets
579 * one iff -bs; all other keywords (>2) always get a space
580 * before lparen */
581 ps.keyword + opt.Bill_Shannon > 2))
582 *e_code++ = ' ';
583 ps.want_blank = false;
584 *e_code++ = token[0];
585 ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1;
586 if (sp_sw && ps.p_l_follow == 1 && opt.extra_expression_indent
587 && ps.paren_indents[0] < 2 * opt.ind_size)
588 ps.paren_indents[0] = 2 * opt.ind_size;
589 if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
590 /*
591 * this is a kluge to make sure that declarations will be
592 * aligned right if proc decl has an explicit type on it, i.e.
593 * "int a(x) {..."
594 */
595 parse(semicolon); /* I said this was a kluge... */
596 ps.in_or_st = false; /* turn off flag for structure decl or
597 * initialization */
598 }
599 /* parenthesized type following sizeof or offsetof is not a cast */
600 if (ps.keyword == 1 || ps.keyword == 2)
601 ps.not_cast_mask |= 1 << ps.p_l_follow;
602 break;
603
604 case rparen: /* got a ')' or ']' */
605 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) {
606 ps.last_u_d = true;
607 ps.cast_mask &= (1 << ps.p_l_follow) - 1;
608 ps.want_blank = opt.space_after_cast;
609 } else
610 ps.want_blank = true;
611 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1;
612 if (--ps.p_l_follow < 0) {
613 ps.p_l_follow = 0;
614 diag3(0, "Extra %c", *token);
615 }
616 if (e_code == s_code) /* if the paren starts the line */
617 ps.paren_level = ps.p_l_follow; /* then indent it */
618
619 *e_code++ = token[0];
620
621 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if
622 * (...), or some such */
623 sp_sw = false;
624 force_nl = true;/* must force newline after if */
625 ps.last_u_d = true; /* inform lexi that a following
626 * operator is unary */
627 ps.in_stmt = false; /* dont use stmt continuation
628 * indentation */
629
630 parse(hd_type); /* let parser worry about if, or whatever */
631 }
632 ps.search_brace = opt.btype_2; /* this should ensure that
633 * constructs such as main(){...}
634 * and int[]{...} have their braces
635 * put in the right place */
636 break;
637
638 case unary_op: /* this could be any unary operation */
639 if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init &&
640 ps.procname[0] == '\0' && ps.paren_level == 0) {
641 /* pointer declarations */
642
643 /*
644 * if this is a unary op in a declaration, we should indent
645 * this token
646 */
647 for (i = 0; token[i]; ++i)
648 /* find length of token */;
649 indent_declaration(dec_ind - i, tabs_to_var);
650 ps.dumped_decl_indent = true;
651 }
652 else if (ps.want_blank)
653 *e_code++ = ' ';
654
655 {
656 int len = e_token - s_token;
657
658 CHECK_SIZE_CODE(len);
659 memcpy(e_code, token, len);
660 e_code += len;
661 }
662 ps.want_blank = false;
663 break;
664
665 case binary_op: /* any binary operation */
666 {
667 int len = e_token - s_token;
668
669 CHECK_SIZE_CODE(len + 1);
670 if (ps.want_blank)
671 *e_code++ = ' ';
672 memcpy(e_code, token, len);
673 e_code += len;
674 }
675 ps.want_blank = true;
676 break;
677
678 case postop: /* got a trailing ++ or -- */
679 *e_code++ = token[0];
680 *e_code++ = token[1];
681 ps.want_blank = true;
682 break;
683
684 case question: /* got a ? */
685 squest++; /* this will be used when a later colon
686 * appears so we can distinguish the
687 * <c>?<n>:<n> construct */
688 if (ps.want_blank)
689 *e_code++ = ' ';
690 *e_code++ = '?';
691 ps.want_blank = true;
692 break;
693
694 case casestmt: /* got word 'case' or 'default' */
695 scase = true; /* so we can process the later colon properly */
696 goto copy_id;
697
698 case colon: /* got a ':' */
699 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */
700 --squest;
701 if (ps.want_blank)
702 *e_code++ = ' ';
703 *e_code++ = ':';
704 ps.want_blank = true;
705 break;
706 }
707 if (ps.in_or_st) {
708 *e_code++ = ':';
709 ps.want_blank = false;
710 break;
711 }
712 ps.in_stmt = false; /* seeing a label does not imply we are in a
713 * stmt */
714 /*
715 * turn everything so far into a label
716 */
717 {
718 int len = e_code - s_code;
719
720 CHECK_SIZE_LAB(len + 3);
721 memcpy(e_lab, s_code, len);
722 e_lab += len;
723 *e_lab++ = ':';
724 *e_lab = '\0';
725 e_code = s_code;
726 }
727 force_nl = ps.pcase = scase; /* ps.pcase will be used by
728 * dump_line to decide how to
729 * indent the label. force_nl
730 * will force a case n: to be
731 * on a line by itself */
732 scase = false;
733 ps.want_blank = false;
734 break;
735
736 case semicolon: /* got a ';' */
737 if (ps.dec_nest == 0)
738 ps.in_or_st = false;/* we are not in an initialization or
739 * structure declaration */
740 scase = false; /* these will only need resetting in an error */
741 squest = 0;
742 if (ps.last_token == rparen)
743 ps.in_parameter_declaration = 0;
744 ps.cast_mask = 0;
745 ps.not_cast_mask = 0;
746 ps.block_init = 0;
747 ps.block_init_level = 0;
748 ps.just_saw_decl--;
749
750 if (ps.in_decl && s_code == e_code && !ps.block_init &&
751 !ps.dumped_decl_indent && ps.paren_level == 0) {
752 /* indent stray semicolons in declarations */
753 indent_declaration(dec_ind - 1, tabs_to_var);
754 ps.dumped_decl_indent = true;
755 }
756
757 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level
758 * structure declaration, we
759 * arent any more */
760
761 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
762
763 /*
764 * This should be true iff there were unbalanced parens in the
765 * stmt. It is a bit complicated, because the semicolon might
766 * be in a for stmt
767 */
768 diag2(1, "Unbalanced parens");
769 ps.p_l_follow = 0;
770 if (sp_sw) { /* this is a check for an if, while, etc. with
771 * unbalanced parens */
772 sp_sw = false;
773 parse(hd_type); /* dont lose the if, or whatever */
774 }
775 }
776 *e_code++ = ';';
777 ps.want_blank = true;
778 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the
779 * middle of a stmt */
780
781 if (!sp_sw) { /* if not if for (;;) */
782 parse(semicolon); /* let parser know about end of stmt */
783 force_nl = true;/* force newline after an end of stmt */
784 }
785 break;
786
787 case lbrace: /* got a '{' */
788 ps.in_stmt = false; /* dont indent the {} */
789 if (!ps.block_init)
790 force_nl = true;/* force other stuff on same line as '{' onto
791 * new line */
792 else if (ps.block_init_level <= 0)
793 ps.block_init_level = 1;
794 else
795 ps.block_init_level++;
796
797 if (s_code != e_code && !ps.block_init) {
798 if (!opt.btype_2) {
799 dump_line();
800 ps.want_blank = false;
801 }
802 else if (ps.in_parameter_declaration && !ps.in_or_st) {
803 ps.i_l_follow = 0;
804 if (opt.function_brace_split) { /* dump the line prior
805 * to the brace ... */
806 dump_line();
807 ps.want_blank = false;
808 } else /* add a space between the decl and brace */
809 ps.want_blank = true;
810 }
811 }
812 if (ps.in_parameter_declaration)
813 prefix_blankline_requested = 0;
814
815 if (ps.p_l_follow > 0) { /* check for preceding unbalanced
816 * parens */
817 diag2(1, "Unbalanced parens");
818 ps.p_l_follow = 0;
819 if (sp_sw) { /* check for unclosed if, for, etc. */
820 sp_sw = false;
821 parse(hd_type);
822 ps.ind_level = ps.i_l_follow;
823 }
824 }
825 if (s_code == e_code)
826 ps.ind_stmt = false; /* dont put extra indentation on line
827 * with '{' */
828 if (ps.in_decl && ps.in_or_st) { /* this is either a structure
829 * declaration or an init */
830 di_stack[ps.dec_nest] = dec_ind;
831 if (++ps.dec_nest == nitems(di_stack)) {
832 diag3(0, "Reached internal limit of %d struct levels",
833 nitems(di_stack));
834 ps.dec_nest--;
835 }
836 /* ? dec_ind = 0; */
837 }
838 else {
839 ps.decl_on_line = false; /* we can't be in the middle of
840 * a declaration, so don't do
841 * special indentation of
842 * comments */
843 if (opt.blanklines_after_declarations_at_proctop
844 && ps.in_parameter_declaration)
845 postfix_blankline_requested = 1;
846 ps.in_parameter_declaration = 0;
847 ps.in_decl = false;
848 }
849 dec_ind = 0;
850 parse(lbrace); /* let parser know about this */
851 if (ps.want_blank) /* put a blank before '{' if '{' is not at
852 * start of line */
853 *e_code++ = ' ';
854 ps.want_blank = false;
855 *e_code++ = '{';
856 ps.just_saw_decl = 0;
857 break;
858
859 case rbrace: /* got a '}' */
860 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be
861 * omitted in
862 * declarations */
863 parse(semicolon);
864 if (ps.p_l_follow) {/* check for unclosed if, for, else. */
865 diag2(1, "Unbalanced parens");
866 ps.p_l_follow = 0;
867 sp_sw = false;
868 }
869 ps.just_saw_decl = 0;
870 ps.block_init_level--;
871 if (s_code != e_code && !ps.block_init) { /* '}' must be first on
872 * line */
873 if (opt.verbose)
874 diag2(0, "Line broken");
875 dump_line();
876 }
877 *e_code++ = '}';
878 ps.want_blank = true;
879 ps.in_stmt = ps.ind_stmt = false;
880 if (ps.dec_nest > 0) { /* we are in multi-level structure
881 * declaration */
882 dec_ind = di_stack[--ps.dec_nest];
883 if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
884 ps.just_saw_decl = 2;
885 ps.in_decl = true;
886 }
887 prefix_blankline_requested = 0;
888 parse(rbrace); /* let parser know about this */
889 ps.search_brace = opt.cuddle_else && ps.p_stack[ps.tos] == ifhead
890 && ps.il[ps.tos] >= ps.ind_level;
891 if (ps.tos <= 1 && opt.blanklines_after_procs && ps.dec_nest <= 0)
892 postfix_blankline_requested = 1;
893 break;
894
895 case swstmt: /* got keyword "switch" */
896 sp_sw = true;
897 hd_type = swstmt; /* keep this for when we have seen the
898 * expression */
899 goto copy_id; /* go move the token into buffer */
900
901 case sp_paren: /* token is if, while, for */
902 sp_sw = true; /* the interesting stuff is done after the
903 * expression is scanned */
904 hd_type = (*token == 'i' ? ifstmt :
905 (*token == 'w' ? whilestmt : forstmt));
906
907 /*
908 * remember the type of header for later use by parser
909 */
910 goto copy_id; /* copy the token into line */
911
912 case sp_nparen: /* got else, do */
913 ps.in_stmt = false;
914 if (*token == 'e') {
915 if (e_code != s_code && (!opt.cuddle_else || e_code[-1] != '}')) {
916 if (opt.verbose)
917 diag2(0, "Line broken");
918 dump_line();/* make sure this starts a line */
919 ps.want_blank = false;
920 }
921 force_nl = true;/* also, following stuff must go onto new line */
922 last_else = 1;
923 parse(elselit);
924 }
925 else {
926 if (e_code != s_code) { /* make sure this starts a line */
927 if (opt.verbose)
928 diag2(0, "Line broken");
929 dump_line();
930 ps.want_blank = false;
931 }
932 force_nl = true;/* also, following stuff must go onto new line */
933 last_else = 0;
934 parse(dolit);
935 }
936 goto copy_id; /* move the token into line */
937
938 case type_def:
939 case storage:
940 prefix_blankline_requested = 0;
941 goto copy_id;
942
943 case structure:
944 if (ps.p_l_follow > 0)
945 goto copy_id;
946 case decl: /* we have a declaration type (int, etc.) */
947 parse(decl); /* let parser worry about indentation */
948 if (ps.last_token == rparen && ps.tos <= 1) {
949 if (s_code != e_code) {
950 dump_line();
951 ps.want_blank = 0;
952 }
953 }
954 if (ps.in_parameter_declaration && opt.indent_parameters && ps.dec_nest == 0) {
955 ps.ind_level = ps.i_l_follow = 1;
956 ps.ind_stmt = 0;
957 }
958 ps.in_or_st = true; /* this might be a structure or initialization
959 * declaration */
960 ps.in_decl = ps.decl_on_line = ps.last_token != type_def;
961 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
962 ps.just_saw_decl = 2;
963 prefix_blankline_requested = 0;
964 for (i = 0; token[i++];); /* get length of token */
965
966 if (ps.ind_level == 0 || ps.dec_nest > 0) {
967 /* global variable or struct member in local variable */
968 dec_ind = opt.decl_indent > 0 ? opt.decl_indent : i;
969 tabs_to_var = (opt.use_tabs ? opt.decl_indent > 0 : 0);
970 } else {
971 /* local variable */
972 dec_ind = opt.local_decl_indent > 0 ? opt.local_decl_indent : i;
973 tabs_to_var = (opt.use_tabs ? opt.local_decl_indent > 0 : 0);
974 }
975 goto copy_id;
976
977 case funcname:
978 case ident: /* got an identifier or constant */
979 if (ps.in_decl) {
980 if (type_code == funcname) {
981 ps.in_decl = false;
982 if (opt.procnames_start_line && s_code != e_code) {
983 *e_code = '\0';
984 dump_line();
985 }
986 else if (ps.want_blank) {
987 *e_code++ = ' ';
988 }
989 ps.want_blank = false;
990 }
991 else if (!ps.block_init && !ps.dumped_decl_indent &&
992 ps.paren_level == 0) { /* if we are in a declaration, we
993 * must indent identifier */
994 indent_declaration(dec_ind, tabs_to_var);
995 ps.dumped_decl_indent = true;
996 ps.want_blank = false;
997 }
998 }
999 else if (sp_sw && ps.p_l_follow == 0) {
1000 sp_sw = false;
1001 force_nl = true;
1002 ps.last_u_d = true;
1003 ps.in_stmt = false;
1004 parse(hd_type);
1005 }
1006 copy_id:
1007 {
1008 int len = e_token - s_token;
1009
1010 CHECK_SIZE_CODE(len + 1);
1011 if (ps.want_blank)
1012 *e_code++ = ' ';
1013 memcpy(e_code, s_token, len);
1014 e_code += len;
1015 }
1016 if (type_code != funcname)
1017 ps.want_blank = true;
1018 break;
1019
1020 case strpfx:
1021 {
1022 int len = e_token - s_token;
1023
1024 CHECK_SIZE_CODE(len + 1);
1025 if (ps.want_blank)
1026 *e_code++ = ' ';
1027 memcpy(e_code, token, len);
1028 e_code += len;
1029 }
1030 ps.want_blank = false;
1031 break;
1032
1033 case period: /* treat a period kind of like a binary
1034 * operation */
1035 *e_code++ = '.'; /* move the period into line */
1036 ps.want_blank = false; /* dont put a blank after a period */
1037 break;
1038
1039 case comma:
1040 ps.want_blank = (s_code != e_code); /* only put blank after comma
1041 * if comma does not start the
1042 * line */
1043 if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init &&
1044 !ps.dumped_decl_indent && ps.paren_level == 0) {
1045 /* indent leading commas and not the actual identifiers */
1046 indent_declaration(dec_ind - 1, tabs_to_var);
1047 ps.dumped_decl_indent = true;
1048 }
1049 *e_code++ = ',';
1050 if (ps.p_l_follow == 0) {
1051 if (ps.block_init_level <= 0)
1052 ps.block_init = 0;
1053 if (break_comma && (!opt.leave_comma ||
1054 count_spaces_until(compute_code_target(), s_code, e_code) >
1055 opt.max_col - opt.tabsize))
1056 force_nl = true;
1057 }
1058 break;
1059
1060 case preesc: /* got the character '#' */
1061 if ((s_com != e_com) ||
1062 (s_lab != e_lab) ||
1063 (s_code != e_code))
1064 dump_line();
1065 CHECK_SIZE_LAB(1);
1066 *e_lab++ = '#'; /* move whole line to 'label' buffer */
1067 {
1068 int in_comment = 0;
1069 int com_start = 0;
1070 char quote = 0;
1071 int com_end = 0;
1072
1073 while (*buf_ptr == ' ' || *buf_ptr == '\t') {
1074 buf_ptr++;
1075 if (buf_ptr >= buf_end)
1076 fill_buffer();
1077 }
1078 while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
1079 CHECK_SIZE_LAB(2);
1080 *e_lab = *buf_ptr++;
1081 if (buf_ptr >= buf_end)
1082 fill_buffer();
1083 switch (*e_lab++) {
1084 case BACKSLASH:
1085 if (!in_comment) {
1086 *e_lab++ = *buf_ptr++;
1087 if (buf_ptr >= buf_end)
1088 fill_buffer();
1089 }
1090 break;
1091 case '/':
1092 if (*buf_ptr == '*' && !in_comment && !quote) {
1093 in_comment = 1;
1094 *e_lab++ = *buf_ptr++;
1095 com_start = e_lab - s_lab - 2;
1096 }
1097 break;
1098 case '"':
1099 if (quote == '"')
1100 quote = 0;
1101 break;
1102 case '\'':
1103 if (quote == '\'')
1104 quote = 0;
1105 break;
1106 case '*':
1107 if (*buf_ptr == '/' && in_comment) {
1108 in_comment = 0;
1109 *e_lab++ = *buf_ptr++;
1110 com_end = e_lab - s_lab;
1111 }
1112 break;
1113 }
1114 }
1115
1116 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1117 e_lab--;
1118 if (e_lab - s_lab == com_end && bp_save == NULL) {
1119 /* comment on preprocessor line */
1120 if (sc_end == NULL) { /* if this is the first comment,
1121 * we must set up the buffer */
1122 save_com = sc_buf;
1123 sc_end = &save_com[0];
1124 }
1125 else {
1126 *sc_end++ = '\n'; /* add newline between
1127 * comments */
1128 *sc_end++ = ' ';
1129 --line_no;
1130 }
1131 if (sc_end - save_com + com_end - com_start > sc_size)
1132 errx(1, "input too long");
1133 memmove(sc_end, s_lab + com_start, com_end - com_start);
1134 sc_end += com_end - com_start;
1135 e_lab = s_lab + com_start;
1136 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1137 e_lab--;
1138 bp_save = buf_ptr; /* save current input buffer */
1139 be_save = buf_end;
1140 buf_ptr = save_com; /* fix so that subsequent calls to
1141 * lexi will take tokens out of
1142 * save_com */
1143 *sc_end++ = ' '; /* add trailing blank, just in case */
1144 buf_end = sc_end;
1145 sc_end = NULL;
1146 }
1147 CHECK_SIZE_LAB(1);
1148 *e_lab = '\0'; /* null terminate line */
1149 ps.pcase = false;
1150 }
1151
1152 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */
1153 if ((size_t)ifdef_level < nitems(state_stack)) {
1154 match_state[ifdef_level].tos = -1;
1155 state_stack[ifdef_level++] = ps;
1156 }
1157 else
1158 diag2(1, "#if stack overflow");
1159 }
1160 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */
1161 if (ifdef_level <= 0)
1162 diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else");
1163 else {
1164 match_state[ifdef_level - 1] = ps;
1165 ps = state_stack[ifdef_level - 1];
1166 }
1167 }
1168 else if (strncmp(s_lab, "#endif", 6) == 0) {
1169 if (ifdef_level <= 0)
1170 diag2(1, "Unmatched #endif");
1171 else
1172 ifdef_level--;
1173 } else {
1174 struct directives {
1175 int size;
1176 const char *string;
1177 }
1178 recognized[] = {
1179 {7, "include"},
1180 {6, "define"},
1181 {5, "undef"},
1182 {4, "line"},
1183 {5, "error"},
1184 {6, "pragma"}
1185 };
1186 int d = nitems(recognized);
1187 while (--d >= 0)
1188 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0)
1189 break;
1190 if (d < 0) {
1191 diag2(1, "Unrecognized cpp directive");
1192 break;
1193 }
1194 }
1195 if (opt.blanklines_around_conditional_compilation) {
1196 postfix_blankline_requested++;
1197 n_real_blanklines = 0;
1198 }
1199 else {
1200 postfix_blankline_requested = 0;
1201 prefix_blankline_requested = 0;
1202 }
1203 break; /* subsequent processing of the newline
1204 * character will cause the line to be printed */
1205
1206 case comment: /* we have gotten a / followed by * this is a biggie */
1207 pr_comment();
1208 break;
1209 } /* end of big switch stmt */
1210
1211 *e_code = '\0'; /* make sure code section is null terminated */
1212 if (type_code != comment && type_code != newline && type_code != preesc)
1213 ps.last_token = type_code;
1214 } /* end of main while (1) loop */
1215 }
1216
1217 /*
1218 * copy input file to backup file if in_name is /blah/blah/blah/file, then
1219 * backup file will be ".Bfile" then make the backup file the input and
1220 * original input file the output
1221 */
1222 static void
1223 bakcopy(void)
1224 {
1225 int n,
1226 bakchn;
1227 char buff[8 * 1024];
1228 const char *p;
1229
1230 /* construct file name .Bfile */
1231 for (p = in_name; *p; p++); /* skip to end of string */
1232 while (p > in_name && *p != '/') /* find last '/' */
1233 p--;
1234 if (*p == '/')
1235 p++;
1236 sprintf(bakfile, "%s%s", p, simple_backup_suffix);
1237
1238 /* copy in_name to backup file */
1239 bakchn = creat(bakfile, 0600);
1240 if (bakchn < 0)
1241 err(1, "%s", bakfile);
1242 while ((n = read(fileno(input), buff, sizeof(buff))) > 0)
1243 if (write(bakchn, buff, n) != n)
1244 err(1, "%s", bakfile);
1245 if (n < 0)
1246 err(1, "%s", in_name);
1247 close(bakchn);
1248 fclose(input);
1249
1250 /* re-open backup file as the input file */
1251 input = fopen(bakfile, "r");
1252 if (input == NULL)
1253 err(1, "%s", bakfile);
1254 /* now the original input file will be the output */
1255 output = fopen(in_name, "w");
1256 if (output == NULL) {
1257 unlink(bakfile);
1258 err(1, "%s", in_name);
1259 }
1260 }
1261
1262 static void
1263 indent_declaration(int cur_dec_ind, int tabs_to_var)
1264 {
1265 int pos = e_code - s_code;
1266 char *startpos = e_code;
1267
1268 /*
1269 * get the tab math right for indentations that are not multiples of tabsize
1270 */
1271 if ((ps.ind_level * opt.ind_size) % opt.tabsize != 0) {
1272 pos += (ps.ind_level * opt.ind_size) % opt.tabsize;
1273 cur_dec_ind += (ps.ind_level * opt.ind_size) % opt.tabsize;
1274 }
1275 if (tabs_to_var) {
1276 int tpos;
1277
1278 CHECK_SIZE_CODE(cur_dec_ind / opt.tabsize);
1279 while ((tpos = opt.tabsize * (1 + pos / opt.tabsize)) <= cur_dec_ind) {
1280 *e_code++ = '\t';
1281 pos = tpos;
1282 }
1283 }
1284 CHECK_SIZE_CODE(cur_dec_ind - pos + 1);
1285 while (pos < cur_dec_ind) {
1286 *e_code++ = ' ';
1287 pos++;
1288 }
1289 if (e_code == startpos && ps.want_blank) {
1290 *e_code++ = ' ';
1291 ps.want_blank = false;
1292 }
1293 }
1294