indent.h revision 1.160 1 /* $NetBSD: indent.h,v 1.160 2023/06/02 11:43:07 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
5 *
6 * Copyright (c) 2001 Jens Schweikhardt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30 /*-
31 * SPDX-License-Identifier: BSD-4-Clause
32 *
33 * Copyright (c) 1985 Sun Microsystems, Inc.
34 * Copyright (c) 1980, 1993
35 * The Regents of the University of California. All rights reserved.
36 * All rights reserved.
37 *
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. All advertising materials mentioning features or use of this software
47 * must display the following acknowledgement:
48 * This product includes software developed by the University of
49 * California, Berkeley and its contributors.
50 * 4. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 */
66
67 #include <ctype.h>
68 #include <stdbool.h>
69 #include <stdio.h>
70
71 typedef enum lexer_symbol {
72 lsym_eof,
73 lsym_preprocessing, /* '#' */
74 lsym_newline,
75 lsym_comment, /* the initial '/ *' or '//' of a comment */
76 lsym_lparen_or_lbracket,
77 lsym_rparen_or_rbracket,
78 lsym_lbrace,
79 lsym_rbrace,
80 lsym_period,
81 lsym_unary_op, /* e.g. '*', '&', '-' or leading '++' */
82 lsym_binary_op, /* e.g. '*', '&', '<<', '&&' or '/=' */
83 lsym_postfix_op, /* trailing '++' or '--' */
84 lsym_question, /* the '?' from a '?:' expression */
85 lsym_colon,
86 lsym_comma,
87 lsym_semicolon,
88 lsym_typedef,
89 lsym_storage_class,
90 lsym_type_outside_parentheses,
91 lsym_type_in_parentheses,
92 lsym_tag, /* 'struct', 'union' or 'enum' */
93 lsym_case_label, /* 'case' or 'default' */
94 lsym_sizeof,
95 lsym_offsetof,
96 lsym_word, /* identifier, constant or string */
97 lsym_funcname, /* name of a function being defined */
98 lsym_do,
99 lsym_else,
100 lsym_for,
101 lsym_if,
102 lsym_switch,
103 lsym_while,
104 lsym_return
105 } lexer_symbol;
106
107 typedef enum parser_symbol {
108 psym_0, /* a placeholder */
109 psym_lbrace,
110 psym_rbrace,
111 psym_decl,
112 psym_stmt,
113 psym_stmt_list,
114 psym_for_exprs, /* 'for' '(' ... ')' */
115 psym_if_expr, /* 'if' '(' expr ')' */
116 psym_if_expr_stmt, /* 'if' '(' expr ')' stmt */
117 psym_if_expr_stmt_else, /* 'if' '(' expr ')' stmt 'else' */
118 psym_else, /* 'else'; not stored on the stack */
119 psym_switch_expr, /* 'switch' '(' expr ')' */
120 psym_do, /* 'do' */
121 psym_do_stmt, /* 'do' stmt */
122 psym_while_expr, /* 'while' '(' expr ')' */
123 } parser_symbol;
124
125 /* A range of characters, not null-terminated. */
126 struct buffer {
127 const char *st; /* start of the usable text */
128 char *mem;
129 size_t len; /* length of the usable text, from 'mem' */
130 size_t cap;
131 };
132
133 extern FILE *input;
134 extern FILE *output;
135
136 /*
137 * The current line from the input file, used by the lexer to generate tokens.
138 * To read from the line, start at inp.st and continue up to and including the
139 * next '\n'. To read beyond the '\n', call inp_skip or inp_next, which will
140 * make the next line available, invalidating any pointers into the previous
141 * line.
142 */
143 extern struct buffer inp;
144
145 extern struct buffer token; /* the current token to be processed, is
146 * typically copied to the buffer 'code', or in
147 * some cases to 'lab'. */
148
149 extern struct buffer lab; /* the label or preprocessor directive */
150 extern struct buffer code; /* the main part of the current line of code,
151 * containing declarations or statements */
152 extern struct buffer com; /* the trailing comment of the line, or the
153 * start or end of a multi-line comment, or
154 * while in process_comment, a single line of a
155 * multi-line comment */
156
157 extern struct options {
158 bool blanklines_around_conditional_compilation;
159 bool blank_line_after_decl_at_top; /* this is vaguely similar to
160 * blank_line_after_decl except
161 * that it only applies to the
162 * first set of declarations in
163 * a procedure (just after the
164 * first '{') and it causes a
165 * blank line to be generated
166 * even if there are no
167 * declarations */
168 bool blank_line_after_decl;
169 bool blanklines_after_procs;
170 bool blanklines_before_block_comments;
171 bool break_after_comma; /* whether to add a line break after each
172 * declarator */
173 bool brace_same_line; /* whether brace should be on same line as if,
174 * while, etc */
175 bool blank_after_sizeof; /* whether a blank should always be
176 * inserted after sizeof */
177 bool comment_delimiter_on_blankline;
178 int decl_comment_column; /* the column in which comments after
179 * declarations should be put */
180 bool cuddle_else; /* whether 'else' should cuddle up to '}' */
181 int continuation_indent; /* the indentation between the edge of
182 * code and continuation lines */
183 float case_indent; /* The distance (measured in indentation
184 * levels) to indent case labels from the
185 * switch statement */
186 int comment_column; /* the column in which comments to the right of
187 * code should start */
188 int decl_indent; /* indentation of identifier in declaration */
189 bool ljust_decl; /* true if declarations should be left
190 * justified */
191 int unindent_displace; /* comments not to the right of code will be
192 * placed this many indentation levels to the
193 * left of code */
194 bool extra_expr_indent; /* whether continuation lines from the
195 * expression part of "if (e)", "while (e)",
196 * "for (e; e; e)" should be indented an extra
197 * tab stop so that they don't conflict with
198 * the code that follows */
199 bool else_if; /* whether else-if pairs use the same line */
200 bool function_brace_split; /* split function declaration and brace
201 * onto separate lines */
202 bool format_col1_comments; /* If comments which start in column 1
203 * are to be reformatted (just like
204 * comments that begin in later
205 * columns) */
206 bool format_block_comments; /* whether comments beginning with '/ *
207 * \n' are to be reformatted */
208 bool indent_parameters;
209 int indent_size; /* the size of one indentation level */
210 int block_comment_max_line_length;
211 int local_decl_indent; /* like decl_indent but for locals */
212 bool lineup_to_parens_always; /* whether to not(?) attempt to keep
213 * lined-up code within the margin */
214 bool lineup_to_parens; /* whether continued code within parens will be
215 * lined up to the open paren */
216 bool proc_calls_space; /* whether function calls look like: foo (bar)
217 * rather than foo(bar) */
218 bool procnames_start_line; /* whether the names of procedures
219 * being defined get placed in column 1
220 * (i.e. a newline is placed between
221 * the type of the procedure and its
222 * name) */
223 bool space_after_cast; /* "b = (int) a" vs "b = (int)a" */
224 bool star_comment_cont; /* whether comment continuation lines should
225 * have stars at the beginning of each line */
226 bool swallow_optional_blanklines;
227 bool auto_typedefs; /* whether to recognize identifiers ending in
228 * "_t" like typedefs */
229 int tabsize; /* the size of a tab */
230 int max_line_length;
231 bool use_tabs; /* set true to use tabs for spacing, false uses
232 * all spaces */
233 bool verbose; /* whether non-essential error messages are
234 * printed */
235 } opt;
236
237 extern bool found_err;
238 extern float case_ind; /* indentation level to be used for a "case n:"
239 */
240 extern bool had_eof; /* whether input is exhausted */
241 extern int line_no; /* the current line number. */
242 extern enum indent_enabled {
243 indent_on,
244 indent_off,
245 indent_last_off_line,
246 } indent_enabled;
247
248 #define STACKSIZE 256
249
250 /* Properties of each level of parentheses or brackets. */
251 typedef struct paren_level_props {
252 int indent; /* indentation of the operand/argument,
253 * relative to the enclosing statement; if
254 * negative, reflected at -1 */
255 enum paren_level_cast {
256 cast_unknown,
257 cast_maybe,
258 cast_no,
259 } cast; /* whether the parentheses form a type cast */
260 } paren_level_props;
261
262 /*
263 * The parser state determines the layout of the formatted text.
264 *
265 * At each '#if', the parser state is copied so that the corresponding '#else'
266 * lines start in the same state.
267 *
268 * In a function body, the number of block braces determines the indentation
269 * of statements and declarations.
270 *
271 * In a statement, the number of parentheses or brackets determines the
272 * indentation of follow-up lines.
273 *
274 * In an expression, the token type determine whether to put spaces around.
275 *
276 * In a source file, the types of line determine the vertical spacing, such as
277 * around preprocessing directives or function bodies, or above block
278 * comments.
279 */
280 extern struct parser_state {
281 lexer_symbol prev_token; /* the previous token, but never
282 * comment, newline or preprocessing
283 * line */
284
285 /* Token classification */
286
287 int quest_level; /* when this is positive, we have seen a '?'
288 * without the matching ':' in a '?:'
289 * expression */
290 bool is_function_definition; /* starts either at the 'name(' from a
291 * function definition if it occurs at
292 * the beginning of a line, or at the
293 * first '*' from inside a declaration
294 * when the line starts with words
295 * followed by a '('; ends at the end
296 * of that line */
297 bool block_init; /* whether inside a block initialization */
298 int block_init_level; /* the level of brace nesting in an
299 * initialization */
300 bool init_or_struct; /* whether there has been a type name and no
301 * left parenthesis since the last semicolon.
302 * When true, a '{' starts a structure
303 * definition or an initialization list */
304 bool decl_on_line; /* whether this line of code has part of a
305 * declaration on it */
306 bool in_stmt_or_decl; /* whether in a statement or a struct
307 * declaration or a plain declaration */
308 bool in_decl; /* whether we are in a declaration. The
309 * processing of braces is then slightly
310 * different */
311 bool in_func_def_params;
312 bool seen_case; /* set to true when we see a 'case', so we know
313 * what to do with the following colon */
314 bool is_case_label; /* 'case' and 'default' labels are indented
315 * differently from regular labels */
316 parser_symbol spaced_expr_psym; /* the parser symbol to be shifted
317 * after the parenthesized expression
318 * from a 'for', 'if', 'switch' or
319 * 'while'; or psym_0 */
320
321 /* Indentation of statements and declarations */
322
323 int ind_level; /* the indentation level for the line that is
324 * currently prepared for output */
325 int ind_level_follow; /* the level to which ind_level should be set
326 * after the current line is printed */
327 bool in_stmt_cont; /* whether the current line should have an
328 * extra indentation level because we are in
329 * the middle of a statement */
330 int decl_level; /* current nesting level for a structure
331 * declaration or an initializer */
332 int di_stack[20]; /* a stack of structure indentation levels */
333 bool decl_indent_done; /* whether the indentation for a declaration
334 * has been added to the code buffer. */
335 int decl_ind; /* current indentation for declarations */
336 bool tabs_to_var; /* true if using tabs to indent to var name */
337
338 enum {
339 eei_no,
340 eei_yes,
341 eei_last
342 } extra_expr_indent;
343
344 enum {
345 in_enum_no, /* outside any 'enum { ... }' */
346 in_enum_enum, /* after keyword 'enum' */
347 in_enum_type, /* after 'enum' or 'enum tag' */
348 in_enum_brace /* between '{' and '}' */
349 } in_enum; /* enum { . } */
350
351 int tos; /* pointer to top of stack */
352 parser_symbol s_sym[STACKSIZE];
353 int s_ind_level[STACKSIZE];
354 float s_case_ind_level[STACKSIZE];
355
356 /* Spacing inside a statement or declaration */
357
358 bool next_unary; /* whether the following operator should be
359 * unary; is used in declarations for '*', as
360 * well as in expressions */
361 bool want_blank; /* whether the following token should be
362 * prefixed by a blank. (Said prefixing is
363 * ignored in some cases.) */
364 int line_start_nparen; /* the number of parentheses or brackets that
365 * were open at the beginning of the current
366 * line; used to indent within statements,
367 * initializers and declarations */
368 int nparen; /* the number of parentheses or brackets that
369 * are currently open; used to indent the
370 * remaining lines of the statement,
371 * initializer or declaration */
372 enum {
373 dp_start,
374 dp_word,
375 dp_word_asterisk,
376 dp_other,
377 } decl_ptr; /* detects declarations like 'typename *x',
378 * to prevent the '*' from being interpreted as
379 * a binary operator */
380 paren_level_props paren[20];
381
382 /* Horizontal spacing for comments */
383
384 int comment_delta; /* used to set up indentation for all lines of
385 * a boxed comment after the first one */
386 int n_comment_delta; /* remembers how many columns there were before
387 * the start of a box comment so that
388 * forthcoming lines of the comment are
389 * indented properly */
390 int com_ind; /* indentation of the current comment */
391
392 /* Vertical spacing */
393
394 bool break_after_comma; /* whether to add a newline after the next
395 * comma; used in declarations but not in
396 * initializer lists */
397 bool force_nl; /* whether the next token is forced to go to a
398 * new line; used after 'if (expr)' and in
399 * similar situations; tokens like '{' may
400 * ignore this */
401
402 enum declaration {
403 decl_no, /* no declaration anywhere nearby */
404 decl_begin, /* collecting tokens of a declaration */
405 decl_end, /* finished a declaration */
406 } declaration;
407 bool blank_line_after_decl;
408
409 /* Comments */
410
411 bool curr_col_1; /* whether the current token started in column
412 * 1 of the original input */
413 bool next_col_1;
414 } ps;
415
416 extern struct output_state {
417 enum line_kind {
418 lk_other,
419 lk_blank,
420 lk_if, /* #if, #ifdef, #ifndef */
421 lk_endif, /* #endif */
422 lk_stmt_head, /* the ')' of an incomplete statement such as
423 * 'if (expr)' or 'for (expr; expr; expr)' */
424 lk_func_end, /* the last '}' of a function body */
425 lk_block_comment,
426 } line_kind; /* kind of the line that is being prepared for
427 * output; is reset to lk_other each time after
428 * trying to send a line to the output, even if
429 * that line was a suppressed blank line; used
430 * for inserting or removing blank lines */
431 enum line_kind prev_line_kind; /* the kind of line that was actually
432 * sent to the output */
433
434 struct buffer indent_off_text; /* text from between 'INDENT OFF' and
435 * 'INDENT ON', both inclusive */
436 } out;
437
438
439 #define array_length(array) (sizeof(array) / sizeof((array)[0]))
440
441 #ifdef debug
442 void debug_printf(const char *, ...) __printflike(1, 2);
443 void debug_println(const char *, ...) __printflike(1, 2);
444 void debug_blank_line(void);
445 void debug_vis_range(const char *, const char *, size_t, const char *);
446 void debug_parser_state(void);
447 void debug_parse_stack(const char *);
448 void debug_buffers(void);
449 extern const char *const lsym_name[];
450 extern const char *const psym_name[];
451 extern const char *const paren_level_cast_name[];
452 extern const char *const line_kind_name[];
453 #else
454 #define debug_noop() do { } while (false)
455 #define debug_printf(fmt, ...) debug_noop()
456 #define debug_println(fmt, ...) debug_noop()
457 #define debug_blank_line() debug_noop()
458 #define debug_vis_range(prefix, s, e, suffix) debug_noop()
459 #define debug_parser_state() debug_noop()
460 #define debug_parse_stack(situation) debug_noop()
461 #define debug_buffers() debug_noop()
462 #endif
463
464 void register_typename(const char *);
465 int compute_code_indent(void);
466 int compute_label_indent(void);
467 int ind_add(int, const char *, size_t);
468
469 void inp_skip(void);
470 char inp_next(void);
471
472 lexer_symbol lexi(void);
473 void diag(int, const char *, ...) __printflike(2, 3);
474 void output_line(void);
475 void inp_read_line(void);
476 void parse(parser_symbol);
477 void process_comment(void);
478 void set_option(const char *, const char *);
479 void load_profile_files(const char *);
480
481 void *nonnull(void *);
482
483 void buf_add_char(struct buffer *, char);
484 void buf_add_chars(struct buffer *, const char *, size_t);
485
486 static inline bool
487 ch_isalnum(char ch)
488 {
489 return isalnum((unsigned char)ch) != 0;
490 }
491
492 static inline bool
493 ch_isalpha(char ch)
494 {
495 return isalpha((unsigned char)ch) != 0;
496 }
497
498 static inline bool
499 ch_isblank(char ch)
500 {
501 return ch == ' ' || ch == '\t';
502 }
503
504 static inline bool
505 ch_isdigit(char ch)
506 {
507 return '0' <= ch && ch <= '9';
508 }
509
510 static inline bool
511 ch_isspace(char ch)
512 {
513 return isspace((unsigned char)ch) != 0;
514 }
515
516 static inline int
517 next_tab(int ind)
518 {
519 return ind - ind % opt.tabsize + opt.tabsize;
520 }
521