parse.c revision 1.46 1 /* $NetBSD: parse.c,v 1.46 2021/10/29 23:03:53 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #if 0
41 static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93";
42 #endif
43
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$FreeBSD$");
47 #else
48 __FBSDID("$FreeBSD: head/usr.bin/indent/parse.c 337651 2018-08-11 19:20:06Z pstef $");
49 #endif
50
51 #include <assert.h>
52 #include <err.h>
53 #include <stdio.h>
54
55 #include "indent.h"
56
57 static void reduce(void);
58
59 #ifdef debug
60 static const char *
61 psym_name(parser_symbol psym)
62 {
63 static const char *const name[] = {
64 "semicolon",
65 "lbrace",
66 "rbrace",
67 "decl",
68 "stmt",
69 "stmt_list",
70 "for_exprs",
71 "if_expr",
72 "if_expr_stmt",
73 "if_expr_stmt_else",
74 "else",
75 "switch_expr",
76 "do",
77 "do_stmt",
78 "while_expr",
79 };
80
81 assert(array_length(name) == (int)psym_while_expr + 1);
82
83 return name[psym];
84 }
85 #endif
86
87 static int
88 decl_level(void)
89 {
90 int level = 0;
91 for (int i = ps.tos - 1; i > 0; i--)
92 if (ps.s_sym[i] == psym_decl)
93 level++;
94 return level;
95 }
96
97 /*
98 * Shift the token onto the parser stack, or reduce it by combining it with
99 * previous tokens.
100 */
101 void
102 parse(parser_symbol psym)
103 {
104 debug_println("parse token: '%s'", psym_name(psym));
105
106 if (psym != psym_else) {
107 while (ps.s_sym[ps.tos] == psym_if_expr_stmt) {
108 ps.s_sym[ps.tos] = psym_stmt;
109 reduce();
110 }
111 }
112
113 switch (psym) {
114
115 case psym_decl:
116 ps.search_stmt = opt.brace_same_line;
117 /* indicate that following brace should be on same line */
118
119 if (ps.s_sym[ps.tos] == psym_decl)
120 break; /* only put one declaration onto stack */
121
122 break_comma = true; /* while in a declaration, force a newline
123 * after comma */
124 ps.s_sym[++ps.tos] = psym_decl;
125 ps.s_ind_level[ps.tos] = ps.ind_level_follow;
126
127 if (opt.ljust_decl)
128 ps.ind_level_follow = ps.ind_level = decl_level();
129 break;
130
131 case psym_if_expr:
132 if (ps.s_sym[ps.tos] == psym_if_expr_stmt_else && opt.else_if) {
133 /*
134 * Reduce "else if" to "if". This saves a lot of stack space in
135 * case of a long "if-else-if ... else-if" sequence.
136 */
137 ps.ind_level_follow = ps.s_ind_level[ps.tos--];
138 }
139 /* FALLTHROUGH */
140 case psym_do:
141 case psym_for_exprs:
142 ps.s_sym[++ps.tos] = psym;
143 ps.s_ind_level[ps.tos] = ps.ind_level = ps.ind_level_follow;
144 ++ps.ind_level_follow; /* subsequent statements should be indented 1 */
145 ps.search_stmt = opt.brace_same_line;
146 break;
147
148 case psym_lbrace:
149 break_comma = false; /* don't break comma in an initializer list */
150 if (ps.s_sym[ps.tos] == psym_stmt || ps.s_sym[ps.tos] == psym_decl
151 || ps.s_sym[ps.tos] == psym_stmt_list)
152 ++ps.ind_level_follow; /* it is a random, isolated stmt group
153 * or a declaration */
154 else {
155 if (code.s == code.e) {
156 /* it is a group as part of a while, for, etc. */
157 --ps.ind_level;
158
159 /*
160 * for a switch, brace should be two levels out from the code
161 */
162 if (ps.s_sym[ps.tos] == psym_switch_expr &&
163 opt.case_indent >= 1)
164 --ps.ind_level;
165 }
166 }
167
168 ps.s_sym[++ps.tos] = psym_lbrace;
169 ps.s_ind_level[ps.tos] = ps.ind_level;
170 ps.s_sym[++ps.tos] = psym_stmt;
171 /* allow null stmt between braces */
172 ps.s_ind_level[ps.tos] = ps.ind_level_follow;
173 break;
174
175 case psym_while_expr:
176 if (ps.s_sym[ps.tos] == psym_do_stmt) {
177 /* it is matched with do stmt */
178 ps.ind_level = ps.ind_level_follow = ps.s_ind_level[ps.tos];
179 ps.s_sym[++ps.tos] = psym_while_expr;
180 ps.s_ind_level[ps.tos] = ps.ind_level = ps.ind_level_follow;
181
182 } else { /* it is a while loop */
183 ps.s_sym[++ps.tos] = psym_while_expr;
184 ps.s_ind_level[ps.tos] = ps.ind_level_follow;
185 ++ps.ind_level_follow;
186 ps.search_stmt = opt.brace_same_line;
187 }
188
189 break;
190
191 case psym_else:
192 if (ps.s_sym[ps.tos] != psym_if_expr_stmt)
193 diag(1, "Unmatched 'else'");
194 else {
195 /* The indentation for 'else' should be the same as for 'if'. */
196 ps.ind_level = ps.s_ind_level[ps.tos];
197 ps.ind_level_follow = ps.ind_level + 1;
198 ps.s_sym[ps.tos] = psym_if_expr_stmt_else;
199 /* remember if with else */
200 ps.search_stmt = opt.brace_same_line || opt.else_if;
201 }
202 break;
203
204 case psym_rbrace:
205 /* stack should have <lbrace> <stmt> or <lbrace> <stmt_list> */
206 if (ps.tos > 0 && ps.s_sym[ps.tos - 1] == psym_lbrace) {
207 ps.ind_level = ps.ind_level_follow = ps.s_ind_level[--ps.tos];
208 ps.s_sym[ps.tos] = psym_stmt;
209 } else
210 diag(1, "Statement nesting error");
211 break;
212
213 case psym_switch_expr:
214 ps.s_sym[++ps.tos] = psym_switch_expr;
215 ps.s_case_ind_level[ps.tos] = case_ind;
216 /* save current case indent level */
217 ps.s_ind_level[ps.tos] = ps.ind_level_follow;
218 /* cases should be one level deeper than the switch */
219 case_ind = (float)ps.ind_level_follow + opt.case_indent;
220 /* statements should be two levels deeper */
221 ps.ind_level_follow += (int)opt.case_indent + 1;
222 ps.search_stmt = opt.brace_same_line;
223 break;
224
225 case psym_semicolon: /* a simple statement */
226 break_comma = false; /* don't break after comma in a declaration */
227 ps.s_sym[++ps.tos] = psym_stmt;
228 ps.s_ind_level[ps.tos] = ps.ind_level;
229 break;
230
231 default:
232 diag(1, "Unknown code to parser");
233 return;
234 }
235
236 if (ps.tos >= STACKSIZE - 1)
237 errx(1, "Parser stack overflow");
238
239 reduce(); /* see if any reduction can be done */
240
241 #ifdef debug
242 printf("parse stack:");
243 for (int i = 1; i <= ps.tos; ++i)
244 printf(" ('%s' at %d)", psym_name(ps.s_sym[i]), ps.s_ind_level[i]);
245 if (ps.tos == 0)
246 printf(" empty");
247 printf("\n");
248 #endif
249 }
250
251 void
252 parse_stmt_head(stmt_head hd)
253 {
254 static const parser_symbol psym[] = {
255 [hd_for] = psym_for_exprs,
256 [hd_if] = psym_if_expr,
257 [hd_switch] = psym_switch_expr,
258 [hd_while] = psym_while_expr
259 };
260 parse(psym[hd]);
261 }
262
263 /*
264 * Try to combine the statement on the top of the parse stack with the symbol
265 * directly below it, replacing these two symbols with a single symbol.
266 */
267 static bool
268 reduce_stmt(void)
269 {
270 switch (ps.s_sym[ps.tos - 1]) {
271
272 case psym_stmt:
273 case psym_stmt_list:
274 ps.s_sym[--ps.tos] = psym_stmt_list;
275 return true;
276
277 case psym_do:
278 ps.s_sym[--ps.tos] = psym_do_stmt;
279 ps.ind_level_follow = ps.s_ind_level[ps.tos];
280 return true;
281
282 case psym_if_expr:
283 ps.s_sym[--ps.tos] = psym_if_expr_stmt;
284 int i = ps.tos - 1;
285 while (ps.s_sym[i] != psym_stmt &&
286 ps.s_sym[i] != psym_stmt_list &&
287 ps.s_sym[i] != psym_lbrace)
288 --i;
289 ps.ind_level_follow = ps.s_ind_level[i];
290 /*
291 * For the time being, assume that there is no 'else' on this 'if',
292 * and set the indentation level accordingly. If an 'else' is scanned,
293 * it will be fixed up later.
294 */
295 return true;
296
297 case psym_switch_expr:
298 case_ind = ps.s_case_ind_level[ps.tos - 1];
299 /* FALLTHROUGH */
300 case psym_decl: /* finish of a declaration */
301 case psym_if_expr_stmt_else:
302 case psym_for_exprs:
303 case psym_while_expr:
304 ps.s_sym[--ps.tos] = psym_stmt;
305 ps.ind_level_follow = ps.s_ind_level[ps.tos];
306 return true;
307
308 default:
309 return false;
310 }
311 }
312
313 /*
314 * Repeatedly try to reduce the top two symbols on the parse stack to a
315 * single symbol, until no more reductions are possible.
316 */
317 static void
318 reduce(void)
319 {
320 again:
321 if (ps.s_sym[ps.tos] == psym_stmt && reduce_stmt())
322 goto again;
323 if (ps.s_sym[ps.tos] == psym_while_expr &&
324 ps.s_sym[ps.tos - 1] == psym_do_stmt) {
325 ps.tos -= 2; /* XXX: why not reduce to stmt? */
326 goto again;
327 }
328 }
329