parse.c revision 1.50 1 /* $NetBSD: parse.c,v 1.50 2023/05/11 09:28:53 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #if 0
41 static char sccsid[] = "@(#)parse.c 8.1 (Berkeley) 6/6/93";
42 #endif
43
44 #include <sys/cdefs.h>
45 #if defined(__NetBSD__)
46 __RCSID("$NetBSD: parse.c,v 1.50 2023/05/11 09:28:53 rillig Exp $");
47 #else
48 __FBSDID("$FreeBSD: head/usr.bin/indent/parse.c 337651 2018-08-11 19:20:06Z pstef $");
49 #endif
50
51 #include <assert.h>
52 #include <err.h>
53 #include <stdio.h>
54
55 #include "indent.h"
56
57 static void reduce(void);
58
59 #ifdef debug
60 static const char *
61 psym_name(parser_symbol psym)
62 {
63 static const char *const name[] = {
64 "semicolon",
65 "lbrace",
66 "rbrace",
67 "decl",
68 "stmt",
69 "stmt_list",
70 "for_exprs",
71 "if_expr",
72 "if_expr_stmt",
73 "if_expr_stmt_else",
74 "else",
75 "switch_expr",
76 "do",
77 "do_stmt",
78 "while_expr",
79 };
80
81 assert(array_length(name) == (int)psym_while_expr + 1);
82
83 return name[psym];
84 }
85 #endif
86
87 static int
88 decl_level(void)
89 {
90 int level = 0;
91 for (int i = ps.tos - 1; i > 0; i--)
92 if (ps.s_sym[i] == psym_decl)
93 level++;
94 return level;
95 }
96
97 /*
98 * Shift the token onto the parser stack, or reduce it by combining it with
99 * previous tokens.
100 */
101 void
102 parse(parser_symbol psym)
103 {
104 debug_println("parse token: %s", psym_name(psym));
105
106 if (psym != psym_else) {
107 while (ps.s_sym[ps.tos] == psym_if_expr_stmt) {
108 ps.s_sym[ps.tos] = psym_stmt;
109 reduce();
110 }
111 }
112
113 switch (psym) {
114
115 case psym_decl:
116 if (ps.s_sym[ps.tos] == psym_decl)
117 break; /* only put one declaration onto stack */
118
119 break_comma = true; /* while in a declaration, force a newline
120 * after comma */
121 ps.s_sym[++ps.tos] = psym_decl;
122 ps.s_ind_level[ps.tos] = ps.ind_level_follow;
123
124 if (opt.ljust_decl)
125 ps.ind_level_follow = ps.ind_level = decl_level();
126 break;
127
128 case psym_if_expr:
129 if (ps.s_sym[ps.tos] == psym_if_expr_stmt_else && opt.else_if) {
130 /*
131 * Reduce "else if" to "if". This saves a lot of stack space in
132 * case of a long "if-else-if ... else-if" sequence.
133 */
134 ps.ind_level_follow = ps.s_ind_level[ps.tos--];
135 }
136 /* FALLTHROUGH */
137 case psym_do:
138 case psym_for_exprs:
139 ps.s_sym[++ps.tos] = psym;
140 ps.s_ind_level[ps.tos] = ps.ind_level = ps.ind_level_follow;
141 ++ps.ind_level_follow; /* subsequent statements should be indented 1 */
142 break;
143
144 case psym_lbrace:
145 break_comma = false; /* don't break comma in an initializer list */
146 if (ps.s_sym[ps.tos] == psym_stmt || ps.s_sym[ps.tos] == psym_decl
147 || ps.s_sym[ps.tos] == psym_stmt_list)
148 ++ps.ind_level_follow; /* it is a random, isolated stmt group
149 * or a declaration */
150 else {
151 if (code.s == code.e) {
152 /* it is a group as part of a while, for, etc. */
153 --ps.ind_level;
154
155 /*
156 * for a switch, brace should be two levels out from the code
157 */
158 if (ps.s_sym[ps.tos] == psym_switch_expr &&
159 opt.case_indent >= 1.0F)
160 --ps.ind_level;
161 }
162 }
163
164 ps.s_sym[++ps.tos] = psym_lbrace;
165 ps.s_ind_level[ps.tos] = ps.ind_level;
166 ps.s_sym[++ps.tos] = psym_stmt;
167 ps.s_ind_level[ps.tos] = ps.ind_level_follow;
168 break;
169
170 case psym_while_expr:
171 if (ps.s_sym[ps.tos] == psym_do_stmt) {
172 /* it is matched with do stmt */
173 ps.ind_level = ps.ind_level_follow = ps.s_ind_level[ps.tos];
174 ps.s_sym[++ps.tos] = psym_while_expr;
175 ps.s_ind_level[ps.tos] = ps.ind_level = ps.ind_level_follow;
176
177 } else { /* it is a while loop */
178 ps.s_sym[++ps.tos] = psym_while_expr;
179 ps.s_ind_level[ps.tos] = ps.ind_level_follow;
180 ++ps.ind_level_follow;
181 }
182
183 break;
184
185 case psym_else:
186 if (ps.s_sym[ps.tos] != psym_if_expr_stmt)
187 diag(1, "Unmatched 'else'");
188 else {
189 ps.ind_level = ps.s_ind_level[ps.tos];
190 ps.ind_level_follow = ps.ind_level + 1;
191 ps.s_sym[ps.tos] = psym_if_expr_stmt_else;
192 }
193 break;
194
195 case psym_rbrace:
196 /* stack should have <lbrace> <stmt> or <lbrace> <stmt_list> */
197 if (ps.tos > 0 && ps.s_sym[ps.tos - 1] == psym_lbrace) {
198 ps.ind_level = ps.ind_level_follow = ps.s_ind_level[--ps.tos];
199 ps.s_sym[ps.tos] = psym_stmt;
200 } else
201 diag(1, "Statement nesting error");
202 break;
203
204 case psym_switch_expr:
205 ps.s_sym[++ps.tos] = psym_switch_expr;
206 ps.s_case_ind_level[ps.tos] = case_ind;
207 ps.s_ind_level[ps.tos] = ps.ind_level_follow;
208 case_ind = (float)ps.ind_level_follow + opt.case_indent;
209 ps.ind_level_follow += (int)opt.case_indent + 1;
210 break;
211
212 case psym_semicolon: /* a simple statement */
213 break_comma = false; /* don't break after comma in a declaration */
214 ps.s_sym[++ps.tos] = psym_stmt;
215 ps.s_ind_level[ps.tos] = ps.ind_level;
216 break;
217
218 default:
219 diag(1, "Unknown code to parser");
220 return;
221 }
222
223 if (ps.tos >= STACKSIZE - 1)
224 errx(1, "Parser stack overflow");
225
226 reduce(); /* see if any reduction can be done */
227
228 #ifdef debug
229 printf("parse stack:");
230 for (int i = 1; i <= ps.tos; ++i)
231 printf(" %s %d", psym_name(ps.s_sym[i]), ps.s_ind_level[i]);
232 if (ps.tos == 0)
233 printf(" empty");
234 printf("\n");
235 #endif
236 }
237
238 void
239 parse_stmt_head(stmt_head hd)
240 {
241 static const parser_symbol psym[] = {
242 [hd_for] = psym_for_exprs,
243 [hd_if] = psym_if_expr,
244 [hd_switch] = psym_switch_expr,
245 [hd_while] = psym_while_expr
246 };
247 parse(psym[hd]);
248 }
249
250 /*
251 * Try to combine the statement on the top of the parse stack with the symbol
252 * directly below it, replacing these two symbols with a single symbol.
253 */
254 static bool
255 reduce_stmt(void)
256 {
257 switch (ps.s_sym[ps.tos - 1]) {
258
259 case psym_stmt:
260 case psym_stmt_list:
261 ps.s_sym[--ps.tos] = psym_stmt_list;
262 return true;
263
264 case psym_do:
265 ps.s_sym[--ps.tos] = psym_do_stmt;
266 ps.ind_level_follow = ps.s_ind_level[ps.tos];
267 return true;
268
269 case psym_if_expr:
270 ps.s_sym[--ps.tos] = psym_if_expr_stmt;
271 int i = ps.tos - 1;
272 while (ps.s_sym[i] != psym_stmt &&
273 ps.s_sym[i] != psym_stmt_list &&
274 ps.s_sym[i] != psym_lbrace)
275 --i;
276 ps.ind_level_follow = ps.s_ind_level[i];
277 /*
278 * For the time being, assume that there is no 'else' on this 'if',
279 * and set the indentation level accordingly. If an 'else' is scanned,
280 * it will be fixed up later.
281 */
282 return true;
283
284 case psym_switch_expr:
285 case_ind = ps.s_case_ind_level[ps.tos - 1];
286 /* FALLTHROUGH */
287 case psym_decl: /* finish of a declaration */
288 case psym_if_expr_stmt_else:
289 case psym_for_exprs:
290 case psym_while_expr:
291 ps.s_sym[--ps.tos] = psym_stmt;
292 ps.ind_level_follow = ps.s_ind_level[ps.tos];
293 return true;
294
295 default:
296 return false;
297 }
298 }
299
300 /*
301 * Repeatedly try to reduce the top two symbols on the parse stack to a
302 * single symbol, until no more reductions are possible.
303 */
304 static void
305 reduce(void)
306 {
307 again:
308 if (ps.s_sym[ps.tos] == psym_stmt && reduce_stmt())
309 goto again;
310 if (ps.s_sym[ps.tos] == psym_while_expr &&
311 ps.s_sym[ps.tos - 1] == psym_do_stmt) {
312 ps.tos -= 2;
313 goto again;
314 }
315 }
316