parse.c revision 1.82 1 /* $NetBSD: parse.c,v 1.82 2025/01/04 21:54:26 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: parse.c,v 1.82 2025/01/04 21:54:26 rillig Exp $");
42
43 #include <stdlib.h>
44
45 #include "indent.h"
46
47 /* Replace the top 2 symbols with the given symbol. */
48 static void
49 psyms_replace2(parser_symbol psym)
50 {
51 ps.psyms.len--;
52 ps.psyms.sym[ps.psyms.len - 1] = psym;
53 }
54
55 /*
56 * Try to combine the statement on the top of the parse stack with the symbol
57 * directly below it, replacing these two symbols with a single symbol.
58 */
59 static bool
60 psyms_reduce_stmt(void)
61 {
62 switch (ps.psyms.sym[ps.psyms.len - 2]) {
63
64 case psym_stmt:
65 psyms_replace2(psym_stmt);
66 ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
67 return true;
68
69 case psym_do:
70 psyms_replace2(psym_do_stmt);
71 ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
72 return true;
73
74 case psym_if_expr:
75 psyms_replace2(psym_if_expr_stmt);
76 /* For the time being, assume that there is no 'else' on this
77 * 'if', and set the indentation level accordingly. If an
78 * 'else' is scanned, it will be fixed up later. */
79 size_t i = ps.psyms.len - 2;
80 while (ps.psyms.sym[i] != psym_stmt
81 && ps.psyms.sym[i] != psym_lbrace_block)
82 i--;
83 ps.ind_level_follow = ps.psyms.ind_level[i];
84 return true;
85
86 case psym_switch_expr:
87 case psym_decl:
88 case psym_if_expr_stmt_else:
89 case psym_for_exprs:
90 case psym_while_expr:
91 psyms_replace2(psym_stmt);
92 ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
93 return true;
94
95 default:
96 return false;
97 }
98 }
99
100 static int
101 left_justify_decl_level(void)
102 {
103 int level = 0;
104 for (size_t i = ps.psyms.len - 2; i > 0; i--)
105 if (ps.psyms.sym[i] == psym_decl)
106 level++;
107 return level;
108 }
109
110 void
111 ps_push(parser_symbol psym, bool follow)
112 {
113 if (ps.psyms.len == ps.psyms.cap) {
114 ps.psyms.cap += 16;
115 ps.psyms.sym = nonnull(realloc(ps.psyms.sym,
116 sizeof(ps.psyms.sym[0]) * ps.psyms.cap));
117 ps.psyms.ind_level = nonnull(realloc(ps.psyms.ind_level,
118 sizeof(ps.psyms.ind_level[0]) * ps.psyms.cap));
119 }
120 ps.psyms.len++;
121 ps.psyms.sym[ps.psyms.len - 1] = psym;
122 ps.psyms.ind_level[ps.psyms.len - 1] =
123 follow ? ps.ind_level_follow : ps.ind_level;
124 }
125
126 /*
127 * Repeatedly try to reduce the top two symbols on the parse stack to a single
128 * symbol, until no more reductions are possible.
129 */
130 static void
131 psyms_reduce(void)
132 {
133 again:
134 if (ps.psyms.len >= 2 && ps.psyms.sym[ps.psyms.len - 1] == psym_stmt
135 && psyms_reduce_stmt())
136 goto again;
137 if (ps.psyms.sym[ps.psyms.len - 1] == psym_while_expr &&
138 ps.psyms.sym[ps.psyms.len - 2] == psym_do_stmt) {
139 ps.psyms.len -= 2;
140 goto again;
141 }
142 }
143
144 static bool
145 is_lbrace(parser_symbol psym)
146 {
147 return psym == psym_lbrace_block
148 || psym == psym_lbrace_struct
149 || psym == psym_lbrace_union
150 || psym == psym_lbrace_enum;
151 }
152
153 /*
154 * Shift the token onto the parser stack, then try to reduce it by combining it with
155 * previous tokens.
156 */
157 void
158 parse(parser_symbol psym)
159 {
160 debug_blank_line();
161 debug_println("parse: %s", psym_name[psym]);
162
163 if (psym != psym_else) {
164 while (ps.psyms.sym[ps.psyms.len - 1] == psym_if_expr_stmt) {
165 ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
166 psyms_reduce();
167 }
168 }
169
170 switch (psym) {
171
172 case psym_lbrace_block:
173 case psym_lbrace_struct:
174 case psym_lbrace_union:
175 case psym_lbrace_enum:
176 ps.break_after_comma = false;
177 if (ps.psyms.sym[ps.psyms.len - 1] == psym_decl
178 || ps.psyms.sym[ps.psyms.len - 1] == psym_stmt)
179 ps.ind_level_follow++;
180 else if (code.len == 0) {
181 /* It is part of a while, for, etc. */
182 ps.ind_level--;
183
184 /* for a switch, brace should be two levels out from
185 * the code */
186 if (ps.psyms.sym[ps.psyms.len - 1] == psym_switch_expr
187 && opt.case_indent >= 1.0F)
188 ps.ind_level--;
189 }
190
191 ps_push(psym, false);
192 ps_push(psym_stmt, true);
193 break;
194
195 case psym_rbrace:
196 /* stack should have <lbrace> <stmt> or <lbrace> <decl> */
197 if (!(ps.psyms.len >= 2
198 && is_lbrace(ps.psyms.sym[ps.psyms.len - 2]))) {
199 diag(1, "Statement nesting error");
200 break;
201 }
202 ps.psyms.len--;
203 ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
204 ps.ind_level_follow = ps.ind_level;
205 ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
206 break;
207
208 case psym_decl:
209 if (ps.psyms.sym[ps.psyms.len - 1] == psym_decl)
210 break; /* only put one declaration onto stack */
211
212 ps.break_after_comma = true;
213 ps_push(psym_decl, true);
214
215 if (opt.left_justify_decl) {
216 ps.ind_level = left_justify_decl_level();
217 ps.ind_level_follow = ps.ind_level;
218 }
219 break;
220
221 case psym_stmt:
222 ps.break_after_comma = false;
223 ps_push(psym_stmt, false);
224 break;
225
226 case psym_if_expr:
227 if (ps.psyms.sym[ps.psyms.len - 1] == psym_if_expr_stmt_else
228 && opt.else_if_in_same_line) {
229 ps.ind_level_follow =
230 ps.psyms.ind_level[ps.psyms.len - 1];
231 ps.psyms.len--;
232 }
233 /* FALLTHROUGH */
234 case psym_do:
235 case psym_for_exprs:
236 ps.ind_level = ps.ind_level_follow;
237 ps.ind_level_follow = ps.ind_level + 1;
238 ps_push(psym, false);
239 break;
240
241 case psym_else:
242 if (ps.psyms.sym[ps.psyms.len - 1] != psym_if_expr_stmt) {
243 diag(1, "Unmatched 'else'");
244 break;
245 }
246 ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
247 ps.ind_level_follow = ps.ind_level + 1;
248 ps.psyms.sym[ps.psyms.len - 1] = psym_if_expr_stmt_else;
249 break;
250
251 case psym_switch_expr:
252 ps_push(psym_switch_expr, true);
253 ps.ind_level_follow += (int)opt.case_indent + 1;
254 break;
255
256 case psym_while_expr:
257 if (ps.psyms.sym[ps.psyms.len - 1] == psym_do_stmt) {
258 ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
259 ps.ind_level_follow = ps.ind_level;
260 ps_push(psym_while_expr, false);
261 } else {
262 ps_push(psym_while_expr, true);
263 ps.ind_level_follow++;
264 }
265 break;
266
267 default:
268 diag(1, "Unknown code to parser");
269 return;
270 }
271
272 #if debug
273 static struct buffer before, after;
274 buf_clear(&before);
275 ps_psyms_to_string(&before, &ps);
276 #endif
277 psyms_reduce();
278 #if debug
279 buf_clear(&after);
280 ps_psyms_to_string(&after, &ps);
281 if (strcmp(before.s, after.s) != 0) {
282 debug_println("psyms before: %s", before.s);
283 debug_println("psyms after: %s", after.s);
284 } else
285 debug_println("psyms: %s", after.s);
286 #endif
287 }
288