parse.c revision 1.83 1 /* $NetBSD: parse.c,v 1.83 2025/01/07 02:55:30 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: parse.c,v 1.83 2025/01/07 02:55:30 rillig Exp $");
42
43 #include <stdlib.h>
44
45 #include "indent.h"
46
47 /* Replace the top 2 symbols with the given symbol. */
48 static void
49 psyms_replace2(parser_symbol psym)
50 {
51 ps.psyms.len--;
52 ps.psyms.sym[ps.psyms.len - 1] = psym;
53 }
54
55 /*
56 * Try to combine the statement on the top of the parse stack with the symbol
57 * directly below it, replacing these two symbols with a single symbol.
58 */
59 static bool
60 psyms_reduce_stmt(void)
61 {
62 switch (ps.psyms.sym[ps.psyms.len - 2]) {
63
64 case psym_stmt:
65 psyms_replace2(psym_stmt);
66 ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
67 return true;
68
69 case psym_do:
70 psyms_replace2(psym_do_stmt);
71 ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
72 return true;
73
74 case psym_if_expr:
75 psyms_replace2(psym_if_expr_stmt);
76 ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
77 return true;
78
79 case psym_switch_expr:
80 case psym_decl:
81 case psym_if_expr_stmt_else:
82 case psym_for_exprs:
83 case psym_while_expr:
84 psyms_replace2(psym_stmt);
85 ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
86 return true;
87
88 default:
89 return false;
90 }
91 }
92
93 static int
94 left_justify_decl_level(void)
95 {
96 int level = 0;
97 for (size_t i = ps.psyms.len - 2; i > 0; i--)
98 if (ps.psyms.sym[i] == psym_decl)
99 level++;
100 return level;
101 }
102
103 void
104 ps_push(parser_symbol psym, bool follow)
105 {
106 if (ps.psyms.len == ps.psyms.cap) {
107 ps.psyms.cap += 16;
108 ps.psyms.sym = nonnull(realloc(ps.psyms.sym,
109 sizeof(ps.psyms.sym[0]) * ps.psyms.cap));
110 ps.psyms.ind_level = nonnull(realloc(ps.psyms.ind_level,
111 sizeof(ps.psyms.ind_level[0]) * ps.psyms.cap));
112 }
113 ps.psyms.len++;
114 ps.psyms.sym[ps.psyms.len - 1] = psym;
115 ps.psyms.ind_level[ps.psyms.len - 1] =
116 follow ? ps.ind_level_follow : ps.ind_level;
117 }
118
119 /*
120 * Repeatedly try to reduce the top two symbols on the parse stack to a single
121 * symbol, until no more reductions are possible.
122 */
123 static void
124 psyms_reduce(void)
125 {
126 again:
127 if (ps.psyms.len >= 2 && ps.psyms.sym[ps.psyms.len - 1] == psym_stmt
128 && psyms_reduce_stmt())
129 goto again;
130 if (ps.psyms.sym[ps.psyms.len - 1] == psym_while_expr &&
131 ps.psyms.sym[ps.psyms.len - 2] == psym_do_stmt) {
132 ps.psyms.len -= 2;
133 goto again;
134 }
135 }
136
137 static bool
138 is_lbrace(parser_symbol psym)
139 {
140 return psym == psym_lbrace_block
141 || psym == psym_lbrace_struct
142 || psym == psym_lbrace_union
143 || psym == psym_lbrace_enum;
144 }
145
146 /*
147 * Shift the token onto the parser stack, then try to reduce it by combining it with
148 * previous tokens.
149 */
150 void
151 parse(parser_symbol psym)
152 {
153 debug_blank_line();
154 debug_println("parse: %s", psym_name[psym]);
155
156 if (psym != psym_else) {
157 while (ps.psyms.sym[ps.psyms.len - 1] == psym_if_expr_stmt) {
158 ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
159 ps.ind_level = ps.ind_level_follow
160 = ps.psyms.ind_level[ps.psyms.len - 2];
161 psyms_reduce();
162 }
163 }
164
165 switch (psym) {
166
167 case psym_lbrace_block:
168 case psym_lbrace_struct:
169 case psym_lbrace_union:
170 case psym_lbrace_enum:
171 ps.break_after_comma = false;
172 if (ps.psyms.sym[ps.psyms.len - 1] == psym_decl
173 || ps.psyms.sym[ps.psyms.len - 1] == psym_stmt)
174 ps.ind_level_follow++;
175 else if (code.len == 0) {
176 /* It is part of a while, for, etc. */
177 ps.ind_level--;
178
179 /* for a switch, brace should be two levels out from
180 * the code */
181 if (ps.psyms.sym[ps.psyms.len - 1] == psym_switch_expr
182 && opt.case_indent >= 1.0F)
183 ps.ind_level--;
184 }
185
186 ps_push(psym, false);
187 ps_push(psym_stmt, true);
188 break;
189
190 case psym_rbrace:
191 /* stack should have <lbrace> <stmt> or <lbrace> <decl> */
192 if (!(ps.psyms.len >= 2
193 && is_lbrace(ps.psyms.sym[ps.psyms.len - 2]))) {
194 diag(1, "Statement nesting error");
195 break;
196 }
197 ps.psyms.len--;
198 ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
199 ps.ind_level_follow = ps.ind_level;
200 ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
201 break;
202
203 case psym_decl:
204 if (ps.psyms.sym[ps.psyms.len - 1] == psym_decl)
205 break; /* only put one declaration onto stack */
206
207 ps.break_after_comma = true;
208 ps_push(psym_decl, true);
209
210 if (opt.left_justify_decl) {
211 ps.ind_level = left_justify_decl_level();
212 ps.ind_level_follow = ps.ind_level;
213 }
214 break;
215
216 case psym_stmt:
217 ps.break_after_comma = false;
218 ps_push(psym_stmt, false);
219 break;
220
221 case psym_if_expr:
222 if (ps.psyms.sym[ps.psyms.len - 1] == psym_if_expr_stmt_else
223 && opt.else_if_in_same_line) {
224 ps.ind_level_follow =
225 ps.psyms.ind_level[ps.psyms.len - 1];
226 ps.psyms.len--;
227 }
228 /* FALLTHROUGH */
229 case psym_do:
230 case psym_for_exprs:
231 ps.ind_level = ps.ind_level_follow;
232 ps.ind_level_follow = ps.ind_level + 1;
233 ps_push(psym, false);
234 break;
235
236 case psym_else:
237 if (ps.psyms.sym[ps.psyms.len - 1] != psym_if_expr_stmt) {
238 diag(1, "Unmatched 'else'");
239 break;
240 }
241 ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
242 ps.ind_level_follow = ps.ind_level + 1;
243 ps.psyms.sym[ps.psyms.len - 1] = psym_if_expr_stmt_else;
244 break;
245
246 case psym_switch_expr:
247 ps_push(psym_switch_expr, true);
248 ps.ind_level_follow += (int)opt.case_indent + 1;
249 break;
250
251 case psym_while_expr:
252 if (ps.psyms.sym[ps.psyms.len - 1] == psym_do_stmt) {
253 ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
254 ps.ind_level_follow = ps.ind_level;
255 ps_push(psym_while_expr, false);
256 } else {
257 ps_push(psym_while_expr, true);
258 ps.ind_level_follow++;
259 }
260 break;
261
262 default:
263 diag(1, "Unknown code to parser");
264 return;
265 }
266
267 #if debug
268 static struct buffer before, after;
269 buf_clear(&before);
270 ps_psyms_to_string(&before, &ps);
271 #endif
272 psyms_reduce();
273 #if debug
274 buf_clear(&after);
275 ps_psyms_to_string(&after, &ps);
276 if (strcmp(before.s, after.s) != 0) {
277 debug_println("psyms before: %s", before.s);
278 debug_println("psyms after: %s", after.s);
279 } else
280 debug_println("psyms: %s", after.s);
281 #endif
282 }
283