parse.c revision 1.80 1 /* $NetBSD: parse.c,v 1.80 2025/01/04 10:28:08 rillig Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (c) 1985 Sun Microsystems, Inc.
7 * Copyright (c) 1980, 1993
8 * The Regents of the University of California. All rights reserved.
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: parse.c,v 1.80 2025/01/04 10:28:08 rillig Exp $");
42
43 #include <stdlib.h>
44
45 #include "indent.h"
46
47 /*
48 * Try to combine the statement on the top of the parse stack with the symbol
49 * directly below it, replacing these two symbols with a single symbol.
50 */
51 static bool
52 psyms_reduce_stmt(void)
53 {
54 switch (ps.psyms.sym[ps.psyms.len - 2]) {
55
56 case psym_stmt:
57 ps.psyms.len--;
58 ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
59 return true;
60
61 case psym_do:
62 ps.psyms.len--;
63 ps.psyms.sym[ps.psyms.len - 1] = psym_do_stmt;
64 ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
65 return true;
66
67 case psym_if_expr:
68 ps.psyms.len--;
69 ps.psyms.sym[ps.psyms.len - 1] = psym_if_expr_stmt;
70 /* For the time being, assume that there is no 'else' on this
71 * 'if', and set the indentation level accordingly. If an
72 * 'else' is scanned, it will be fixed up later. */
73 size_t i = ps.psyms.len - 2;
74 while (ps.psyms.sym[i] != psym_stmt
75 && ps.psyms.sym[i] != psym_lbrace_block)
76 i--;
77 ps.ind_level_follow = ps.psyms.ind_level[i];
78 return true;
79
80 case psym_switch_expr:
81 case psym_decl:
82 case psym_if_expr_stmt_else:
83 case psym_for_exprs:
84 case psym_while_expr:
85 ps.psyms.len--;
86 ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
87 ps.ind_level_follow = ps.psyms.ind_level[ps.psyms.len - 1];
88 return true;
89
90 default:
91 return false;
92 }
93 }
94
95 static int
96 left_justify_decl_level(void)
97 {
98 int level = 0;
99 for (size_t i = ps.psyms.len - 2; i > 0; i--)
100 if (ps.psyms.sym[i] == psym_decl)
101 level++;
102 return level;
103 }
104
105 void
106 ps_push(parser_symbol psym, bool follow)
107 {
108 if (ps.psyms.len == ps.psyms.cap) {
109 ps.psyms.cap += 16;
110 ps.psyms.sym = nonnull(realloc(ps.psyms.sym,
111 sizeof(ps.psyms.sym[0]) * ps.psyms.cap));
112 ps.psyms.ind_level = nonnull(realloc(ps.psyms.ind_level,
113 sizeof(ps.psyms.ind_level[0]) * ps.psyms.cap));
114 }
115 ps.psyms.len++;
116 ps.psyms.sym[ps.psyms.len - 1] = psym;
117 ps.psyms.ind_level[ps.psyms.len - 1] =
118 follow ? ps.ind_level_follow : ps.ind_level;
119 }
120
121 /*
122 * Repeatedly try to reduce the top two symbols on the parse stack to a single
123 * symbol, until no more reductions are possible.
124 */
125 static void
126 psyms_reduce(void)
127 {
128 again:
129 if (ps.psyms.len >= 2 && ps.psyms.sym[ps.psyms.len - 1] == psym_stmt
130 && psyms_reduce_stmt())
131 goto again;
132 if (ps.psyms.sym[ps.psyms.len - 1] == psym_while_expr &&
133 ps.psyms.sym[ps.psyms.len - 2] == psym_do_stmt) {
134 ps.psyms.len -= 2;
135 goto again;
136 }
137 }
138
139 static bool
140 is_lbrace(parser_symbol psym)
141 {
142 return psym == psym_lbrace_block
143 || psym == psym_lbrace_struct
144 || psym == psym_lbrace_union
145 || psym == psym_lbrace_enum;
146 }
147
148 /*
149 * Shift the token onto the parser stack, then try to reduce it by combining it with
150 * previous tokens.
151 */
152 void
153 parse(parser_symbol psym)
154 {
155 debug_blank_line();
156 debug_println("parse token: %s", psym_name[psym]);
157
158 if (psym != psym_else) {
159 while (ps.psyms.sym[ps.psyms.len - 1] == psym_if_expr_stmt) {
160 ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
161 psyms_reduce();
162 }
163 }
164
165 switch (psym) {
166
167 case psym_lbrace_block:
168 case psym_lbrace_struct:
169 case psym_lbrace_union:
170 case psym_lbrace_enum:
171 ps.break_after_comma = false;
172 if (ps.psyms.sym[ps.psyms.len - 1] == psym_decl
173 || ps.psyms.sym[ps.psyms.len - 1] == psym_stmt)
174 ps.ind_level_follow++;
175 else if (code.len == 0) {
176 /* It is part of a while, for, etc. */
177 ps.ind_level--;
178
179 /* for a switch, brace should be two levels out from
180 * the code */
181 if (ps.psyms.sym[ps.psyms.len - 1] == psym_switch_expr
182 && opt.case_indent >= 1.0F)
183 ps.ind_level--;
184 }
185
186 ps_push(psym, false);
187 ps_push(psym_stmt, true);
188 break;
189
190 case psym_rbrace:
191 /* stack should have <lbrace> <stmt> or <lbrace> <decl> */
192 if (!(ps.psyms.len >= 2
193 && is_lbrace(ps.psyms.sym[ps.psyms.len - 2]))) {
194 diag(1, "Statement nesting error");
195 break;
196 }
197 ps.psyms.len--;
198 ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
199 ps.ind_level_follow = ps.ind_level;
200 ps.psyms.sym[ps.psyms.len - 1] = psym_stmt;
201 break;
202
203 case psym_decl:
204 if (ps.psyms.sym[ps.psyms.len - 1] == psym_decl)
205 break; /* only put one declaration onto stack */
206
207 ps.break_after_comma = true;
208 ps_push(psym_decl, true);
209
210 if (opt.left_justify_decl) {
211 ps.ind_level = left_justify_decl_level();
212 ps.ind_level_follow = ps.ind_level;
213 }
214 break;
215
216 case psym_stmt:
217 ps.break_after_comma = false;
218 ps_push(psym_stmt, false);
219 break;
220
221 case psym_if_expr:
222 if (ps.psyms.sym[ps.psyms.len - 1] == psym_if_expr_stmt_else
223 && opt.else_if_in_same_line) {
224 ps.ind_level_follow =
225 ps.psyms.ind_level[ps.psyms.len - 1];
226 ps.psyms.len--;
227 }
228 /* FALLTHROUGH */
229 case psym_do:
230 case psym_for_exprs:
231 ps.ind_level = ps.ind_level_follow;
232 ps.ind_level_follow = ps.ind_level + 1;
233 ps_push(psym, false);
234 break;
235
236 case psym_else:
237 if (ps.psyms.sym[ps.psyms.len - 1] != psym_if_expr_stmt) {
238 diag(1, "Unmatched 'else'");
239 break;
240 }
241 ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
242 ps.ind_level_follow = ps.ind_level + 1;
243 ps.psyms.sym[ps.psyms.len - 1] = psym_if_expr_stmt_else;
244 break;
245
246 case psym_switch_expr:
247 ps_push(psym_switch_expr, true);
248 ps.ind_level_follow += (int)opt.case_indent + 1;
249 break;
250
251 case psym_while_expr:
252 if (ps.psyms.sym[ps.psyms.len - 1] == psym_do_stmt) {
253 ps.ind_level = ps.psyms.ind_level[ps.psyms.len - 1];
254 ps.ind_level_follow = ps.ind_level;
255 ps_push(psym_while_expr, false);
256 } else {
257 ps_push(psym_while_expr, true);
258 ps.ind_level_follow++;
259 }
260 break;
261
262 default:
263 diag(1, "Unknown code to parser");
264 return;
265 }
266
267 #if debug
268 static struct buffer before, after;
269 debug_fmt_psyms_stack(&before);
270 #endif
271 psyms_reduce();
272 #if debug
273 debug_fmt_psyms_stack(&after);
274 if (before.len != after.len
275 || memcmp(before.s, after.s, before.len) != 0) {
276 debug_println("psyms before:%s", before.s);
277 debug_println("psyms after: %s", after.s);
278 }
279 #endif
280 }
281