for.c revision 1.102 1 /* $NetBSD: for.c,v 1.102 2020/10/25 15:26:18 rillig Exp $ */
2
3 /*
4 * Copyright (c) 1992, The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Handling of .for/.endfor loops in a makefile.
34 *
35 * For loops are of the form:
36 *
37 * .for <varname...> in <value...>
38 * ...
39 * .endfor
40 *
41 * When a .for line is parsed, all following lines are accumulated into a
42 * buffer, up to but excluding the corresponding .endfor line. To find the
43 * corresponding .endfor, the number of nested .for and .endfor directives
44 * are counted.
45 *
46 * During parsing, any nested .for loops are just passed through; they get
47 * handled recursively in For_Eval when the enclosing .for loop is evaluated
48 * in For_Run.
49 *
50 * When the .for loop has been parsed completely, the variable expressions
51 * for the iteration variables are replaced with expressions of the form
52 * ${:Uvalue}, and then this modified body is "included" as a special file.
53 *
54 * Interface:
55 * For_Eval Evaluate the loop in the passed line.
56 *
57 * For_Run Run accumulated loop
58 */
59
60 #include "make.h"
61
62 /* "@(#)for.c 8.1 (Berkeley) 6/6/93" */
63 MAKE_RCSID("$NetBSD: for.c,v 1.102 2020/10/25 15:26:18 rillig Exp $");
64
65 typedef enum ForEscapes {
66 FOR_SUB_ESCAPE_CHAR = 0x0001,
67 FOR_SUB_ESCAPE_BRACE = 0x0002,
68 FOR_SUB_ESCAPE_PAREN = 0x0004
69 } ForEscapes;
70
71 static int forLevel = 0; /* Nesting level */
72
73 /* One of the variables to the left of the "in" in a .for loop. */
74 typedef struct ForVar {
75 char *name;
76 size_t len;
77 } ForVar;
78
79 /*
80 * State of a for loop.
81 */
82 typedef struct For {
83 Buffer buf; /* Body of loop */
84 Vector /* of ForVar */ vars; /* Iteration variables */
85 Words items; /* Substitution items */
86 char *parse_buf;
87 /* Is any of the names 1 character long? If so, when the variable values
88 * are substituted, the parser must handle $V expressions as well, not
89 * only ${V} and $(V). */
90 Boolean short_var;
91 unsigned int sub_next;
92 } For;
93
94 static For *accumFor; /* Loop being accumulated */
95
96 static void
97 ForAddVar(For *f, const char *name, size_t len)
98 {
99 ForVar *var = Vector_Push(&f->vars);
100 var->name = bmake_strldup(name, len);
101 var->len = len;
102 }
103
104 static void
105 ForVarDone(ForVar *var)
106 {
107 free(var->name);
108 }
109
110 static void
111 For_Free(For *arg)
112 {
113 Buf_Destroy(&arg->buf, TRUE);
114
115 while (arg->vars.len > 0)
116 ForVarDone(Vector_Pop(&arg->vars));
117 Vector_Done(&arg->vars);
118
119 Words_Free(arg->items);
120 free(arg->parse_buf);
121
122 free(arg);
123 }
124
125 static ForEscapes
126 GetEscapes(const char *word)
127 {
128 const char *p;
129 ForEscapes escapes = 0;
130
131 for (p = word; *p != '\0'; p++) {
132 switch (*p) {
133 case ':':
134 case '$':
135 case '\\':
136 escapes |= FOR_SUB_ESCAPE_CHAR;
137 break;
138 case ')':
139 escapes |= FOR_SUB_ESCAPE_PAREN;
140 break;
141 case '}':
142 escapes |= FOR_SUB_ESCAPE_BRACE;
143 break;
144 }
145 }
146 return escapes;
147 }
148
149 /* Evaluate the for loop in the passed line. The line looks like this:
150 * .for <varname...> in <value...>
151 *
152 * Input:
153 * line Line to parse
154 *
155 * Results:
156 * 0: Not a .for statement, parse the line
157 * 1: We found a for loop
158 * -1: A .for statement with a bad syntax error, discard.
159 */
160 int
161 For_Eval(const char *line)
162 {
163 For *new_for;
164 const char *ptr;
165
166 /* Skip the '.' and any following whitespace */
167 ptr = line + 1;
168 cpp_skip_whitespace(&ptr);
169
170 /*
171 * If we are not in a for loop quickly determine if the statement is
172 * a for.
173 */
174 if (ptr[0] != 'f' || ptr[1] != 'o' || ptr[2] != 'r' ||
175 !ch_isspace(ptr[3])) {
176 if (ptr[0] == 'e' && strncmp(ptr + 1, "ndfor", 5) == 0) {
177 Parse_Error(PARSE_FATAL, "for-less endfor");
178 return -1;
179 }
180 return 0;
181 }
182 ptr += 3;
183
184 /*
185 * we found a for loop, and now we are going to parse it.
186 */
187
188 new_for = bmake_malloc(sizeof *new_for);
189 Buf_Init(&new_for->buf, 0);
190 Vector_Init(&new_for->vars, sizeof(ForVar));
191 new_for->items.words = NULL;
192 new_for->items.freeIt = NULL;
193 new_for->parse_buf = NULL;
194 new_for->short_var = FALSE;
195 new_for->sub_next = 0;
196
197 /* Grab the variables. Terminate on "in". */
198 for (;;) {
199 size_t len;
200
201 cpp_skip_whitespace(&ptr);
202 if (*ptr == '\0') {
203 Parse_Error(PARSE_FATAL, "missing `in' in for");
204 For_Free(new_for);
205 return -1;
206 }
207
208 for (len = 1; ptr[len] && !ch_isspace(ptr[len]); len++)
209 continue;
210 if (len == 2 && ptr[0] == 'i' && ptr[1] == 'n') {
211 ptr += 2;
212 break;
213 }
214 if (len == 1)
215 new_for->short_var = TRUE;
216
217 ForAddVar(new_for, ptr, len);
218 ptr += len;
219 }
220
221 if (new_for->vars.len == 0) {
222 Parse_Error(PARSE_FATAL, "no iteration variables in for");
223 For_Free(new_for);
224 return -1;
225 }
226
227 cpp_skip_whitespace(&ptr);
228
229 /*
230 * Make a list with the remaining words.
231 * The values are later substituted as ${:U<value>...} so we must
232 * backslash-escape characters that break that syntax.
233 * Variables are fully expanded - so it is safe for escape $.
234 * We can't do the escapes here - because we don't know whether
235 * we will be substituting into ${...} or $(...).
236 */
237 {
238 char *items;
239 (void)Var_Subst(ptr, VAR_GLOBAL, VARE_WANTRES, &items);
240 /* TODO: handle errors */
241 new_for->items = Str_Words(items, FALSE);
242 free(items);
243
244 if (new_for->items.len == 1 && new_for->items.words[0][0] == '\0')
245 new_for->items.len = 0; /* .for var in ${:U} */
246 }
247
248 {
249 size_t nitems, nvars;
250
251 if ((nitems = new_for->items.len) > 0 &&
252 nitems % (nvars = new_for->vars.len)) {
253 Parse_Error(PARSE_FATAL,
254 "Wrong number of words (%zu) in .for substitution list"
255 " with %zu vars", nitems, nvars);
256 /*
257 * Return 'success' so that the body of the .for loop is
258 * accumulated.
259 * Remove all items so that the loop doesn't iterate.
260 */
261 new_for->items.len = 0;
262 }
263 }
264
265 accumFor = new_for;
266 forLevel = 1;
267 return 1;
268 }
269
270 /*
271 * Add another line to a .for loop.
272 * Returns FALSE when the matching .endfor is reached.
273 */
274 Boolean
275 For_Accum(const char *line)
276 {
277 const char *ptr = line;
278
279 if (*ptr == '.') {
280 ptr++;
281 cpp_skip_whitespace(&ptr);
282
283 if (strncmp(ptr, "endfor", 6) == 0 && (ch_isspace(ptr[6]) || !ptr[6])) {
284 DEBUG1(FOR, "For: end for %d\n", forLevel);
285 if (--forLevel <= 0)
286 return FALSE;
287 } else if (strncmp(ptr, "for", 3) == 0 && ch_isspace(ptr[3])) {
288 forLevel++;
289 DEBUG1(FOR, "For: new loop %d\n", forLevel);
290 }
291 }
292
293 Buf_AddStr(&accumFor->buf, line);
294 Buf_AddByte(&accumFor->buf, '\n');
295 return TRUE;
296 }
297
298
299 static size_t
300 for_var_len(const char *var)
301 {
302 char ch, var_start, var_end;
303 int depth;
304 size_t len;
305
306 var_start = *var;
307 if (var_start == 0)
308 /* just escape the $ */
309 return 0;
310
311 if (var_start == '(')
312 var_end = ')';
313 else if (var_start == '{')
314 var_end = '}';
315 else
316 /* Single char variable */
317 return 1;
318
319 depth = 1;
320 for (len = 1; (ch = var[len++]) != 0;) {
321 if (ch == var_start)
322 depth++;
323 else if (ch == var_end && --depth == 0)
324 return len;
325 }
326
327 /* Variable end not found, escape the $ */
328 return 0;
329 }
330
331 static void
332 for_substitute(Buffer *cmds, const char *item, char ech)
333 {
334 ForEscapes escapes = GetEscapes(item);
335 char ch;
336
337 /* If there were no escapes, or the only escape is the other variable
338 * terminator, then just substitute the full string */
339 if (!(escapes & (ech == ')' ? ~(unsigned)FOR_SUB_ESCAPE_BRACE
340 : ~(unsigned)FOR_SUB_ESCAPE_PAREN))) {
341 Buf_AddStr(cmds, item);
342 return;
343 }
344
345 /* Escape ':', '$', '\\' and 'ech' - these will be removed later by
346 * :U processing, see ApplyModifier_Defined. */
347 while ((ch = *item++) != '\0') {
348 if (ch == '$') {
349 size_t len = for_var_len(item);
350 if (len != 0) {
351 Buf_AddBytes(cmds, item - 1, len + 1);
352 item += len;
353 continue;
354 }
355 Buf_AddByte(cmds, '\\');
356 } else if (ch == ':' || ch == '\\' || ch == ech)
357 Buf_AddByte(cmds, '\\');
358 Buf_AddByte(cmds, ch);
359 }
360 }
361
362 static void
363 SubstVarLong(For *arg, const char **inout_cp, const char **inout_cmd_cp,
364 Buffer *cmds, char ech)
365 {
366 size_t i;
367 const char *cp = *inout_cp;
368 const char *cmd_cp = *inout_cmd_cp;
369
370 for (i = 0; i < arg->vars.len; i++) {
371 ForVar *forVar = Vector_Get(&arg->vars, i);
372 char *var = forVar->name;
373 size_t vlen = forVar->len;
374
375 /* XXX: undefined behavior for cp if vlen is longer than cp? */
376 if (memcmp(cp, var, vlen) != 0)
377 continue;
378 /* XXX: why test for backslash here? */
379 if (cp[vlen] != ':' && cp[vlen] != ech && cp[vlen] != '\\')
380 continue;
381
382 /* Found a variable match. Replace with :U<value> */
383 Buf_AddBytesBetween(cmds, cmd_cp, cp);
384 Buf_AddStr(cmds, ":U");
385 cp += vlen;
386 cmd_cp = cp;
387 for_substitute(cmds, arg->items.words[arg->sub_next + i], ech);
388 break;
389 }
390
391 *inout_cp = cp;
392 *inout_cmd_cp = cmd_cp;
393 }
394
395 static void
396 SubstVarShort(For *arg, char const ch,
397 const char **inout_cp, const char **input_cmd_cp, Buffer *cmds)
398 {
399 const char *cp = *inout_cp;
400 const char *cmd_cp = *input_cmd_cp;
401 size_t i;
402
403 /* Probably a single character name, ignore $$ and stupid ones. {*/
404 if (!arg->short_var || strchr("}):$", ch) != NULL) {
405 cp++;
406 *inout_cp = cp;
407 return;
408 }
409
410 for (i = 0; i < arg->vars.len; i++) {
411 ForVar *forVar = Vector_Get(&arg->vars, i);
412 char *var = forVar->name;
413 if (var[0] != ch || var[1] != 0)
414 continue;
415
416 /* Found a variable match. Replace with ${:U<value>} */
417 Buf_AddBytesBetween(cmds, cmd_cp, cp);
418 Buf_AddStr(cmds, "{:U");
419 cmd_cp = ++cp;
420 for_substitute(cmds, arg->items.words[arg->sub_next + i], '}');
421 Buf_AddByte(cmds, '}');
422 break;
423 }
424
425 *inout_cp = cp;
426 *input_cmd_cp = cmd_cp;
427 }
428
429 /*
430 * Scan the for loop body and replace references to the loop variables
431 * with variable references that expand to the required text.
432 *
433 * Using variable expansions ensures that the .for loop can't generate
434 * syntax, and that the later parsing will still see a variable.
435 * We assume that the null variable will never be defined.
436 *
437 * The detection of substitutions of the loop control variable is naive.
438 * Many of the modifiers use \ to escape $ (not $) so it is possible
439 * to contrive a makefile where an unwanted substitution happens.
440 */
441 static char *
442 ForIterate(void *v_arg, size_t *ret_len)
443 {
444 For *arg = v_arg;
445 const char *cp;
446 const char *cmd_cp;
447 const char *body_end;
448 Buffer cmds;
449 char *cmds_str;
450 size_t cmd_len;
451
452 if (arg->sub_next + arg->vars.len > arg->items.len) {
453 /* No more iterations */
454 For_Free(arg);
455 return NULL;
456 }
457
458 free(arg->parse_buf);
459 arg->parse_buf = NULL;
460
461 cmd_cp = Buf_GetAll(&arg->buf, &cmd_len);
462 body_end = cmd_cp + cmd_len;
463 Buf_Init(&cmds, cmd_len + 256);
464 for (cp = cmd_cp; (cp = strchr(cp, '$')) != NULL;) {
465 char ch, ech;
466 ch = *++cp;
467 if ((ch == '(' && (ech = ')', 1)) || (ch == '{' && (ech = '}', 1))) {
468 cp++;
469 /* Check variable name against the .for loop variables */
470 SubstVarLong(arg, &cp, &cmd_cp, &cmds, ech);
471 continue;
472 }
473 if (ch == '\0')
474 break;
475
476 SubstVarShort(arg, ch, &cp, &cmd_cp, &cmds);
477 }
478 Buf_AddBytesBetween(&cmds, cmd_cp, body_end);
479
480 *ret_len = Buf_Len(&cmds);
481 cmds_str = Buf_Destroy(&cmds, FALSE);
482 DEBUG1(FOR, "For: loop body:\n%s", cmds_str);
483
484 arg->sub_next += arg->vars.len;
485
486 arg->parse_buf = cmds_str;
487 return cmds_str;
488 }
489
490 /* Run the for loop, imitating the actions of an include file. */
491 void
492 For_Run(int lineno)
493 {
494 For *arg;
495
496 arg = accumFor;
497 accumFor = NULL;
498
499 if (arg->items.len == 0) {
500 /* Nothing to expand - possibly due to an earlier syntax error. */
501 For_Free(arg);
502 return;
503 }
504
505 Parse_SetInput(NULL, lineno, -1, ForIterate, arg);
506 }
507