for.c revision 1.105 1 /* $NetBSD: for.c,v 1.105 2020/10/25 15:58:04 rillig Exp $ */
2
3 /*
4 * Copyright (c) 1992, The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Handling of .for/.endfor loops in a makefile.
34 *
35 * For loops are of the form:
36 *
37 * .for <varname...> in <value...>
38 * ...
39 * .endfor
40 *
41 * When a .for line is parsed, all following lines are accumulated into a
42 * buffer, up to but excluding the corresponding .endfor line. To find the
43 * corresponding .endfor, the number of nested .for and .endfor directives
44 * are counted.
45 *
46 * During parsing, any nested .for loops are just passed through; they get
47 * handled recursively in For_Eval when the enclosing .for loop is evaluated
48 * in For_Run.
49 *
50 * When the .for loop has been parsed completely, the variable expressions
51 * for the iteration variables are replaced with expressions of the form
52 * ${:Uvalue}, and then this modified body is "included" as a special file.
53 *
54 * Interface:
55 * For_Eval Evaluate the loop in the passed line.
56 *
57 * For_Run Run accumulated loop
58 */
59
60 #include "make.h"
61
62 /* "@(#)for.c 8.1 (Berkeley) 6/6/93" */
63 MAKE_RCSID("$NetBSD: for.c,v 1.105 2020/10/25 15:58:04 rillig Exp $");
64
65 typedef enum ForEscapes {
66 FOR_SUB_ESCAPE_CHAR = 0x0001,
67 FOR_SUB_ESCAPE_BRACE = 0x0002,
68 FOR_SUB_ESCAPE_PAREN = 0x0004
69 } ForEscapes;
70
71 static int forLevel = 0; /* Nesting level */
72
73 /* One of the variables to the left of the "in" in a .for loop. */
74 typedef struct ForVar {
75 char *name;
76 size_t len;
77 } ForVar;
78
79 /*
80 * State of a for loop.
81 */
82 typedef struct For {
83 Buffer body; /* Unexpanded body of the loop */
84 Vector /* of ForVar */ vars; /* Iteration variables */
85 Words items; /* Substitution items */
86 Buffer curBody; /* Expanded body of the current iteration */
87 /* Is any of the names 1 character long? If so, when the variable values
88 * are substituted, the parser must handle $V expressions as well, not
89 * only ${V} and $(V). */
90 Boolean short_var;
91 unsigned int sub_next; /* Where to continue iterating */
92 } For;
93
94 static For *accumFor; /* Loop being accumulated */
95
96 static void
97 ForAddVar(For *f, const char *name, size_t len)
98 {
99 ForVar *var = Vector_Push(&f->vars);
100 var->name = bmake_strldup(name, len);
101 var->len = len;
102 }
103
104 static void
105 ForVarDone(ForVar *var)
106 {
107 free(var->name);
108 }
109
110 static void
111 For_Free(For *arg)
112 {
113 Buf_Destroy(&arg->body, TRUE);
114
115 while (arg->vars.len > 0)
116 ForVarDone(Vector_Pop(&arg->vars));
117 Vector_Done(&arg->vars);
118
119 Words_Free(arg->items);
120 Buf_Destroy(&arg->curBody, TRUE);
121
122 free(arg);
123 }
124
125 static ForEscapes
126 GetEscapes(const char *word)
127 {
128 const char *p;
129 ForEscapes escapes = 0;
130
131 for (p = word; *p != '\0'; p++) {
132 switch (*p) {
133 case ':':
134 case '$':
135 case '\\':
136 escapes |= FOR_SUB_ESCAPE_CHAR;
137 break;
138 case ')':
139 escapes |= FOR_SUB_ESCAPE_PAREN;
140 break;
141 case '}':
142 escapes |= FOR_SUB_ESCAPE_BRACE;
143 break;
144 }
145 }
146 return escapes;
147 }
148
149 /* Evaluate the for loop in the passed line. The line looks like this:
150 * .for <varname...> in <value...>
151 *
152 * Input:
153 * line Line to parse
154 *
155 * Results:
156 * 0: Not a .for statement, parse the line
157 * 1: We found a for loop
158 * -1: A .for statement with a bad syntax error, discard.
159 */
160 int
161 For_Eval(const char *line)
162 {
163 For *new_for;
164 const char *p;
165
166 /* Skip the '.' and any following whitespace */
167 p = line + 1;
168 cpp_skip_whitespace(&p);
169
170 /*
171 * If we are not in a for loop quickly determine if the statement is
172 * a for.
173 */
174 if (p[0] != 'f' || p[1] != 'o' || p[2] != 'r' || !ch_isspace(p[3])) {
175 if (p[0] == 'e' && strncmp(p + 1, "ndfor", 5) == 0) {
176 Parse_Error(PARSE_FATAL, "for-less endfor");
177 return -1;
178 }
179 return 0;
180 }
181 p += 3;
182
183 /*
184 * we found a for loop, and now we are going to parse it.
185 */
186
187 new_for = bmake_malloc(sizeof *new_for);
188 Buf_Init(&new_for->body, 0);
189 Vector_Init(&new_for->vars, sizeof(ForVar));
190 new_for->items.words = NULL;
191 new_for->items.freeIt = NULL;
192 Buf_Init(&new_for->curBody, 0);
193 new_for->short_var = FALSE;
194 new_for->sub_next = 0;
195
196 /* Grab the variables. Terminate on "in". */
197 for (;;) {
198 size_t len;
199
200 cpp_skip_whitespace(&p);
201 if (*p == '\0') {
202 Parse_Error(PARSE_FATAL, "missing `in' in for");
203 For_Free(new_for);
204 return -1;
205 }
206
207 /* XXX: This allows arbitrary variable names; see directive-for.mk. */
208 for (len = 1; p[len] && !ch_isspace(p[len]); len++)
209 continue;
210
211 if (len == 2 && p[0] == 'i' && p[1] == 'n') {
212 p += 2;
213 break;
214 }
215 if (len == 1)
216 new_for->short_var = TRUE;
217
218 ForAddVar(new_for, p, len);
219 p += len;
220 }
221
222 if (new_for->vars.len == 0) {
223 Parse_Error(PARSE_FATAL, "no iteration variables in for");
224 For_Free(new_for);
225 return -1;
226 }
227
228 cpp_skip_whitespace(&p);
229
230 /*
231 * Make a list with the remaining words.
232 * The values are later substituted as ${:U<value>...} so we must
233 * backslash-escape characters that break that syntax.
234 * Variables are fully expanded - so it is safe for escape $.
235 * We can't do the escapes here - because we don't know whether
236 * we will be substituting into ${...} or $(...).
237 */
238 {
239 char *items;
240 (void)Var_Subst(p, VAR_GLOBAL, VARE_WANTRES, &items);
241 /* TODO: handle errors */
242 new_for->items = Str_Words(items, FALSE);
243 free(items);
244
245 if (new_for->items.len == 1 && new_for->items.words[0][0] == '\0')
246 new_for->items.len = 0; /* .for var in ${:U} */
247 }
248
249 {
250 size_t nitems, nvars;
251
252 if ((nitems = new_for->items.len) > 0 &&
253 nitems % (nvars = new_for->vars.len)) {
254 Parse_Error(PARSE_FATAL,
255 "Wrong number of words (%zu) in .for substitution list"
256 " with %zu vars", nitems, nvars);
257 /*
258 * Return 'success' so that the body of the .for loop is
259 * accumulated.
260 * Remove all items so that the loop doesn't iterate.
261 */
262 new_for->items.len = 0;
263 }
264 }
265
266 accumFor = new_for;
267 forLevel = 1;
268 return 1;
269 }
270
271 /*
272 * Add another line to a .for loop.
273 * Returns FALSE when the matching .endfor is reached.
274 */
275 Boolean
276 For_Accum(const char *line)
277 {
278 const char *ptr = line;
279
280 if (*ptr == '.') {
281 ptr++;
282 cpp_skip_whitespace(&ptr);
283
284 if (strncmp(ptr, "endfor", 6) == 0 && (ch_isspace(ptr[6]) || !ptr[6])) {
285 DEBUG1(FOR, "For: end for %d\n", forLevel);
286 if (--forLevel <= 0)
287 return FALSE;
288 } else if (strncmp(ptr, "for", 3) == 0 && ch_isspace(ptr[3])) {
289 forLevel++;
290 DEBUG1(FOR, "For: new loop %d\n", forLevel);
291 }
292 }
293
294 Buf_AddStr(&accumFor->body, line);
295 Buf_AddByte(&accumFor->body, '\n');
296 return TRUE;
297 }
298
299
300 static size_t
301 for_var_len(const char *var)
302 {
303 char ch, var_start, var_end;
304 int depth;
305 size_t len;
306
307 var_start = *var;
308 if (var_start == 0)
309 /* just escape the $ */
310 return 0;
311
312 if (var_start == '(')
313 var_end = ')';
314 else if (var_start == '{')
315 var_end = '}';
316 else
317 /* Single char variable */
318 return 1;
319
320 depth = 1;
321 for (len = 1; (ch = var[len++]) != 0;) {
322 if (ch == var_start)
323 depth++;
324 else if (ch == var_end && --depth == 0)
325 return len;
326 }
327
328 /* Variable end not found, escape the $ */
329 return 0;
330 }
331
332 static void
333 for_substitute(Buffer *cmds, const char *item, char ech)
334 {
335 ForEscapes escapes = GetEscapes(item);
336 char ch;
337
338 /* If there were no escapes, or the only escape is the other variable
339 * terminator, then just substitute the full string */
340 if (!(escapes & (ech == ')' ? ~(unsigned)FOR_SUB_ESCAPE_BRACE
341 : ~(unsigned)FOR_SUB_ESCAPE_PAREN))) {
342 Buf_AddStr(cmds, item);
343 return;
344 }
345
346 /* Escape ':', '$', '\\' and 'ech' - these will be removed later by
347 * :U processing, see ApplyModifier_Defined. */
348 while ((ch = *item++) != '\0') {
349 if (ch == '$') {
350 size_t len = for_var_len(item);
351 if (len != 0) {
352 Buf_AddBytes(cmds, item - 1, len + 1);
353 item += len;
354 continue;
355 }
356 Buf_AddByte(cmds, '\\');
357 } else if (ch == ':' || ch == '\\' || ch == ech)
358 Buf_AddByte(cmds, '\\');
359 Buf_AddByte(cmds, ch);
360 }
361 }
362
363 static void
364 SubstVarLong(For *arg, const char **inout_cp, const char **inout_cmd_cp,
365 char ech)
366 {
367 size_t i;
368 const char *cp = *inout_cp;
369 const char *cmd_cp = *inout_cmd_cp;
370
371 for (i = 0; i < arg->vars.len; i++) {
372 ForVar *forVar = Vector_Get(&arg->vars, i);
373 char *var = forVar->name;
374 size_t vlen = forVar->len;
375
376 /* XXX: undefined behavior for cp if vlen is longer than cp? */
377 if (memcmp(cp, var, vlen) != 0)
378 continue;
379 /* XXX: why test for backslash here? */
380 if (cp[vlen] != ':' && cp[vlen] != ech && cp[vlen] != '\\')
381 continue;
382
383 /* Found a variable match. Replace with :U<value> */
384 Buf_AddBytesBetween(&arg->curBody, cmd_cp, cp);
385 Buf_AddStr(&arg->curBody, ":U");
386 cp += vlen;
387 cmd_cp = cp;
388 for_substitute(&arg->curBody, arg->items.words[arg->sub_next + i], ech);
389 break;
390 }
391
392 *inout_cp = cp;
393 *inout_cmd_cp = cmd_cp;
394 }
395
396 static void
397 SubstVarShort(For *arg, char const ch,
398 const char **inout_cp, const char **input_cmd_cp)
399 {
400 const char *cp = *inout_cp;
401 const char *cmd_cp = *input_cmd_cp;
402 size_t i;
403
404 /* Probably a single character name, ignore $$ and stupid ones. {*/
405 if (!arg->short_var || strchr("}):$", ch) != NULL) {
406 cp++;
407 *inout_cp = cp;
408 return;
409 }
410
411 for (i = 0; i < arg->vars.len; i++) {
412 ForVar *forVar = Vector_Get(&arg->vars, i);
413 char *var = forVar->name;
414 if (var[0] != ch || var[1] != 0)
415 continue;
416
417 /* Found a variable match. Replace with ${:U<value>} */
418 Buf_AddBytesBetween(&arg->curBody, cmd_cp, cp);
419 Buf_AddStr(&arg->curBody, "{:U");
420 cmd_cp = ++cp;
421 for_substitute(&arg->curBody, arg->items.words[arg->sub_next + i], '}');
422 Buf_AddByte(&arg->curBody, '}');
423 break;
424 }
425
426 *inout_cp = cp;
427 *input_cmd_cp = cmd_cp;
428 }
429
430 /*
431 * Scan the for loop body and replace references to the loop variables
432 * with variable references that expand to the required text.
433 *
434 * Using variable expansions ensures that the .for loop can't generate
435 * syntax, and that the later parsing will still see a variable.
436 * We assume that the null variable will never be defined.
437 *
438 * The detection of substitutions of the loop control variable is naive.
439 * Many of the modifiers use \ to escape $ (not $) so it is possible
440 * to contrive a makefile where an unwanted substitution happens.
441 */
442 static char *
443 ForIterate(void *v_arg, size_t *ret_len)
444 {
445 For *arg = v_arg;
446 const char *cp;
447 const char *cmd_cp;
448 const char *body_end;
449 char *cmds_str;
450 size_t cmd_len;
451
452 if (arg->sub_next + arg->vars.len > arg->items.len) {
453 /* No more iterations */
454 For_Free(arg);
455 return NULL;
456 }
457
458 Buf_Empty(&arg->curBody);
459
460 cmd_cp = Buf_GetAll(&arg->body, &cmd_len);
461 body_end = cmd_cp + cmd_len;
462 for (cp = cmd_cp; (cp = strchr(cp, '$')) != NULL;) {
463 char ch, ech;
464 ch = *++cp;
465 if ((ch == '(' && (ech = ')', 1)) || (ch == '{' && (ech = '}', 1))) {
466 cp++;
467 /* Check variable name against the .for loop variables */
468 SubstVarLong(arg, &cp, &cmd_cp, ech);
469 continue;
470 }
471 if (ch == '\0')
472 break;
473
474 SubstVarShort(arg, ch, &cp, &cmd_cp);
475 }
476 Buf_AddBytesBetween(&arg->curBody, cmd_cp, body_end);
477
478 *ret_len = Buf_Len(&arg->curBody);
479 cmds_str = Buf_GetAll(&arg->curBody, NULL);
480 DEBUG1(FOR, "For: loop body:\n%s", cmds_str);
481
482 arg->sub_next += arg->vars.len;
483
484 return cmds_str;
485 }
486
487 /* Run the for loop, imitating the actions of an include file. */
488 void
489 For_Run(int lineno)
490 {
491 For *arg;
492
493 arg = accumFor;
494 accumFor = NULL;
495
496 if (arg->items.len == 0) {
497 /* Nothing to expand - possibly due to an earlier syntax error. */
498 For_Free(arg);
499 return;
500 }
501
502 Parse_SetInput(NULL, lineno, -1, ForIterate, arg);
503 }
504