for.c revision 1.107 1 /* $NetBSD: for.c,v 1.107 2020/10/25 16:15:48 rillig Exp $ */
2
3 /*
4 * Copyright (c) 1992, The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Handling of .for/.endfor loops in a makefile.
34 *
35 * For loops are of the form:
36 *
37 * .for <varname...> in <value...>
38 * ...
39 * .endfor
40 *
41 * When a .for line is parsed, all following lines are accumulated into a
42 * buffer, up to but excluding the corresponding .endfor line. To find the
43 * corresponding .endfor, the number of nested .for and .endfor directives
44 * are counted.
45 *
46 * During parsing, any nested .for loops are just passed through; they get
47 * handled recursively in For_Eval when the enclosing .for loop is evaluated
48 * in For_Run.
49 *
50 * When the .for loop has been parsed completely, the variable expressions
51 * for the iteration variables are replaced with expressions of the form
52 * ${:Uvalue}, and then this modified body is "included" as a special file.
53 *
54 * Interface:
55 * For_Eval Evaluate the loop in the passed line.
56 *
57 * For_Run Run accumulated loop
58 */
59
60 #include "make.h"
61
62 /* "@(#)for.c 8.1 (Berkeley) 6/6/93" */
63 MAKE_RCSID("$NetBSD: for.c,v 1.107 2020/10/25 16:15:48 rillig Exp $");
64
65 /* The .for loop substitutes the items as ${:U<value>...}, which means
66 * that characters that break this syntax must be backslash-escaped. */
67 typedef enum ForEscapes {
68 FOR_SUB_ESCAPE_CHAR = 0x0001,
69 FOR_SUB_ESCAPE_BRACE = 0x0002,
70 FOR_SUB_ESCAPE_PAREN = 0x0004
71 } ForEscapes;
72
73 static int forLevel = 0; /* Nesting level */
74
75 /* One of the variables to the left of the "in" in a .for loop. */
76 typedef struct ForVar {
77 char *name;
78 size_t len;
79 } ForVar;
80
81 /*
82 * State of a for loop.
83 */
84 typedef struct For {
85 Buffer body; /* Unexpanded body of the loop */
86 Vector /* of ForVar */ vars; /* Iteration variables */
87 Words items; /* Substitution items */
88 Buffer curBody; /* Expanded body of the current iteration */
89 /* Is any of the names 1 character long? If so, when the variable values
90 * are substituted, the parser must handle $V expressions as well, not
91 * only ${V} and $(V). */
92 Boolean short_var;
93 unsigned int sub_next; /* Where to continue iterating */
94 } For;
95
96 static For *accumFor; /* Loop being accumulated */
97
98 static void
99 ForAddVar(For *f, const char *name, size_t len)
100 {
101 ForVar *var = Vector_Push(&f->vars);
102 var->name = bmake_strldup(name, len);
103 var->len = len;
104 }
105
106 static void
107 ForVarDone(ForVar *var)
108 {
109 free(var->name);
110 }
111
112 static void
113 For_Free(For *arg)
114 {
115 Buf_Destroy(&arg->body, TRUE);
116
117 while (arg->vars.len > 0)
118 ForVarDone(Vector_Pop(&arg->vars));
119 Vector_Done(&arg->vars);
120
121 Words_Free(arg->items);
122 Buf_Destroy(&arg->curBody, TRUE);
123
124 free(arg);
125 }
126
127 static ForEscapes
128 GetEscapes(const char *word)
129 {
130 const char *p;
131 ForEscapes escapes = 0;
132
133 for (p = word; *p != '\0'; p++) {
134 switch (*p) {
135 case ':':
136 case '$':
137 case '\\':
138 escapes |= FOR_SUB_ESCAPE_CHAR;
139 break;
140 case ')':
141 escapes |= FOR_SUB_ESCAPE_PAREN;
142 break;
143 case '}':
144 escapes |= FOR_SUB_ESCAPE_BRACE;
145 break;
146 }
147 }
148 return escapes;
149 }
150
151 static Boolean
152 IsFor(const char *p)
153 {
154 return p[0] == 'f' && p[1] == 'o' && p[2] == 'r' && ch_isspace(p[3]);
155 }
156
157 static Boolean
158 IsEndfor(const char *p)
159 {
160 return p[0] == 'e' && strncmp(p, "endfor", 6) == 0 &&
161 (p[6] == '\0' || ch_isspace(p[6]));
162 }
163
164 /* Evaluate the for loop in the passed line. The line looks like this:
165 * .for <varname...> in <value...>
166 *
167 * Input:
168 * line Line to parse
169 *
170 * Results:
171 * 0: Not a .for statement, parse the line
172 * 1: We found a for loop
173 * -1: A .for statement with a bad syntax error, discard.
174 */
175 int
176 For_Eval(const char *line)
177 {
178 For *new_for;
179 const char *p;
180
181 /* Skip the '.' and any following whitespace */
182 p = line + 1;
183 cpp_skip_whitespace(&p);
184
185 /*
186 * If we are not in a for loop quickly determine if the statement is
187 * a for.
188 */
189 if (!IsFor(p)) {
190 if (IsEndfor(p)) {
191 Parse_Error(PARSE_FATAL, "for-less endfor");
192 return -1;
193 }
194 return 0;
195 }
196 p += 3;
197
198 /*
199 * we found a for loop, and now we are going to parse it.
200 */
201
202 new_for = bmake_malloc(sizeof *new_for);
203 Buf_Init(&new_for->body, 0);
204 Vector_Init(&new_for->vars, sizeof(ForVar));
205 new_for->items.words = NULL;
206 new_for->items.freeIt = NULL;
207 Buf_Init(&new_for->curBody, 0);
208 new_for->short_var = FALSE;
209 new_for->sub_next = 0;
210
211 /* Grab the variables. Terminate on "in". */
212 for (;;) {
213 size_t len;
214
215 cpp_skip_whitespace(&p);
216 if (*p == '\0') {
217 Parse_Error(PARSE_FATAL, "missing `in' in for");
218 For_Free(new_for);
219 return -1;
220 }
221
222 /* XXX: This allows arbitrary variable names; see directive-for.mk. */
223 for (len = 1; p[len] != '\0' && !ch_isspace(p[len]); len++)
224 continue;
225
226 if (len == 2 && p[0] == 'i' && p[1] == 'n') {
227 p += 2;
228 break;
229 }
230 if (len == 1)
231 new_for->short_var = TRUE;
232
233 ForAddVar(new_for, p, len);
234 p += len;
235 }
236
237 if (new_for->vars.len == 0) {
238 Parse_Error(PARSE_FATAL, "no iteration variables in for");
239 For_Free(new_for);
240 return -1;
241 }
242
243 cpp_skip_whitespace(&p);
244
245 {
246 char *items;
247 (void)Var_Subst(p, VAR_GLOBAL, VARE_WANTRES, &items);
248 /* TODO: handle errors */
249 new_for->items = Str_Words(items, FALSE);
250 free(items);
251
252 if (new_for->items.len == 1 && new_for->items.words[0][0] == '\0')
253 new_for->items.len = 0; /* .for var in ${:U} */
254 }
255
256 {
257 size_t nitems, nvars;
258
259 if ((nitems = new_for->items.len) > 0 &&
260 nitems % (nvars = new_for->vars.len)) {
261 Parse_Error(PARSE_FATAL,
262 "Wrong number of words (%zu) in .for substitution list"
263 " with %zu variables", nitems, nvars);
264 /*
265 * Return 'success' so that the body of the .for loop is
266 * accumulated.
267 * Remove all items so that the loop doesn't iterate.
268 */
269 new_for->items.len = 0;
270 }
271 }
272
273 accumFor = new_for;
274 forLevel = 1;
275 return 1;
276 }
277
278 /*
279 * Add another line to a .for loop.
280 * Returns FALSE when the matching .endfor is reached.
281 */
282 Boolean
283 For_Accum(const char *line)
284 {
285 const char *ptr = line;
286
287 if (*ptr == '.') {
288 ptr++;
289 cpp_skip_whitespace(&ptr);
290
291 if (IsEndfor(ptr)) {
292 DEBUG1(FOR, "For: end for %d\n", forLevel);
293 if (--forLevel <= 0)
294 return FALSE;
295 } else if (IsFor(ptr)) {
296 forLevel++;
297 DEBUG1(FOR, "For: new loop %d\n", forLevel);
298 }
299 }
300
301 Buf_AddStr(&accumFor->body, line);
302 Buf_AddByte(&accumFor->body, '\n');
303 return TRUE;
304 }
305
306
307 static size_t
308 for_var_len(const char *var)
309 {
310 char ch, var_start, var_end;
311 int depth;
312 size_t len;
313
314 var_start = *var;
315 if (var_start == 0)
316 /* just escape the $ */
317 return 0;
318
319 if (var_start == '(')
320 var_end = ')';
321 else if (var_start == '{')
322 var_end = '}';
323 else
324 /* Single char variable */
325 return 1;
326
327 depth = 1;
328 for (len = 1; (ch = var[len++]) != 0;) {
329 if (ch == var_start)
330 depth++;
331 else if (ch == var_end && --depth == 0)
332 return len;
333 }
334
335 /* Variable end not found, escape the $ */
336 return 0;
337 }
338
339 static void
340 for_substitute(Buffer *cmds, const char *item, char ech)
341 {
342 ForEscapes escapes = GetEscapes(item);
343 char ch;
344
345 /* If there were no escapes, or the only escape is the other variable
346 * terminator, then just substitute the full string */
347 if (!(escapes & (ech == ')' ? ~(unsigned)FOR_SUB_ESCAPE_BRACE
348 : ~(unsigned)FOR_SUB_ESCAPE_PAREN))) {
349 Buf_AddStr(cmds, item);
350 return;
351 }
352
353 /* Escape ':', '$', '\\' and 'ech' - these will be removed later by
354 * :U processing, see ApplyModifier_Defined. */
355 while ((ch = *item++) != '\0') {
356 if (ch == '$') {
357 size_t len = for_var_len(item);
358 if (len != 0) {
359 Buf_AddBytes(cmds, item - 1, len + 1);
360 item += len;
361 continue;
362 }
363 Buf_AddByte(cmds, '\\');
364 } else if (ch == ':' || ch == '\\' || ch == ech)
365 Buf_AddByte(cmds, '\\');
366 Buf_AddByte(cmds, ch);
367 }
368 }
369
370 static void
371 SubstVarLong(For *arg, const char **inout_cp, const char **inout_cmd_cp,
372 char ech)
373 {
374 size_t i;
375 const char *cp = *inout_cp;
376 const char *cmd_cp = *inout_cmd_cp;
377
378 for (i = 0; i < arg->vars.len; i++) {
379 ForVar *forVar = Vector_Get(&arg->vars, i);
380 char *var = forVar->name;
381 size_t vlen = forVar->len;
382
383 /* XXX: undefined behavior for cp if vlen is longer than cp? */
384 if (memcmp(cp, var, vlen) != 0)
385 continue;
386 /* XXX: why test for backslash here? */
387 if (cp[vlen] != ':' && cp[vlen] != ech && cp[vlen] != '\\')
388 continue;
389
390 /* Found a variable match. Replace with :U<value> */
391 Buf_AddBytesBetween(&arg->curBody, cmd_cp, cp);
392 Buf_AddStr(&arg->curBody, ":U");
393 cp += vlen;
394 cmd_cp = cp;
395 for_substitute(&arg->curBody, arg->items.words[arg->sub_next + i], ech);
396 break;
397 }
398
399 *inout_cp = cp;
400 *inout_cmd_cp = cmd_cp;
401 }
402
403 static void
404 SubstVarShort(For *arg, char const ch,
405 const char **inout_cp, const char **input_cmd_cp)
406 {
407 const char *cp = *inout_cp;
408 const char *cmd_cp = *input_cmd_cp;
409 size_t i;
410
411 /* Probably a single character name, ignore $$ and stupid ones. {*/
412 if (!arg->short_var || strchr("}):$", ch) != NULL) {
413 cp++;
414 *inout_cp = cp;
415 return;
416 }
417
418 for (i = 0; i < arg->vars.len; i++) {
419 ForVar *forVar = Vector_Get(&arg->vars, i);
420 char *var = forVar->name;
421 if (var[0] != ch || var[1] != 0)
422 continue;
423
424 /* Found a variable match. Replace with ${:U<value>} */
425 Buf_AddBytesBetween(&arg->curBody, cmd_cp, cp);
426 Buf_AddStr(&arg->curBody, "{:U");
427 cmd_cp = ++cp;
428 for_substitute(&arg->curBody, arg->items.words[arg->sub_next + i], '}');
429 Buf_AddByte(&arg->curBody, '}');
430 break;
431 }
432
433 *inout_cp = cp;
434 *input_cmd_cp = cmd_cp;
435 }
436
437 /*
438 * Scan the for loop body and replace references to the loop variables
439 * with variable references that expand to the required text.
440 *
441 * Using variable expansions ensures that the .for loop can't generate
442 * syntax, and that the later parsing will still see a variable.
443 * We assume that the null variable will never be defined.
444 *
445 * The detection of substitutions of the loop control variable is naive.
446 * Many of the modifiers use \ to escape $ (not $) so it is possible
447 * to contrive a makefile where an unwanted substitution happens.
448 */
449 static char *
450 ForIterate(void *v_arg, size_t *ret_len)
451 {
452 For *arg = v_arg;
453 const char *cp;
454 const char *cmd_cp;
455 const char *body_end;
456 char *cmds_str;
457 size_t cmd_len;
458
459 if (arg->sub_next + arg->vars.len > arg->items.len) {
460 /* No more iterations */
461 For_Free(arg);
462 return NULL;
463 }
464
465 Buf_Empty(&arg->curBody);
466
467 cmd_cp = Buf_GetAll(&arg->body, &cmd_len);
468 body_end = cmd_cp + cmd_len;
469 for (cp = cmd_cp; (cp = strchr(cp, '$')) != NULL;) {
470 char ch, ech;
471 ch = *++cp;
472 if ((ch == '(' && (ech = ')', 1)) || (ch == '{' && (ech = '}', 1))) {
473 cp++;
474 /* Check variable name against the .for loop variables */
475 SubstVarLong(arg, &cp, &cmd_cp, ech);
476 continue;
477 }
478 if (ch == '\0')
479 break;
480
481 SubstVarShort(arg, ch, &cp, &cmd_cp);
482 }
483 Buf_AddBytesBetween(&arg->curBody, cmd_cp, body_end);
484
485 *ret_len = Buf_Len(&arg->curBody);
486 cmds_str = Buf_GetAll(&arg->curBody, NULL);
487 DEBUG1(FOR, "For: loop body:\n%s", cmds_str);
488
489 arg->sub_next += arg->vars.len;
490
491 return cmds_str;
492 }
493
494 /* Run the for loop, imitating the actions of an include file. */
495 void
496 For_Run(int lineno)
497 {
498 For *arg;
499
500 arg = accumFor;
501 accumFor = NULL;
502
503 if (arg->items.len == 0) {
504 /* Nothing to expand - possibly due to an earlier syntax error. */
505 For_Free(arg);
506 return;
507 }
508
509 Parse_SetInput(NULL, lineno, -1, ForIterate, arg);
510 }
511