for.c revision 1.147 1 /* $NetBSD: for.c,v 1.147 2021/09/02 07:02:07 rillig Exp $ */
2
3 /*
4 * Copyright (c) 1992, The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Handling of .for/.endfor loops in a makefile.
34 *
35 * For loops have the form:
36 *
37 * .for <varname...> in <value...>
38 * # the body
39 * .endfor
40 *
41 * When a .for line is parsed, the following lines are copied to the body of
42 * the .for loop, until the corresponding .endfor line is reached. In this
43 * phase, the body is not yet evaluated. This also applies to any nested
44 * .for loops.
45 *
46 * After reaching the .endfor, the values from the .for line are grouped
47 * according to the number of variables. For each such group, the unexpanded
48 * body is scanned for variable expressions, and those that match the variable
49 * names are replaced with expressions of the form ${:U...} or $(:U...).
50 * After that, the body is treated like a file from an .include directive.
51 *
52 * Interface:
53 * For_Eval Evaluate the loop in the passed line.
54 *
55 * For_Run Run accumulated loop
56 */
57
58 #include "make.h"
59
60 /* "@(#)for.c 8.1 (Berkeley) 6/6/93" */
61 MAKE_RCSID("$NetBSD: for.c,v 1.147 2021/09/02 07:02:07 rillig Exp $");
62
63
64 /* One of the variables to the left of the "in" in a .for loop. */
65 typedef struct ForVar {
66 char *name;
67 size_t nameLen;
68 } ForVar;
69
70 typedef struct ForLoop {
71 Buffer body; /* Unexpanded body of the loop */
72 Vector /* of ForVar */ vars; /* Iteration variables */
73 Words items; /* Substitution items */
74 Buffer curBody; /* Expanded body of the current iteration */
75 unsigned int sub_next; /* Where to continue iterating */
76 } ForLoop;
77
78
79 static ForLoop *accumFor; /* Loop being accumulated */
80 static int forLevel = 0; /* Nesting level */
81
82
83 static ForLoop *
84 ForLoop_New(void)
85 {
86 ForLoop *f = bmake_malloc(sizeof *f);
87
88 Buf_Init(&f->body);
89 Vector_Init(&f->vars, sizeof(ForVar));
90 f->items.words = NULL;
91 f->items.freeIt = NULL;
92 Buf_Init(&f->curBody);
93 f->sub_next = 0;
94
95 return f;
96 }
97
98 static void
99 ForLoop_Free(ForLoop *f)
100 {
101 Buf_Done(&f->body);
102
103 while (f->vars.len > 0) {
104 ForVar *var = Vector_Pop(&f->vars);
105 free(var->name);
106 }
107 Vector_Done(&f->vars);
108
109 Words_Free(f->items);
110 Buf_Done(&f->curBody);
111
112 free(f);
113 }
114
115 static void
116 ForLoop_AddVar(ForLoop *f, const char *name, size_t len)
117 {
118 ForVar *var = Vector_Push(&f->vars);
119 var->name = bmake_strldup(name, len);
120 var->nameLen = len;
121 }
122
123 static bool
124 ForLoop_ParseVarnames(ForLoop *f, const char **pp)
125 {
126 const char *p = *pp;
127
128 for (;;) {
129 size_t len;
130
131 cpp_skip_whitespace(&p);
132 if (*p == '\0') {
133 Parse_Error(PARSE_FATAL, "missing `in' in for");
134 return false;
135 }
136
137 /*
138 * XXX: This allows arbitrary variable names;
139 * see directive-for.mk.
140 */
141 for (len = 1; p[len] != '\0' && !ch_isspace(p[len]); len++)
142 continue;
143
144 if (len == 2 && p[0] == 'i' && p[1] == 'n') {
145 p += 2;
146 break;
147 }
148
149 ForLoop_AddVar(f, p, len);
150 p += len;
151 }
152
153 if (f->vars.len == 0) {
154 Parse_Error(PARSE_FATAL, "no iteration variables in for");
155 return false;
156 }
157
158 *pp = p;
159 return true;
160 }
161
162 static bool
163 ForLoop_ParseItems(ForLoop *f, const char *p)
164 {
165 char *items;
166
167 cpp_skip_whitespace(&p);
168
169 if (Var_Subst(p, SCOPE_GLOBAL, VARE_WANTRES, &items) != VPR_OK) {
170 Parse_Error(PARSE_FATAL, "Error in .for loop items");
171 return false;
172 }
173
174 f->items = Str_Words(items, false);
175 free(items);
176
177 if (f->items.len == 1 && f->items.words[0][0] == '\0')
178 f->items.len = 0; /* .for var in ${:U} */
179
180 if (f->items.len != 0 && f->items.len % f->vars.len != 0) {
181 Parse_Error(PARSE_FATAL,
182 "Wrong number of words (%u) in .for "
183 "substitution list with %u variables",
184 (unsigned)f->items.len, (unsigned)f->vars.len);
185 return false;
186 }
187
188 return true;
189 }
190
191 static bool
192 IsFor(const char *p)
193 {
194 return p[0] == 'f' && p[1] == 'o' && p[2] == 'r' && ch_isspace(p[3]);
195 }
196
197 static bool
198 IsEndfor(const char *p)
199 {
200 return p[0] == 'e' && strncmp(p, "endfor", 6) == 0 &&
201 (p[6] == '\0' || ch_isspace(p[6]));
202 }
203
204 /*
205 * Evaluate the for loop in the passed line. The line looks like this:
206 * .for <varname...> in <value...>
207 *
208 * Input:
209 * line Line to parse
210 *
211 * Results:
212 * 0: Not a .for statement, parse the line
213 * 1: We found a for loop
214 * -1: A .for statement with a bad syntax error, discard.
215 */
216 int
217 For_Eval(const char *line)
218 {
219 ForLoop *f;
220 const char *p;
221
222 p = line + 1; /* skip the '.' */
223 cpp_skip_whitespace(&p);
224
225 if (!IsFor(p)) {
226 if (IsEndfor(p)) {
227 Parse_Error(PARSE_FATAL, "for-less endfor");
228 return -1;
229 }
230 return 0;
231 }
232 p += 3;
233
234 f = ForLoop_New();
235
236 if (!ForLoop_ParseVarnames(f, &p)) {
237 ForLoop_Free(f);
238 return -1;
239 }
240
241 if (!ForLoop_ParseItems(f, p)) {
242 /* Continue parsing the .for loop, but don't iterate. */
243 f->items.len = 0;
244 }
245
246 accumFor = f;
247 forLevel = 1;
248 return 1;
249 }
250
251 /*
252 * Add another line to the .for loop that is being built up.
253 * Returns false when the matching .endfor is reached.
254 */
255 bool
256 For_Accum(const char *line)
257 {
258 const char *p = line;
259
260 if (*p == '.') {
261 p++;
262 cpp_skip_whitespace(&p);
263
264 if (IsEndfor(p)) {
265 DEBUG1(FOR, "For: end for %d\n", forLevel);
266 if (--forLevel <= 0)
267 return false;
268 } else if (IsFor(p)) {
269 forLevel++;
270 DEBUG1(FOR, "For: new loop %d\n", forLevel);
271 }
272 }
273
274 Buf_AddStr(&accumFor->body, line);
275 Buf_AddByte(&accumFor->body, '\n');
276 return true;
277 }
278
279
280 static size_t
281 ExprLen(const char *expr)
282 {
283 char ch, expr_open, expr_close;
284 int depth;
285 size_t len;
286
287 expr_open = expr[0];
288 if (expr_open == '\0')
289 /* just escape the $ */
290 return 0;
291
292 if (expr_open == '(')
293 expr_close = ')';
294 else if (expr_open == '{')
295 expr_close = '}';
296 else
297 return 1; /* Single char variable */
298
299 depth = 1;
300 for (len = 1; (ch = expr[len++]) != '\0';) {
301 if (ch == expr_open)
302 depth++;
303 else if (ch == expr_close && --depth == 0)
304 return len;
305 }
306
307 /* Expression end not found, escape the $ */
308 return 0;
309 }
310
311 /*
312 * The .for loop substitutes the items as ${:U<value>...}, which means
313 * that characters that break this syntax must be backslash-escaped.
314 */
315 static bool
316 NeedsEscapes(const char *value, char endc)
317 {
318 const char *p;
319
320 for (p = value; *p != '\0'; p++) {
321 if (*p == ':' || *p == '$' || *p == '\\' || *p == endc ||
322 *p == '\n')
323 return true;
324 }
325 return false;
326 }
327
328 /*
329 * While expanding the body of a .for loop, write the item in the ${:U...}
330 * expression, escaping characters as needed.
331 *
332 * The result is later unescaped by ApplyModifier_Defined.
333 */
334 static void
335 Buf_AddEscaped(Buffer *cmds, const char *item, char endc)
336 {
337 char ch;
338
339 if (!NeedsEscapes(item, endc)) {
340 Buf_AddStr(cmds, item);
341 return;
342 }
343
344 /* Escape ':', '$', '\\' and 'endc' - these will be removed later by
345 * :U processing, see ApplyModifier_Defined. */
346 while ((ch = *item++) != '\0') {
347 if (ch == '$') {
348 size_t len = ExprLen(item);
349 if (len != 0) {
350 /*
351 * XXX: Should a '\' be added here?
352 * See directive-for-escape.mk, ExprLen.
353 */
354 Buf_AddBytes(cmds, item - 1, len + 1);
355 item += len;
356 continue;
357 }
358 Buf_AddByte(cmds, '\\');
359 } else if (ch == ':' || ch == '\\' || ch == endc)
360 Buf_AddByte(cmds, '\\');
361 else if (ch == '\n') {
362 Parse_Error(PARSE_FATAL, "newline in .for value");
363 ch = ' '; /* prevent newline injection */
364 }
365 Buf_AddByte(cmds, ch);
366 }
367 }
368
369 /*
370 * When expanding the body of a .for loop, replace the variable name of an
371 * expression like ${i} or ${i:...} or $(i) or $(i:...) with ":Uvalue".
372 */
373 static void
374 ForLoop_SubstVarLong(ForLoop *f, const char **pp, const char *bodyEnd,
375 char endc, const char **inout_mark)
376 {
377 size_t i;
378 const char *p = *pp;
379
380 for (i = 0; i < f->vars.len; i++) {
381 const ForVar *forVar = Vector_Get(&f->vars, i);
382 const char *varname = forVar->name;
383 size_t varnameLen = forVar->nameLen;
384
385 if (varnameLen >= (size_t)(bodyEnd - p))
386 continue;
387 if (memcmp(p, varname, varnameLen) != 0)
388 continue;
389 /* XXX: why test for backslash here? */
390 if (p[varnameLen] != ':' && p[varnameLen] != endc &&
391 p[varnameLen] != '\\')
392 continue;
393
394 /*
395 * Found a variable match. Skip over the variable name and
396 * instead add ':U<value>' to the current body.
397 */
398 Buf_AddBytesBetween(&f->curBody, *inout_mark, p);
399 Buf_AddStr(&f->curBody, ":U");
400 Buf_AddEscaped(&f->curBody,
401 f->items.words[f->sub_next + i], endc);
402
403 p += varnameLen;
404 *inout_mark = p;
405 *pp = p;
406 return;
407 }
408 }
409
410 /*
411 * When expanding the body of a .for loop, replace single-character
412 * variable expressions like $i with their ${:U...} expansion.
413 */
414 static void
415 ForLoop_SubstVarShort(ForLoop *f, const char *p, const char **inout_mark)
416 {
417 const char ch = *p;
418 const ForVar *vars;
419 size_t i;
420
421 /* Skip $$ and stupid ones. */
422 if (ch == '}' || ch == ')' || ch == ':' || ch == '$')
423 return;
424
425 vars = Vector_Get(&f->vars, 0);
426 for (i = 0; i < f->vars.len; i++) {
427 const char *varname = vars[i].name;
428 if (varname[0] == ch && varname[1] == '\0')
429 goto found;
430 }
431 return;
432
433 found:
434 Buf_AddBytesBetween(&f->curBody, *inout_mark, p);
435 *inout_mark = p + 1;
436
437 /* Replace $<ch> with ${:U<value>} */
438 Buf_AddStr(&f->curBody, "{:U");
439 Buf_AddEscaped(&f->curBody, f->items.words[f->sub_next + i], '}');
440 Buf_AddByte(&f->curBody, '}');
441 }
442
443 /*
444 * Compute the body for the current iteration by copying the unexpanded body,
445 * replacing the expressions for the iteration variables on the way.
446 *
447 * Using variable expressions ensures that the .for loop can't generate
448 * syntax, and that the later parsing will still see a variable.
449 * This code assumes that the variable with the empty name will never be
450 * defined, see unit-tests/varname-empty.mk for more details.
451 *
452 * The detection of substitutions of the loop control variables is naive.
453 * Many of the modifiers use '\$' instead of '$$' to escape '$', so it is
454 * possible to contrive a makefile where an unwanted substitution happens.
455 */
456 static void
457 ForLoop_SubstBody(ForLoop *f)
458 {
459 const char *p, *bodyEnd;
460 const char *mark; /* where the last substitution left off */
461
462 Buf_Empty(&f->curBody);
463
464 mark = f->body.data;
465 bodyEnd = f->body.data + f->body.len;
466 for (p = mark; (p = strchr(p, '$')) != NULL;) {
467 if (p[1] == '{' || p[1] == '(') {
468 char endc = p[1] == '{' ? '}' : ')';
469 p += 2;
470 ForLoop_SubstVarLong(f, &p, bodyEnd, endc, &mark);
471 } else if (p[1] != '\0') {
472 ForLoop_SubstVarShort(f, p + 1, &mark);
473 p += 2;
474 } else
475 break;
476 }
477
478 Buf_AddBytesBetween(&f->curBody, mark, bodyEnd);
479 }
480
481 /*
482 * Compute the body for the current iteration by copying the unexpanded body,
483 * replacing the expressions for the iteration variables on the way.
484 */
485 static char *
486 ForReadMore(void *v_arg, size_t *out_len)
487 {
488 ForLoop *f = v_arg;
489
490 if (f->sub_next == f->items.len) {
491 /* No more iterations */
492 ForLoop_Free(f);
493 return NULL;
494 }
495
496 ForLoop_SubstBody(f);
497 DEBUG1(FOR, "For: loop body:\n%s", f->curBody.data);
498 f->sub_next += (unsigned int)f->vars.len;
499
500 *out_len = f->curBody.len;
501 return f->curBody.data;
502 }
503
504 /* Run the .for loop, imitating the actions of an include file. */
505 void
506 For_Run(int lineno)
507 {
508 ForLoop *f = accumFor;
509 accumFor = NULL;
510
511 if (f->items.len == 0) {
512 /*
513 * Nothing to expand - possibly due to an earlier syntax
514 * error.
515 */
516 ForLoop_Free(f);
517 return;
518 }
519
520 Parse_SetInput(NULL, lineno, -1, ForReadMore, f);
521 }
522