for.c revision 1.154 1 /* $NetBSD: for.c,v 1.154 2022/01/02 01:54:43 rillig Exp $ */
2
3 /*
4 * Copyright (c) 1992, The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Handling of .for/.endfor loops in a makefile.
34 *
35 * For loops have the form:
36 *
37 * .for <varname...> in <value...>
38 * # the body
39 * .endfor
40 *
41 * When a .for line is parsed, the following lines are copied to the body of
42 * the .for loop, until the corresponding .endfor line is reached. In this
43 * phase, the body is not yet evaluated. This also applies to any nested
44 * .for loops.
45 *
46 * After reaching the .endfor, the values from the .for line are grouped
47 * according to the number of variables. For each such group, the unexpanded
48 * body is scanned for variable expressions, and those that match the
49 * variable names are replaced with expressions of the form ${:U...}. After
50 * that, the body is treated like a file from an .include directive.
51 *
52 * Interface:
53 * For_Eval Evaluate the loop in the passed line.
54 *
55 * For_Run Run accumulated loop
56 */
57
58 #include "make.h"
59
60 /* "@(#)for.c 8.1 (Berkeley) 6/6/93" */
61 MAKE_RCSID("$NetBSD: for.c,v 1.154 2022/01/02 01:54:43 rillig Exp $");
62
63
64 /* One of the variables to the left of the "in" in a .for loop. */
65 typedef struct ForVar {
66 char *name;
67 size_t nameLen;
68 } ForVar;
69
70 typedef struct ForLoop {
71 Buffer body; /* Unexpanded body of the loop */
72 Vector /* of ForVar */ vars; /* Iteration variables */
73 SubstringWords items; /* Substitution items */
74 unsigned int nextItem; /* Where to continue iterating */
75 } ForLoop;
76
77
78 static ForLoop *accumFor; /* Loop being accumulated */
79 static int forLevel = 0; /* Nesting level */
80
81
82 static ForLoop *
83 ForLoop_New(void)
84 {
85 ForLoop *f = bmake_malloc(sizeof *f);
86
87 Buf_Init(&f->body);
88 Vector_Init(&f->vars, sizeof(ForVar));
89 SubstringWords_Init(&f->items);
90 f->nextItem = 0;
91
92 return f;
93 }
94
95 static void
96 ForLoop_Free(ForLoop *f)
97 {
98 Buf_Done(&f->body);
99
100 while (f->vars.len > 0) {
101 ForVar *var = Vector_Pop(&f->vars);
102 free(var->name);
103 }
104 Vector_Done(&f->vars);
105
106 SubstringWords_Free(f->items);
107
108 free(f);
109 }
110
111 static void
112 ForLoop_AddVar(ForLoop *f, const char *name, size_t len)
113 {
114 ForVar *var = Vector_Push(&f->vars);
115 var->name = bmake_strldup(name, len);
116 var->nameLen = len;
117 }
118
119 static bool
120 ForLoop_ParseVarnames(ForLoop *f, const char **pp)
121 {
122 const char *p = *pp;
123
124 for (;;) {
125 size_t len;
126
127 cpp_skip_whitespace(&p);
128 if (*p == '\0') {
129 Parse_Error(PARSE_FATAL, "missing `in' in for");
130 return false;
131 }
132
133 /*
134 * XXX: This allows arbitrary variable names;
135 * see directive-for.mk.
136 */
137 for (len = 1; p[len] != '\0' && !ch_isspace(p[len]); len++)
138 continue;
139
140 if (len == 2 && p[0] == 'i' && p[1] == 'n') {
141 p += 2;
142 break;
143 }
144
145 ForLoop_AddVar(f, p, len);
146 p += len;
147 }
148
149 if (f->vars.len == 0) {
150 Parse_Error(PARSE_FATAL, "no iteration variables in for");
151 return false;
152 }
153
154 *pp = p;
155 return true;
156 }
157
158 static bool
159 ForLoop_ParseItems(ForLoop *f, const char *p)
160 {
161 char *items;
162
163 cpp_skip_whitespace(&p);
164
165 if (Var_Subst(p, SCOPE_GLOBAL, VARE_WANTRES, &items) != VPR_OK) {
166 Parse_Error(PARSE_FATAL, "Error in .for loop items");
167 return false;
168 }
169
170 f->items = Substring_Words(items, false);
171 free(items);
172
173 if (f->items.len == 1 && Substring_IsEmpty(f->items.words[0]))
174 f->items.len = 0; /* .for var in ${:U} */
175
176 if (f->items.len != 0 && f->items.len % f->vars.len != 0) {
177 Parse_Error(PARSE_FATAL,
178 "Wrong number of words (%u) in .for "
179 "substitution list with %u variables",
180 (unsigned)f->items.len, (unsigned)f->vars.len);
181 return false;
182 }
183
184 return true;
185 }
186
187 static bool
188 IsFor(const char *p)
189 {
190 return p[0] == 'f' && p[1] == 'o' && p[2] == 'r' && ch_isspace(p[3]);
191 }
192
193 static bool
194 IsEndfor(const char *p)
195 {
196 return p[0] == 'e' && strncmp(p, "endfor", 6) == 0 &&
197 (p[6] == '\0' || ch_isspace(p[6]));
198 }
199
200 /*
201 * Evaluate the for loop in the passed line. The line looks like this:
202 * .for <varname...> in <value...>
203 *
204 * Input:
205 * line Line to parse
206 *
207 * Results:
208 * 0: Not a .for statement, parse the line
209 * 1: We found a for loop
210 * -1: A .for statement with a bad syntax error, discard.
211 */
212 int
213 For_Eval(const char *line)
214 {
215 ForLoop *f;
216 const char *p;
217
218 p = line + 1; /* skip the '.' */
219 cpp_skip_whitespace(&p);
220
221 if (!IsFor(p)) {
222 if (IsEndfor(p)) {
223 Parse_Error(PARSE_FATAL, "for-less endfor");
224 return -1;
225 }
226 return 0;
227 }
228 p += 3;
229
230 f = ForLoop_New();
231
232 if (!ForLoop_ParseVarnames(f, &p)) {
233 ForLoop_Free(f);
234 return -1;
235 }
236
237 if (!ForLoop_ParseItems(f, p)) {
238 /* Continue parsing the .for loop, but don't iterate. */
239 f->items.len = 0;
240 }
241
242 accumFor = f;
243 forLevel = 1;
244 return 1;
245 }
246
247 /*
248 * Add another line to the .for loop that is being built up.
249 * Returns false when the matching .endfor is reached.
250 */
251 bool
252 For_Accum(const char *line)
253 {
254 const char *p = line;
255
256 if (*p == '.') {
257 p++;
258 cpp_skip_whitespace(&p);
259
260 if (IsEndfor(p)) {
261 DEBUG1(FOR, "For: end for %d\n", forLevel);
262 if (--forLevel <= 0)
263 return false;
264 } else if (IsFor(p)) {
265 forLevel++;
266 DEBUG1(FOR, "For: new loop %d\n", forLevel);
267 }
268 }
269
270 Buf_AddStr(&accumFor->body, line);
271 Buf_AddByte(&accumFor->body, '\n');
272 return true;
273 }
274
275
276 static size_t
277 ExprLen(const char *s, const char *e)
278 {
279 char expr_open, expr_close;
280 int depth;
281 const char *p;
282
283 if (s == e)
284 return 0; /* just escape the '$' */
285
286 expr_open = s[0];
287 if (expr_open == '(')
288 expr_close = ')';
289 else if (expr_open == '{')
290 expr_close = '}';
291 else
292 return 1; /* Single char variable */
293
294 depth = 1;
295 for (p = s + 1; p != e; p++) {
296 if (*p == expr_open)
297 depth++;
298 else if (*p == expr_close && --depth == 0)
299 return (size_t)(p + 1 - s);
300 }
301
302 /* Expression end not found, escape the $ */
303 return 0;
304 }
305
306 /*
307 * The .for loop substitutes the items as ${:U<value>...}, which means
308 * that characters that break this syntax must be backslash-escaped.
309 */
310 static bool
311 NeedsEscapes(Substring value, char endc)
312 {
313 const char *p;
314
315 for (p = value.start; p != value.end; p++) {
316 if (*p == ':' || *p == '$' || *p == '\\' || *p == endc ||
317 *p == '\n')
318 return true;
319 }
320 return false;
321 }
322
323 /*
324 * While expanding the body of a .for loop, write the item in the ${:U...}
325 * expression, escaping characters as needed.
326 *
327 * The result is later unescaped by ApplyModifier_Defined.
328 */
329 static void
330 Buf_AddEscaped(Buffer *cmds, Substring item, char endc)
331 {
332 const char *p;
333 char ch;
334
335 if (!NeedsEscapes(item, endc)) {
336 Buf_AddBytesBetween(cmds, item.start, item.end);
337 return;
338 }
339
340 /*
341 * Escape ':', '$', '\\' and 'endc' - these will be removed later by
342 * :U processing, see ApplyModifier_Defined.
343 */
344 for (p = item.start; p != item.end; p++) {
345 ch = *p;
346 if (ch == '$') {
347 size_t len = ExprLen(p + 1, item.end);
348 if (len != 0) {
349 /*
350 * XXX: Should a '\' be added here?
351 * See directive-for-escape.mk, ExprLen.
352 */
353 Buf_AddBytes(cmds, p, 1 + len);
354 p += len;
355 continue;
356 }
357 Buf_AddByte(cmds, '\\');
358 } else if (ch == ':' || ch == '\\' || ch == endc)
359 Buf_AddByte(cmds, '\\');
360 else if (ch == '\n') {
361 Parse_Error(PARSE_FATAL, "newline in .for value");
362 ch = ' '; /* prevent newline injection */
363 }
364 Buf_AddByte(cmds, ch);
365 }
366 }
367
368 /*
369 * When expanding the body of a .for loop, replace the variable name of an
370 * expression like ${i} or ${i:...} or $(i) or $(i:...) with ":Uvalue".
371 */
372 static void
373 ForLoop_SubstVarLong(ForLoop *f, Buffer *body, const char **pp,
374 const char *end, char endc, const char **inout_mark)
375 {
376 size_t i;
377 const char *p = *pp;
378
379 for (i = 0; i < f->vars.len; i++) {
380 const ForVar *forVar = Vector_Get(&f->vars, i);
381 const char *varname = forVar->name;
382 size_t varnameLen = forVar->nameLen;
383
384 if (varnameLen >= (size_t)(end - p))
385 continue;
386 if (memcmp(p, varname, varnameLen) != 0)
387 continue;
388 /* XXX: why test for backslash here? */
389 if (p[varnameLen] != ':' && p[varnameLen] != endc &&
390 p[varnameLen] != '\\')
391 continue;
392
393 /*
394 * Found a variable match. Skip over the variable name and
395 * instead add ':U<value>' to the current body.
396 */
397 Buf_AddBytesBetween(body, *inout_mark, p);
398 Buf_AddStr(body, ":U");
399 Buf_AddEscaped(body, f->items.words[f->nextItem + i], endc);
400
401 p += varnameLen;
402 *inout_mark = p;
403 *pp = p;
404 return;
405 }
406 }
407
408 /*
409 * When expanding the body of a .for loop, replace single-character
410 * variable expressions like $i with their ${:U...} expansion.
411 */
412 static void
413 ForLoop_SubstVarShort(ForLoop *f, Buffer *body,
414 const char *p, const char **inout_mark)
415 {
416 const char ch = *p;
417 const ForVar *vars;
418 size_t i;
419
420 /* Skip $$ and stupid ones. */
421 if (ch == '}' || ch == ')' || ch == ':' || ch == '$')
422 return;
423
424 vars = Vector_Get(&f->vars, 0);
425 for (i = 0; i < f->vars.len; i++) {
426 const char *varname = vars[i].name;
427 if (varname[0] == ch && varname[1] == '\0')
428 goto found;
429 }
430 return;
431
432 found:
433 Buf_AddBytesBetween(body, *inout_mark, p);
434 *inout_mark = p + 1;
435
436 /* Replace $<ch> with ${:U<value>} */
437 Buf_AddStr(body, "{:U");
438 Buf_AddEscaped(body, f->items.words[f->nextItem + i], '}');
439 Buf_AddByte(body, '}');
440 }
441
442 /*
443 * Compute the body for the current iteration by copying the unexpanded body,
444 * replacing the expressions for the iteration variables on the way.
445 *
446 * Using variable expressions ensures that the .for loop can't generate
447 * syntax, and that the later parsing will still see a variable.
448 * This code assumes that the variable with the empty name will never be
449 * defined, see unit-tests/varname-empty.mk for more details.
450 *
451 * The detection of substitutions of the loop control variables is naive.
452 * Many of the modifiers use '\$' instead of '$$' to escape '$', so it is
453 * possible to contrive a makefile where an unwanted substitution happens.
454 */
455 static void
456 ForLoop_SubstBody(ForLoop *f, Buffer *body)
457 {
458 const char *p, *end;
459 const char *mark; /* where the last substitution left off */
460
461 Buf_Empty(body);
462
463 mark = f->body.data;
464 end = f->body.data + f->body.len;
465 for (p = mark; (p = strchr(p, '$')) != NULL;) {
466 if (p[1] == '{' || p[1] == '(') {
467 char endc = p[1] == '{' ? '}' : ')';
468 p += 2;
469 ForLoop_SubstVarLong(f, body, &p, end, endc, &mark);
470 } else if (p[1] != '\0') {
471 ForLoop_SubstVarShort(f, body, p + 1, &mark);
472 p += 2;
473 } else
474 break;
475 }
476
477 Buf_AddBytesBetween(body, mark, end);
478 }
479
480 /*
481 * Compute the body for the current iteration by copying the unexpanded body,
482 * replacing the expressions for the iteration variables on the way.
483 */
484 bool
485 For_NextIteration(ForLoop *f, Buffer *body)
486 {
487 if (f->nextItem == f->items.len) {
488 /* No more iterations */
489 ForLoop_Free(f);
490 return false;
491 }
492
493 ForLoop_SubstBody(f, body);
494 DEBUG1(FOR, "For: loop body:\n%s", body->data);
495 f->nextItem += (unsigned int)f->vars.len;
496 return true;
497 }
498
499 /* Run the .for loop, imitating the actions of an include file. */
500 void
501 For_Run(int lineno)
502 {
503 Buffer buf;
504 ForLoop *f = accumFor;
505 accumFor = NULL;
506
507 if (f->items.len > 0) {
508 Buf_Init(&buf);
509 Parse_PushInput(NULL, lineno, buf, f);
510 } else
511 ForLoop_Free(f);
512 }
513