directive-for-escape.mk revision 1.24 1 1.24 rillig # $NetBSD: directive-for-escape.mk,v 1.24 2024/07/05 17:41:50 rillig Exp $
2 1.1 rillig #
3 1.1 rillig # Test escaping of special characters in the iteration values of a .for loop.
4 1.1 rillig # These values get expanded later using the :U variable modifier, and this
5 1.18 rillig # escaping and unescaping must pass all characters and strings unmodified.
6 1.18 rillig
7 1.1 rillig .MAKEFLAGS: -df
8 1.1 rillig
9 1.7 rillig # Even though the .for loops take quotes into account when splitting the
10 1.7 rillig # string into words, the quotes don't need to be balanced, as of 2020-12-31.
11 1.1 rillig # This could be considered a bug.
12 1.1 rillig ASCII= !"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
13 1.1 rillig
14 1.18 rillig
15 1.1 rillig # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
16 1.12 rillig # the loop. Not only would it need the escaping for the variable modifier
17 1.12 rillig # ':U' but also the escaping for the line-end comment.
18 1.1 rillig .for chars in ${ASCII}
19 1.24 rillig # expect: make: Unclosed expression, expecting '}' for modifier "U!"" of variable "" with value "!""
20 1.24 rillig # expect+1: !"
21 1.1 rillig . info ${chars}
22 1.1 rillig .endfor
23 1.1 rillig
24 1.1 rillig # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
25 1.1 rillig # as comment character. Using 3 backslashes doesn't help either since
26 1.1 rillig # then the situation is essentially the same as with 1 backslash.
27 1.1 rillig # This means that a '#' sign cannot be passed in the value of a .for loop
28 1.1 rillig # at all.
29 1.1 rillig ASCII.2020-12-31= !"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
30 1.1 rillig .for chars in ${ASCII.2020-12-31}
31 1.24 rillig # expect: make: Unclosed expression, expecting '}' for modifier "U!"\\\\" of variable "" with value "!"\\"
32 1.24 rillig # expect+1: !"\\
33 1.1 rillig . info ${chars}
34 1.1 rillig .endfor
35 1.1 rillig
36 1.11 rillig # Cover the code in ExprLen.
37 1.1 rillig #
38 1.1 rillig # XXX: It is unexpected that the variable V gets expanded in the loop body.
39 1.11 rillig # The double '$$' should intuitively prevent exactly this. Probably nobody
40 1.11 rillig # was adventurous enough to use literal dollar signs in the values of a .for
41 1.12 rillig # loop, allowing this edge case to go unnoticed for years.
42 1.11 rillig #
43 1.11 rillig # See for.c, function ExprLen.
44 1.1 rillig V= value
45 1.1 rillig VALUES= $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
46 1.1 rillig .for i in ${VALUES}
47 1.1 rillig . info $i
48 1.1 rillig .endfor
49 1.21 rillig # expect: . info ${:U\$}
50 1.21 rillig # expect-3: $
51 1.21 rillig # expect: . info ${:U${V}}
52 1.21 rillig # expect-5: value
53 1.21 rillig # expect: . info ${:U${V:=-with-modifier}}
54 1.21 rillig # expect-7: value-with-modifier
55 1.21 rillig # expect: . info ${:U$(V)}
56 1.21 rillig # expect-9: value
57 1.21 rillig # expect: . info ${:U$(V:=-with-modifier)}
58 1.21 rillig # expect-11: value-with-modifier
59 1.21 rillig #
60 1.21 rillig # Providing the loop items directly has the same effect.
61 1.21 rillig .for i in $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
62 1.21 rillig . info $i
63 1.21 rillig .endfor
64 1.21 rillig # expect: . info ${:U\$}
65 1.21 rillig # expect-3: $
66 1.21 rillig # expect: . info ${:U${V}}
67 1.18 rillig # expect-5: value
68 1.18 rillig # expect-6: value-with-modifier
69 1.21 rillig # expect-7: value
70 1.21 rillig # expect-8: value-with-modifier
71 1.16 rillig
72 1.11 rillig # Try to cover the code for nested '{}' in ExprLen, without success.
73 1.1 rillig #
74 1.22 rillig # The value of the variable VALUES is not meant to be an expression.
75 1.7 rillig # Instead, it is meant to represent literal text, the only escaping mechanism
76 1.7 rillig # being that each '$' is written as '$$'.
77 1.16 rillig VALUES= $${UNDEF:U\$$\$$ {{}} end}
78 1.5 rillig #
79 1.5 rillig # The .for loop splits ${VALUES} into 3 words, at the space characters, since
80 1.14 rillig # the '$$' is an ordinary character and the spaces are not escaped.
81 1.14 rillig # Word 1 is '${UNDEF:U\$\$'
82 1.14 rillig # Word 2 is '{{}}'
83 1.14 rillig # Word 3 is 'end}'
84 1.16 rillig #
85 1.16 rillig # Each of these words is now inserted in the body of the .for loop.
86 1.16 rillig .for i in ${VALUES}
87 1.16 rillig # $i
88 1.16 rillig .endfor
89 1.16 rillig #
90 1.16 rillig # When these words are injected into the body of the .for loop, each inside a
91 1.16 rillig # '${:U...}' expression, the result is:
92 1.16 rillig #
93 1.19 rillig # expect: For: loop body with i = ${UNDEF:U\$\$:
94 1.16 rillig # expect: # ${:U\${UNDEF\:U\\$\\$}
95 1.19 rillig # expect: For: loop body with i = {{}}:
96 1.16 rillig # expect: # ${:U{{\}\}}
97 1.19 rillig # expect: For: loop body with i = end}:
98 1.16 rillig # expect: # ${:Uend\}}
99 1.16 rillig # expect: For: end for 1
100 1.16 rillig #
101 1.16 rillig # The first of these expressions is the most interesting one, due to its many
102 1.16 rillig # special characters. This expression is properly balanced:
103 1.16 rillig #
104 1.16 rillig # Text Meaning Explanation
105 1.16 rillig # \$ $ escaped
106 1.16 rillig # { { ordinary text
107 1.16 rillig # UNDEF UNDEF ordinary text
108 1.16 rillig # \: : escaped
109 1.16 rillig # U U ordinary text
110 1.16 rillig # \\ \ escaped
111 1.16 rillig # $\ (expr) an expression, the variable name is '\'
112 1.16 rillig # \$ $ escaped
113 1.16 rillig #
114 1.16 rillig # To make the expression '$\' visible, define it to an actual word:
115 1.14 rillig ${:U\\}= backslash
116 1.1 rillig .for i in ${VALUES}
117 1.1 rillig . info $i
118 1.1 rillig .endfor
119 1.16 rillig #
120 1.16 rillig # expect-3: ${UNDEF:U\backslash$
121 1.16 rillig # expect-4: {{}}
122 1.16 rillig # expect-5: end}
123 1.16 rillig #
124 1.16 rillig # FIXME: There was no expression '$\' in the original text of the variable
125 1.16 rillig # 'VALUES', that's a surprise in the parser.
126 1.16 rillig
127 1.1 rillig
128 1.11 rillig # Second try to cover the code for nested '{}' in ExprLen.
129 1.5 rillig #
130 1.16 rillig # XXX: It is not the job of ExprLen to parse an expression, it is naive to
131 1.16 rillig # expect ExprLen to get all the details right in just a few lines of code.
132 1.5 rillig # Each variable modifier has its own inconsistent way of parsing nested
133 1.22 rillig # expressions, braces and parentheses. (Compare ':M', ':S', and
134 1.7 rillig # ':D' for details.) The only sensible thing to do is therefore to let
135 1.7 rillig # Var_Parse do all the parsing work.
136 1.5 rillig VALUES= begin<$${UNDEF:Ufallback:N{{{}}}}>end
137 1.5 rillig .for i in ${VALUES}
138 1.5 rillig . info $i
139 1.5 rillig .endfor
140 1.18 rillig # expect-2: begin<fallback>end
141 1.5 rillig
142 1.1 rillig # A single trailing dollar doesn't happen in practice.
143 1.1 rillig # The dollar sign is correctly passed through to the body of the .for loop.
144 1.1 rillig # There, it is expanded by the .info directive, but even there a trailing
145 1.1 rillig # dollar sign is kept as-is.
146 1.1 rillig .for i in ${:U\$}
147 1.1 rillig . info ${i}
148 1.1 rillig .endfor
149 1.18 rillig # expect-2: $
150 1.2 rillig
151 1.17 rillig # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
152 1.22 rillig # could contain colons, which affected expressions having this exact
153 1.17 rillig # modifier. This possibility was neither intended nor documented.
154 1.2 rillig NUMBERS= one two three
155 1.17 rillig # expect+1: invalid character ':' in .for loop variable name
156 1.2 rillig .for NUMBERS:M*e in replaced
157 1.2 rillig . info ${NUMBERS} ${NUMBERS:M*e}
158 1.2 rillig .endfor
159 1.2 rillig
160 1.17 rillig # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
161 1.23 rillig # could contain braces, which allowed to replace sequences of
162 1.17 rillig # expressions. This possibility was neither intended nor documented.
163 1.2 rillig BASENAME= one
164 1.2 rillig EXT= .c
165 1.17 rillig # expect+1: invalid character '}' in .for loop variable name
166 1.2 rillig .for BASENAME}${EXT in replaced
167 1.2 rillig . info ${BASENAME}${EXT}
168 1.2 rillig .endfor
169 1.3 rillig
170 1.3 rillig # Demonstrate the various ways to refer to the iteration variable.
171 1.3 rillig i= outer
172 1.3 rillig i2= two
173 1.3 rillig i,= comma
174 1.3 rillig .for i in inner
175 1.3 rillig . info . $$i: $i
176 1.3 rillig . info . $${i}: ${i}
177 1.3 rillig . info . $${i:M*}: ${i:M*}
178 1.3 rillig . info . $$(i): $(i)
179 1.3 rillig . info . $$(i:M*): $(i:M*)
180 1.3 rillig . info . $${i$${:U}}: ${i${:U}}
181 1.6 rillig . info . $${i\}}: ${i\}} # XXX: unclear why ForLoop_SubstVarLong needs this
182 1.3 rillig . info . $${i2}: ${i2}
183 1.3 rillig . info . $${i,}: ${i,}
184 1.3 rillig . info . adjacent: $i${i}${i:M*}$i
185 1.3 rillig .endfor
186 1.18 rillig # expect-11: . $i: inner
187 1.18 rillig # expect-11: . ${i}: inner
188 1.18 rillig # expect-11: . ${i:M*}: inner
189 1.18 rillig # expect-11: . $(i): inner
190 1.18 rillig # expect-11: . $(i:M*): inner
191 1.18 rillig # expect-11: . ${i${:U}}: outer
192 1.18 rillig # expect-11: . ${i\}}: inner}
193 1.18 rillig # expect-11: . ${i2}: two
194 1.18 rillig # expect-11: . ${i,}: comma
195 1.18 rillig # expect-11: . adjacent: innerinnerinnerinner
196 1.4 rillig
197 1.17 rillig # Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
198 1.17 rillig # since there was no check on valid variable names. ForLoop_SubstVarShort
199 1.17 rillig # skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
200 1.17 rillig # parsed the body of the loop, substituting each '${$}' with an actual
201 1.17 rillig # '${:Udollar}'.
202 1.18 rillig # expect+1: invalid character '$' in .for loop variable name
203 1.8 rillig .for $ in dollar
204 1.8 rillig . info eight $$$$$$$$ and no cents.
205 1.8 rillig . info eight ${$}${$}${$}${$} and no cents.
206 1.8 rillig .endfor
207 1.8 rillig # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
208 1.22 rillig # an expression. The inner '$' is followed by a '}' and is thus a
209 1.8 rillig # silent syntax error, the '$' is skipped. The variable name is thus '', and
210 1.8 rillig # since since there is never a variable named '', the whole expression '${$}'
211 1.8 rillig # evaluates to an empty string.
212 1.8 rillig closing-brace= } # guard against an
213 1.8 rillig ${closing-brace}= <closing-brace> # alternative interpretation
214 1.18 rillig # expect+1: eight and no cents.
215 1.8 rillig .info eight ${$}${$}${$}${$} and no cents.
216 1.8 rillig
217 1.9 rillig # What happens if the values from the .for loop contain a literal newline?
218 1.10 rillig # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
219 1.10 rillig # body of the .for loop, where it was then interpreted as a literal newline,
220 1.10 rillig # leading to syntax errors such as "Unclosed variable expression" in the upper
221 1.10 rillig # line and "Invalid line type" in the lower line.
222 1.18 rillig #
223 1.18 rillig # The error message occurs in the line of the .for loop since that's the place
224 1.18 rillig # where the body of the .for loop is constructed, and at this point the
225 1.18 rillig # newline character gets replaced with a plain space.
226 1.18 rillig # expect+2: newline in .for value
227 1.18 rillig # expect+1: newline in .for value
228 1.9 rillig .for i in "${.newline}"
229 1.9 rillig . info short: $i
230 1.9 rillig . info long: ${i}
231 1.9 rillig .endfor
232 1.18 rillig # expect-3: short: " "
233 1.18 rillig # expect-3: long: " "
234 1.9 rillig
235 1.13 rillig # No error since the newline character is not actually used.
236 1.13 rillig .for i in "${.newline}"
237 1.13 rillig .endfor
238 1.13 rillig
239 1.13 rillig # Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
240 1.13 rillig # a newline character in a .for loop led to a crash since at the point where
241 1.13 rillig # the error message including the stack trace is printed, the body of the .for
242 1.13 rillig # loop is assembled, and at that point, ForLoop.nextItem had already been
243 1.13 rillig # advanced.
244 1.13 rillig .MAKEFLAGS: -dp
245 1.18 rillig # expect+1: newline in .for value
246 1.13 rillig .for i in "${.newline}"
247 1.13 rillig : $i
248 1.13 rillig .endfor
249 1.13 rillig .MAKEFLAGS: -d0
250 1.13 rillig
251 1.14 rillig .MAKEFLAGS: -df
252 1.14 rillig .for i in \# \\\#
253 1.14 rillig # $i
254 1.14 rillig .endfor
255 1.14 rillig
256 1.14 rillig .for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
257 1.14 rillig # $i
258 1.14 rillig .endfor
259 1.14 rillig
260 1.15 rillig # The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
261 1.15 rillig # target-local variables. See for.c 1.45 from 2009-01-14.
262 1.15 rillig .for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
263 1.15 rillig # $i
264 1.15 rillig .endfor
265 1.15 rillig # expect: # ${:U${.TARGET}}
266 1.15 rillig # XXX: Why does '$' result in the same text as '$$'?
267 1.15 rillig # expect: # ${:U${.TARGET}}
268 1.15 rillig # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
269 1.15 rillig # expect: # ${:U$${.TARGET\}}
270 1.15 rillig # XXX: Why does '$' result in the same text as '$$'?
271 1.15 rillig # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
272 1.15 rillig # expect: # ${:U$${.TARGET\}}
273 1.15 rillig
274 1.14 rillig .for i in ((( {{{ ))) }}}
275 1.14 rillig # $i
276 1.14 rillig .endfor
277 1.14 rillig
278 1.21 rillig
279 1.21 rillig # When generating the body of a .for loop, recognizing the expressions is done
280 1.21 rillig # using simple heuristics. These can go wrong in ambiguous cases like this.
281 1.21 rillig # The variable name ',' is unusual as it is not a pronounceable name, but the
282 1.21 rillig # same principle applies for other names as well. In this case, the text '$,'
283 1.21 rillig # is replaced with the expression '${:U1}', even though the text does not
284 1.21 rillig # represent an expression.
285 1.21 rillig .for , in 1
286 1.21 rillig # $$i $i
287 1.21 rillig # VAR= $$i $i ${a:S,from$,to,}
288 1.21 rillig VAR= $$i $i ${a:S,from$,to,}
289 1.21 rillig .endfor
290 1.21 rillig # expect: # $$i $i
291 1.21 rillig # expect: # VAR= $$i $i ${a:S,from${:U1}to,}
292 1.21 rillig # expect: VAR= $$i $i ${a:S,from${:U1}to,}
293 1.21 rillig #
294 1.21 rillig # When the above variable is evaluated, make will complain about the
295 1.21 rillig # unfinished modifier ':S', as it is missing a comma.
296