directive-for-escape.mk revision 1.30 1 # $NetBSD: directive-for-escape.mk,v 1.30 2025/06/28 22:39:28 rillig Exp $
2 #
3 # Test escaping of special characters in the iteration values of a .for loop.
4 # These values get expanded later using the :U variable modifier, and this
5 # escaping and unescaping must pass all characters and strings unmodified.
6
7 .MAKEFLAGS: -df
8
9 # Even though the .for loops take quotes into account when splitting the
10 # string into words, the quotes don't need to be balanced, as of 2020-12-31.
11 # This could be considered a bug.
12 ASCII= !"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
13
14
15 # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
16 # the loop. Not only would it need the escaping for the variable modifier
17 # ':U' but also the escaping for the line-end comment.
18 # expect+3: Unclosed expression, expecting "}" for modifier "U!""
19 # expect+2: !"
20 .for chars in ${ASCII}
21 . info ${chars}
22 .endfor
23
24 # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
25 # as comment character. Using 3 backslashes doesn't help either since
26 # then the situation is essentially the same as with 1 backslash.
27 # This means that a '#' sign cannot be passed in the value of a .for loop
28 # at all.
29 ASCII.2020-12-31= !"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
30 # expect+3: Unclosed expression, expecting "}" for modifier "U!"\\\\"
31 # expect+2: !"\\
32 .for chars in ${ASCII.2020-12-31}
33 . info ${chars}
34 .endfor
35
36 # Cover the code in ExprLen.
37 #
38 # XXX: It is unexpected that the variable V gets expanded in the loop body.
39 # The double '$$' should intuitively prevent exactly this. Probably nobody
40 # was adventurous enough to use literal dollar signs in the values of a .for
41 # loop, allowing this edge case to go unnoticed for years.
42 #
43 # See for.c, function ExprLen.
44 V= value
45 VALUES= $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
46 # expect: . info ${:U\$}
47 # expect+10: $
48 # expect: . info ${:U${V}}
49 # expect+8: value
50 # expect: . info ${:U${V:=-with-modifier}}
51 # expect+6: value-with-modifier
52 # expect: . info ${:U$(V)}
53 # expect+4: value
54 # expect: . info ${:U$(V:=-with-modifier)}
55 # expect+2: value-with-modifier
56 .for i in ${VALUES}
57 . info $i
58 .endfor
59 #
60 # Providing the loop items directly has the same effect.
61 # expect: . info ${:U\$}
62 # expect+7: $
63 # expect: . info ${:U${V}}
64 # expect+5: value
65 # expect+4: value-with-modifier
66 # expect+3: value
67 # expect+2: value-with-modifier
68 .for i in $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
69 . info $i
70 .endfor
71
72 # Try to cover the code for nested '{}' in ExprLen, without success.
73 #
74 # The value of the variable VALUES is not meant to be an expression.
75 # Instead, it is meant to represent literal text, the only escaping mechanism
76 # being that each '$' is written as '$$'.
77 VALUES= $${UNDEF:U\$$\$$ {{}} end}
78 #
79 # The .for loop splits ${VALUES} into 3 words, at the space characters, since
80 # the '$$' is an ordinary character and the spaces are not escaped.
81 # Word 1 is '${UNDEF:U\$\$'
82 # Word 2 is '{{}}'
83 # Word 3 is 'end}'
84 #
85 # Each of these words is now inserted in the body of the .for loop.
86 .for i in ${VALUES}
87 # $i
88 .endfor
89 #
90 # When these words are injected into the body of the .for loop, each inside a
91 # '${:U...}' expression, the result is:
92 #
93 # expect: For: loop body with i = ${UNDEF:U\$\$:
94 # expect: # ${:U\${UNDEF\:U\\$\\$}
95 # expect: For: loop body with i = {{}}:
96 # expect: # ${:U{{\}\}}
97 # expect: For: loop body with i = end}:
98 # expect: # ${:Uend\}}
99 # expect: For: end for 1
100 #
101 # The first of these expressions is the most interesting one, due to its many
102 # special characters. This expression is properly balanced:
103 #
104 # Text Meaning Explanation
105 # \$ $ escaped
106 # { { ordinary text
107 # UNDEF UNDEF ordinary text
108 # \: : escaped
109 # U U ordinary text
110 # \\ \ escaped
111 # $\ (expr) an expression, the variable name is '\'
112 # \$ $ escaped
113 #
114 # To make the expression '$\' visible, define it to an actual word:
115 ${:U\\}= backslash
116 # expect+4: ${UNDEF:U\backslash$
117 # expect+3: {{}}
118 # expect+2: end}
119 .for i in ${VALUES}
120 . info $i
121 .endfor
122 #
123 # FIXME: There was no expression '$\' in the original text of the variable
124 # 'VALUES', that's a surprise in the parser.
125
126
127 # The second attempt to cover the code for nested '{}' in ExprLen.
128 #
129 # XXX: It is not the job of ExprLen to parse an expression, it is naive to
130 # expect ExprLen to get all the details right in just a few lines of code.
131 # Each variable modifier has its own inconsistent way of parsing nested
132 # expressions, braces and parentheses. (Compare ':M', ':S', and
133 # ':D' for details.) The only sensible thing to do is therefore to let
134 # Var_Parse do all the parsing work.
135 VALUES= begin<$${UNDEF:Ufallback:N{{{}}}}>end
136 # expect+2: begin<fallback>end
137 .for i in ${VALUES}
138 . info $i
139 .endfor
140
141 # A single trailing dollar doesn't happen in practice.
142 # The dollar sign is correctly passed through to the body of the .for loop.
143 # There, it is expanded by the .info directive, but even there a trailing
144 # dollar sign is kept as-is.
145 # expect+2: $
146 .for i in ${:U\$}
147 . info ${i}
148 .endfor
149
150 # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
151 # could contain colons, which affected expressions having this exact
152 # modifier. This possibility was neither intended nor documented.
153 NUMBERS= one two three
154 # expect+1: Invalid character ":" in .for loop variable name
155 .for NUMBERS:M*e in replaced
156 . info ${NUMBERS} ${NUMBERS:M*e}
157 .endfor
158
159 # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
160 # could contain braces, which allowed to replace sequences of
161 # expressions. This possibility was neither intended nor documented.
162 BASENAME= one
163 EXT= .c
164 # expect+1: Invalid character "}" in .for loop variable name
165 .for BASENAME}${EXT in replaced
166 . info ${BASENAME}${EXT}
167 .endfor
168
169 # Demonstrate the various ways to refer to the iteration variable.
170 i= outer
171 i2= two
172 i,= comma
173 # expect+2: inner inner inner inner inner
174 .for i in inner
175 . info $i ${i} ${i:M*} $(i) $(i:M*)
176 .endfor
177 # expect+2: outer
178 .for i in inner
179 . info ${i${:U}}
180 .endfor
181 # expect+2: inner}
182 .for i in inner
183 . info ${i\}} # XXX: unclear why ForLoop_SubstVarLong needs this
184 .endfor
185 # expect+2: two comma innerinnerinnerinner
186 .for i in inner
187 . info ${i2} ${i,} $i${i}${i:M*}$i
188 .endfor
189
190 # Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
191 # since there was no check on valid variable names. ForLoop_SubstVarShort
192 # skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
193 # parsed the body of the loop, substituting each '${$}' with an actual
194 # '${:Udollar}'.
195 # expect+1: Invalid character "$" in .for loop variable name
196 .for $ in dollar
197 . info eight $$$$$$$$ and no cents.
198 . info eight ${$}${$}${$}${$} and no cents.
199 .endfor
200 # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
201 # an expression. The inner '$' is followed by a '}' and is thus a
202 # silent syntax error, the '$' is skipped. The variable name is thus '', and
203 # since since there is never a variable named '', the whole expression '${$}'
204 # evaluates to an empty string.
205 closing-brace= } # guard against an
206 ${closing-brace}= <closing-brace> # alternative interpretation
207 # expect+1: eight and no cents.
208 .info eight ${$}${$}${$}${$} and no cents.
209
210 # What happens if the values from the .for loop contain a literal newline?
211 # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
212 # body of the .for loop, where it was then interpreted as a literal newline,
213 # leading to syntax errors such as "Unclosed variable expression" in the upper
214 # line and "Invalid line type" in the lower line.
215 #
216 # The error message occurs in the line of the .for loop since that's the place
217 # where the body of the .for loop is constructed, and at this point the
218 # newline character gets replaced with a plain space.
219 # expect+3: newline in .for value
220 # expect+2: newline in .for value
221 # expect+2: short: " ", long: " "
222 .for i in "${.newline}"
223 . info short: $i, long: ${i}
224 .endfor
225 # No error since the newline character is not actually used in the body.
226 .for i in "${.newline}"
227 .endfor
228
229 # Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
230 # a newline character in a .for loop led to a crash since at the point where
231 # the error message including the stack trace is printed, the body of the .for
232 # loop is assembled, and at that point, ForLoop.nextItem had already been
233 # advanced.
234 .MAKEFLAGS: -dp
235 # expect+1: newline in .for value
236 .for i in "${.newline}"
237 : $i
238 .endfor
239 .MAKEFLAGS: -d0
240
241 .MAKEFLAGS: -df
242 .for i in \# \\\#
243 # $i
244 .endfor
245
246 .for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
247 # $i
248 .endfor
249
250 # The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
251 # target-local variables. See for.c 1.45 from 2009-01-14.
252 .for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
253 # $i
254 .endfor
255 # expect: # ${:U${.TARGET}}
256 # XXX: Why does '$' result in the same text as '$$'?
257 # expect: # ${:U${.TARGET}}
258 # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
259 # expect: # ${:U$${.TARGET\}}
260 # XXX: Why does '$' result in the same text as '$$'?
261 # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
262 # expect: # ${:U$${.TARGET\}}
263
264 .for i in ((( {{{ ))) }}}
265 # $i
266 .endfor
267
268
269 # When generating the body of a .for loop, recognizing the expressions is done
270 # using simple heuristics. These can go wrong in ambiguous cases like this.
271 # The variable name ',' is unusual as it is not a pronounceable name, but the
272 # same principle applies for other names as well. In this case, the text '$,'
273 # is replaced with the expression '${:U1}', even though the text does not
274 # represent an expression.
275 .for , in 1
276 # $$i $i
277 # VAR= $$i $i ${a:S,from$,to,}
278 VAR= $$i $i ${a:S,from$,to,}
279 .endfor
280 # expect: # $$i $i
281 # expect: # VAR= $$i $i ${a:S,from${:U1}to,}
282 # expect: VAR= $$i $i ${a:S,from${:U1}to,}
283 #
284 # When the above variable is evaluated, make will complain about the
285 # unfinished modifier ':S', as it is missing a comma.
286