directive-for-escape.mk revision 1.20 1 # $NetBSD: directive-for-escape.mk,v 1.20 2023/06/01 20:56:35 rillig Exp $
2 #
3 # Test escaping of special characters in the iteration values of a .for loop.
4 # These values get expanded later using the :U variable modifier, and this
5 # escaping and unescaping must pass all characters and strings unmodified.
6
7 .MAKEFLAGS: -df
8
9 # Even though the .for loops take quotes into account when splitting the
10 # string into words, the quotes don't need to be balanced, as of 2020-12-31.
11 # This could be considered a bug.
12 ASCII= !"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
13
14
15 # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
16 # the loop. Not only would it need the escaping for the variable modifier
17 # ':U' but also the escaping for the line-end comment.
18 .for chars in ${ASCII}
19 . info ${chars}
20 .endfor
21 # expect-2: !"
22
23 # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
24 # as comment character. Using 3 backslashes doesn't help either since
25 # then the situation is essentially the same as with 1 backslash.
26 # This means that a '#' sign cannot be passed in the value of a .for loop
27 # at all.
28 ASCII.2020-12-31= !"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
29 .for chars in ${ASCII.2020-12-31}
30 . info ${chars}
31 .endfor
32 # expect-2: !"\\
33
34 # Cover the code in ExprLen.
35 #
36 # XXX: It is unexpected that the variable V gets expanded in the loop body.
37 # The double '$$' should intuitively prevent exactly this. Probably nobody
38 # was adventurous enough to use literal dollar signs in the values of a .for
39 # loop, allowing this edge case to go unnoticed for years.
40 #
41 # See for.c, function ExprLen.
42 V= value
43 VALUES= $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
44 .for i in ${VALUES}
45 . info $i
46 .endfor
47 # expect-2: $
48 # expect-3: value
49 # expect-4: value-with-modifier
50 # expect-5: value
51 # expect-6: value-with-modifier
52
53
54 # Try to cover the code for nested '{}' in ExprLen, without success.
55 #
56 # The value of the variable VALUES is not meant to be a variable expression.
57 # Instead, it is meant to represent literal text, the only escaping mechanism
58 # being that each '$' is written as '$$'.
59 VALUES= $${UNDEF:U\$$\$$ {{}} end}
60 #
61 # The .for loop splits ${VALUES} into 3 words, at the space characters, since
62 # the '$$' is an ordinary character and the spaces are not escaped.
63 # Word 1 is '${UNDEF:U\$\$'
64 # Word 2 is '{{}}'
65 # Word 3 is 'end}'
66 #
67 # Each of these words is now inserted in the body of the .for loop.
68 .for i in ${VALUES}
69 # $i
70 .endfor
71 #
72 # When these words are injected into the body of the .for loop, each inside a
73 # '${:U...}' expression, the result is:
74 #
75 # expect: For: loop body with i = ${UNDEF:U\$\$:
76 # expect: # ${:U\${UNDEF\:U\\$\\$}
77 # expect: For: loop body with i = {{}}:
78 # expect: # ${:U{{\}\}}
79 # expect: For: loop body with i = end}:
80 # expect: # ${:Uend\}}
81 # expect: For: end for 1
82 #
83 # The first of these expressions is the most interesting one, due to its many
84 # special characters. This expression is properly balanced:
85 #
86 # Text Meaning Explanation
87 # \$ $ escaped
88 # { { ordinary text
89 # UNDEF UNDEF ordinary text
90 # \: : escaped
91 # U U ordinary text
92 # \\ \ escaped
93 # $\ (expr) an expression, the variable name is '\'
94 # \$ $ escaped
95 #
96 # To make the expression '$\' visible, define it to an actual word:
97 ${:U\\}= backslash
98 .for i in ${VALUES}
99 . info $i
100 .endfor
101 #
102 # expect-3: ${UNDEF:U\backslash$
103 # expect-4: {{}}
104 # expect-5: end}
105 #
106 # FIXME: There was no expression '$\' in the original text of the variable
107 # 'VALUES', that's a surprise in the parser.
108
109
110 # Second try to cover the code for nested '{}' in ExprLen.
111 #
112 # XXX: It is not the job of ExprLen to parse an expression, it is naive to
113 # expect ExprLen to get all the details right in just a few lines of code.
114 # Each variable modifier has its own inconsistent way of parsing nested
115 # variable expressions, braces and parentheses. (Compare ':M', ':S', and
116 # ':D' for details.) The only sensible thing to do is therefore to let
117 # Var_Parse do all the parsing work.
118 VALUES= begin<$${UNDEF:Ufallback:N{{{}}}}>end
119 .for i in ${VALUES}
120 . info $i
121 .endfor
122 # expect-2: begin<fallback>end
123
124 # A single trailing dollar doesn't happen in practice.
125 # The dollar sign is correctly passed through to the body of the .for loop.
126 # There, it is expanded by the .info directive, but even there a trailing
127 # dollar sign is kept as-is.
128 .for i in ${:U\$}
129 . info ${i}
130 .endfor
131 # expect-2: $
132
133 # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
134 # could contain colons, which affected variable expressions having this exact
135 # modifier. This possibility was neither intended nor documented.
136 NUMBERS= one two three
137 # expect+1: invalid character ':' in .for loop variable name
138 .for NUMBERS:M*e in replaced
139 . info ${NUMBERS} ${NUMBERS:M*e}
140 .endfor
141
142 # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
143 # could contain braces, which allowed to replace sequences of variable
144 # expressions. This possibility was neither intended nor documented.
145 BASENAME= one
146 EXT= .c
147 # expect+1: invalid character '}' in .for loop variable name
148 .for BASENAME}${EXT in replaced
149 . info ${BASENAME}${EXT}
150 .endfor
151
152 # Demonstrate the various ways to refer to the iteration variable.
153 i= outer
154 i2= two
155 i,= comma
156 .for i in inner
157 . info . $$i: $i
158 . info . $${i}: ${i}
159 . info . $${i:M*}: ${i:M*}
160 . info . $$(i): $(i)
161 . info . $$(i:M*): $(i:M*)
162 . info . $${i$${:U}}: ${i${:U}}
163 . info . $${i\}}: ${i\}} # XXX: unclear why ForLoop_SubstVarLong needs this
164 . info . $${i2}: ${i2}
165 . info . $${i,}: ${i,}
166 . info . adjacent: $i${i}${i:M*}$i
167 .endfor
168 # expect-11: . $i: inner
169 # expect-11: . ${i}: inner
170 # expect-11: . ${i:M*}: inner
171 # expect-11: . $(i): inner
172 # expect-11: . $(i:M*): inner
173 # expect-11: . ${i${:U}}: outer
174 # expect-11: . ${i\}}: inner}
175 # expect-11: . ${i2}: two
176 # expect-11: . ${i,}: comma
177 # expect-11: . adjacent: innerinnerinnerinner
178
179 # Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
180 # since there was no check on valid variable names. ForLoop_SubstVarShort
181 # skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
182 # parsed the body of the loop, substituting each '${$}' with an actual
183 # '${:Udollar}'.
184 # expect+1: invalid character '$' in .for loop variable name
185 .for $ in dollar
186 . info eight $$$$$$$$ and no cents.
187 . info eight ${$}${$}${$}${$} and no cents.
188 .endfor
189 # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
190 # a variable expression. The inner '$' is followed by a '}' and is thus a
191 # silent syntax error, the '$' is skipped. The variable name is thus '', and
192 # since since there is never a variable named '', the whole expression '${$}'
193 # evaluates to an empty string.
194 closing-brace= } # guard against an
195 ${closing-brace}= <closing-brace> # alternative interpretation
196 # expect+1: eight and no cents.
197 .info eight ${$}${$}${$}${$} and no cents.
198
199 # What happens if the values from the .for loop contain a literal newline?
200 # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
201 # body of the .for loop, where it was then interpreted as a literal newline,
202 # leading to syntax errors such as "Unclosed variable expression" in the upper
203 # line and "Invalid line type" in the lower line.
204 #
205 # The error message occurs in the line of the .for loop since that's the place
206 # where the body of the .for loop is constructed, and at this point the
207 # newline character gets replaced with a plain space.
208 # expect+2: newline in .for value
209 # expect+1: newline in .for value
210 .for i in "${.newline}"
211 . info short: $i
212 . info long: ${i}
213 .endfor
214 # expect-3: short: " "
215 # expect-3: long: " "
216
217 # No error since the newline character is not actually used.
218 .for i in "${.newline}"
219 .endfor
220
221 # Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
222 # a newline character in a .for loop led to a crash since at the point where
223 # the error message including the stack trace is printed, the body of the .for
224 # loop is assembled, and at that point, ForLoop.nextItem had already been
225 # advanced.
226 .MAKEFLAGS: -dp
227 # expect+1: newline in .for value
228 .for i in "${.newline}"
229 : $i
230 .endfor
231 .MAKEFLAGS: -d0
232
233 .MAKEFLAGS: -df
234 .for i in \# \\\#
235 # $i
236 .endfor
237
238 .for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
239 # $i
240 .endfor
241
242 # The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
243 # target-local variables. See for.c 1.45 from 2009-01-14.
244 .for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
245 # $i
246 .endfor
247 # expect: # ${:U${.TARGET}}
248 # XXX: Why does '$' result in the same text as '$$'?
249 # expect: # ${:U${.TARGET}}
250 # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
251 # expect: # ${:U$${.TARGET\}}
252 # XXX: Why does '$' result in the same text as '$$'?
253 # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
254 # expect: # ${:U$${.TARGET\}}
255
256 .for i in ((( {{{ ))) }}}
257 # $i
258 .endfor
259 .MAKEFLAGS: -d0
260
261 all:
262