directive-for-escape.mk revision 1.13 1 1.13 rillig # $NetBSD: directive-for-escape.mk,v 1.13 2022/01/09 14:06:00 rillig Exp $
2 1.1 rillig #
3 1.1 rillig # Test escaping of special characters in the iteration values of a .for loop.
4 1.1 rillig # These values get expanded later using the :U variable modifier, and this
5 1.1 rillig # escaping and unescaping must pass all characters and strings effectively
6 1.1 rillig # unmodified.
7 1.1 rillig
8 1.1 rillig .MAKEFLAGS: -df
9 1.1 rillig
10 1.7 rillig # Even though the .for loops take quotes into account when splitting the
11 1.7 rillig # string into words, the quotes don't need to be balanced, as of 2020-12-31.
12 1.1 rillig # This could be considered a bug.
13 1.1 rillig ASCII= !"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
14 1.1 rillig
15 1.1 rillig # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
16 1.12 rillig # the loop. Not only would it need the escaping for the variable modifier
17 1.12 rillig # ':U' but also the escaping for the line-end comment.
18 1.1 rillig .for chars in ${ASCII}
19 1.1 rillig . info ${chars}
20 1.1 rillig .endfor
21 1.1 rillig
22 1.1 rillig # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
23 1.1 rillig # as comment character. Using 3 backslashes doesn't help either since
24 1.1 rillig # then the situation is essentially the same as with 1 backslash.
25 1.1 rillig # This means that a '#' sign cannot be passed in the value of a .for loop
26 1.1 rillig # at all.
27 1.1 rillig ASCII.2020-12-31= !"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
28 1.1 rillig .for chars in ${ASCII.2020-12-31}
29 1.1 rillig . info ${chars}
30 1.1 rillig .endfor
31 1.1 rillig
32 1.11 rillig # Cover the code in ExprLen.
33 1.1 rillig #
34 1.1 rillig # XXX: It is unexpected that the variable V gets expanded in the loop body.
35 1.11 rillig # The double '$$' should intuitively prevent exactly this. Probably nobody
36 1.11 rillig # was adventurous enough to use literal dollar signs in the values of a .for
37 1.12 rillig # loop, allowing this edge case to go unnoticed for years.
38 1.11 rillig #
39 1.11 rillig # See for.c, function ExprLen.
40 1.1 rillig V= value
41 1.1 rillig VALUES= $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
42 1.1 rillig .for i in ${VALUES}
43 1.1 rillig . info $i
44 1.1 rillig .endfor
45 1.1 rillig
46 1.11 rillig # Try to cover the code for nested '{}' in ExprLen, without success.
47 1.1 rillig #
48 1.7 rillig # The value of the variable VALUES is not meant to be a variable expression.
49 1.7 rillig # Instead, it is meant to represent literal text, the only escaping mechanism
50 1.7 rillig # being that each '$' is written as '$$'.
51 1.5 rillig #
52 1.5 rillig # The .for loop splits ${VALUES} into 3 words, at the space characters, since
53 1.5 rillig # these are not escaped.
54 1.1 rillig VALUES= $${UNDEF:U\$$\$$ {{}} end}
55 1.7 rillig # XXX: Where in the code does the '\$\$' get converted into a single '\$'?
56 1.1 rillig .for i in ${VALUES}
57 1.1 rillig . info $i
58 1.1 rillig .endfor
59 1.1 rillig
60 1.11 rillig # Second try to cover the code for nested '{}' in ExprLen.
61 1.5 rillig #
62 1.11 rillig # XXX: It is wrong that ExprLen requires the braces to be balanced.
63 1.5 rillig # Each variable modifier has its own inconsistent way of parsing nested
64 1.7 rillig # variable expressions, braces and parentheses. (Compare ':M', ':S', and
65 1.7 rillig # ':D' for details.) The only sensible thing to do is therefore to let
66 1.7 rillig # Var_Parse do all the parsing work.
67 1.5 rillig VALUES= begin<$${UNDEF:Ufallback:N{{{}}}}>end
68 1.5 rillig .for i in ${VALUES}
69 1.5 rillig . info $i
70 1.5 rillig .endfor
71 1.5 rillig
72 1.1 rillig # A single trailing dollar doesn't happen in practice.
73 1.1 rillig # The dollar sign is correctly passed through to the body of the .for loop.
74 1.1 rillig # There, it is expanded by the .info directive, but even there a trailing
75 1.1 rillig # dollar sign is kept as-is.
76 1.1 rillig .for i in ${:U\$}
77 1.1 rillig . info ${i}
78 1.1 rillig .endfor
79 1.2 rillig
80 1.2 rillig # As of 2020-12-31, the name of the iteration variable can even contain
81 1.2 rillig # colons, which then affects variable expressions having this exact modifier.
82 1.2 rillig # This is clearly an unintended side effect of the implementation.
83 1.2 rillig NUMBERS= one two three
84 1.2 rillig .for NUMBERS:M*e in replaced
85 1.2 rillig . info ${NUMBERS} ${NUMBERS:M*e}
86 1.2 rillig .endfor
87 1.2 rillig
88 1.2 rillig # As of 2020-12-31, the name of the iteration variable can contain braces,
89 1.2 rillig # which gets even more surprising than colons, since it allows to replace
90 1.2 rillig # sequences of variable expressions. There is no practical use case for
91 1.2 rillig # this, though.
92 1.2 rillig BASENAME= one
93 1.2 rillig EXT= .c
94 1.2 rillig .for BASENAME}${EXT in replaced
95 1.2 rillig . info ${BASENAME}${EXT}
96 1.2 rillig .endfor
97 1.3 rillig
98 1.3 rillig # Demonstrate the various ways to refer to the iteration variable.
99 1.3 rillig i= outer
100 1.3 rillig i2= two
101 1.3 rillig i,= comma
102 1.3 rillig .for i in inner
103 1.3 rillig . info . $$i: $i
104 1.3 rillig . info . $${i}: ${i}
105 1.3 rillig . info . $${i:M*}: ${i:M*}
106 1.3 rillig . info . $$(i): $(i)
107 1.3 rillig . info . $$(i:M*): $(i:M*)
108 1.3 rillig . info . $${i$${:U}}: ${i${:U}}
109 1.6 rillig . info . $${i\}}: ${i\}} # XXX: unclear why ForLoop_SubstVarLong needs this
110 1.3 rillig . info . $${i2}: ${i2}
111 1.3 rillig . info . $${i,}: ${i,}
112 1.3 rillig . info . adjacent: $i${i}${i:M*}$i
113 1.3 rillig .endfor
114 1.4 rillig
115 1.8 rillig # The variable name can be a single '$' since there is no check on valid
116 1.8 rillig # variable names. ForLoop_SubstVarShort skips "stupid" variable names though,
117 1.8 rillig # but ForLoop_SubstVarLong naively parses the body of the loop, substituting
118 1.8 rillig # each '${$}' with an actual 'dollar'.
119 1.8 rillig .for $ in dollar
120 1.8 rillig . info eight $$$$$$$$ and no cents.
121 1.8 rillig . info eight ${$}${$}${$}${$} and no cents.
122 1.8 rillig .endfor
123 1.8 rillig # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
124 1.8 rillig # a variable expression. The inner '$' is followed by a '}' and is thus a
125 1.8 rillig # silent syntax error, the '$' is skipped. The variable name is thus '', and
126 1.8 rillig # since since there is never a variable named '', the whole expression '${$}'
127 1.8 rillig # evaluates to an empty string.
128 1.8 rillig closing-brace= } # guard against an
129 1.8 rillig ${closing-brace}= <closing-brace> # alternative interpretation
130 1.8 rillig .info eight ${$}${$}${$}${$} and no cents.
131 1.8 rillig
132 1.9 rillig # What happens if the values from the .for loop contain a literal newline?
133 1.10 rillig # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
134 1.10 rillig # body of the .for loop, where it was then interpreted as a literal newline,
135 1.10 rillig # leading to syntax errors such as "Unclosed variable expression" in the upper
136 1.10 rillig # line and "Invalid line type" in the lower line.
137 1.9 rillig .for i in "${.newline}"
138 1.9 rillig . info short: $i
139 1.9 rillig . info long: ${i}
140 1.9 rillig .endfor
141 1.9 rillig
142 1.13 rillig # No error since the newline character is not actually used.
143 1.13 rillig .for i in "${.newline}"
144 1.13 rillig .endfor
145 1.13 rillig
146 1.13 rillig # Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
147 1.13 rillig # a newline character in a .for loop led to a crash since at the point where
148 1.13 rillig # the error message including the stack trace is printed, the body of the .for
149 1.13 rillig # loop is assembled, and at that point, ForLoop.nextItem had already been
150 1.13 rillig # advanced.
151 1.13 rillig .MAKEFLAGS: -dp
152 1.13 rillig .for i in "${.newline}"
153 1.13 rillig : $i
154 1.13 rillig .endfor
155 1.13 rillig .MAKEFLAGS: -d0
156 1.13 rillig
157 1.4 rillig all:
158