directive-for-escape.mk revision 1.11 1 # $NetBSD: directive-for-escape.mk,v 1.11 2021/09/02 07:02:08 rillig Exp $
2 #
3 # Test escaping of special characters in the iteration values of a .for loop.
4 # These values get expanded later using the :U variable modifier, and this
5 # escaping and unescaping must pass all characters and strings effectively
6 # unmodified.
7
8 .MAKEFLAGS: -df
9
10 # Even though the .for loops take quotes into account when splitting the
11 # string into words, the quotes don't need to be balanced, as of 2020-12-31.
12 # This could be considered a bug.
13 ASCII= !"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
14
15 # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
16 # the loop since it would not need only the escaping for the :U variable
17 # modifier but also the escaping for the line-end comment.
18 .for chars in ${ASCII}
19 . info ${chars}
20 .endfor
21
22 # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
23 # as comment character. Using 3 backslashes doesn't help either since
24 # then the situation is essentially the same as with 1 backslash.
25 # This means that a '#' sign cannot be passed in the value of a .for loop
26 # at all.
27 ASCII.2020-12-31= !"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
28 .for chars in ${ASCII.2020-12-31}
29 . info ${chars}
30 .endfor
31
32 # Cover the code in ExprLen.
33 #
34 # XXX: It is unexpected that the variable V gets expanded in the loop body.
35 # The double '$$' should intuitively prevent exactly this. Probably nobody
36 # was adventurous enough to use literal dollar signs in the values of a .for
37 # loop.
38 #
39 # See for.c, function ExprLen.
40 V= value
41 VALUES= $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
42 .for i in ${VALUES}
43 . info $i
44 .endfor
45
46 # Try to cover the code for nested '{}' in ExprLen, without success.
47 #
48 # The value of the variable VALUES is not meant to be a variable expression.
49 # Instead, it is meant to represent literal text, the only escaping mechanism
50 # being that each '$' is written as '$$'.
51 #
52 # The .for loop splits ${VALUES} into 3 words, at the space characters, since
53 # these are not escaped.
54 VALUES= $${UNDEF:U\$$\$$ {{}} end}
55 # XXX: Where in the code does the '\$\$' get converted into a single '\$'?
56 .for i in ${VALUES}
57 . info $i
58 .endfor
59
60 # Second try to cover the code for nested '{}' in ExprLen.
61 #
62 # XXX: It is wrong that ExprLen requires the braces to be balanced.
63 # Each variable modifier has its own inconsistent way of parsing nested
64 # variable expressions, braces and parentheses. (Compare ':M', ':S', and
65 # ':D' for details.) The only sensible thing to do is therefore to let
66 # Var_Parse do all the parsing work.
67 VALUES= begin<$${UNDEF:Ufallback:N{{{}}}}>end
68 .for i in ${VALUES}
69 . info $i
70 .endfor
71
72 # A single trailing dollar doesn't happen in practice.
73 # The dollar sign is correctly passed through to the body of the .for loop.
74 # There, it is expanded by the .info directive, but even there a trailing
75 # dollar sign is kept as-is.
76 .for i in ${:U\$}
77 . info ${i}
78 .endfor
79
80 # As of 2020-12-31, the name of the iteration variable can even contain
81 # colons, which then affects variable expressions having this exact modifier.
82 # This is clearly an unintended side effect of the implementation.
83 NUMBERS= one two three
84 .for NUMBERS:M*e in replaced
85 . info ${NUMBERS} ${NUMBERS:M*e}
86 .endfor
87
88 # As of 2020-12-31, the name of the iteration variable can contain braces,
89 # which gets even more surprising than colons, since it allows to replace
90 # sequences of variable expressions. There is no practical use case for
91 # this, though.
92 BASENAME= one
93 EXT= .c
94 .for BASENAME}${EXT in replaced
95 . info ${BASENAME}${EXT}
96 .endfor
97
98 # Demonstrate the various ways to refer to the iteration variable.
99 i= outer
100 i2= two
101 i,= comma
102 .for i in inner
103 . info . $$i: $i
104 . info . $${i}: ${i}
105 . info . $${i:M*}: ${i:M*}
106 . info . $$(i): $(i)
107 . info . $$(i:M*): $(i:M*)
108 . info . $${i$${:U}}: ${i${:U}}
109 . info . $${i\}}: ${i\}} # XXX: unclear why ForLoop_SubstVarLong needs this
110 . info . $${i2}: ${i2}
111 . info . $${i,}: ${i,}
112 . info . adjacent: $i${i}${i:M*}$i
113 .endfor
114
115 # The variable name can be a single '$' since there is no check on valid
116 # variable names. ForLoop_SubstVarShort skips "stupid" variable names though,
117 # but ForLoop_SubstVarLong naively parses the body of the loop, substituting
118 # each '${$}' with an actual 'dollar'.
119 .for $ in dollar
120 . info eight $$$$$$$$ and no cents.
121 . info eight ${$}${$}${$}${$} and no cents.
122 .endfor
123 # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
124 # a variable expression. The inner '$' is followed by a '}' and is thus a
125 # silent syntax error, the '$' is skipped. The variable name is thus '', and
126 # since since there is never a variable named '', the whole expression '${$}'
127 # evaluates to an empty string.
128 closing-brace= } # guard against an
129 ${closing-brace}= <closing-brace> # alternative interpretation
130 .info eight ${$}${$}${$}${$} and no cents.
131
132 # What happens if the values from the .for loop contain a literal newline?
133 # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
134 # body of the .for loop, where it was then interpreted as a literal newline,
135 # leading to syntax errors such as "Unclosed variable expression" in the upper
136 # line and "Invalid line type" in the lower line.
137 .for i in "${.newline}"
138 . info short: $i
139 . info long: ${i}
140 .endfor
141
142 all:
143