Home | History | Annotate | Line # | Download | only in unit-tests
      1 # $NetBSD: directive-for-escape.mk,v 1.30 2025/06/28 22:39:28 rillig Exp $
      2 #
      3 # Test escaping of special characters in the iteration values of a .for loop.
      4 # These values get expanded later using the :U variable modifier, and this
      5 # escaping and unescaping must pass all characters and strings unmodified.
      6 
      7 .MAKEFLAGS: -df
      8 
      9 # Even though the .for loops take quotes into account when splitting the
     10 # string into words, the quotes don't need to be balanced, as of 2020-12-31.
     11 # This could be considered a bug.
     12 ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     13 
     14 
     15 # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
     16 # the loop.  Not only would it need the escaping for the variable modifier
     17 # ':U' but also the escaping for the line-end comment.
     18 # expect+3: Unclosed expression, expecting "}" for modifier "U!""
     19 # expect+2: !"
     20 .for chars in ${ASCII}
     21 .  info ${chars}
     22 .endfor
     23 
     24 # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
     25 # as comment character.  Using 3 backslashes doesn't help either since
     26 # then the situation is essentially the same as with 1 backslash.
     27 # This means that a '#' sign cannot be passed in the value of a .for loop
     28 # at all.
     29 ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     30 # expect+3: Unclosed expression, expecting "}" for modifier "U!"\\\\"
     31 # expect+2: !"\\
     32 .for chars in ${ASCII.2020-12-31}
     33 .  info ${chars}
     34 .endfor
     35 
     36 # Cover the code in ExprLen.
     37 #
     38 # XXX: It is unexpected that the variable V gets expanded in the loop body.
     39 # The double '$$' should intuitively prevent exactly this.  Probably nobody
     40 # was adventurous enough to use literal dollar signs in the values of a .for
     41 # loop, allowing this edge case to go unnoticed for years.
     42 #
     43 # See for.c, function ExprLen.
     44 V=		value
     45 VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
     46 # expect: .  info ${:U\$}
     47 # expect+10: $
     48 # expect: .  info ${:U${V}}
     49 # expect+8: value
     50 # expect: .  info ${:U${V:=-with-modifier}}
     51 # expect+6: value-with-modifier
     52 # expect: .  info ${:U$(V)}
     53 # expect+4: value
     54 # expect: .  info ${:U$(V:=-with-modifier)}
     55 # expect+2: value-with-modifier
     56 .for i in ${VALUES}
     57 .  info $i
     58 .endfor
     59 #
     60 # Providing the loop items directly has the same effect.
     61 # expect: .  info ${:U\$}
     62 # expect+7: $
     63 # expect: .  info ${:U${V}}
     64 # expect+5: value
     65 # expect+4: value-with-modifier
     66 # expect+3: value
     67 # expect+2: value-with-modifier
     68 .for i in $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
     69 .  info $i
     70 .endfor
     71 
     72 # Try to cover the code for nested '{}' in ExprLen, without success.
     73 #
     74 # The value of the variable VALUES is not meant to be an expression.
     75 # Instead, it is meant to represent literal text, the only escaping mechanism
     76 # being that each '$' is written as '$$'.
     77 VALUES=		$${UNDEF:U\$$\$$ {{}} end}
     78 #
     79 # The .for loop splits ${VALUES} into 3 words, at the space characters, since
     80 # the '$$' is an ordinary character and the spaces are not escaped.
     81 #	Word 1 is '${UNDEF:U\$\$'
     82 #	Word 2 is '{{}}'
     83 #	Word 3 is 'end}'
     84 #
     85 # Each of these words is now inserted in the body of the .for loop.
     86 .for i in ${VALUES}
     87 # $i
     88 .endfor
     89 #
     90 # When these words are injected into the body of the .for loop, each inside a
     91 # '${:U...}' expression, the result is:
     92 #
     93 # expect: For: loop body with i = ${UNDEF:U\$\$:
     94 # expect: # ${:U\${UNDEF\:U\\$\\$}
     95 # expect: For: loop body with i = {{}}:
     96 # expect: # ${:U{{\}\}}
     97 # expect: For: loop body with i = end}:
     98 # expect: # ${:Uend\}}
     99 # expect: For: end for 1
    100 #
    101 # The first of these expressions is the most interesting one, due to its many
    102 # special characters.  This expression is properly balanced:
    103 #
    104 #	Text	Meaning		Explanation
    105 #	\$	$		escaped
    106 #	{	{		ordinary text
    107 #	UNDEF	UNDEF		ordinary text
    108 #	\:	:		escaped
    109 #	U	U		ordinary text
    110 #	\\	\		escaped
    111 #	$\	(expr)		an expression, the variable name is '\'
    112 #	\$	$		escaped
    113 #
    114 # To make the expression '$\' visible, define it to an actual word:
    115 ${:U\\}=	backslash
    116 # expect+4: ${UNDEF:U\backslash$
    117 # expect+3: {{}}
    118 # expect+2: end}
    119 .for i in ${VALUES}
    120 .  info $i
    121 .endfor
    122 #
    123 # FIXME: There was no expression '$\' in the original text of the variable
    124 # 'VALUES', that's a surprise in the parser.
    125 
    126 
    127 # The second attempt to cover the code for nested '{}' in ExprLen.
    128 #
    129 # XXX: It is not the job of ExprLen to parse an expression, it is naive to
    130 # expect ExprLen to get all the details right in just a few lines of code.
    131 # Each variable modifier has its own inconsistent way of parsing nested
    132 # expressions, braces and parentheses.  (Compare ':M', ':S', and
    133 # ':D' for details.)  The only sensible thing to do is therefore to let
    134 # Var_Parse do all the parsing work.
    135 VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
    136 # expect+2: begin<fallback>end
    137 .for i in ${VALUES}
    138 .  info $i
    139 .endfor
    140 
    141 # A single trailing dollar doesn't happen in practice.
    142 # The dollar sign is correctly passed through to the body of the .for loop.
    143 # There, it is expanded by the .info directive, but even there a trailing
    144 # dollar sign is kept as-is.
    145 # expect+2: $
    146 .for i in ${:U\$}
    147 .  info ${i}
    148 .endfor
    149 
    150 # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
    151 # could contain colons, which affected expressions having this exact
    152 # modifier.  This possibility was neither intended nor documented.
    153 NUMBERS=	one two three
    154 # expect+1: Invalid character ":" in .for loop variable name
    155 .for NUMBERS:M*e in replaced
    156 .  info ${NUMBERS} ${NUMBERS:M*e}
    157 .endfor
    158 
    159 # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
    160 # could contain braces, which allowed to replace sequences of
    161 # expressions.  This possibility was neither intended nor documented.
    162 BASENAME=	one
    163 EXT=		.c
    164 # expect+1: Invalid character "}" in .for loop variable name
    165 .for BASENAME}${EXT in replaced
    166 .  info ${BASENAME}${EXT}
    167 .endfor
    168 
    169 # Demonstrate the various ways to refer to the iteration variable.
    170 i=		outer
    171 i2=		two
    172 i,=		comma
    173 # expect+2: inner inner inner inner inner
    174 .for i in inner
    175 .  info $i ${i} ${i:M*} $(i) $(i:M*)
    176 .endfor
    177 # expect+2: outer
    178 .for i in inner
    179 .  info ${i${:U}}
    180 .endfor
    181 # expect+2: inner}
    182 .for i in inner
    183 .  info ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
    184 .endfor
    185 # expect+2: two comma innerinnerinnerinner
    186 .for i in inner
    187 .  info ${i2} ${i,} $i${i}${i:M*}$i
    188 .endfor
    189 
    190 # Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
    191 # since there was no check on valid variable names.  ForLoop_SubstVarShort
    192 # skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
    193 # parsed the body of the loop, substituting each '${$}' with an actual
    194 # '${:Udollar}'.
    195 # expect+1: Invalid character "$" in .for loop variable name
    196 .for $ in dollar
    197 .  info eight $$$$$$$$ and no cents.
    198 .  info eight ${$}${$}${$}${$} and no cents.
    199 .endfor
    200 # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
    201 # an expression. The inner '$' is followed by a '}' and is thus a
    202 # silent syntax error, the '$' is skipped. The variable name is thus '', and
    203 # since since there is never a variable named '', the whole expression '${$}'
    204 # evaluates to an empty string.
    205 closing-brace=		}		# guard against an
    206 ${closing-brace}=	<closing-brace>	# alternative interpretation
    207 # expect+1: eight  and no cents.
    208 .info eight ${$}${$}${$}${$} and no cents.
    209 
    210 # What happens if the values from the .for loop contain a literal newline?
    211 # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
    212 # body of the .for loop, where it was then interpreted as a literal newline,
    213 # leading to syntax errors such as "Unclosed variable expression" in the upper
    214 # line and "Invalid line type" in the lower line.
    215 #
    216 # The error message occurs in the line of the .for loop since that's the place
    217 # where the body of the .for loop is constructed, and at this point the
    218 # newline character gets replaced with a plain space.
    219 # expect+3: newline in .for value
    220 # expect+2: newline in .for value
    221 # expect+2: short: " ", long: " "
    222 .for i in "${.newline}"
    223 .  info short: $i, long: ${i}
    224 .endfor
    225 # No error since the newline character is not actually used in the body.
    226 .for i in "${.newline}"
    227 .endfor
    228 
    229 # Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
    230 # a newline character in a .for loop led to a crash since at the point where
    231 # the error message including the stack trace is printed, the body of the .for
    232 # loop is assembled, and at that point, ForLoop.nextItem had already been
    233 # advanced.
    234 .MAKEFLAGS: -dp
    235 # expect+1: newline in .for value
    236 .for i in "${.newline}"
    237 : $i
    238 .endfor
    239 .MAKEFLAGS: -d0
    240 
    241 .MAKEFLAGS: -df
    242 .for i in \# \\\#
    243 # $i
    244 .endfor
    245 
    246 .for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
    247 # $i
    248 .endfor
    249 
    250 # The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
    251 # target-local variables.  See for.c 1.45 from 2009-01-14.
    252 .for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
    253 # $i
    254 .endfor
    255 # expect: # ${:U${.TARGET}}
    256 # XXX: Why does '$' result in the same text as '$$'?
    257 # expect: # ${:U${.TARGET}}
    258 # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
    259 # expect: # ${:U$${.TARGET\}}
    260 # XXX: Why does '$' result in the same text as '$$'?
    261 # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
    262 # expect: # ${:U$${.TARGET\}}
    263 
    264 .for i in ((( {{{ ))) }}}
    265 # $i
    266 .endfor
    267 
    268 
    269 # When generating the body of a .for loop, recognizing the expressions is done
    270 # using simple heuristics.  These can go wrong in ambiguous cases like this.
    271 # The variable name ',' is unusual as it is not a pronounceable name, but the
    272 # same principle applies for other names as well.  In this case, the text '$,'
    273 # is replaced with the expression '${:U1}', even though the text does not
    274 # represent an expression.
    275 .for , in 1
    276 # $$i $i
    277 # VAR= $$i $i ${a:S,from$,to,}
    278 VAR= $$i $i ${a:S,from$,to,}
    279 .endfor
    280 # expect: # $$i $i
    281 # expect: # VAR= $$i $i ${a:S,from${:U1}to,}
    282 # expect: VAR= $$i $i ${a:S,from${:U1}to,}
    283 #
    284 # When the above variable is evaluated, make will complain about the
    285 # unfinished modifier ':S', as it is missing a comma.
    286