Home | History | Annotate | Line # | Download | only in unit-tests
directive-for-escape.mk revision 1.23.2.1
      1  1.23.2.1  perseant # $NetBSD: directive-for-escape.mk,v 1.23.2.1 2025/08/02 05:58:33 perseant Exp $
      2       1.1    rillig #
      3       1.1    rillig # Test escaping of special characters in the iteration values of a .for loop.
      4       1.1    rillig # These values get expanded later using the :U variable modifier, and this
      5      1.18    rillig # escaping and unescaping must pass all characters and strings unmodified.
      6      1.18    rillig 
      7       1.1    rillig .MAKEFLAGS: -df
      8       1.1    rillig 
      9       1.7    rillig # Even though the .for loops take quotes into account when splitting the
     10       1.7    rillig # string into words, the quotes don't need to be balanced, as of 2020-12-31.
     11       1.1    rillig # This could be considered a bug.
     12       1.1    rillig ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     13       1.1    rillig 
     14      1.18    rillig 
     15       1.1    rillig # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
     16      1.12    rillig # the loop.  Not only would it need the escaping for the variable modifier
     17      1.12    rillig # ':U' but also the escaping for the line-end comment.
     18  1.23.2.1  perseant # expect+3: Unclosed expression, expecting "}" for modifier "U!""
     19  1.23.2.1  perseant # expect+2: !"
     20       1.1    rillig .for chars in ${ASCII}
     21       1.1    rillig .  info ${chars}
     22       1.1    rillig .endfor
     23       1.1    rillig 
     24       1.1    rillig # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
     25       1.1    rillig # as comment character.  Using 3 backslashes doesn't help either since
     26       1.1    rillig # then the situation is essentially the same as with 1 backslash.
     27       1.1    rillig # This means that a '#' sign cannot be passed in the value of a .for loop
     28       1.1    rillig # at all.
     29       1.1    rillig ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     30  1.23.2.1  perseant # expect+3: Unclosed expression, expecting "}" for modifier "U!"\\\\"
     31  1.23.2.1  perseant # expect+2: !"\\
     32       1.1    rillig .for chars in ${ASCII.2020-12-31}
     33       1.1    rillig .  info ${chars}
     34       1.1    rillig .endfor
     35       1.1    rillig 
     36      1.11    rillig # Cover the code in ExprLen.
     37       1.1    rillig #
     38       1.1    rillig # XXX: It is unexpected that the variable V gets expanded in the loop body.
     39      1.11    rillig # The double '$$' should intuitively prevent exactly this.  Probably nobody
     40      1.11    rillig # was adventurous enough to use literal dollar signs in the values of a .for
     41      1.12    rillig # loop, allowing this edge case to go unnoticed for years.
     42      1.11    rillig #
     43      1.11    rillig # See for.c, function ExprLen.
     44       1.1    rillig V=		value
     45       1.1    rillig VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
     46      1.21    rillig # expect: .  info ${:U\$}
     47  1.23.2.1  perseant # expect+10: $
     48      1.21    rillig # expect: .  info ${:U${V}}
     49  1.23.2.1  perseant # expect+8: value
     50      1.21    rillig # expect: .  info ${:U${V:=-with-modifier}}
     51  1.23.2.1  perseant # expect+6: value-with-modifier
     52      1.21    rillig # expect: .  info ${:U$(V)}
     53  1.23.2.1  perseant # expect+4: value
     54      1.21    rillig # expect: .  info ${:U$(V:=-with-modifier)}
     55  1.23.2.1  perseant # expect+2: value-with-modifier
     56  1.23.2.1  perseant .for i in ${VALUES}
     57  1.23.2.1  perseant .  info $i
     58  1.23.2.1  perseant .endfor
     59      1.21    rillig #
     60      1.21    rillig # Providing the loop items directly has the same effect.
     61  1.23.2.1  perseant # expect: .  info ${:U\$}
     62  1.23.2.1  perseant # expect+7: $
     63  1.23.2.1  perseant # expect: .  info ${:U${V}}
     64  1.23.2.1  perseant # expect+5: value
     65  1.23.2.1  perseant # expect+4: value-with-modifier
     66  1.23.2.1  perseant # expect+3: value
     67  1.23.2.1  perseant # expect+2: value-with-modifier
     68      1.21    rillig .for i in $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
     69      1.21    rillig .  info $i
     70      1.21    rillig .endfor
     71      1.16    rillig 
     72      1.11    rillig # Try to cover the code for nested '{}' in ExprLen, without success.
     73       1.1    rillig #
     74      1.22    rillig # The value of the variable VALUES is not meant to be an expression.
     75       1.7    rillig # Instead, it is meant to represent literal text, the only escaping mechanism
     76       1.7    rillig # being that each '$' is written as '$$'.
     77      1.16    rillig VALUES=		$${UNDEF:U\$$\$$ {{}} end}
     78       1.5    rillig #
     79       1.5    rillig # The .for loop splits ${VALUES} into 3 words, at the space characters, since
     80      1.14    rillig # the '$$' is an ordinary character and the spaces are not escaped.
     81      1.14    rillig #	Word 1 is '${UNDEF:U\$\$'
     82      1.14    rillig #	Word 2 is '{{}}'
     83      1.14    rillig #	Word 3 is 'end}'
     84      1.16    rillig #
     85      1.16    rillig # Each of these words is now inserted in the body of the .for loop.
     86      1.16    rillig .for i in ${VALUES}
     87      1.16    rillig # $i
     88      1.16    rillig .endfor
     89      1.16    rillig #
     90      1.16    rillig # When these words are injected into the body of the .for loop, each inside a
     91      1.16    rillig # '${:U...}' expression, the result is:
     92      1.16    rillig #
     93      1.19    rillig # expect: For: loop body with i = ${UNDEF:U\$\$:
     94      1.16    rillig # expect: # ${:U\${UNDEF\:U\\$\\$}
     95      1.19    rillig # expect: For: loop body with i = {{}}:
     96      1.16    rillig # expect: # ${:U{{\}\}}
     97      1.19    rillig # expect: For: loop body with i = end}:
     98      1.16    rillig # expect: # ${:Uend\}}
     99      1.16    rillig # expect: For: end for 1
    100      1.16    rillig #
    101      1.16    rillig # The first of these expressions is the most interesting one, due to its many
    102      1.16    rillig # special characters.  This expression is properly balanced:
    103      1.16    rillig #
    104      1.16    rillig #	Text	Meaning		Explanation
    105      1.16    rillig #	\$	$		escaped
    106      1.16    rillig #	{	{		ordinary text
    107      1.16    rillig #	UNDEF	UNDEF		ordinary text
    108      1.16    rillig #	\:	:		escaped
    109      1.16    rillig #	U	U		ordinary text
    110      1.16    rillig #	\\	\		escaped
    111      1.16    rillig #	$\	(expr)		an expression, the variable name is '\'
    112      1.16    rillig #	\$	$		escaped
    113      1.16    rillig #
    114      1.16    rillig # To make the expression '$\' visible, define it to an actual word:
    115      1.14    rillig ${:U\\}=	backslash
    116  1.23.2.1  perseant # expect+4: ${UNDEF:U\backslash$
    117  1.23.2.1  perseant # expect+3: {{}}
    118  1.23.2.1  perseant # expect+2: end}
    119       1.1    rillig .for i in ${VALUES}
    120       1.1    rillig .  info $i
    121       1.1    rillig .endfor
    122      1.16    rillig #
    123      1.16    rillig # FIXME: There was no expression '$\' in the original text of the variable
    124      1.16    rillig # 'VALUES', that's a surprise in the parser.
    125      1.16    rillig 
    126       1.1    rillig 
    127  1.23.2.1  perseant # The second attempt to cover the code for nested '{}' in ExprLen.
    128       1.5    rillig #
    129      1.16    rillig # XXX: It is not the job of ExprLen to parse an expression, it is naive to
    130      1.16    rillig # expect ExprLen to get all the details right in just a few lines of code.
    131       1.5    rillig # Each variable modifier has its own inconsistent way of parsing nested
    132      1.22    rillig # expressions, braces and parentheses.  (Compare ':M', ':S', and
    133       1.7    rillig # ':D' for details.)  The only sensible thing to do is therefore to let
    134       1.7    rillig # Var_Parse do all the parsing work.
    135       1.5    rillig VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
    136  1.23.2.1  perseant # expect+2: begin<fallback>end
    137       1.5    rillig .for i in ${VALUES}
    138       1.5    rillig .  info $i
    139       1.5    rillig .endfor
    140       1.5    rillig 
    141       1.1    rillig # A single trailing dollar doesn't happen in practice.
    142       1.1    rillig # The dollar sign is correctly passed through to the body of the .for loop.
    143       1.1    rillig # There, it is expanded by the .info directive, but even there a trailing
    144       1.1    rillig # dollar sign is kept as-is.
    145  1.23.2.1  perseant # expect+2: $
    146       1.1    rillig .for i in ${:U\$}
    147       1.1    rillig .  info ${i}
    148       1.1    rillig .endfor
    149       1.2    rillig 
    150      1.17    rillig # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
    151      1.22    rillig # could contain colons, which affected expressions having this exact
    152      1.17    rillig # modifier.  This possibility was neither intended nor documented.
    153       1.2    rillig NUMBERS=	one two three
    154  1.23.2.1  perseant # expect+1: Invalid character ":" in .for loop variable name
    155       1.2    rillig .for NUMBERS:M*e in replaced
    156       1.2    rillig .  info ${NUMBERS} ${NUMBERS:M*e}
    157       1.2    rillig .endfor
    158       1.2    rillig 
    159      1.17    rillig # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
    160      1.23    rillig # could contain braces, which allowed to replace sequences of
    161      1.17    rillig # expressions.  This possibility was neither intended nor documented.
    162       1.2    rillig BASENAME=	one
    163       1.2    rillig EXT=		.c
    164  1.23.2.1  perseant # expect+1: Invalid character "}" in .for loop variable name
    165       1.2    rillig .for BASENAME}${EXT in replaced
    166       1.2    rillig .  info ${BASENAME}${EXT}
    167       1.2    rillig .endfor
    168       1.3    rillig 
    169       1.3    rillig # Demonstrate the various ways to refer to the iteration variable.
    170       1.3    rillig i=		outer
    171       1.3    rillig i2=		two
    172       1.3    rillig i,=		comma
    173  1.23.2.1  perseant # expect+2: inner inner inner inner inner
    174  1.23.2.1  perseant .for i in inner
    175  1.23.2.1  perseant .  info $i ${i} ${i:M*} $(i) $(i:M*)
    176  1.23.2.1  perseant .endfor
    177  1.23.2.1  perseant # expect+2: outer
    178  1.23.2.1  perseant .for i in inner
    179  1.23.2.1  perseant .  info ${i${:U}}
    180  1.23.2.1  perseant .endfor
    181  1.23.2.1  perseant # expect+2: inner}
    182       1.3    rillig .for i in inner
    183  1.23.2.1  perseant .  info ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
    184  1.23.2.1  perseant .endfor
    185  1.23.2.1  perseant # expect+2: two comma innerinnerinnerinner
    186  1.23.2.1  perseant .for i in inner
    187  1.23.2.1  perseant .  info ${i2} ${i,} $i${i}${i:M*}$i
    188  1.23.2.1  perseant .endfor
    189       1.4    rillig 
    190      1.17    rillig # Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
    191      1.17    rillig # since there was no check on valid variable names.  ForLoop_SubstVarShort
    192      1.17    rillig # skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
    193      1.17    rillig # parsed the body of the loop, substituting each '${$}' with an actual
    194      1.17    rillig # '${:Udollar}'.
    195  1.23.2.1  perseant # expect+1: Invalid character "$" in .for loop variable name
    196       1.8    rillig .for $ in dollar
    197       1.8    rillig .  info eight $$$$$$$$ and no cents.
    198       1.8    rillig .  info eight ${$}${$}${$}${$} and no cents.
    199       1.8    rillig .endfor
    200       1.8    rillig # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
    201      1.22    rillig # an expression. The inner '$' is followed by a '}' and is thus a
    202       1.8    rillig # silent syntax error, the '$' is skipped. The variable name is thus '', and
    203       1.8    rillig # since since there is never a variable named '', the whole expression '${$}'
    204       1.8    rillig # evaluates to an empty string.
    205       1.8    rillig closing-brace=		}		# guard against an
    206       1.8    rillig ${closing-brace}=	<closing-brace>	# alternative interpretation
    207      1.18    rillig # expect+1: eight  and no cents.
    208       1.8    rillig .info eight ${$}${$}${$}${$} and no cents.
    209       1.8    rillig 
    210       1.9    rillig # What happens if the values from the .for loop contain a literal newline?
    211      1.10    rillig # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
    212      1.10    rillig # body of the .for loop, where it was then interpreted as a literal newline,
    213      1.10    rillig # leading to syntax errors such as "Unclosed variable expression" in the upper
    214      1.10    rillig # line and "Invalid line type" in the lower line.
    215      1.18    rillig #
    216      1.18    rillig # The error message occurs in the line of the .for loop since that's the place
    217      1.18    rillig # where the body of the .for loop is constructed, and at this point the
    218      1.18    rillig # newline character gets replaced with a plain space.
    219  1.23.2.1  perseant # expect+3: newline in .for value
    220      1.18    rillig # expect+2: newline in .for value
    221  1.23.2.1  perseant # expect+2: short: " ", long: " "
    222       1.9    rillig .for i in "${.newline}"
    223  1.23.2.1  perseant .  info short: $i, long: ${i}
    224       1.9    rillig .endfor
    225  1.23.2.1  perseant # No error since the newline character is not actually used in the body.
    226      1.13    rillig .for i in "${.newline}"
    227      1.13    rillig .endfor
    228      1.13    rillig 
    229      1.13    rillig # Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
    230      1.13    rillig # a newline character in a .for loop led to a crash since at the point where
    231      1.13    rillig # the error message including the stack trace is printed, the body of the .for
    232      1.13    rillig # loop is assembled, and at that point, ForLoop.nextItem had already been
    233      1.13    rillig # advanced.
    234      1.13    rillig .MAKEFLAGS: -dp
    235      1.18    rillig # expect+1: newline in .for value
    236      1.13    rillig .for i in "${.newline}"
    237      1.13    rillig : $i
    238      1.13    rillig .endfor
    239      1.13    rillig .MAKEFLAGS: -d0
    240      1.13    rillig 
    241      1.14    rillig .MAKEFLAGS: -df
    242      1.14    rillig .for i in \# \\\#
    243      1.14    rillig # $i
    244      1.14    rillig .endfor
    245      1.14    rillig 
    246      1.14    rillig .for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
    247      1.14    rillig # $i
    248      1.14    rillig .endfor
    249      1.14    rillig 
    250      1.15    rillig # The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
    251      1.15    rillig # target-local variables.  See for.c 1.45 from 2009-01-14.
    252      1.15    rillig .for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
    253      1.15    rillig # $i
    254      1.15    rillig .endfor
    255      1.15    rillig # expect: # ${:U${.TARGET}}
    256      1.15    rillig # XXX: Why does '$' result in the same text as '$$'?
    257      1.15    rillig # expect: # ${:U${.TARGET}}
    258      1.15    rillig # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
    259      1.15    rillig # expect: # ${:U$${.TARGET\}}
    260      1.15    rillig # XXX: Why does '$' result in the same text as '$$'?
    261      1.15    rillig # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
    262      1.15    rillig # expect: # ${:U$${.TARGET\}}
    263      1.15    rillig 
    264      1.14    rillig .for i in ((( {{{ ))) }}}
    265      1.14    rillig # $i
    266      1.14    rillig .endfor
    267      1.14    rillig 
    268      1.21    rillig 
    269      1.21    rillig # When generating the body of a .for loop, recognizing the expressions is done
    270      1.21    rillig # using simple heuristics.  These can go wrong in ambiguous cases like this.
    271      1.21    rillig # The variable name ',' is unusual as it is not a pronounceable name, but the
    272      1.21    rillig # same principle applies for other names as well.  In this case, the text '$,'
    273      1.21    rillig # is replaced with the expression '${:U1}', even though the text does not
    274      1.21    rillig # represent an expression.
    275      1.21    rillig .for , in 1
    276      1.21    rillig # $$i $i
    277      1.21    rillig # VAR= $$i $i ${a:S,from$,to,}
    278      1.21    rillig VAR= $$i $i ${a:S,from$,to,}
    279      1.21    rillig .endfor
    280      1.21    rillig # expect: # $$i $i
    281      1.21    rillig # expect: # VAR= $$i $i ${a:S,from${:U1}to,}
    282      1.21    rillig # expect: VAR= $$i $i ${a:S,from${:U1}to,}
    283      1.21    rillig #
    284      1.21    rillig # When the above variable is evaluated, make will complain about the
    285      1.21    rillig # unfinished modifier ':S', as it is missing a comma.
    286