Home | History | Annotate | Line # | Download | only in unit-tests
directive-for-escape.mk revision 1.19
      1  1.19  rillig # $NetBSD: directive-for-escape.mk,v 1.19 2023/06/01 09:02:14 rillig Exp $
      2   1.1  rillig #
      3   1.1  rillig # Test escaping of special characters in the iteration values of a .for loop.
      4   1.1  rillig # These values get expanded later using the :U variable modifier, and this
      5  1.18  rillig # escaping and unescaping must pass all characters and strings unmodified.
      6  1.18  rillig 
      7  1.18  rillig # expect-all
      8   1.1  rillig 
      9   1.1  rillig .MAKEFLAGS: -df
     10   1.1  rillig 
     11   1.7  rillig # Even though the .for loops take quotes into account when splitting the
     12   1.7  rillig # string into words, the quotes don't need to be balanced, as of 2020-12-31.
     13   1.1  rillig # This could be considered a bug.
     14   1.1  rillig ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     15   1.1  rillig 
     16  1.18  rillig 
     17   1.1  rillig # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
     18  1.12  rillig # the loop.  Not only would it need the escaping for the variable modifier
     19  1.12  rillig # ':U' but also the escaping for the line-end comment.
     20   1.1  rillig .for chars in ${ASCII}
     21   1.1  rillig .  info ${chars}
     22   1.1  rillig .endfor
     23  1.18  rillig # expect-2: !"
     24   1.1  rillig 
     25   1.1  rillig # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
     26   1.1  rillig # as comment character.  Using 3 backslashes doesn't help either since
     27   1.1  rillig # then the situation is essentially the same as with 1 backslash.
     28   1.1  rillig # This means that a '#' sign cannot be passed in the value of a .for loop
     29   1.1  rillig # at all.
     30   1.1  rillig ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     31   1.1  rillig .for chars in ${ASCII.2020-12-31}
     32   1.1  rillig .  info ${chars}
     33   1.1  rillig .endfor
     34  1.18  rillig # expect-2: !"\\
     35   1.1  rillig 
     36  1.11  rillig # Cover the code in ExprLen.
     37   1.1  rillig #
     38   1.1  rillig # XXX: It is unexpected that the variable V gets expanded in the loop body.
     39  1.11  rillig # The double '$$' should intuitively prevent exactly this.  Probably nobody
     40  1.11  rillig # was adventurous enough to use literal dollar signs in the values of a .for
     41  1.12  rillig # loop, allowing this edge case to go unnoticed for years.
     42  1.11  rillig #
     43  1.11  rillig # See for.c, function ExprLen.
     44   1.1  rillig V=		value
     45   1.1  rillig VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
     46   1.1  rillig .for i in ${VALUES}
     47   1.1  rillig .  info $i
     48   1.1  rillig .endfor
     49  1.18  rillig # expect-2: $
     50  1.18  rillig # expect-3: value
     51  1.18  rillig # expect-4: value-with-modifier
     52  1.18  rillig # expect-5: value
     53  1.18  rillig # expect-6: value-with-modifier
     54   1.1  rillig 
     55  1.16  rillig 
     56  1.11  rillig # Try to cover the code for nested '{}' in ExprLen, without success.
     57   1.1  rillig #
     58   1.7  rillig # The value of the variable VALUES is not meant to be a variable expression.
     59   1.7  rillig # Instead, it is meant to represent literal text, the only escaping mechanism
     60   1.7  rillig # being that each '$' is written as '$$'.
     61  1.16  rillig VALUES=		$${UNDEF:U\$$\$$ {{}} end}
     62   1.5  rillig #
     63   1.5  rillig # The .for loop splits ${VALUES} into 3 words, at the space characters, since
     64  1.14  rillig # the '$$' is an ordinary character and the spaces are not escaped.
     65  1.14  rillig #	Word 1 is '${UNDEF:U\$\$'
     66  1.14  rillig #	Word 2 is '{{}}'
     67  1.14  rillig #	Word 3 is 'end}'
     68  1.16  rillig #
     69  1.16  rillig # Each of these words is now inserted in the body of the .for loop.
     70  1.16  rillig .for i in ${VALUES}
     71  1.16  rillig # $i
     72  1.16  rillig .endfor
     73  1.16  rillig #
     74  1.16  rillig # When these words are injected into the body of the .for loop, each inside a
     75  1.16  rillig # '${:U...}' expression, the result is:
     76  1.16  rillig #
     77  1.19  rillig # expect: For: loop body with i = ${UNDEF:U\$\$:
     78  1.16  rillig # expect: # ${:U\${UNDEF\:U\\$\\$}
     79  1.19  rillig # expect: For: loop body with i = {{}}:
     80  1.16  rillig # expect: # ${:U{{\}\}}
     81  1.19  rillig # expect: For: loop body with i = end}:
     82  1.16  rillig # expect: # ${:Uend\}}
     83  1.16  rillig # expect: For: end for 1
     84  1.16  rillig #
     85  1.16  rillig # The first of these expressions is the most interesting one, due to its many
     86  1.16  rillig # special characters.  This expression is properly balanced:
     87  1.16  rillig #
     88  1.16  rillig #	Text	Meaning		Explanation
     89  1.16  rillig #	\$	$		escaped
     90  1.16  rillig #	{	{		ordinary text
     91  1.16  rillig #	UNDEF	UNDEF		ordinary text
     92  1.16  rillig #	\:	:		escaped
     93  1.16  rillig #	U	U		ordinary text
     94  1.16  rillig #	\\	\		escaped
     95  1.16  rillig #	$\	(expr)		an expression, the variable name is '\'
     96  1.16  rillig #	\$	$		escaped
     97  1.16  rillig #
     98  1.16  rillig # To make the expression '$\' visible, define it to an actual word:
     99  1.14  rillig ${:U\\}=	backslash
    100   1.1  rillig .for i in ${VALUES}
    101   1.1  rillig .  info $i
    102   1.1  rillig .endfor
    103  1.16  rillig #
    104  1.16  rillig # expect-3: ${UNDEF:U\backslash$
    105  1.16  rillig # expect-4: {{}}
    106  1.16  rillig # expect-5: end}
    107  1.16  rillig #
    108  1.16  rillig # FIXME: There was no expression '$\' in the original text of the variable
    109  1.16  rillig # 'VALUES', that's a surprise in the parser.
    110  1.16  rillig 
    111   1.1  rillig 
    112  1.11  rillig # Second try to cover the code for nested '{}' in ExprLen.
    113   1.5  rillig #
    114  1.16  rillig # XXX: It is not the job of ExprLen to parse an expression, it is naive to
    115  1.16  rillig # expect ExprLen to get all the details right in just a few lines of code.
    116   1.5  rillig # Each variable modifier has its own inconsistent way of parsing nested
    117   1.7  rillig # variable expressions, braces and parentheses.  (Compare ':M', ':S', and
    118   1.7  rillig # ':D' for details.)  The only sensible thing to do is therefore to let
    119   1.7  rillig # Var_Parse do all the parsing work.
    120   1.5  rillig VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
    121   1.5  rillig .for i in ${VALUES}
    122   1.5  rillig .  info $i
    123   1.5  rillig .endfor
    124  1.18  rillig # expect-2: begin<fallback>end
    125   1.5  rillig 
    126   1.1  rillig # A single trailing dollar doesn't happen in practice.
    127   1.1  rillig # The dollar sign is correctly passed through to the body of the .for loop.
    128   1.1  rillig # There, it is expanded by the .info directive, but even there a trailing
    129   1.1  rillig # dollar sign is kept as-is.
    130   1.1  rillig .for i in ${:U\$}
    131   1.1  rillig .  info ${i}
    132   1.1  rillig .endfor
    133  1.18  rillig # expect-2: $
    134   1.2  rillig 
    135  1.17  rillig # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
    136  1.17  rillig # could contain colons, which affected variable expressions having this exact
    137  1.17  rillig # modifier.  This possibility was neither intended nor documented.
    138   1.2  rillig NUMBERS=	one two three
    139  1.17  rillig # expect+1: invalid character ':' in .for loop variable name
    140   1.2  rillig .for NUMBERS:M*e in replaced
    141   1.2  rillig .  info ${NUMBERS} ${NUMBERS:M*e}
    142   1.2  rillig .endfor
    143   1.2  rillig 
    144  1.17  rillig # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
    145  1.17  rillig # could contain braces, which allowed to replace sequences of variable
    146  1.17  rillig # expressions.  This possibility was neither intended nor documented.
    147   1.2  rillig BASENAME=	one
    148   1.2  rillig EXT=		.c
    149  1.17  rillig # expect+1: invalid character '}' in .for loop variable name
    150   1.2  rillig .for BASENAME}${EXT in replaced
    151   1.2  rillig .  info ${BASENAME}${EXT}
    152   1.2  rillig .endfor
    153   1.3  rillig 
    154   1.3  rillig # Demonstrate the various ways to refer to the iteration variable.
    155   1.3  rillig i=		outer
    156   1.3  rillig i2=		two
    157   1.3  rillig i,=		comma
    158   1.3  rillig .for i in inner
    159   1.3  rillig .  info .        $$i: $i
    160   1.3  rillig .  info .      $${i}: ${i}
    161   1.3  rillig .  info .   $${i:M*}: ${i:M*}
    162   1.3  rillig .  info .      $$(i): $(i)
    163   1.3  rillig .  info .   $$(i:M*): $(i:M*)
    164   1.3  rillig .  info . $${i$${:U}}: ${i${:U}}
    165   1.6  rillig .  info .    $${i\}}: ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
    166   1.3  rillig .  info .     $${i2}: ${i2}
    167   1.3  rillig .  info .     $${i,}: ${i,}
    168   1.3  rillig .  info .  adjacent: $i${i}${i:M*}$i
    169   1.3  rillig .endfor
    170  1.18  rillig # expect-11: .        $i: inner
    171  1.18  rillig # expect-11: .      ${i}: inner
    172  1.18  rillig # expect-11: .   ${i:M*}: inner
    173  1.18  rillig # expect-11: .      $(i): inner
    174  1.18  rillig # expect-11: .   $(i:M*): inner
    175  1.18  rillig # expect-11: . ${i${:U}}: outer
    176  1.18  rillig # expect-11: .    ${i\}}: inner}
    177  1.18  rillig # expect-11: .     ${i2}: two
    178  1.18  rillig # expect-11: .     ${i,}: comma
    179  1.18  rillig # expect-11: .  adjacent: innerinnerinnerinner
    180   1.4  rillig 
    181  1.17  rillig # Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
    182  1.17  rillig # since there was no check on valid variable names.  ForLoop_SubstVarShort
    183  1.17  rillig # skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
    184  1.17  rillig # parsed the body of the loop, substituting each '${$}' with an actual
    185  1.17  rillig # '${:Udollar}'.
    186  1.18  rillig # expect+1: invalid character '$' in .for loop variable name
    187   1.8  rillig .for $ in dollar
    188   1.8  rillig .  info eight $$$$$$$$ and no cents.
    189   1.8  rillig .  info eight ${$}${$}${$}${$} and no cents.
    190   1.8  rillig .endfor
    191   1.8  rillig # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
    192   1.8  rillig # a variable expression. The inner '$' is followed by a '}' and is thus a
    193   1.8  rillig # silent syntax error, the '$' is skipped. The variable name is thus '', and
    194   1.8  rillig # since since there is never a variable named '', the whole expression '${$}'
    195   1.8  rillig # evaluates to an empty string.
    196   1.8  rillig closing-brace=		}		# guard against an
    197   1.8  rillig ${closing-brace}=	<closing-brace>	# alternative interpretation
    198  1.18  rillig # expect+1: eight  and no cents.
    199   1.8  rillig .info eight ${$}${$}${$}${$} and no cents.
    200   1.8  rillig 
    201   1.9  rillig # What happens if the values from the .for loop contain a literal newline?
    202  1.10  rillig # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
    203  1.10  rillig # body of the .for loop, where it was then interpreted as a literal newline,
    204  1.10  rillig # leading to syntax errors such as "Unclosed variable expression" in the upper
    205  1.10  rillig # line and "Invalid line type" in the lower line.
    206  1.18  rillig #
    207  1.18  rillig # The error message occurs in the line of the .for loop since that's the place
    208  1.18  rillig # where the body of the .for loop is constructed, and at this point the
    209  1.18  rillig # newline character gets replaced with a plain space.
    210  1.18  rillig # expect+2: newline in .for value
    211  1.18  rillig # expect+1: newline in .for value
    212   1.9  rillig .for i in "${.newline}"
    213   1.9  rillig .  info short: $i
    214   1.9  rillig .  info long: ${i}
    215   1.9  rillig .endfor
    216  1.18  rillig # expect-3: short: " "
    217  1.18  rillig # expect-3: long: " "
    218   1.9  rillig 
    219  1.13  rillig # No error since the newline character is not actually used.
    220  1.13  rillig .for i in "${.newline}"
    221  1.13  rillig .endfor
    222  1.13  rillig 
    223  1.13  rillig # Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
    224  1.13  rillig # a newline character in a .for loop led to a crash since at the point where
    225  1.13  rillig # the error message including the stack trace is printed, the body of the .for
    226  1.13  rillig # loop is assembled, and at that point, ForLoop.nextItem had already been
    227  1.13  rillig # advanced.
    228  1.13  rillig .MAKEFLAGS: -dp
    229  1.18  rillig # expect+1: newline in .for value
    230  1.13  rillig .for i in "${.newline}"
    231  1.13  rillig : $i
    232  1.13  rillig .endfor
    233  1.13  rillig .MAKEFLAGS: -d0
    234  1.13  rillig 
    235  1.14  rillig .MAKEFLAGS: -df
    236  1.14  rillig .for i in \# \\\#
    237  1.14  rillig # $i
    238  1.14  rillig .endfor
    239  1.14  rillig 
    240  1.14  rillig .for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
    241  1.14  rillig # $i
    242  1.14  rillig .endfor
    243  1.14  rillig 
    244  1.15  rillig # The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
    245  1.15  rillig # target-local variables.  See for.c 1.45 from 2009-01-14.
    246  1.15  rillig .for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
    247  1.15  rillig # $i
    248  1.15  rillig .endfor
    249  1.15  rillig # expect: # ${:U${.TARGET}}
    250  1.15  rillig # XXX: Why does '$' result in the same text as '$$'?
    251  1.15  rillig # expect: # ${:U${.TARGET}}
    252  1.15  rillig # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
    253  1.15  rillig # expect: # ${:U$${.TARGET\}}
    254  1.15  rillig # XXX: Why does '$' result in the same text as '$$'?
    255  1.15  rillig # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
    256  1.15  rillig # expect: # ${:U$${.TARGET\}}
    257  1.15  rillig 
    258  1.14  rillig .for i in ((( {{{ ))) }}}
    259  1.14  rillig # $i
    260  1.14  rillig .endfor
    261  1.14  rillig .MAKEFLAGS: -d0
    262  1.14  rillig 
    263   1.4  rillig all:
    264