Home | History | Annotate | Line # | Download | only in unit-tests
directive-for-escape.mk revision 1.16
      1 # $NetBSD: directive-for-escape.mk,v 1.16 2022/06/12 16:09:21 rillig Exp $
      2 #
      3 # Test escaping of special characters in the iteration values of a .for loop.
      4 # These values get expanded later using the :U variable modifier, and this
      5 # escaping and unescaping must pass all characters and strings effectively
      6 # unmodified.
      7 
      8 .MAKEFLAGS: -df
      9 
     10 # Even though the .for loops take quotes into account when splitting the
     11 # string into words, the quotes don't need to be balanced, as of 2020-12-31.
     12 # This could be considered a bug.
     13 ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     14 
     15 # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
     16 # the loop.  Not only would it need the escaping for the variable modifier
     17 # ':U' but also the escaping for the line-end comment.
     18 .for chars in ${ASCII}
     19 .  info ${chars}
     20 .endfor
     21 
     22 # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
     23 # as comment character.  Using 3 backslashes doesn't help either since
     24 # then the situation is essentially the same as with 1 backslash.
     25 # This means that a '#' sign cannot be passed in the value of a .for loop
     26 # at all.
     27 ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     28 .for chars in ${ASCII.2020-12-31}
     29 .  info ${chars}
     30 .endfor
     31 
     32 # Cover the code in ExprLen.
     33 #
     34 # XXX: It is unexpected that the variable V gets expanded in the loop body.
     35 # The double '$$' should intuitively prevent exactly this.  Probably nobody
     36 # was adventurous enough to use literal dollar signs in the values of a .for
     37 # loop, allowing this edge case to go unnoticed for years.
     38 #
     39 # See for.c, function ExprLen.
     40 V=		value
     41 VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
     42 .for i in ${VALUES}
     43 .  info $i
     44 .endfor
     45 
     46 
     47 # Try to cover the code for nested '{}' in ExprLen, without success.
     48 #
     49 # The value of the variable VALUES is not meant to be a variable expression.
     50 # Instead, it is meant to represent literal text, the only escaping mechanism
     51 # being that each '$' is written as '$$'.
     52 VALUES=		$${UNDEF:U\$$\$$ {{}} end}
     53 #
     54 # The .for loop splits ${VALUES} into 3 words, at the space characters, since
     55 # the '$$' is an ordinary character and the spaces are not escaped.
     56 #	Word 1 is '${UNDEF:U\$\$'
     57 #	Word 2 is '{{}}'
     58 #	Word 3 is 'end}'
     59 #
     60 # Each of these words is now inserted in the body of the .for loop.
     61 .for i in ${VALUES}
     62 # $i
     63 .endfor
     64 #
     65 # When these words are injected into the body of the .for loop, each inside a
     66 # '${:U...}' expression, the result is:
     67 #
     68 # expect: For: loop body:
     69 # expect: # ${:U\${UNDEF\:U\\$\\$}
     70 # expect: For: loop body:
     71 # expect: # ${:U{{\}\}}
     72 # expect: For: loop body:
     73 # expect: # ${:Uend\}}
     74 # expect: For: end for 1
     75 #
     76 # The first of these expressions is the most interesting one, due to its many
     77 # special characters.  This expression is properly balanced:
     78 #
     79 #	Text	Meaning		Explanation
     80 #	\$	$		escaped
     81 #	{	{		ordinary text
     82 #	UNDEF	UNDEF		ordinary text
     83 #	\:	:		escaped
     84 #	U	U		ordinary text
     85 #	\\	\		escaped
     86 #	$\	(expr)		an expression, the variable name is '\'
     87 #	\$	$		escaped
     88 #
     89 # To make the expression '$\' visible, define it to an actual word:
     90 ${:U\\}=	backslash
     91 .for i in ${VALUES}
     92 .  info $i
     93 .endfor
     94 #
     95 # expect-3: ${UNDEF:U\backslash$
     96 # expect-4: {{}}
     97 # expect-5: end}
     98 #
     99 # FIXME: There was no expression '$\' in the original text of the variable
    100 # 'VALUES', that's a surprise in the parser.
    101 
    102 
    103 # Second try to cover the code for nested '{}' in ExprLen.
    104 #
    105 # XXX: It is not the job of ExprLen to parse an expression, it is naive to
    106 # expect ExprLen to get all the details right in just a few lines of code.
    107 # Each variable modifier has its own inconsistent way of parsing nested
    108 # variable expressions, braces and parentheses.  (Compare ':M', ':S', and
    109 # ':D' for details.)  The only sensible thing to do is therefore to let
    110 # Var_Parse do all the parsing work.
    111 VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
    112 .for i in ${VALUES}
    113 .  info $i
    114 .endfor
    115 
    116 # A single trailing dollar doesn't happen in practice.
    117 # The dollar sign is correctly passed through to the body of the .for loop.
    118 # There, it is expanded by the .info directive, but even there a trailing
    119 # dollar sign is kept as-is.
    120 .for i in ${:U\$}
    121 .  info ${i}
    122 .endfor
    123 
    124 # As of 2020-12-31, the name of the iteration variable can even contain
    125 # colons, which then affects variable expressions having this exact modifier.
    126 # This is clearly an unintended side effect of the implementation.
    127 NUMBERS=	one two three
    128 .for NUMBERS:M*e in replaced
    129 .  info ${NUMBERS} ${NUMBERS:M*e}
    130 .endfor
    131 
    132 # As of 2020-12-31, the name of the iteration variable can contain braces,
    133 # which gets even more surprising than colons, since it allows to replace
    134 # sequences of variable expressions.  There is no practical use case for
    135 # this, though.
    136 BASENAME=	one
    137 EXT=		.c
    138 .for BASENAME}${EXT in replaced
    139 .  info ${BASENAME}${EXT}
    140 .endfor
    141 
    142 # Demonstrate the various ways to refer to the iteration variable.
    143 i=		outer
    144 i2=		two
    145 i,=		comma
    146 .for i in inner
    147 .  info .        $$i: $i
    148 .  info .      $${i}: ${i}
    149 .  info .   $${i:M*}: ${i:M*}
    150 .  info .      $$(i): $(i)
    151 .  info .   $$(i:M*): $(i:M*)
    152 .  info . $${i$${:U}}: ${i${:U}}
    153 .  info .    $${i\}}: ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
    154 .  info .     $${i2}: ${i2}
    155 .  info .     $${i,}: ${i,}
    156 .  info .  adjacent: $i${i}${i:M*}$i
    157 .endfor
    158 
    159 # The variable name can be a single '$' since there is no check on valid
    160 # variable names. ForLoop_SubstVarShort skips "stupid" variable names though,
    161 # but ForLoop_SubstVarLong naively parses the body of the loop, substituting
    162 # each '${$}' with an actual 'dollar'.
    163 .for $ in dollar
    164 .  info eight $$$$$$$$ and no cents.
    165 .  info eight ${$}${$}${$}${$} and no cents.
    166 .endfor
    167 # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
    168 # a variable expression. The inner '$' is followed by a '}' and is thus a
    169 # silent syntax error, the '$' is skipped. The variable name is thus '', and
    170 # since since there is never a variable named '', the whole expression '${$}'
    171 # evaluates to an empty string.
    172 closing-brace=		}		# guard against an
    173 ${closing-brace}=	<closing-brace>	# alternative interpretation
    174 .info eight ${$}${$}${$}${$} and no cents.
    175 
    176 # What happens if the values from the .for loop contain a literal newline?
    177 # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
    178 # body of the .for loop, where it was then interpreted as a literal newline,
    179 # leading to syntax errors such as "Unclosed variable expression" in the upper
    180 # line and "Invalid line type" in the lower line.
    181 .for i in "${.newline}"
    182 .  info short: $i
    183 .  info long: ${i}
    184 .endfor
    185 
    186 # No error since the newline character is not actually used.
    187 .for i in "${.newline}"
    188 .endfor
    189 
    190 # Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
    191 # a newline character in a .for loop led to a crash since at the point where
    192 # the error message including the stack trace is printed, the body of the .for
    193 # loop is assembled, and at that point, ForLoop.nextItem had already been
    194 # advanced.
    195 .MAKEFLAGS: -dp
    196 .for i in "${.newline}"
    197 : $i
    198 .endfor
    199 .MAKEFLAGS: -d0
    200 
    201 .MAKEFLAGS: -df
    202 .for i in \# \\\#
    203 # $i
    204 .endfor
    205 
    206 .for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
    207 # $i
    208 .endfor
    209 
    210 # The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
    211 # target-local variables.  See for.c 1.45 from 2009-01-14.
    212 .for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
    213 # $i
    214 .endfor
    215 # expect: # ${:U${.TARGET}}
    216 # XXX: Why does '$' result in the same text as '$$'?
    217 # expect: # ${:U${.TARGET}}
    218 # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
    219 # expect: # ${:U$${.TARGET\}}
    220 # XXX: Why does '$' result in the same text as '$$'?
    221 # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
    222 # expect: # ${:U$${.TARGET\}}
    223 
    224 .for i in ((( {{{ ))) }}}
    225 # $i
    226 .endfor
    227 .MAKEFLAGS: -d0
    228 
    229 all:
    230