Home | History | Annotate | Line # | Download | only in unit-tests
directive-for-escape.mk revision 1.18
      1 # $NetBSD: directive-for-escape.mk,v 1.18 2023/05/09 19:43:12 rillig Exp $
      2 #
      3 # Test escaping of special characters in the iteration values of a .for loop.
      4 # These values get expanded later using the :U variable modifier, and this
      5 # escaping and unescaping must pass all characters and strings unmodified.
      6 
      7 # expect-all
      8 
      9 .MAKEFLAGS: -df
     10 
     11 # Even though the .for loops take quotes into account when splitting the
     12 # string into words, the quotes don't need to be balanced, as of 2020-12-31.
     13 # This could be considered a bug.
     14 ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     15 
     16 
     17 # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
     18 # the loop.  Not only would it need the escaping for the variable modifier
     19 # ':U' but also the escaping for the line-end comment.
     20 .for chars in ${ASCII}
     21 .  info ${chars}
     22 .endfor
     23 # expect-2: !"
     24 
     25 # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
     26 # as comment character.  Using 3 backslashes doesn't help either since
     27 # then the situation is essentially the same as with 1 backslash.
     28 # This means that a '#' sign cannot be passed in the value of a .for loop
     29 # at all.
     30 ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     31 .for chars in ${ASCII.2020-12-31}
     32 .  info ${chars}
     33 .endfor
     34 # expect-2: !"\\
     35 
     36 # Cover the code in ExprLen.
     37 #
     38 # XXX: It is unexpected that the variable V gets expanded in the loop body.
     39 # The double '$$' should intuitively prevent exactly this.  Probably nobody
     40 # was adventurous enough to use literal dollar signs in the values of a .for
     41 # loop, allowing this edge case to go unnoticed for years.
     42 #
     43 # See for.c, function ExprLen.
     44 V=		value
     45 VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
     46 .for i in ${VALUES}
     47 .  info $i
     48 .endfor
     49 # expect-2: $
     50 # expect-3: value
     51 # expect-4: value-with-modifier
     52 # expect-5: value
     53 # expect-6: value-with-modifier
     54 
     55 
     56 # Try to cover the code for nested '{}' in ExprLen, without success.
     57 #
     58 # The value of the variable VALUES is not meant to be a variable expression.
     59 # Instead, it is meant to represent literal text, the only escaping mechanism
     60 # being that each '$' is written as '$$'.
     61 VALUES=		$${UNDEF:U\$$\$$ {{}} end}
     62 #
     63 # The .for loop splits ${VALUES} into 3 words, at the space characters, since
     64 # the '$$' is an ordinary character and the spaces are not escaped.
     65 #	Word 1 is '${UNDEF:U\$\$'
     66 #	Word 2 is '{{}}'
     67 #	Word 3 is 'end}'
     68 #
     69 # Each of these words is now inserted in the body of the .for loop.
     70 .for i in ${VALUES}
     71 # $i
     72 .endfor
     73 #
     74 # When these words are injected into the body of the .for loop, each inside a
     75 # '${:U...}' expression, the result is:
     76 #
     77 # expect: For: loop body:
     78 # expect: # ${:U\${UNDEF\:U\\$\\$}
     79 # expect: For: loop body:
     80 # expect: # ${:U{{\}\}}
     81 # expect: For: loop body:
     82 # expect: # ${:Uend\}}
     83 # expect: For: end for 1
     84 #
     85 # The first of these expressions is the most interesting one, due to its many
     86 # special characters.  This expression is properly balanced:
     87 #
     88 #	Text	Meaning		Explanation
     89 #	\$	$		escaped
     90 #	{	{		ordinary text
     91 #	UNDEF	UNDEF		ordinary text
     92 #	\:	:		escaped
     93 #	U	U		ordinary text
     94 #	\\	\		escaped
     95 #	$\	(expr)		an expression, the variable name is '\'
     96 #	\$	$		escaped
     97 #
     98 # To make the expression '$\' visible, define it to an actual word:
     99 ${:U\\}=	backslash
    100 .for i in ${VALUES}
    101 .  info $i
    102 .endfor
    103 #
    104 # expect-3: ${UNDEF:U\backslash$
    105 # expect-4: {{}}
    106 # expect-5: end}
    107 #
    108 # FIXME: There was no expression '$\' in the original text of the variable
    109 # 'VALUES', that's a surprise in the parser.
    110 
    111 
    112 # Second try to cover the code for nested '{}' in ExprLen.
    113 #
    114 # XXX: It is not the job of ExprLen to parse an expression, it is naive to
    115 # expect ExprLen to get all the details right in just a few lines of code.
    116 # Each variable modifier has its own inconsistent way of parsing nested
    117 # variable expressions, braces and parentheses.  (Compare ':M', ':S', and
    118 # ':D' for details.)  The only sensible thing to do is therefore to let
    119 # Var_Parse do all the parsing work.
    120 VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
    121 .for i in ${VALUES}
    122 .  info $i
    123 .endfor
    124 # expect-2: begin<fallback>end
    125 
    126 # A single trailing dollar doesn't happen in practice.
    127 # The dollar sign is correctly passed through to the body of the .for loop.
    128 # There, it is expanded by the .info directive, but even there a trailing
    129 # dollar sign is kept as-is.
    130 .for i in ${:U\$}
    131 .  info ${i}
    132 .endfor
    133 # expect-2: $
    134 
    135 # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
    136 # could contain colons, which affected variable expressions having this exact
    137 # modifier.  This possibility was neither intended nor documented.
    138 NUMBERS=	one two three
    139 # expect+1: invalid character ':' in .for loop variable name
    140 .for NUMBERS:M*e in replaced
    141 .  info ${NUMBERS} ${NUMBERS:M*e}
    142 .endfor
    143 
    144 # Before for.c 1.173 from 2023-05-08, the name of the iteration variable
    145 # could contain braces, which allowed to replace sequences of variable
    146 # expressions.  This possibility was neither intended nor documented.
    147 BASENAME=	one
    148 EXT=		.c
    149 # expect+1: invalid character '}' in .for loop variable name
    150 .for BASENAME}${EXT in replaced
    151 .  info ${BASENAME}${EXT}
    152 .endfor
    153 
    154 # Demonstrate the various ways to refer to the iteration variable.
    155 i=		outer
    156 i2=		two
    157 i,=		comma
    158 .for i in inner
    159 .  info .        $$i: $i
    160 .  info .      $${i}: ${i}
    161 .  info .   $${i:M*}: ${i:M*}
    162 .  info .      $$(i): $(i)
    163 .  info .   $$(i:M*): $(i:M*)
    164 .  info . $${i$${:U}}: ${i${:U}}
    165 .  info .    $${i\}}: ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
    166 .  info .     $${i2}: ${i2}
    167 .  info .     $${i,}: ${i,}
    168 .  info .  adjacent: $i${i}${i:M*}$i
    169 .endfor
    170 # expect-11: .        $i: inner
    171 # expect-11: .      ${i}: inner
    172 # expect-11: .   ${i:M*}: inner
    173 # expect-11: .      $(i): inner
    174 # expect-11: .   $(i:M*): inner
    175 # expect-11: . ${i${:U}}: outer
    176 # expect-11: .    ${i\}}: inner}
    177 # expect-11: .     ${i2}: two
    178 # expect-11: .     ${i,}: comma
    179 # expect-11: .  adjacent: innerinnerinnerinner
    180 
    181 # Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
    182 # since there was no check on valid variable names.  ForLoop_SubstVarShort
    183 # skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
    184 # parsed the body of the loop, substituting each '${$}' with an actual
    185 # '${:Udollar}'.
    186 # expect+1: invalid character '$' in .for loop variable name
    187 .for $ in dollar
    188 .  info eight $$$$$$$$ and no cents.
    189 .  info eight ${$}${$}${$}${$} and no cents.
    190 .endfor
    191 # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
    192 # a variable expression. The inner '$' is followed by a '}' and is thus a
    193 # silent syntax error, the '$' is skipped. The variable name is thus '', and
    194 # since since there is never a variable named '', the whole expression '${$}'
    195 # evaluates to an empty string.
    196 closing-brace=		}		# guard against an
    197 ${closing-brace}=	<closing-brace>	# alternative interpretation
    198 # expect+1: eight  and no cents.
    199 .info eight ${$}${$}${$}${$} and no cents.
    200 
    201 # What happens if the values from the .for loop contain a literal newline?
    202 # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
    203 # body of the .for loop, where it was then interpreted as a literal newline,
    204 # leading to syntax errors such as "Unclosed variable expression" in the upper
    205 # line and "Invalid line type" in the lower line.
    206 #
    207 # The error message occurs in the line of the .for loop since that's the place
    208 # where the body of the .for loop is constructed, and at this point the
    209 # newline character gets replaced with a plain space.
    210 # expect+2: newline in .for value
    211 # expect+1: newline in .for value
    212 .for i in "${.newline}"
    213 .  info short: $i
    214 .  info long: ${i}
    215 .endfor
    216 # expect-3: short: " "
    217 # expect-3: long: " "
    218 
    219 # No error since the newline character is not actually used.
    220 .for i in "${.newline}"
    221 .endfor
    222 
    223 # Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
    224 # a newline character in a .for loop led to a crash since at the point where
    225 # the error message including the stack trace is printed, the body of the .for
    226 # loop is assembled, and at that point, ForLoop.nextItem had already been
    227 # advanced.
    228 .MAKEFLAGS: -dp
    229 # expect+1: newline in .for value
    230 .for i in "${.newline}"
    231 : $i
    232 .endfor
    233 .MAKEFLAGS: -d0
    234 
    235 .MAKEFLAGS: -df
    236 .for i in \# \\\#
    237 # $i
    238 .endfor
    239 
    240 .for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
    241 # $i
    242 .endfor
    243 
    244 # The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
    245 # target-local variables.  See for.c 1.45 from 2009-01-14.
    246 .for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
    247 # $i
    248 .endfor
    249 # expect: # ${:U${.TARGET}}
    250 # XXX: Why does '$' result in the same text as '$$'?
    251 # expect: # ${:U${.TARGET}}
    252 # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
    253 # expect: # ${:U$${.TARGET\}}
    254 # XXX: Why does '$' result in the same text as '$$'?
    255 # XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
    256 # expect: # ${:U$${.TARGET\}}
    257 
    258 .for i in ((( {{{ ))) }}}
    259 # $i
    260 .endfor
    261 .MAKEFLAGS: -d0
    262 
    263 all:
    264