Home | History | Annotate | Line # | Download | only in unit-tests
directive-for-escape.mk revision 1.13
      1  1.13  rillig # $NetBSD: directive-for-escape.mk,v 1.13 2022/01/09 14:06:00 rillig Exp $
      2   1.1  rillig #
      3   1.1  rillig # Test escaping of special characters in the iteration values of a .for loop.
      4   1.1  rillig # These values get expanded later using the :U variable modifier, and this
      5   1.1  rillig # escaping and unescaping must pass all characters and strings effectively
      6   1.1  rillig # unmodified.
      7   1.1  rillig 
      8   1.1  rillig .MAKEFLAGS: -df
      9   1.1  rillig 
     10   1.7  rillig # Even though the .for loops take quotes into account when splitting the
     11   1.7  rillig # string into words, the quotes don't need to be balanced, as of 2020-12-31.
     12   1.1  rillig # This could be considered a bug.
     13   1.1  rillig ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     14   1.1  rillig 
     15   1.1  rillig # XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
     16  1.12  rillig # the loop.  Not only would it need the escaping for the variable modifier
     17  1.12  rillig # ':U' but also the escaping for the line-end comment.
     18   1.1  rillig .for chars in ${ASCII}
     19   1.1  rillig .  info ${chars}
     20   1.1  rillig .endfor
     21   1.1  rillig 
     22   1.1  rillig # As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
     23   1.1  rillig # as comment character.  Using 3 backslashes doesn't help either since
     24   1.1  rillig # then the situation is essentially the same as with 1 backslash.
     25   1.1  rillig # This means that a '#' sign cannot be passed in the value of a .for loop
     26   1.1  rillig # at all.
     27   1.1  rillig ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
     28   1.1  rillig .for chars in ${ASCII.2020-12-31}
     29   1.1  rillig .  info ${chars}
     30   1.1  rillig .endfor
     31   1.1  rillig 
     32  1.11  rillig # Cover the code in ExprLen.
     33   1.1  rillig #
     34   1.1  rillig # XXX: It is unexpected that the variable V gets expanded in the loop body.
     35  1.11  rillig # The double '$$' should intuitively prevent exactly this.  Probably nobody
     36  1.11  rillig # was adventurous enough to use literal dollar signs in the values of a .for
     37  1.12  rillig # loop, allowing this edge case to go unnoticed for years.
     38  1.11  rillig #
     39  1.11  rillig # See for.c, function ExprLen.
     40   1.1  rillig V=		value
     41   1.1  rillig VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
     42   1.1  rillig .for i in ${VALUES}
     43   1.1  rillig .  info $i
     44   1.1  rillig .endfor
     45   1.1  rillig 
     46  1.11  rillig # Try to cover the code for nested '{}' in ExprLen, without success.
     47   1.1  rillig #
     48   1.7  rillig # The value of the variable VALUES is not meant to be a variable expression.
     49   1.7  rillig # Instead, it is meant to represent literal text, the only escaping mechanism
     50   1.7  rillig # being that each '$' is written as '$$'.
     51   1.5  rillig #
     52   1.5  rillig # The .for loop splits ${VALUES} into 3 words, at the space characters, since
     53   1.5  rillig # these are not escaped.
     54   1.1  rillig VALUES=		$${UNDEF:U\$$\$$ {{}} end}
     55   1.7  rillig # XXX: Where in the code does the '\$\$' get converted into a single '\$'?
     56   1.1  rillig .for i in ${VALUES}
     57   1.1  rillig .  info $i
     58   1.1  rillig .endfor
     59   1.1  rillig 
     60  1.11  rillig # Second try to cover the code for nested '{}' in ExprLen.
     61   1.5  rillig #
     62  1.11  rillig # XXX: It is wrong that ExprLen requires the braces to be balanced.
     63   1.5  rillig # Each variable modifier has its own inconsistent way of parsing nested
     64   1.7  rillig # variable expressions, braces and parentheses.  (Compare ':M', ':S', and
     65   1.7  rillig # ':D' for details.)  The only sensible thing to do is therefore to let
     66   1.7  rillig # Var_Parse do all the parsing work.
     67   1.5  rillig VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
     68   1.5  rillig .for i in ${VALUES}
     69   1.5  rillig .  info $i
     70   1.5  rillig .endfor
     71   1.5  rillig 
     72   1.1  rillig # A single trailing dollar doesn't happen in practice.
     73   1.1  rillig # The dollar sign is correctly passed through to the body of the .for loop.
     74   1.1  rillig # There, it is expanded by the .info directive, but even there a trailing
     75   1.1  rillig # dollar sign is kept as-is.
     76   1.1  rillig .for i in ${:U\$}
     77   1.1  rillig .  info ${i}
     78   1.1  rillig .endfor
     79   1.2  rillig 
     80   1.2  rillig # As of 2020-12-31, the name of the iteration variable can even contain
     81   1.2  rillig # colons, which then affects variable expressions having this exact modifier.
     82   1.2  rillig # This is clearly an unintended side effect of the implementation.
     83   1.2  rillig NUMBERS=	one two three
     84   1.2  rillig .for NUMBERS:M*e in replaced
     85   1.2  rillig .  info ${NUMBERS} ${NUMBERS:M*e}
     86   1.2  rillig .endfor
     87   1.2  rillig 
     88   1.2  rillig # As of 2020-12-31, the name of the iteration variable can contain braces,
     89   1.2  rillig # which gets even more surprising than colons, since it allows to replace
     90   1.2  rillig # sequences of variable expressions.  There is no practical use case for
     91   1.2  rillig # this, though.
     92   1.2  rillig BASENAME=	one
     93   1.2  rillig EXT=		.c
     94   1.2  rillig .for BASENAME}${EXT in replaced
     95   1.2  rillig .  info ${BASENAME}${EXT}
     96   1.2  rillig .endfor
     97   1.3  rillig 
     98   1.3  rillig # Demonstrate the various ways to refer to the iteration variable.
     99   1.3  rillig i=		outer
    100   1.3  rillig i2=		two
    101   1.3  rillig i,=		comma
    102   1.3  rillig .for i in inner
    103   1.3  rillig .  info .        $$i: $i
    104   1.3  rillig .  info .      $${i}: ${i}
    105   1.3  rillig .  info .   $${i:M*}: ${i:M*}
    106   1.3  rillig .  info .      $$(i): $(i)
    107   1.3  rillig .  info .   $$(i:M*): $(i:M*)
    108   1.3  rillig .  info . $${i$${:U}}: ${i${:U}}
    109   1.6  rillig .  info .    $${i\}}: ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
    110   1.3  rillig .  info .     $${i2}: ${i2}
    111   1.3  rillig .  info .     $${i,}: ${i,}
    112   1.3  rillig .  info .  adjacent: $i${i}${i:M*}$i
    113   1.3  rillig .endfor
    114   1.4  rillig 
    115   1.8  rillig # The variable name can be a single '$' since there is no check on valid
    116   1.8  rillig # variable names. ForLoop_SubstVarShort skips "stupid" variable names though,
    117   1.8  rillig # but ForLoop_SubstVarLong naively parses the body of the loop, substituting
    118   1.8  rillig # each '${$}' with an actual 'dollar'.
    119   1.8  rillig .for $ in dollar
    120   1.8  rillig .  info eight $$$$$$$$ and no cents.
    121   1.8  rillig .  info eight ${$}${$}${$}${$} and no cents.
    122   1.8  rillig .endfor
    123   1.8  rillig # Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
    124   1.8  rillig # a variable expression. The inner '$' is followed by a '}' and is thus a
    125   1.8  rillig # silent syntax error, the '$' is skipped. The variable name is thus '', and
    126   1.8  rillig # since since there is never a variable named '', the whole expression '${$}'
    127   1.8  rillig # evaluates to an empty string.
    128   1.8  rillig closing-brace=		}		# guard against an
    129   1.8  rillig ${closing-brace}=	<closing-brace>	# alternative interpretation
    130   1.8  rillig .info eight ${$}${$}${$}${$} and no cents.
    131   1.8  rillig 
    132   1.9  rillig # What happens if the values from the .for loop contain a literal newline?
    133  1.10  rillig # Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
    134  1.10  rillig # body of the .for loop, where it was then interpreted as a literal newline,
    135  1.10  rillig # leading to syntax errors such as "Unclosed variable expression" in the upper
    136  1.10  rillig # line and "Invalid line type" in the lower line.
    137   1.9  rillig .for i in "${.newline}"
    138   1.9  rillig .  info short: $i
    139   1.9  rillig .  info long: ${i}
    140   1.9  rillig .endfor
    141   1.9  rillig 
    142  1.13  rillig # No error since the newline character is not actually used.
    143  1.13  rillig .for i in "${.newline}"
    144  1.13  rillig .endfor
    145  1.13  rillig 
    146  1.13  rillig # Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
    147  1.13  rillig # a newline character in a .for loop led to a crash since at the point where
    148  1.13  rillig # the error message including the stack trace is printed, the body of the .for
    149  1.13  rillig # loop is assembled, and at that point, ForLoop.nextItem had already been
    150  1.13  rillig # advanced.
    151  1.13  rillig .MAKEFLAGS: -dp
    152  1.13  rillig .for i in "${.newline}"
    153  1.13  rillig : $i
    154  1.13  rillig .endfor
    155  1.13  rillig .MAKEFLAGS: -d0
    156  1.13  rillig 
    157   1.4  rillig all:
    158