Home | History | Annotate | Line # | Download | only in unit-tests
varmod-subst-regex.mk revision 1.7
      1  1.7  rillig # $NetBSD: varmod-subst-regex.mk,v 1.7 2021/06/21 08:17:39 rillig Exp $
      2  1.1  rillig #
      3  1.2  rillig # Tests for the :C,from,to, variable modifier.
      4  1.1  rillig 
      5  1.7  rillig # report unmatched subexpressions
      6  1.7  rillig .MAKEFLAGS: -dL
      7  1.7  rillig 
      8  1.3  rillig all: mod-regex-compile-error
      9  1.2  rillig all: mod-regex-limits
     10  1.2  rillig all: mod-regex-errors
     11  1.7  rillig all: unmatched-subexpression
     12  1.1  rillig 
     13  1.3  rillig # The variable expression expands to 4 words.  Of these words, none matches
     14  1.3  rillig # the regular expression "a b" since these words don't contain any
     15  1.3  rillig # whitespace.
     16  1.3  rillig .if ${:Ua b b c:C,a b,,} != "a b b c"
     17  1.4  rillig .  error
     18  1.3  rillig .endif
     19  1.2  rillig 
     20  1.3  rillig # Using the '1' modifier does not change anything.  The '1' modifier just
     21  1.3  rillig # means to apply at most 1 replacement in the whole variable expression.
     22  1.3  rillig .if ${:Ua b b c:C,a b,,1} != "a b b c"
     23  1.4  rillig .  error
     24  1.3  rillig .endif
     25  1.3  rillig 
     26  1.3  rillig # The 'W' modifier treats the whole variable value as a single big word,
     27  1.3  rillig # containing whitespace.  This big word matches the regular expression,
     28  1.3  rillig # therefore it gets replaced.  Whitespace is preserved after replacing.
     29  1.3  rillig .if ${:Ua b b c:C,a b,,W} != " b c"
     30  1.4  rillig .  error
     31  1.3  rillig .endif
     32  1.3  rillig 
     33  1.3  rillig # The 'g' modifier does not have any effect here since each of the words
     34  1.3  rillig # contains the character 'b' a single time.
     35  1.3  rillig .if ${:Ua b b c:C,b,,g} != "a c"
     36  1.4  rillig .  error
     37  1.3  rillig .endif
     38  1.3  rillig 
     39  1.3  rillig # The first :C modifier has the 'W' modifier, which makes the whole
     40  1.3  rillig # expression a single word.  The 'g' modifier then replaces all occurrences
     41  1.3  rillig # of "1 2" with "___".  The 'W' modifier only applies to this single :C
     42  1.3  rillig # modifier.  This is demonstrated by the :C modifier that follows.  If the
     43  1.3  rillig # 'W' modifier would be preserved, only a single underscore would have been
     44  1.3  rillig # replaced with an 'x'.
     45  1.3  rillig .if ${:U1 2 3 1 2 3:C,1 2,___,Wg:C,_,x,} != "x__ 3 x__ 3"
     46  1.4  rillig .  error
     47  1.3  rillig .endif
     48  1.3  rillig 
     49  1.3  rillig # The regular expression does not match in the first word.
     50  1.3  rillig # It matches once in the second word, and the \0\0 doubles that word.
     51  1.3  rillig # In the third word, the regular expression matches as early as possible,
     52  1.3  rillig # and since the matches must not overlap, the next possible match would
     53  1.3  rillig # start at the 6, but at that point, there is only one character left,
     54  1.3  rillig # and that cannot match the regular expression "..".  Therefore only the
     55  1.5  rillig # "45" is doubled in the third word.
     56  1.3  rillig .if ${:U1 23 456:C,..,\0\0,} != "1 2323 45456"
     57  1.4  rillig .  error
     58  1.3  rillig .endif
     59  1.3  rillig 
     60  1.3  rillig # The modifier '1' applies the replacement at most once, across the whole
     61  1.5  rillig # expression value, no matter whether it is a single big word or many small
     62  1.3  rillig # words.
     63  1.3  rillig #
     64  1.3  rillig # Up to 2020-08-28, the manual page said that the modifiers '1' and 'g'
     65  1.5  rillig # were orthogonal, which was wrong.  It doesn't make sense to specify both
     66  1.5  rillig # 'g' and '1' at the same time.
     67  1.3  rillig .if ${:U12345 12345:C,.,\0\0,1} != "112345 12345"
     68  1.4  rillig .  error
     69  1.3  rillig .endif
     70  1.3  rillig 
     71  1.5  rillig # A regular expression that matches the empty string applies before every
     72  1.5  rillig # single character of the word.
     73  1.5  rillig # XXX: Most other places where regular expression are used match at the end
     74  1.5  rillig # of the string as well.
     75  1.5  rillig .if ${:U1a2b3c:C,a*,*,g} != "*1**2*b*3*c"
     76  1.5  rillig .  error
     77  1.5  rillig .endif
     78  1.5  rillig 
     79  1.5  rillig # A dot in the regular expression matches any character, even a newline.
     80  1.5  rillig # In most other contexts where regular expressions are used, a dot matches
     81  1.5  rillig # any character except newline.  In make, regcomp is called without
     82  1.5  rillig # REG_NEWLINE, thus newline is an ordinary character.
     83  1.5  rillig .if ${:U"${.newline}":C,.,.,g} != "..."
     84  1.5  rillig .  error
     85  1.5  rillig .endif
     86  1.5  rillig 
     87  1.3  rillig # Multiple asterisks form an invalid regular expression.  This produces an
     88  1.3  rillig # error message and (as of 2020-08-28) stops parsing in the middle of the
     89  1.3  rillig # variable expression.  The unparsed part of the expression is then copied
     90  1.3  rillig # verbatim to the output, which is unexpected and can lead to strange shell
     91  1.3  rillig # commands being run.
     92  1.3  rillig mod-regex-compile-error:
     93  1.3  rillig 	@echo $@: ${:Uword1 word2:C,****,____,g:C,word,____,:Q}.
     94  1.3  rillig 
     95  1.3  rillig # These tests generate error messages but as of 2020-08-28 just continue
     96  1.3  rillig # parsing and execution as if nothing bad had happened.
     97  1.2  rillig mod-regex-limits:
     98  1.2  rillig 	@echo $@:11-missing:${:U1 23 456:C,..,\1\1,:Q}
     99  1.2  rillig 	@echo $@:11-ok:${:U1 23 456:C,(.).,\1\1,:Q}
    100  1.2  rillig 	@echo $@:22-missing:${:U1 23 456:C,..,\2\2,:Q}
    101  1.2  rillig 	@echo $@:22-missing:${:U1 23 456:C,(.).,\2\2,:Q}
    102  1.2  rillig 	@echo $@:22-ok:${:U1 23 456:C,(.)(.),\2\2,:Q}
    103  1.2  rillig 	# The :C modifier only handles single-digit capturing groups,
    104  1.2  rillig 	# which is more than enough for daily use.
    105  1.2  rillig 	@echo $@:capture:${:UabcdefghijABCDEFGHIJrest:C,(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.),\9\8\7\6\5\4\3\2\1\0\10\11\12,}
    106  1.2  rillig 
    107  1.2  rillig mod-regex-errors:
    108  1.2  rillig 	@echo $@: ${UNDEF:Uvalue:C,[,,}
    109  1.6  rillig 
    110  1.6  rillig 	# If the replacement pattern produces a parse error because of an
    111  1.6  rillig 	# unknown modifier, the parse error is ignored in ParseModifierPart
    112  1.6  rillig 	# and the faulty variable expression expands to "".
    113  1.6  rillig 	@echo $@: ${word:L:C,.*,x${:U:Z}y,W}
    114  1.7  rillig 
    115  1.7  rillig # In regular expressions with alternatives, not all capturing groups are
    116  1.7  rillig # always set; some may be missing.  Make calls these "unmatched
    117  1.7  rillig # subexpressions".
    118  1.7  rillig #
    119  1.7  rillig # Between var.c 1.16 from 1996-12-24 until before var.c 1.933 from 2021-06-21,
    120  1.7  rillig # unmatched subexpressions produced an "error message" but did not have any
    121  1.7  rillig # further effect since the "error handling" didn't influence the exit status.
    122  1.7  rillig #
    123  1.7  rillig # Before 2021-06-21 there was no way to turn off this warning, thus the
    124  1.7  rillig # combination of alternative matches and capturing groups was seldom used, if
    125  1.7  rillig # at all.
    126  1.7  rillig #
    127  1.7  rillig # Since var.c 1.933 from 2021-06-21, the error message is only printed in lint
    128  1.7  rillig # mode (-dL), but not in default mode.
    129  1.7  rillig #
    130  1.7  rillig # As an alternative to the change from var.c 1.933 from 2021-06-21, a possible
    131  1.7  rillig # mitigation would have been to add a new modifier 'U' to the already existing
    132  1.7  rillig # '1Wg' modifiers of the ':C' modifier.  That modifier could have been used in
    133  1.7  rillig # the modifier ':C,(a.)|(b.),\1\2,U' to treat unmatched subexpressions as
    134  1.7  rillig # empty.  This approach would have created a syntactical ambiguity since the
    135  1.7  rillig # modifiers ':S' and ':C' are open-ended (see mod-subst-chain), that is, they
    136  1.7  rillig # do not need to be followed by a ':' to separate them from the next modifier.
    137  1.7  rillig # Luckily the modifier :U does not make sense after :C, therefore this case
    138  1.7  rillig # does not happen in practice.
    139  1.7  rillig unmatched-subexpression:
    140  1.7  rillig 	# In each of the following cases, if the regular expression matches at
    141  1.7  rillig 	# all, the subexpression \1 matches as well.
    142  1.7  rillig 	@echo $@.ok: ${:U1 1 2 3 5 8 13 21 34:C,1(.*),one\1,}
    143  1.7  rillig 
    144  1.7  rillig 	# In the following cases:
    145  1.7  rillig 	#	* The subexpression \1 is only defined for 1 and 13.
    146  1.7  rillig 	#	* The subexpression \2 is only defined for 2 and 21.
    147  1.7  rillig 	#	* If the regular expression does not match at all, the
    148  1.7  rillig 	#	  replacement string is not analyzed, thus no error messages.
    149  1.7  rillig 	# In total, there are 5 error messages about unmatched subexpressions.
    150  1.7  rillig 	@echo $@.1:  ${:U  1:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
    151  1.7  rillig 	@echo $@.1:  ${:U  1:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
    152  1.7  rillig 	@echo $@.2:  ${:U  2:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \1
    153  1.7  rillig 	@echo $@.3:  ${:U  3:C,1(.*)|2(.*),(\1)(\2),:Q}
    154  1.7  rillig 	@echo $@.5:  ${:U  5:C,1(.*)|2(.*),(\1)(\2),:Q}
    155  1.7  rillig 	@echo $@.8:  ${:U  8:C,1(.*)|2(.*),(\1)(\2),:Q}
    156  1.7  rillig 	@echo $@.13: ${:U 13:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
    157  1.7  rillig 	@echo $@.21: ${:U 21:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \1
    158  1.7  rillig 	@echo $@.34: ${:U 34:C,1(.*)|2(.*),(\1)(\2),:Q}
    159  1.7  rillig 
    160  1.7  rillig 	# And now all together: 5 error messages for 1, 1, 2, 13, 21.
    161  1.7  rillig 	@echo $@.all: ${:U1 1 2 3 5 8 13 21 34:C,1(.*)|2(.*),(\1)(\2),:Q}
    162