Home | History | Annotate | Line # | Download | only in unit-tests
varmod-subst-regex.mk revision 1.11.2.1
      1  1.11.2.1  perseant # $NetBSD: varmod-subst-regex.mk,v 1.11.2.1 2025/08/02 05:58:39 perseant Exp $
      2       1.1    rillig #
      3       1.2    rillig # Tests for the :C,from,to, variable modifier.
      4       1.1    rillig 
      5       1.7    rillig # report unmatched subexpressions
      6       1.7    rillig .MAKEFLAGS: -dL
      7       1.7    rillig 
      8       1.3    rillig all: mod-regex-compile-error
      9  1.11.2.1  perseant all: mod-regex-limits-{1,2,3,4,5,6}
     10  1.11.2.1  perseant all: mod-regex-errors-{1,2}
     11       1.7    rillig all: unmatched-subexpression
     12       1.1    rillig 
     13       1.8    rillig # The expression expands to 4 words.  Of these words, none matches
     14       1.3    rillig # the regular expression "a b" since these words don't contain any
     15       1.3    rillig # whitespace.
     16       1.3    rillig .if ${:Ua b b c:C,a b,,} != "a b b c"
     17       1.4    rillig .  error
     18       1.3    rillig .endif
     19       1.2    rillig 
     20       1.3    rillig # Using the '1' modifier does not change anything.  The '1' modifier just
     21       1.8    rillig # means to apply at most 1 replacement in the whole expression.
     22       1.3    rillig .if ${:Ua b b c:C,a b,,1} != "a b b c"
     23       1.4    rillig .  error
     24       1.3    rillig .endif
     25       1.3    rillig 
     26       1.3    rillig # The 'W' modifier treats the whole variable value as a single big word,
     27       1.3    rillig # containing whitespace.  This big word matches the regular expression,
     28       1.3    rillig # therefore it gets replaced.  Whitespace is preserved after replacing.
     29       1.3    rillig .if ${:Ua b b c:C,a b,,W} != " b c"
     30       1.4    rillig .  error
     31       1.3    rillig .endif
     32       1.3    rillig 
     33       1.3    rillig # The 'g' modifier does not have any effect here since each of the words
     34       1.3    rillig # contains the character 'b' a single time.
     35       1.3    rillig .if ${:Ua b b c:C,b,,g} != "a c"
     36       1.4    rillig .  error
     37       1.3    rillig .endif
     38       1.3    rillig 
     39       1.3    rillig # The first :C modifier has the 'W' modifier, which makes the whole
     40       1.3    rillig # expression a single word.  The 'g' modifier then replaces all occurrences
     41       1.3    rillig # of "1 2" with "___".  The 'W' modifier only applies to this single :C
     42       1.3    rillig # modifier.  This is demonstrated by the :C modifier that follows.  If the
     43       1.3    rillig # 'W' modifier would be preserved, only a single underscore would have been
     44       1.3    rillig # replaced with an 'x'.
     45       1.3    rillig .if ${:U1 2 3 1 2 3:C,1 2,___,Wg:C,_,x,} != "x__ 3 x__ 3"
     46       1.4    rillig .  error
     47       1.3    rillig .endif
     48       1.3    rillig 
     49       1.3    rillig # The regular expression does not match in the first word.
     50       1.3    rillig # It matches once in the second word, and the \0\0 doubles that word.
     51       1.3    rillig # In the third word, the regular expression matches as early as possible,
     52       1.3    rillig # and since the matches must not overlap, the next possible match would
     53       1.3    rillig # start at the 6, but at that point, there is only one character left,
     54       1.3    rillig # and that cannot match the regular expression "..".  Therefore only the
     55       1.5    rillig # "45" is doubled in the third word.
     56       1.3    rillig .if ${:U1 23 456:C,..,\0\0,} != "1 2323 45456"
     57       1.4    rillig .  error
     58       1.3    rillig .endif
     59       1.3    rillig 
     60       1.3    rillig # The modifier '1' applies the replacement at most once, across the whole
     61       1.5    rillig # expression value, no matter whether it is a single big word or many small
     62       1.3    rillig # words.
     63       1.3    rillig #
     64       1.3    rillig # Up to 2020-08-28, the manual page said that the modifiers '1' and 'g'
     65       1.5    rillig # were orthogonal, which was wrong.  It doesn't make sense to specify both
     66       1.5    rillig # 'g' and '1' at the same time.
     67       1.3    rillig .if ${:U12345 12345:C,.,\0\0,1} != "112345 12345"
     68       1.4    rillig .  error
     69       1.3    rillig .endif
     70       1.3    rillig 
     71       1.5    rillig # A regular expression that matches the empty string applies before every
     72       1.5    rillig # single character of the word.
     73       1.5    rillig # XXX: Most other places where regular expression are used match at the end
     74       1.5    rillig # of the string as well.
     75       1.5    rillig .if ${:U1a2b3c:C,a*,*,g} != "*1**2*b*3*c"
     76       1.5    rillig .  error
     77       1.5    rillig .endif
     78       1.5    rillig 
     79       1.5    rillig # A dot in the regular expression matches any character, even a newline.
     80       1.5    rillig # In most other contexts where regular expressions are used, a dot matches
     81       1.5    rillig # any character except newline.  In make, regcomp is called without
     82       1.5    rillig # REG_NEWLINE, thus newline is an ordinary character.
     83       1.5    rillig .if ${:U"${.newline}":C,.,.,g} != "..."
     84       1.5    rillig .  error
     85       1.5    rillig .endif
     86       1.5    rillig 
     87       1.9    rillig 
     88      1.10    rillig # Like the ':S' modifier, the ':C' modifier matches on an expression
     89       1.9    rillig # that contains no words at all, but only if the regular expression matches an
     90       1.9    rillig # empty string, for example, when the regular expression is anchored at the
     91      1.11    rillig # beginning or the end of the word.  An unanchored regular expression that
     92      1.11    rillig # matches the empty string is uncommon in practice, as it would match before
     93      1.11    rillig # each character of the word.
     94      1.11    rillig .if "<${:U:S,,unanchored,}> <${:U:C,.?,unanchored,}>" != "<> <unanchored>"
     95       1.9    rillig .  error
     96       1.9    rillig .endif
     97      1.11    rillig .if "<${:U:S,^,prefix,}> <${:U:C,^,prefix,}>" != "<prefix> <prefix>"
     98       1.9    rillig .  error
     99       1.9    rillig .endif
    100      1.11    rillig .if "<${:U:S,$,suffix,}> <${:U:C,$,suffix,}>" != "<suffix> <suffix>"
    101       1.9    rillig .  error
    102       1.9    rillig .endif
    103      1.11    rillig .if "<${:U:S,^$,whole,}> <${:U:C,^$,whole,}>" != "<whole> <whole>"
    104       1.9    rillig .  error
    105       1.9    rillig .endif
    106      1.11    rillig .if "<${:U:S,,unanchored,g}> <${:U:C,.?,unanchored,g}>" != "<> <unanchored>"
    107       1.9    rillig .  error
    108       1.9    rillig .endif
    109      1.11    rillig .if "<${:U:S,^,prefix,g}> <${:U:C,^,prefix,g}>" != "<prefix> <prefix>"
    110      1.11    rillig .  error
    111      1.11    rillig .endif
    112      1.11    rillig .if "<${:U:S,$,suffix,g}> <${:U:C,$,suffix,g}>" != "<suffix> <suffix>"
    113      1.11    rillig .  error
    114      1.11    rillig .endif
    115      1.11    rillig .if "<${:U:S,^$,whole,g}> <${:U:C,^$,whole,g}>" != "<whole> <whole>"
    116      1.11    rillig .  error
    117      1.11    rillig .endif
    118      1.11    rillig .if "<${:U:S,,unanchored,W}> <${:U:C,.?,unanchored,W}>" != "<> <unanchored>"
    119      1.11    rillig .  error
    120      1.11    rillig .endif
    121      1.11    rillig .if "<${:U:S,^,prefix,W}> <${:U:C,^,prefix,W}>" != "<prefix> <prefix>"
    122      1.11    rillig .  error
    123      1.11    rillig .endif
    124      1.11    rillig .if "<${:U:S,$,suffix,W}> <${:U:C,$,suffix,W}>" != "<suffix> <suffix>"
    125      1.11    rillig .  error
    126      1.11    rillig .endif
    127      1.11    rillig .if "<${:U:S,^$,whole,W}> <${:U:C,^$,whole,W}>" != "<whole> <whole>"
    128       1.9    rillig .  error
    129       1.9    rillig .endif
    130       1.9    rillig 
    131       1.9    rillig 
    132       1.3    rillig # Multiple asterisks form an invalid regular expression.  This produces an
    133       1.3    rillig # error message and (as of 2020-08-28) stops parsing in the middle of the
    134       1.8    rillig # expression.  The unparsed part of the expression is then copied
    135       1.3    rillig # verbatim to the output, which is unexpected and can lead to strange shell
    136       1.3    rillig # commands being run.
    137       1.3    rillig mod-regex-compile-error:
    138       1.3    rillig 	@echo $@: ${:Uword1 word2:C,****,____,g:C,word,____,:Q}.
    139       1.3    rillig 
    140       1.3    rillig # These tests generate error messages but as of 2020-08-28 just continue
    141       1.3    rillig # parsing and execution as if nothing bad had happened.
    142  1.11.2.1  perseant mod-regex-limits-1:
    143       1.2    rillig 	@echo $@:11-missing:${:U1 23 456:C,..,\1\1,:Q}
    144  1.11.2.1  perseant mod-regex-limits-2:
    145       1.2    rillig 	@echo $@:11-ok:${:U1 23 456:C,(.).,\1\1,:Q}
    146  1.11.2.1  perseant mod-regex-limits-3:
    147       1.2    rillig 	@echo $@:22-missing:${:U1 23 456:C,..,\2\2,:Q}
    148  1.11.2.1  perseant mod-regex-limits-4:
    149       1.2    rillig 	@echo $@:22-missing:${:U1 23 456:C,(.).,\2\2,:Q}
    150  1.11.2.1  perseant mod-regex-limits-5:
    151       1.2    rillig 	@echo $@:22-ok:${:U1 23 456:C,(.)(.),\2\2,:Q}
    152  1.11.2.1  perseant mod-regex-limits-6:
    153       1.2    rillig 	# The :C modifier only handles single-digit capturing groups,
    154      1.10    rillig 	# which is enough for all practical use cases.
    155       1.2    rillig 	@echo $@:capture:${:UabcdefghijABCDEFGHIJrest:C,(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.),\9\8\7\6\5\4\3\2\1\0\10\11\12,}
    156       1.2    rillig 
    157  1.11.2.1  perseant mod-regex-errors-1:
    158       1.2    rillig 	@echo $@: ${UNDEF:Uvalue:C,[,,}
    159       1.6    rillig 
    160  1.11.2.1  perseant mod-regex-errors-2:
    161       1.6    rillig 	# If the replacement pattern produces a parse error because of an
    162       1.6    rillig 	# unknown modifier, the parse error is ignored in ParseModifierPart
    163       1.8    rillig 	# and the faulty expression expands to "".
    164       1.6    rillig 	@echo $@: ${word:L:C,.*,x${:U:Z}y,W}
    165       1.7    rillig 
    166       1.7    rillig # In regular expressions with alternatives, not all capturing groups are
    167       1.7    rillig # always set; some may be missing.  Make calls these "unmatched
    168       1.7    rillig # subexpressions".
    169       1.7    rillig #
    170       1.7    rillig # Between var.c 1.16 from 1996-12-24 until before var.c 1.933 from 2021-06-21,
    171       1.7    rillig # unmatched subexpressions produced an "error message" but did not have any
    172       1.7    rillig # further effect since the "error handling" didn't influence the exit status.
    173       1.7    rillig #
    174       1.7    rillig # Before 2021-06-21 there was no way to turn off this warning, thus the
    175       1.7    rillig # combination of alternative matches and capturing groups was seldom used, if
    176       1.7    rillig # at all.
    177       1.7    rillig #
    178       1.7    rillig # Since var.c 1.933 from 2021-06-21, the error message is only printed in lint
    179       1.7    rillig # mode (-dL), but not in default mode.
    180       1.7    rillig #
    181       1.7    rillig # As an alternative to the change from var.c 1.933 from 2021-06-21, a possible
    182       1.7    rillig # mitigation would have been to add a new modifier 'U' to the already existing
    183       1.7    rillig # '1Wg' modifiers of the ':C' modifier.  That modifier could have been used in
    184       1.7    rillig # the modifier ':C,(a.)|(b.),\1\2,U' to treat unmatched subexpressions as
    185       1.7    rillig # empty.  This approach would have created a syntactical ambiguity since the
    186       1.7    rillig # modifiers ':S' and ':C' are open-ended (see mod-subst-chain), that is, they
    187       1.7    rillig # do not need to be followed by a ':' to separate them from the next modifier.
    188       1.7    rillig # Luckily the modifier :U does not make sense after :C, therefore this case
    189       1.7    rillig # does not happen in practice.
    190       1.7    rillig unmatched-subexpression:
    191       1.7    rillig 	# In each of the following cases, if the regular expression matches at
    192       1.7    rillig 	# all, the subexpression \1 matches as well.
    193       1.7    rillig 	@echo $@.ok: ${:U1 1 2 3 5 8 13 21 34:C,1(.*),one\1,}
    194       1.7    rillig 
    195       1.7    rillig 	# In the following cases:
    196       1.7    rillig 	#	* The subexpression \1 is only defined for 1 and 13.
    197       1.7    rillig 	#	* The subexpression \2 is only defined for 2 and 21.
    198       1.7    rillig 	#	* If the regular expression does not match at all, the
    199       1.7    rillig 	#	  replacement string is not analyzed, thus no error messages.
    200       1.7    rillig 	# In total, there are 5 error messages about unmatched subexpressions.
    201       1.7    rillig 	@echo $@.1:  ${:U  1:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
    202       1.7    rillig 	@echo $@.1:  ${:U  1:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
    203       1.7    rillig 	@echo $@.2:  ${:U  2:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \1
    204       1.7    rillig 	@echo $@.3:  ${:U  3:C,1(.*)|2(.*),(\1)(\2),:Q}
    205       1.7    rillig 	@echo $@.5:  ${:U  5:C,1(.*)|2(.*),(\1)(\2),:Q}
    206       1.7    rillig 	@echo $@.8:  ${:U  8:C,1(.*)|2(.*),(\1)(\2),:Q}
    207       1.7    rillig 	@echo $@.13: ${:U 13:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
    208       1.7    rillig 	@echo $@.21: ${:U 21:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \1
    209       1.7    rillig 	@echo $@.34: ${:U 34:C,1(.*)|2(.*),(\1)(\2),:Q}
    210       1.7    rillig 
    211       1.7    rillig 	# And now all together: 5 error messages for 1, 1, 2, 13, 21.
    212       1.7    rillig 	@echo $@.all: ${:U1 1 2 3 5 8 13 21 34:C,1(.*)|2(.*),(\1)(\2),:Q}
    213