varmod-subst-regex.mk revision 1.12 1 1.12 rillig # $NetBSD: varmod-subst-regex.mk,v 1.12 2024/07/20 11:05:12 rillig Exp $
2 1.1 rillig #
3 1.2 rillig # Tests for the :C,from,to, variable modifier.
4 1.1 rillig
5 1.7 rillig # report unmatched subexpressions
6 1.7 rillig .MAKEFLAGS: -dL
7 1.7 rillig
8 1.3 rillig all: mod-regex-compile-error
9 1.12 rillig all: mod-regex-limits-{1,2,3,4,5,6}
10 1.12 rillig all: mod-regex-errors-{1,2}
11 1.7 rillig all: unmatched-subexpression
12 1.1 rillig
13 1.8 rillig # The expression expands to 4 words. Of these words, none matches
14 1.3 rillig # the regular expression "a b" since these words don't contain any
15 1.3 rillig # whitespace.
16 1.3 rillig .if ${:Ua b b c:C,a b,,} != "a b b c"
17 1.4 rillig . error
18 1.3 rillig .endif
19 1.2 rillig
20 1.3 rillig # Using the '1' modifier does not change anything. The '1' modifier just
21 1.8 rillig # means to apply at most 1 replacement in the whole expression.
22 1.3 rillig .if ${:Ua b b c:C,a b,,1} != "a b b c"
23 1.4 rillig . error
24 1.3 rillig .endif
25 1.3 rillig
26 1.3 rillig # The 'W' modifier treats the whole variable value as a single big word,
27 1.3 rillig # containing whitespace. This big word matches the regular expression,
28 1.3 rillig # therefore it gets replaced. Whitespace is preserved after replacing.
29 1.3 rillig .if ${:Ua b b c:C,a b,,W} != " b c"
30 1.4 rillig . error
31 1.3 rillig .endif
32 1.3 rillig
33 1.3 rillig # The 'g' modifier does not have any effect here since each of the words
34 1.3 rillig # contains the character 'b' a single time.
35 1.3 rillig .if ${:Ua b b c:C,b,,g} != "a c"
36 1.4 rillig . error
37 1.3 rillig .endif
38 1.3 rillig
39 1.3 rillig # The first :C modifier has the 'W' modifier, which makes the whole
40 1.3 rillig # expression a single word. The 'g' modifier then replaces all occurrences
41 1.3 rillig # of "1 2" with "___". The 'W' modifier only applies to this single :C
42 1.3 rillig # modifier. This is demonstrated by the :C modifier that follows. If the
43 1.3 rillig # 'W' modifier would be preserved, only a single underscore would have been
44 1.3 rillig # replaced with an 'x'.
45 1.3 rillig .if ${:U1 2 3 1 2 3:C,1 2,___,Wg:C,_,x,} != "x__ 3 x__ 3"
46 1.4 rillig . error
47 1.3 rillig .endif
48 1.3 rillig
49 1.3 rillig # The regular expression does not match in the first word.
50 1.3 rillig # It matches once in the second word, and the \0\0 doubles that word.
51 1.3 rillig # In the third word, the regular expression matches as early as possible,
52 1.3 rillig # and since the matches must not overlap, the next possible match would
53 1.3 rillig # start at the 6, but at that point, there is only one character left,
54 1.3 rillig # and that cannot match the regular expression "..". Therefore only the
55 1.5 rillig # "45" is doubled in the third word.
56 1.3 rillig .if ${:U1 23 456:C,..,\0\0,} != "1 2323 45456"
57 1.4 rillig . error
58 1.3 rillig .endif
59 1.3 rillig
60 1.3 rillig # The modifier '1' applies the replacement at most once, across the whole
61 1.5 rillig # expression value, no matter whether it is a single big word or many small
62 1.3 rillig # words.
63 1.3 rillig #
64 1.3 rillig # Up to 2020-08-28, the manual page said that the modifiers '1' and 'g'
65 1.5 rillig # were orthogonal, which was wrong. It doesn't make sense to specify both
66 1.5 rillig # 'g' and '1' at the same time.
67 1.3 rillig .if ${:U12345 12345:C,.,\0\0,1} != "112345 12345"
68 1.4 rillig . error
69 1.3 rillig .endif
70 1.3 rillig
71 1.5 rillig # A regular expression that matches the empty string applies before every
72 1.5 rillig # single character of the word.
73 1.5 rillig # XXX: Most other places where regular expression are used match at the end
74 1.5 rillig # of the string as well.
75 1.5 rillig .if ${:U1a2b3c:C,a*,*,g} != "*1**2*b*3*c"
76 1.5 rillig . error
77 1.5 rillig .endif
78 1.5 rillig
79 1.5 rillig # A dot in the regular expression matches any character, even a newline.
80 1.5 rillig # In most other contexts where regular expressions are used, a dot matches
81 1.5 rillig # any character except newline. In make, regcomp is called without
82 1.5 rillig # REG_NEWLINE, thus newline is an ordinary character.
83 1.5 rillig .if ${:U"${.newline}":C,.,.,g} != "..."
84 1.5 rillig . error
85 1.5 rillig .endif
86 1.5 rillig
87 1.9 rillig
88 1.10 rillig # Like the ':S' modifier, the ':C' modifier matches on an expression
89 1.9 rillig # that contains no words at all, but only if the regular expression matches an
90 1.9 rillig # empty string, for example, when the regular expression is anchored at the
91 1.11 rillig # beginning or the end of the word. An unanchored regular expression that
92 1.11 rillig # matches the empty string is uncommon in practice, as it would match before
93 1.11 rillig # each character of the word.
94 1.11 rillig .if "<${:U:S,,unanchored,}> <${:U:C,.?,unanchored,}>" != "<> <unanchored>"
95 1.9 rillig . error
96 1.9 rillig .endif
97 1.11 rillig .if "<${:U:S,^,prefix,}> <${:U:C,^,prefix,}>" != "<prefix> <prefix>"
98 1.9 rillig . error
99 1.9 rillig .endif
100 1.11 rillig .if "<${:U:S,$,suffix,}> <${:U:C,$,suffix,}>" != "<suffix> <suffix>"
101 1.9 rillig . error
102 1.9 rillig .endif
103 1.11 rillig .if "<${:U:S,^$,whole,}> <${:U:C,^$,whole,}>" != "<whole> <whole>"
104 1.9 rillig . error
105 1.9 rillig .endif
106 1.11 rillig .if "<${:U:S,,unanchored,g}> <${:U:C,.?,unanchored,g}>" != "<> <unanchored>"
107 1.9 rillig . error
108 1.9 rillig .endif
109 1.11 rillig .if "<${:U:S,^,prefix,g}> <${:U:C,^,prefix,g}>" != "<prefix> <prefix>"
110 1.11 rillig . error
111 1.11 rillig .endif
112 1.11 rillig .if "<${:U:S,$,suffix,g}> <${:U:C,$,suffix,g}>" != "<suffix> <suffix>"
113 1.11 rillig . error
114 1.11 rillig .endif
115 1.11 rillig .if "<${:U:S,^$,whole,g}> <${:U:C,^$,whole,g}>" != "<whole> <whole>"
116 1.11 rillig . error
117 1.11 rillig .endif
118 1.11 rillig .if "<${:U:S,,unanchored,W}> <${:U:C,.?,unanchored,W}>" != "<> <unanchored>"
119 1.11 rillig . error
120 1.11 rillig .endif
121 1.11 rillig .if "<${:U:S,^,prefix,W}> <${:U:C,^,prefix,W}>" != "<prefix> <prefix>"
122 1.11 rillig . error
123 1.11 rillig .endif
124 1.11 rillig .if "<${:U:S,$,suffix,W}> <${:U:C,$,suffix,W}>" != "<suffix> <suffix>"
125 1.11 rillig . error
126 1.11 rillig .endif
127 1.11 rillig .if "<${:U:S,^$,whole,W}> <${:U:C,^$,whole,W}>" != "<whole> <whole>"
128 1.9 rillig . error
129 1.9 rillig .endif
130 1.9 rillig
131 1.9 rillig
132 1.3 rillig # Multiple asterisks form an invalid regular expression. This produces an
133 1.3 rillig # error message and (as of 2020-08-28) stops parsing in the middle of the
134 1.8 rillig # expression. The unparsed part of the expression is then copied
135 1.3 rillig # verbatim to the output, which is unexpected and can lead to strange shell
136 1.3 rillig # commands being run.
137 1.3 rillig mod-regex-compile-error:
138 1.3 rillig @echo $@: ${:Uword1 word2:C,****,____,g:C,word,____,:Q}.
139 1.3 rillig
140 1.3 rillig # These tests generate error messages but as of 2020-08-28 just continue
141 1.3 rillig # parsing and execution as if nothing bad had happened.
142 1.12 rillig mod-regex-limits-1:
143 1.2 rillig @echo $@:11-missing:${:U1 23 456:C,..,\1\1,:Q}
144 1.12 rillig mod-regex-limits-2:
145 1.2 rillig @echo $@:11-ok:${:U1 23 456:C,(.).,\1\1,:Q}
146 1.12 rillig mod-regex-limits-3:
147 1.2 rillig @echo $@:22-missing:${:U1 23 456:C,..,\2\2,:Q}
148 1.12 rillig mod-regex-limits-4:
149 1.2 rillig @echo $@:22-missing:${:U1 23 456:C,(.).,\2\2,:Q}
150 1.12 rillig mod-regex-limits-5:
151 1.2 rillig @echo $@:22-ok:${:U1 23 456:C,(.)(.),\2\2,:Q}
152 1.12 rillig mod-regex-limits-6:
153 1.2 rillig # The :C modifier only handles single-digit capturing groups,
154 1.10 rillig # which is enough for all practical use cases.
155 1.2 rillig @echo $@:capture:${:UabcdefghijABCDEFGHIJrest:C,(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.),\9\8\7\6\5\4\3\2\1\0\10\11\12,}
156 1.2 rillig
157 1.12 rillig mod-regex-errors-1:
158 1.2 rillig @echo $@: ${UNDEF:Uvalue:C,[,,}
159 1.6 rillig
160 1.12 rillig mod-regex-errors-2:
161 1.6 rillig # If the replacement pattern produces a parse error because of an
162 1.6 rillig # unknown modifier, the parse error is ignored in ParseModifierPart
163 1.8 rillig # and the faulty expression expands to "".
164 1.6 rillig @echo $@: ${word:L:C,.*,x${:U:Z}y,W}
165 1.7 rillig
166 1.7 rillig # In regular expressions with alternatives, not all capturing groups are
167 1.7 rillig # always set; some may be missing. Make calls these "unmatched
168 1.7 rillig # subexpressions".
169 1.7 rillig #
170 1.7 rillig # Between var.c 1.16 from 1996-12-24 until before var.c 1.933 from 2021-06-21,
171 1.7 rillig # unmatched subexpressions produced an "error message" but did not have any
172 1.7 rillig # further effect since the "error handling" didn't influence the exit status.
173 1.7 rillig #
174 1.7 rillig # Before 2021-06-21 there was no way to turn off this warning, thus the
175 1.7 rillig # combination of alternative matches and capturing groups was seldom used, if
176 1.7 rillig # at all.
177 1.7 rillig #
178 1.7 rillig # Since var.c 1.933 from 2021-06-21, the error message is only printed in lint
179 1.7 rillig # mode (-dL), but not in default mode.
180 1.7 rillig #
181 1.7 rillig # As an alternative to the change from var.c 1.933 from 2021-06-21, a possible
182 1.7 rillig # mitigation would have been to add a new modifier 'U' to the already existing
183 1.7 rillig # '1Wg' modifiers of the ':C' modifier. That modifier could have been used in
184 1.7 rillig # the modifier ':C,(a.)|(b.),\1\2,U' to treat unmatched subexpressions as
185 1.7 rillig # empty. This approach would have created a syntactical ambiguity since the
186 1.7 rillig # modifiers ':S' and ':C' are open-ended (see mod-subst-chain), that is, they
187 1.7 rillig # do not need to be followed by a ':' to separate them from the next modifier.
188 1.7 rillig # Luckily the modifier :U does not make sense after :C, therefore this case
189 1.7 rillig # does not happen in practice.
190 1.7 rillig unmatched-subexpression:
191 1.7 rillig # In each of the following cases, if the regular expression matches at
192 1.7 rillig # all, the subexpression \1 matches as well.
193 1.7 rillig @echo $@.ok: ${:U1 1 2 3 5 8 13 21 34:C,1(.*),one\1,}
194 1.7 rillig
195 1.7 rillig # In the following cases:
196 1.7 rillig # * The subexpression \1 is only defined for 1 and 13.
197 1.7 rillig # * The subexpression \2 is only defined for 2 and 21.
198 1.7 rillig # * If the regular expression does not match at all, the
199 1.7 rillig # replacement string is not analyzed, thus no error messages.
200 1.7 rillig # In total, there are 5 error messages about unmatched subexpressions.
201 1.7 rillig @echo $@.1: ${:U 1:C,1(.*)|2(.*),(\1)(\2),:Q} # missing \2
202 1.7 rillig @echo $@.1: ${:U 1:C,1(.*)|2(.*),(\1)(\2),:Q} # missing \2
203 1.7 rillig @echo $@.2: ${:U 2:C,1(.*)|2(.*),(\1)(\2),:Q} # missing \1
204 1.7 rillig @echo $@.3: ${:U 3:C,1(.*)|2(.*),(\1)(\2),:Q}
205 1.7 rillig @echo $@.5: ${:U 5:C,1(.*)|2(.*),(\1)(\2),:Q}
206 1.7 rillig @echo $@.8: ${:U 8:C,1(.*)|2(.*),(\1)(\2),:Q}
207 1.7 rillig @echo $@.13: ${:U 13:C,1(.*)|2(.*),(\1)(\2),:Q} # missing \2
208 1.7 rillig @echo $@.21: ${:U 21:C,1(.*)|2(.*),(\1)(\2),:Q} # missing \1
209 1.7 rillig @echo $@.34: ${:U 34:C,1(.*)|2(.*),(\1)(\2),:Q}
210 1.7 rillig
211 1.7 rillig # And now all together: 5 error messages for 1, 1, 2, 13, 21.
212 1.7 rillig @echo $@.all: ${:U1 1 2 3 5 8 13 21 34:C,1(.*)|2(.*),(\1)(\2),:Q}
213