1 1.32 rillig # $NetBSD: varmod-match.mk,v 1.32 2025/06/29 09:40:13 rillig Exp $ 2 1.1 rillig # 3 1.18 rillig # Tests for the ':M' modifier, which keeps only those words that match the 4 1.2 rillig # given pattern. 5 1.4 rillig # 6 1.18 rillig # Table of contents 7 1.18 rillig # 8 1.18 rillig # 1. Pattern characters '*', '?' and '\' 9 1.18 rillig # 2. Character lists and character ranges 10 1.18 rillig # 3. Parsing and escaping 11 1.18 rillig # 4. Interaction with other modifiers 12 1.18 rillig # 5. Performance 13 1.18 rillig # 6. Error handling 14 1.18 rillig # 7. Historical bugs 15 1.19 rillig # 16 1.32 rillig # See also: 17 1.32 rillig # char-005c-reverse-solidus.mk 18 1.32 rillig # ApplyModifier_Match 19 1.32 rillig # ParseModifier_Match 20 1.32 rillig # ModifyWord_Match 21 1.32 rillig # Str_Match 22 1.3 rillig 23 1.3 rillig 24 1.18 rillig # 1. Pattern characters '*', '?' and '\' 25 1.5 rillig # 26 1.18 rillig # * matches 0 or more characters 27 1.18 rillig # ? matches 1 character 28 1.18 rillig # \x matches the character 'x' 29 1.3 rillig 30 1.19 rillig # The pattern is anchored both at the beginning and at the end of the word. 31 1.19 rillig # Since the pattern 'e' does not contain any pattern matching characters, it 32 1.19 rillig # matches exactly the word 'e', twice. 33 1.19 rillig .if ${a c e aa cc ee e f g:L:Me} != "e e" 34 1.19 rillig . error 35 1.19 rillig .endif 36 1.19 rillig 37 1.19 rillig # The pattern character '?' matches exactly 1 character, the pattern character 38 1.19 rillig # '*' matches 0 or more characters. The whole pattern matches all words that 39 1.19 rillig # start with 's' and have 3 or more characters. 40 1.22 rillig .if ${One Two Three Four five six seven so s:L:Ms??*} != "six seven" 41 1.19 rillig . error 42 1.19 rillig .endif 43 1.19 rillig 44 1.22 rillig # A pattern without placeholders only matches itself. 45 1.19 rillig .if ${a aa aaa b ba baa bab:L:Ma} != "a" 46 1.19 rillig . error 47 1.19 rillig .endif 48 1.19 rillig 49 1.23 rillig # A pattern that does not start with '*' is anchored at the beginning. 50 1.19 rillig .if ${a aa aaa b ba baa bab:L:Ma*} != "a aa aaa" 51 1.19 rillig . error 52 1.19 rillig .endif 53 1.19 rillig 54 1.23 rillig # A pattern that does not end with '*' is anchored at the end. 55 1.19 rillig .if ${a aa aaa b ba baa bab:L:M*a} != "a aa aaa ba baa" 56 1.19 rillig . error 57 1.19 rillig .endif 58 1.4 rillig 59 1.20 rillig # Test the fast code path for '*' followed by a regular character. 60 1.20 rillig .if ${:U file.c file.*c file.h file\.c :M*.c} != "file.c file\\.c" 61 1.20 rillig . error 62 1.20 rillig .endif 63 1.20 rillig # Ensure that the fast code path correctly handles the backslash. 64 1.20 rillig .if ${:U file.c file.*c file.h file\.c :M*\.c} != "file.c file\\.c" 65 1.20 rillig . error 66 1.20 rillig .endif 67 1.20 rillig # Ensure that the fast code path correctly handles '\*'. 68 1.20 rillig .if ${:U file.c file.*c file.h file\.c :M*\*c} != "file.*c" 69 1.20 rillig . error 70 1.20 rillig .endif 71 1.20 rillig # Ensure that the partial match '.c' doesn't confuse the fast code path. 72 1.20 rillig .if ${:U file.c.cc file.cc.cc file.cc.c :M*.cc} != "file.c.cc file.cc.cc" 73 1.20 rillig . error 74 1.20 rillig .endif 75 1.20 rillig # Ensure that the substring '.cc' doesn't confuse the fast code path for '.c'. 76 1.20 rillig .if ${:U file.c.cc file.cc.cc file.cc.c :M*.c} != "file.cc.c" 77 1.20 rillig . error 78 1.20 rillig .endif 79 1.20 rillig 80 1.10 rillig 81 1.18 rillig # 2. Character lists and character ranges 82 1.5 rillig # 83 1.18 rillig # [...] matches 1 character from the listed characters 84 1.18 rillig # [^...] matches 1 character from the unlisted characters 85 1.18 rillig # [a-z] matches 1 character from the range 'a' to 'z' 86 1.18 rillig # [z-a] matches 1 character from the range 'a' to 'z' 87 1.18 rillig 88 1.18 rillig # Only keep words that start with an uppercase letter. 89 1.19 rillig .if ${One Two Three Four five six seven:L:M[A-Z]*} != "One Two Three Four" 90 1.4 rillig . error 91 1.4 rillig .endif 92 1.4 rillig 93 1.18 rillig # Only keep words that start with a character other than an uppercase letter. 94 1.19 rillig .if ${One Two Three Four five six seven:L:M[^A-Z]*} != "five six seven" 95 1.4 rillig . error 96 1.4 rillig .endif 97 1.4 rillig 98 1.7 rillig # [] matches never 99 1.7 rillig .if ${ ab a[]b a[b a b :L:M[]} != "" 100 1.7 rillig . error 101 1.7 rillig .endif 102 1.7 rillig 103 1.7 rillig # a[]b matches never 104 1.7 rillig .if ${ ab a[]b a[b a b [ ] :L:Ma[]b} != "" 105 1.7 rillig . error 106 1.7 rillig .endif 107 1.7 rillig 108 1.7 rillig # [^] matches exactly 1 arbitrary character 109 1.7 rillig .if ${ ab a[]b a[b a b [ ] :L:M[^]} != "a b [ ]" 110 1.7 rillig . error 111 1.7 rillig .endif 112 1.7 rillig 113 1.7 rillig # a[^]b matches 'a', then exactly 1 arbitrary character, then 'b' 114 1.7 rillig .if ${ ab a[]b a[b a b :L:Ma[^]b} != "a[b" 115 1.7 rillig . error 116 1.7 rillig .endif 117 1.7 rillig 118 1.7 rillig # [Nn0] matches exactly 1 character from the set 'N', 'n', '0' 119 1.7 rillig .if ${ a b N n 0 Nn0 [ ] :L:M[Nn0]} != "N n 0" 120 1.7 rillig . error 121 1.7 rillig .endif 122 1.7 rillig 123 1.7 rillig # [a-c] matches exactly 1 character from the range 'a' to 'c' 124 1.7 rillig .if ${ A B C a b c d [a-c] [a] :L:M[a-c]} != "a b c" 125 1.7 rillig . error 126 1.7 rillig .endif 127 1.7 rillig 128 1.7 rillig # [c-a] matches the same as [a-c] 129 1.7 rillig .if ${ A B C a b c d [a-c] [a] :L:M[c-a]} != "a b c" 130 1.7 rillig . error 131 1.7 rillig .endif 132 1.7 rillig 133 1.7 rillig # [^a-c67] 134 1.8 rillig # matches a single character, except for 'a', 'b', 'c', '6' or 135 1.8 rillig # '7' 136 1.7 rillig .if ${ A B C a b c d 5 6 7 8 [a-c] [a] :L:M[^a-c67]} != "A B C d 5 8" 137 1.7 rillig . error 138 1.7 rillig .endif 139 1.7 rillig 140 1.18 rillig # [\] matches a single backslash; no escaping takes place in 141 1.18 rillig # character ranges 142 1.18 rillig # Without the 'b' in the below words, the backslash would end a word and thus 143 1.18 rillig # influence how the string is split into words. 144 1.18 rillig WORDS= a\b a[\]b ab a\\b 145 1.11 rillig .if ${WORDS:Ma[\]b} != "a\\b" 146 1.11 rillig . error 147 1.11 rillig .endif 148 1.11 rillig 149 1.18 rillig # [[-]] May look like it would match a single '[', '\' or ']', but 150 1.18 rillig # the inner ']' has two roles: it is the upper bound of the 151 1.18 rillig # character range as well as the closing character of the 152 1.18 rillig # character list. The outer ']' is just a regular character. 153 1.18 rillig WORDS= [ ] [] \] ]] 154 1.18 rillig .if ${WORDS:M[[-]]} != "[] \\] ]]" 155 1.18 rillig . error 156 1.18 rillig .endif 157 1.18 rillig 158 1.18 rillig # [b[-]a] 159 1.18 rillig # Same as for '[[-]]': the character list stops at the first 160 1.18 rillig # ']', and the 'a]' is treated as a literal string. 161 1.18 rillig WORDS= [a \a ]a []a \]a ]]a [a] \a] ]a] ba] 162 1.18 rillig .if ${WORDS:M[b[-]a]} != "[a] \\a] ]a] ba]" 163 1.18 rillig . error 164 1.18 rillig .endif 165 1.18 rillig 166 1.18 rillig # [-] Matches a single '-' since the '-' only becomes part of a 167 1.18 rillig # character range if it is preceded and followed by another 168 1.18 rillig # character. 169 1.18 rillig WORDS= - -] 170 1.18 rillig .if ${WORDS:M[-]} != "-" 171 1.18 rillig . error 172 1.18 rillig .endif 173 1.18 rillig 174 1.18 rillig # Only keep words that don't start with s and at the same time end with 175 1.18 rillig # either of [ex]. 176 1.18 rillig # 177 1.19 rillig # This test case ensures that the negation from the first character list 178 1.19 rillig # '[^s]' does not propagate to the second character list '[ex]'. 179 1.19 rillig .if ${One Two Three Four five six seven:L:M[^s]*[ex]} != "One Three five" 180 1.18 rillig . error 181 1.18 rillig .endif 182 1.18 rillig 183 1.18 rillig 184 1.18 rillig # 3. Parsing and escaping 185 1.18 rillig # 186 1.18 rillig # * matches 0 or more characters 187 1.18 rillig # ? matches 1 character 188 1.18 rillig # \ outside a character list, escapes the following character 189 1.18 rillig # [ starts a character list for matching 1 character 190 1.18 rillig # ] ends a character list for matching 1 character 191 1.18 rillig # - in a character list, forms a character range 192 1.18 rillig # ^ at the beginning of a character list, negates the list 193 1.18 rillig # ( while parsing the pattern, starts a nesting level 194 1.18 rillig # ) while parsing the pattern, ends a nesting level 195 1.18 rillig # { while parsing the pattern, starts a nesting level 196 1.18 rillig # } while parsing the pattern, ends a nesting level 197 1.18 rillig # : while parsing the pattern, terminates the pattern 198 1.18 rillig # $ while parsing the pattern, starts a nested expression 199 1.18 rillig # # in a line except a shell command, starts a comment 200 1.18 rillig 201 1.18 rillig # The pattern can come from an expression. For single-letter 202 1.18 rillig # variables, either the short form or the long form can be used, just as 203 1.18 rillig # everywhere else. 204 1.18 rillig PRIMES= 2 3 5 7 11 205 1.18 rillig n= 2 206 1.18 rillig .if ${PRIMES:M$n} != "2" 207 1.18 rillig . error 208 1.18 rillig .endif 209 1.18 rillig .if ${PRIMES:M${n}} != "2" 210 1.18 rillig . error 211 1.18 rillig .endif 212 1.18 rillig .if ${PRIMES:M${:U2}} != "2" 213 1.18 rillig . error 214 1.18 rillig .endif 215 1.18 rillig 216 1.7 rillig # : terminates the pattern 217 1.7 rillig .if ${ A * :L:M:} != "" 218 1.7 rillig . error 219 1.7 rillig .endif 220 1.7 rillig 221 1.7 rillig # \: matches a colon 222 1.7 rillig .if ${ ${:U\: \:\:} :L:M\:} != ":" 223 1.7 rillig . error 224 1.7 rillig .endif 225 1.7 rillig 226 1.7 rillig # ${:U\:} matches a colon 227 1.7 rillig .if ${ ${:U\:} ${:U\:\:} :L:M${:U\:}} != ":" 228 1.7 rillig . error 229 1.7 rillig .endif 230 1.7 rillig 231 1.18 rillig # To match a dollar sign in a word, double it. 232 1.18 rillig # 233 1.19 rillig # This is different from the :S and :C modifiers, where a '$' has to be 234 1.19 rillig # escaped as '\$'. 235 1.18 rillig .if ${:Ua \$ sign:M*$$*} != "\$" 236 1.7 rillig . error 237 1.18 rillig .endif 238 1.18 rillig 239 1.18 rillig # In the :M modifier, '\$' does not escape a dollar. Instead it is 240 1.18 rillig # interpreted as a backslash followed by whatever expression the 241 1.18 rillig # '$' starts. 242 1.18 rillig # 243 1.19 rillig # This differs from the :S, :C and several other modifiers. 244 1.18 rillig ${:U*}= asterisk 245 1.18 rillig .if ${:Ua \$ sign any-asterisk:M*\$*} != "any-asterisk" 246 1.7 rillig . error 247 1.7 rillig .endif 248 1.7 rillig 249 1.18 rillig # TODO: ${VAR:M(((}}}} 250 1.18 rillig # TODO: ${VAR:M{{{)))} 251 1.18 rillig # TODO: ${VAR:M${UNBALANCED}} 252 1.18 rillig # TODO: ${VAR:M${:U(((\}\}\}}} 253 1.18 rillig 254 1.18 rillig 255 1.18 rillig # 4. Interaction with other modifiers 256 1.18 rillig 257 1.18 rillig # The modifier ':tW' prevents splitting at whitespace. Even leading and 258 1.18 rillig # trailing whitespace is preserved. 259 1.18 rillig .if ${ plain string :L:tW:M*} != " plain string " 260 1.11 rillig . error 261 1.11 rillig .endif 262 1.11 rillig 263 1.22 rillig # Without the modifier ':tW', the string is split into words. Whitespace 264 1.22 rillig # around the words is discarded, and whitespace between the words is 265 1.22 rillig # normalized to a single space. 266 1.18 rillig .if ${ plain string :L:M*} != "plain string" 267 1.11 rillig . error 268 1.11 rillig .endif 269 1.11 rillig 270 1.18 rillig 271 1.18 rillig # 5. Performance 272 1.18 rillig 273 1.18 rillig # Before 2020-06-13, this expression called Str_Match 601,080,390 times. 274 1.18 rillig # Since 2020-06-13, this expression calls Str_Match 1 time. 275 1.18 rillig .if ${:U****************:M****************b} 276 1.11 rillig .endif 277 1.11 rillig 278 1.18 rillig # Before 2023-06-22, this expression called Str_Match 2,621,112 times. 279 1.18 rillig # Adding another '*?' to the pattern called Str_Match 20,630,572 times. 280 1.18 rillig # Adding another '*?' to the pattern called Str_Match 136,405,672 times. 281 1.18 rillig # Adding another '*?' to the pattern called Str_Match 773,168,722 times. 282 1.18 rillig # Adding another '*?' to the pattern called Str_Match 3,815,481,072 times. 283 1.18 rillig # Since 2023-06-22, Str_Match no longer backtracks. 284 1.18 rillig .if ${:U..................................................b:M*?*?*?*?*?a} 285 1.11 rillig .endif 286 1.11 rillig 287 1.18 rillig 288 1.18 rillig # 6. Error handling 289 1.18 rillig 290 1.11 rillig # [ Incomplete empty character list, never matches. 291 1.11 rillig WORDS= a a[ 292 1.31 rillig # expect+1: Unfinished character list in pattern "a[" of modifier ":M" 293 1.11 rillig .if ${WORDS:Ma[} != "" 294 1.11 rillig . error 295 1.11 rillig .endif 296 1.11 rillig 297 1.11 rillig # [^ Incomplete negated empty character list, matches any single 298 1.11 rillig # character. 299 1.11 rillig WORDS= a a[ aX 300 1.31 rillig # expect+1: Unfinished character list in pattern "a[^" of modifier ":M" 301 1.11 rillig .if ${WORDS:Ma[^} != "a[ aX" 302 1.7 rillig . error 303 1.7 rillig .endif 304 1.7 rillig 305 1.11 rillig # [-x1-3 Incomplete character list, matches those elements that can be 306 1.11 rillig # parsed without lookahead. 307 1.11 rillig WORDS= - + x xx 0 1 2 3 4 [x1-3 308 1.31 rillig # expect+1: Unfinished character list in pattern "[-x1-3" of modifier ":M" 309 1.11 rillig .if ${WORDS:M[-x1-3} != "- x 1 2 3" 310 1.11 rillig . error 311 1.11 rillig .endif 312 1.11 rillig 313 1.14 rillig # *[-x1-3 Incomplete character list after a wildcard, matches those 314 1.14 rillig # words that end with one of the characters from the list. 315 1.14 rillig WORDS= - + x xx 0 1 2 3 4 00 01 10 11 000 001 010 011 100 101 110 111 [x1-3 316 1.31 rillig # expect+1: Unfinished character list in pattern "*[-x1-3" of modifier ":M" 317 1.14 rillig .if ${WORDS:M*[-x1-3} != "- x xx 1 2 3 01 11 001 011 101 111 [x1-3" 318 1.14 rillig . warning ${WORDS:M*[-x1-3} 319 1.14 rillig .endif 320 1.14 rillig 321 1.11 rillig # [^-x1-3 322 1.11 rillig # Incomplete negated character list, matches any character 323 1.11 rillig # except those elements that can be parsed without lookahead. 324 1.11 rillig WORDS= - + x xx 0 1 2 3 4 [x1-3 325 1.31 rillig # expect+1: Unfinished character list in pattern "[^-x1-3" of modifier ":M" 326 1.11 rillig .if ${WORDS:M[^-x1-3} != "+ 0 4" 327 1.11 rillig . error 328 1.11 rillig .endif 329 1.11 rillig 330 1.11 rillig # [\ Incomplete character list containing a single '\'. 331 1.11 rillig # 332 1.11 rillig # A word can only end with a backslash if the preceding 333 1.11 rillig # character is a backslash as well; in all other cases the final 334 1.11 rillig # backslash would escape the following space, making the space 335 1.11 rillig # part of the word. Only the very last word of a string can be 336 1.11 rillig # '\', as there is no following space that could be escaped. 337 1.11 rillig WORDS= \\ \a ${:Ux\\} 338 1.18 rillig PATTERN= ${:U?[\\} 339 1.31 rillig # expect+1: Unfinished character list in pattern "?[\" of modifier ":M" 340 1.18 rillig .if ${WORDS:M${PATTERN}} != "\\\\ x\\" 341 1.11 rillig . error 342 1.11 rillig .endif 343 1.11 rillig 344 1.11 rillig # [x- Incomplete character list containing an incomplete character 345 1.11 rillig # range, matches only the 'x'. 346 1.11 rillig WORDS= [x- x x- y 347 1.31 rillig # expect+1: Unfinished character range in pattern "[x-" of modifier ":M" 348 1.11 rillig .if ${WORDS:M[x-} != "x" 349 1.11 rillig . error 350 1.11 rillig .endif 351 1.11 rillig 352 1.11 rillig # [^x- Incomplete negated character list containing an incomplete 353 1.11 rillig # character range; matches each word that does not have an 'x' 354 1.11 rillig # at the position of the character list. 355 1.11 rillig # 356 1.11 rillig # XXX: Even matches strings that are longer than a single 357 1.11 rillig # character. 358 1.11 rillig WORDS= [x- x x- y yyyyy 359 1.31 rillig # expect+1: Unfinished character range in pattern "[^x-" of modifier ":M" 360 1.11 rillig .if ${WORDS:M[^x-} != "[x- y yyyyy" 361 1.11 rillig . error 362 1.11 rillig .endif 363 1.7 rillig 364 1.18 rillig # [:] matches never since the ':' starts the next modifier 365 1.31 rillig # expect+2: Unfinished character list in pattern "[" of modifier ":M" 366 1.30 rillig # expect+1: Unknown modifier ":]" 367 1.18 rillig .if ${ ${:U\:} ${:U\:\:} :L:M[:]} != ":" 368 1.7 rillig . error 369 1.18 rillig .else 370 1.7 rillig . error 371 1.7 rillig .endif 372 1.9 rillig 373 1.9 rillig 374 1.18 rillig # 7. Historical bugs 375 1.12 rillig 376 1.12 rillig # Before var.c 1.1031 from 2022-08-24, the following expressions caused an 377 1.12 rillig # out-of-bounds read beyond the indirect ':M' modifiers. 378 1.22 rillig # 379 1.22 rillig # The argument to the inner ':U' is unescaped to 'M\'. 380 1.23 rillig # This 'M\' becomes an indirect modifier ':M' with the pattern '\'. 381 1.22 rillig # The pattern '\' never matches. 382 1.22 rillig .if ${:U:${:UM\\}} 383 1.22 rillig . error 384 1.22 rillig .endif 385 1.22 rillig # The argument to the inner ':U' is unescaped to 'M\:\'. 386 1.22 rillig # This 'M\:\' becomes an indirect modifier ':M' with the pattern ':\'. 387 1.22 rillig # The pattern ':\' never matches. 388 1.22 rillig .if ${:U:${:UM\\\:\\}} 389 1.22 rillig . error 390 1.22 rillig .endif 391