xref: /freebsd/contrib/bmake/unit-tests/varmod-match.mk (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1# $NetBSD: varmod-match.mk,v 1.15 2023/06/23 04:56:54 rillig Exp $
2#
3# Tests for the :M variable modifier, which filters words that match the
4# given pattern.
5#
6# See ApplyModifier_Match and ModifyWord_Match for the implementation.
7
8.MAKEFLAGS: -dc
9
10NUMBERS=	One Two Three Four five six seven
11
12# Only keep words that start with an uppercase letter.
13.if ${NUMBERS:M[A-Z]*} != "One Two Three Four"
14.  error
15.endif
16
17# Only keep words that start with a character other than an uppercase letter.
18.if ${NUMBERS:M[^A-Z]*} != "five six seven"
19.  error
20.endif
21
22# Only keep words that don't start with s and at the same time end with
23# either of [ex].
24#
25# This test case ensures that the negation from the first character class
26# does not propagate to the second character class.
27.if ${NUMBERS:M[^s]*[ex]} != "One Three five"
28.  error
29.endif
30
31# Before 2020-06-13, this expression called Str_Match 601,080,390 times.
32# Since 2020-06-13, this expression calls Str_Match 1 time.
33.if ${:U****************:M****************b}
34.endif
35
36# Before 2023-06-22, this expression called Str_Match 2,621,112 times.
37# Adding another '*?' to the pattern called Str_Match 20,630,572 times.
38# Adding another '*?' to the pattern called Str_Match 136,405,672 times.
39# Adding another '*?' to the pattern called Str_Match 773,168,722 times.
40# Adding another '*?' to the pattern called Str_Match 3,815,481,072 times.
41# Since 2023-06-22, Str_Match no longer backtracks.
42.if ${:U..................................................b:M*?*?*?*?*?a}
43.endif
44
45# To match a dollar sign in a word, double it.
46#
47# This is different from the :S and :C variable modifiers, where a '$'
48# has to be escaped as '\$'.
49.if ${:Ua \$ sign:M*$$*} != "\$"
50.  error
51.endif
52
53# In the :M modifier, '\$' does not escape a dollar.  Instead it is
54# interpreted as a backslash followed by whatever expression the
55# '$' starts.
56#
57# This differs from the :S, :C and several other variable modifiers.
58${:U*}=		asterisk
59.if ${:Ua \$ sign any-asterisk:M*\$*} != "any-asterisk"
60.  error
61.endif
62
63# TODO: ${VAR:M(((}}}}
64# TODO: ${VAR:M{{{)))}
65# TODO: ${VAR:M${UNBALANCED}}
66# TODO: ${VAR:M${:U(((\}\}\}}}
67
68.MAKEFLAGS: -d0
69
70# Special characters:
71#	*	matches 0 or more arbitrary characters
72#	?	matches a single arbitrary character
73#	\	starts an escape sequence, only outside ranges
74#	[	starts a set for matching a single character
75#	]	ends a set for matching a single character
76#	-	in a set, forms a range of characters
77#	^	as the first character in a set, negates the set
78#	(	during parsing of the pattern, starts a nesting level
79#	)	during parsing of the pattern, ends a nesting level
80#	{	during parsing of the pattern, starts a nesting level
81#	}	during parsing of the pattern, ends a nesting level
82#	:	during parsing of the pattern, finishes the pattern
83#	$	during parsing of the pattern, starts a nested expression
84#	#	in a line except a shell command, starts a comment
85#
86# Pattern parts:
87#	*	matches 0 or more arbitrary characters
88#	?	matches exactly 1 arbitrary character
89#	\x	matches exactly the character 'x'
90#	[...]	matches exactly 1 character from the set
91#	[^...]	matches exactly 1 character outside the set
92#	[a-z]	matches exactly 1 character from the range 'a' to 'z'
93#
94
95#	[]	matches never
96.if ${ ab a[]b a[b a b :L:M[]} != ""
97.  error
98.endif
99
100#	a[]b	matches never
101.if ${ ab a[]b a[b a b [ ] :L:Ma[]b} != ""
102.  error
103.endif
104
105#	[^]	matches exactly 1 arbitrary character
106.if ${ ab a[]b a[b a b [ ] :L:M[^]} != "a b [ ]"
107.  error
108.endif
109
110#	a[^]b	matches 'a', then exactly 1 arbitrary character, then 'b'
111.if ${ ab a[]b a[b a b :L:Ma[^]b} != "a[b"
112.  error
113.endif
114
115#	[Nn0]	matches exactly 1 character from the set 'N', 'n', '0'
116.if ${ a b N n 0 Nn0 [ ] :L:M[Nn0]} != "N n 0"
117.  error
118.endif
119
120#	[a-c]	matches exactly 1 character from the range 'a' to 'c'
121.if ${ A B C a b c d [a-c] [a] :L:M[a-c]} != "a b c"
122.  error
123.endif
124
125#	[c-a]	matches the same as [a-c]
126.if ${ A B C a b c d [a-c] [a] :L:M[c-a]} != "a b c"
127.  error
128.endif
129
130#	[^a-c67]
131#		matches a single character, except for 'a', 'b', 'c', '6' or
132#		'7'
133.if ${ A B C a b c d 5 6 7 8 [a-c] [a] :L:M[^a-c67]} != "A B C d 5 8"
134.  error
135.endif
136
137#	[\]	matches a single backslash
138WORDS=		a\b a[\]b ab
139.if ${WORDS:Ma[\]b} != "a\\b"
140.  error
141.endif
142
143#	:	terminates the pattern
144.if ${ A * :L:M:} != ""
145.  error
146.endif
147
148#	\:	matches a colon
149.if ${ ${:U\: \:\:} :L:M\:} != ":"
150.  error
151.endif
152
153#	${:U\:}	matches a colon
154.if ${ ${:U\:} ${:U\:\:} :L:M${:U\:}} != ":"
155.  error
156.endif
157
158#	[:]	matches never since the ':' starts the next modifier
159# expect+3: warning: Unfinished character list in pattern '[' of modifier ':M'
160# expect+2: Unknown modifier "]"
161# expect+1: Malformed conditional (${ ${:U\:} ${:U\:\:} :L:M[:]} != ":")
162.if ${ ${:U\:} ${:U\:\:} :L:M[:]} != ":"
163.  error
164.else
165.  error
166.endif
167
168#	[\]	matches exactly a backslash; no escaping takes place in
169#		character ranges
170# Without the 'a' in the below words, the backslash would end a word and thus
171# influence how the string is split into words.
172WORDS=		1\a 2\\a
173.if ${WORDS:M?[\]a} != "1\\a"
174.  error
175.endif
176
177#	[[-]]	May look like it would match a single '[', '\' or ']', but
178#		the inner ']' has two roles: it is the upper bound of the
179#		character range as well as the closing character of the
180#		character list.  The outer ']' is just a regular character.
181WORDS=		[ ] [] \] ]]
182.if ${WORDS:M[[-]]} != "[] \\] ]]"
183.  error
184.endif
185
186#	[b[-]a]
187#		Same as for '[[-]]': the character list stops at the first
188#		']', and the 'a]' is treated as a literal string.
189WORDS=		[a \a ]a []a \]a ]]a [a] \a] ]a] ba]
190.if ${WORDS:M[b[-]a]} != "[a] \\a] ]a] ba]"
191.  error
192.endif
193
194#	[-]	Matches a single '-' since the '-' only becomes part of a
195#		character range if it is preceded and followed by another
196#		character.
197WORDS=		- -]
198.if ${WORDS:M[-]} != "-"
199.  error
200.endif
201
202#	[	Incomplete empty character list, never matches.
203WORDS=		a a[
204# expect+1: warning: Unfinished character list in pattern 'a[' of modifier ':M'
205.if ${WORDS:Ma[} != ""
206.  error
207.endif
208
209#	[^	Incomplete negated empty character list, matches any single
210#		character.
211WORDS=		a a[ aX
212# expect+1: warning: Unfinished character list in pattern 'a[^' of modifier ':M'
213.if ${WORDS:Ma[^} != "a[ aX"
214.  error
215.endif
216
217#	[-x1-3	Incomplete character list, matches those elements that can be
218#		parsed without lookahead.
219WORDS=		- + x xx 0 1 2 3 4 [x1-3
220# expect+1: warning: Unfinished character list in pattern '[-x1-3' of modifier ':M'
221.if ${WORDS:M[-x1-3} != "- x 1 2 3"
222.  error
223.endif
224
225#	*[-x1-3	Incomplete character list after a wildcard, matches those
226#		words that end with one of the characters from the list.
227WORDS=		- + x xx 0 1 2 3 4 00 01 10 11 000 001 010 011 100 101 110 111 [x1-3
228# expect+1: warning: Unfinished character list in pattern '*[-x1-3' of modifier ':M'
229.if ${WORDS:M*[-x1-3} != "- x xx 1 2 3 01 11 001 011 101 111 [x1-3"
230.  warning ${WORDS:M*[-x1-3}
231.endif
232
233#	[^-x1-3
234#		Incomplete negated character list, matches any character
235#		except those elements that can be parsed without lookahead.
236WORDS=		- + x xx 0 1 2 3 4 [x1-3
237# expect+1: warning: Unfinished character list in pattern '[^-x1-3' of modifier ':M'
238.if ${WORDS:M[^-x1-3} != "+ 0 4"
239.  error
240.endif
241
242#	[\	Incomplete character list containing a single '\'.
243#
244#		A word can only end with a backslash if the preceding
245#		character is a backslash as well; in all other cases the final
246#		backslash would escape the following space, making the space
247#		part of the word.  Only the very last word of a string can be
248#		'\', as there is no following space that could be escaped.
249WORDS=		\\ \a ${:Ux\\}
250.if ${WORDS:M?[\]} != "\\\\ x\\"
251.  error
252.endif
253
254#	[x-	Incomplete character list containing an incomplete character
255#		range, matches only the 'x'.
256WORDS=		[x- x x- y
257# expect+1: warning: Unfinished character range in pattern '[x-' of modifier ':M'
258.if ${WORDS:M[x-} != "x"
259.  error
260.endif
261
262#	[^x-	Incomplete negated character list containing an incomplete
263#		character range; matches each word that does not have an 'x'
264#		at the position of the character list.
265#
266#		XXX: Even matches strings that are longer than a single
267#		character.
268WORDS=		[x- x x- y yyyyy
269# expect+1: warning: Unfinished character range in pattern '[^x-' of modifier ':M'
270.if ${WORDS:M[^x-} != "[x- y yyyyy"
271.  error
272.endif
273
274
275# The modifier ':tW' prevents splitting at whitespace.  Even leading and
276# trailing whitespace is preserved.
277.if ${   plain   string   :L:tW:M*} != "   plain   string   "
278.  error
279.endif
280
281# Without the modifier ':tW', the string is split into words.  All whitespace
282# around and between the words is normalized to a single space.
283.if ${   plain    string   :L:M*} != "plain string"
284.  error
285.endif
286
287
288# The pattern can come from a variable expression.  For single-letter
289# variables, either the short form or the long form can be used, just as
290# everywhere else.
291PRIMES=	2 3 5 7 11
292n=	2
293.if ${PRIMES:M$n} != "2"
294.  error
295.endif
296.if ${PRIMES:M${n}} != "2"
297.  error
298.endif
299.if ${PRIMES:M${:U2}} != "2"
300.  error
301.endif
302
303
304# Before var.c 1.1031 from 2022-08-24, the following expressions caused an
305# out-of-bounds read beyond the indirect ':M' modifiers.
306.if ${:U:${:UM\\}}		# The ':M' pattern need not be unescaped, the
307.  error			# resulting pattern is '\', it never matches
308.endif				# anything.
309.if ${:U:${:UM\\\:\\}}		# The ':M' pattern must be unescaped, the
310.  error			# resulting pattern is ':\', it never matches
311.endif				# anything.
312