xref: /freebsd/contrib/bmake/unit-tests/varmod-subst-regex.mk (revision 6a7405f5a6b639682cacf01e35d561411ff556aa)
1*6a7405f5SSimon J. Gerraty# $NetBSD: varmod-subst-regex.mk,v 1.12 2024/07/20 11:05:12 rillig Exp $
22c3632d1SSimon J. Gerraty#
32c3632d1SSimon J. Gerraty# Tests for the :C,from,to, variable modifier.
42c3632d1SSimon J. Gerraty
5b0c40a00SSimon J. Gerraty# report unmatched subexpressions
6b0c40a00SSimon J. Gerraty.MAKEFLAGS: -dL
7b0c40a00SSimon J. Gerraty
82c3632d1SSimon J. Gerratyall: mod-regex-compile-error
9*6a7405f5SSimon J. Gerratyall: mod-regex-limits-{1,2,3,4,5,6}
10*6a7405f5SSimon J. Gerratyall: mod-regex-errors-{1,2}
11b0c40a00SSimon J. Gerratyall: unmatched-subexpression
122c3632d1SSimon J. Gerraty
13d5e0a182SSimon J. Gerraty# The expression expands to 4 words.  Of these words, none matches
142c3632d1SSimon J. Gerraty# the regular expression "a b" since these words don't contain any
152c3632d1SSimon J. Gerraty# whitespace.
162c3632d1SSimon J. Gerraty.if ${:Ua b b c:C,a b,,} != "a b b c"
172c3632d1SSimon J. Gerraty.  error
182c3632d1SSimon J. Gerraty.endif
192c3632d1SSimon J. Gerraty
202c3632d1SSimon J. Gerraty# Using the '1' modifier does not change anything.  The '1' modifier just
21d5e0a182SSimon J. Gerraty# means to apply at most 1 replacement in the whole expression.
222c3632d1SSimon J. Gerraty.if ${:Ua b b c:C,a b,,1} != "a b b c"
232c3632d1SSimon J. Gerraty.  error
242c3632d1SSimon J. Gerraty.endif
252c3632d1SSimon J. Gerraty
262c3632d1SSimon J. Gerraty# The 'W' modifier treats the whole variable value as a single big word,
272c3632d1SSimon J. Gerraty# containing whitespace.  This big word matches the regular expression,
282c3632d1SSimon J. Gerraty# therefore it gets replaced.  Whitespace is preserved after replacing.
292c3632d1SSimon J. Gerraty.if ${:Ua b b c:C,a b,,W} != " b c"
302c3632d1SSimon J. Gerraty.  error
312c3632d1SSimon J. Gerraty.endif
322c3632d1SSimon J. Gerraty
332c3632d1SSimon J. Gerraty# The 'g' modifier does not have any effect here since each of the words
342c3632d1SSimon J. Gerraty# contains the character 'b' a single time.
352c3632d1SSimon J. Gerraty.if ${:Ua b b c:C,b,,g} != "a c"
362c3632d1SSimon J. Gerraty.  error
372c3632d1SSimon J. Gerraty.endif
382c3632d1SSimon J. Gerraty
392c3632d1SSimon J. Gerraty# The first :C modifier has the 'W' modifier, which makes the whole
402c3632d1SSimon J. Gerraty# expression a single word.  The 'g' modifier then replaces all occurrences
412c3632d1SSimon J. Gerraty# of "1 2" with "___".  The 'W' modifier only applies to this single :C
422c3632d1SSimon J. Gerraty# modifier.  This is demonstrated by the :C modifier that follows.  If the
432c3632d1SSimon J. Gerraty# 'W' modifier would be preserved, only a single underscore would have been
442c3632d1SSimon J. Gerraty# replaced with an 'x'.
452c3632d1SSimon J. Gerraty.if ${:U1 2 3 1 2 3:C,1 2,___,Wg:C,_,x,} != "x__ 3 x__ 3"
462c3632d1SSimon J. Gerraty.  error
472c3632d1SSimon J. Gerraty.endif
482c3632d1SSimon J. Gerraty
492c3632d1SSimon J. Gerraty# The regular expression does not match in the first word.
502c3632d1SSimon J. Gerraty# It matches once in the second word, and the \0\0 doubles that word.
512c3632d1SSimon J. Gerraty# In the third word, the regular expression matches as early as possible,
522c3632d1SSimon J. Gerraty# and since the matches must not overlap, the next possible match would
532c3632d1SSimon J. Gerraty# start at the 6, but at that point, there is only one character left,
542c3632d1SSimon J. Gerraty# and that cannot match the regular expression "..".  Therefore only the
55956e45f6SSimon J. Gerraty# "45" is doubled in the third word.
562c3632d1SSimon J. Gerraty.if ${:U1 23 456:C,..,\0\0,} != "1 2323 45456"
572c3632d1SSimon J. Gerraty.  error
582c3632d1SSimon J. Gerraty.endif
592c3632d1SSimon J. Gerraty
602c3632d1SSimon J. Gerraty# The modifier '1' applies the replacement at most once, across the whole
61956e45f6SSimon J. Gerraty# expression value, no matter whether it is a single big word or many small
622c3632d1SSimon J. Gerraty# words.
632c3632d1SSimon J. Gerraty#
642c3632d1SSimon J. Gerraty# Up to 2020-08-28, the manual page said that the modifiers '1' and 'g'
65956e45f6SSimon J. Gerraty# were orthogonal, which was wrong.  It doesn't make sense to specify both
66956e45f6SSimon J. Gerraty# 'g' and '1' at the same time.
672c3632d1SSimon J. Gerraty.if ${:U12345 12345:C,.,\0\0,1} != "112345 12345"
682c3632d1SSimon J. Gerraty.  error
692c3632d1SSimon J. Gerraty.endif
702c3632d1SSimon J. Gerraty
71956e45f6SSimon J. Gerraty# A regular expression that matches the empty string applies before every
72956e45f6SSimon J. Gerraty# single character of the word.
73956e45f6SSimon J. Gerraty# XXX: Most other places where regular expression are used match at the end
74956e45f6SSimon J. Gerraty# of the string as well.
75956e45f6SSimon J. Gerraty.if ${:U1a2b3c:C,a*,*,g} != "*1**2*b*3*c"
76956e45f6SSimon J. Gerraty.  error
77956e45f6SSimon J. Gerraty.endif
78956e45f6SSimon J. Gerraty
79956e45f6SSimon J. Gerraty# A dot in the regular expression matches any character, even a newline.
80956e45f6SSimon J. Gerraty# In most other contexts where regular expressions are used, a dot matches
81956e45f6SSimon J. Gerraty# any character except newline.  In make, regcomp is called without
82956e45f6SSimon J. Gerraty# REG_NEWLINE, thus newline is an ordinary character.
83956e45f6SSimon J. Gerraty.if ${:U"${.newline}":C,.,.,g} != "..."
84956e45f6SSimon J. Gerraty.  error
85956e45f6SSimon J. Gerraty.endif
86956e45f6SSimon J. Gerraty
87d5e0a182SSimon J. Gerraty
88d5e0a182SSimon J. Gerraty# Like the ':S' modifier, the ':C' modifier matches on an expression
89d5e0a182SSimon J. Gerraty# that contains no words at all, but only if the regular expression matches an
90d5e0a182SSimon J. Gerraty# empty string, for example, when the regular expression is anchored at the
91d5e0a182SSimon J. Gerraty# beginning or the end of the word.  An unanchored regular expression that
92d5e0a182SSimon J. Gerraty# matches the empty string is uncommon in practice, as it would match before
93d5e0a182SSimon J. Gerraty# each character of the word.
94d5e0a182SSimon J. Gerraty.if "<${:U:S,,unanchored,}> <${:U:C,.?,unanchored,}>" != "<> <unanchored>"
95d5e0a182SSimon J. Gerraty.  error
96d5e0a182SSimon J. Gerraty.endif
97d5e0a182SSimon J. Gerraty.if "<${:U:S,^,prefix,}> <${:U:C,^,prefix,}>" != "<prefix> <prefix>"
98d5e0a182SSimon J. Gerraty.  error
99d5e0a182SSimon J. Gerraty.endif
100d5e0a182SSimon J. Gerraty.if "<${:U:S,$,suffix,}> <${:U:C,$,suffix,}>" != "<suffix> <suffix>"
101d5e0a182SSimon J. Gerraty.  error
102d5e0a182SSimon J. Gerraty.endif
103d5e0a182SSimon J. Gerraty.if "<${:U:S,^$,whole,}> <${:U:C,^$,whole,}>" != "<whole> <whole>"
104d5e0a182SSimon J. Gerraty.  error
105d5e0a182SSimon J. Gerraty.endif
106d5e0a182SSimon J. Gerraty.if "<${:U:S,,unanchored,g}> <${:U:C,.?,unanchored,g}>" != "<> <unanchored>"
107d5e0a182SSimon J. Gerraty.  error
108d5e0a182SSimon J. Gerraty.endif
109d5e0a182SSimon J. Gerraty.if "<${:U:S,^,prefix,g}> <${:U:C,^,prefix,g}>" != "<prefix> <prefix>"
110d5e0a182SSimon J. Gerraty.  error
111d5e0a182SSimon J. Gerraty.endif
112d5e0a182SSimon J. Gerraty.if "<${:U:S,$,suffix,g}> <${:U:C,$,suffix,g}>" != "<suffix> <suffix>"
113d5e0a182SSimon J. Gerraty.  error
114d5e0a182SSimon J. Gerraty.endif
115d5e0a182SSimon J. Gerraty.if "<${:U:S,^$,whole,g}> <${:U:C,^$,whole,g}>" != "<whole> <whole>"
116d5e0a182SSimon J. Gerraty.  error
117d5e0a182SSimon J. Gerraty.endif
118d5e0a182SSimon J. Gerraty.if "<${:U:S,,unanchored,W}> <${:U:C,.?,unanchored,W}>" != "<> <unanchored>"
119d5e0a182SSimon J. Gerraty.  error
120d5e0a182SSimon J. Gerraty.endif
121d5e0a182SSimon J. Gerraty.if "<${:U:S,^,prefix,W}> <${:U:C,^,prefix,W}>" != "<prefix> <prefix>"
122d5e0a182SSimon J. Gerraty.  error
123d5e0a182SSimon J. Gerraty.endif
124d5e0a182SSimon J. Gerraty.if "<${:U:S,$,suffix,W}> <${:U:C,$,suffix,W}>" != "<suffix> <suffix>"
125d5e0a182SSimon J. Gerraty.  error
126d5e0a182SSimon J. Gerraty.endif
127d5e0a182SSimon J. Gerraty.if "<${:U:S,^$,whole,W}> <${:U:C,^$,whole,W}>" != "<whole> <whole>"
128d5e0a182SSimon J. Gerraty.  error
129d5e0a182SSimon J. Gerraty.endif
130d5e0a182SSimon J. Gerraty
131d5e0a182SSimon J. Gerraty
1322c3632d1SSimon J. Gerraty# Multiple asterisks form an invalid regular expression.  This produces an
1332c3632d1SSimon J. Gerraty# error message and (as of 2020-08-28) stops parsing in the middle of the
134d5e0a182SSimon J. Gerraty# expression.  The unparsed part of the expression is then copied
1352c3632d1SSimon J. Gerraty# verbatim to the output, which is unexpected and can lead to strange shell
1362c3632d1SSimon J. Gerraty# commands being run.
1372c3632d1SSimon J. Gerratymod-regex-compile-error:
1382c3632d1SSimon J. Gerraty	@echo $@: ${:Uword1 word2:C,****,____,g:C,word,____,:Q}.
1392c3632d1SSimon J. Gerraty
1402c3632d1SSimon J. Gerraty# These tests generate error messages but as of 2020-08-28 just continue
1412c3632d1SSimon J. Gerraty# parsing and execution as if nothing bad had happened.
142*6a7405f5SSimon J. Gerratymod-regex-limits-1:
1432c3632d1SSimon J. Gerraty	@echo $@:11-missing:${:U1 23 456:C,..,\1\1,:Q}
144*6a7405f5SSimon J. Gerratymod-regex-limits-2:
1452c3632d1SSimon J. Gerraty	@echo $@:11-ok:${:U1 23 456:C,(.).,\1\1,:Q}
146*6a7405f5SSimon J. Gerratymod-regex-limits-3:
1472c3632d1SSimon J. Gerraty	@echo $@:22-missing:${:U1 23 456:C,..,\2\2,:Q}
148*6a7405f5SSimon J. Gerratymod-regex-limits-4:
1492c3632d1SSimon J. Gerraty	@echo $@:22-missing:${:U1 23 456:C,(.).,\2\2,:Q}
150*6a7405f5SSimon J. Gerratymod-regex-limits-5:
1512c3632d1SSimon J. Gerraty	@echo $@:22-ok:${:U1 23 456:C,(.)(.),\2\2,:Q}
152*6a7405f5SSimon J. Gerratymod-regex-limits-6:
1532c3632d1SSimon J. Gerraty	# The :C modifier only handles single-digit capturing groups,
154d5e0a182SSimon J. Gerraty	# which is enough for all practical use cases.
1552c3632d1SSimon J. Gerraty	@echo $@:capture:${:UabcdefghijABCDEFGHIJrest:C,(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.),\9\8\7\6\5\4\3\2\1\0\10\11\12,}
1562c3632d1SSimon J. Gerraty
157*6a7405f5SSimon J. Gerratymod-regex-errors-1:
1582c3632d1SSimon J. Gerraty	@echo $@: ${UNDEF:Uvalue:C,[,,}
15906b9b3e0SSimon J. Gerraty
160*6a7405f5SSimon J. Gerratymod-regex-errors-2:
16106b9b3e0SSimon J. Gerraty	# If the replacement pattern produces a parse error because of an
16206b9b3e0SSimon J. Gerraty	# unknown modifier, the parse error is ignored in ParseModifierPart
163d5e0a182SSimon J. Gerraty	# and the faulty expression expands to "".
16406b9b3e0SSimon J. Gerraty	@echo $@: ${word:L:C,.*,x${:U:Z}y,W}
165b0c40a00SSimon J. Gerraty
166b0c40a00SSimon J. Gerraty# In regular expressions with alternatives, not all capturing groups are
167b0c40a00SSimon J. Gerraty# always set; some may be missing.  Make calls these "unmatched
168b0c40a00SSimon J. Gerraty# subexpressions".
169b0c40a00SSimon J. Gerraty#
170b0c40a00SSimon J. Gerraty# Between var.c 1.16 from 1996-12-24 until before var.c 1.933 from 2021-06-21,
171b0c40a00SSimon J. Gerraty# unmatched subexpressions produced an "error message" but did not have any
172b0c40a00SSimon J. Gerraty# further effect since the "error handling" didn't influence the exit status.
173b0c40a00SSimon J. Gerraty#
174b0c40a00SSimon J. Gerraty# Before 2021-06-21 there was no way to turn off this warning, thus the
175b0c40a00SSimon J. Gerraty# combination of alternative matches and capturing groups was seldom used, if
176b0c40a00SSimon J. Gerraty# at all.
177b0c40a00SSimon J. Gerraty#
178b0c40a00SSimon J. Gerraty# Since var.c 1.933 from 2021-06-21, the error message is only printed in lint
179b0c40a00SSimon J. Gerraty# mode (-dL), but not in default mode.
180b0c40a00SSimon J. Gerraty#
181b0c40a00SSimon J. Gerraty# As an alternative to the change from var.c 1.933 from 2021-06-21, a possible
182b0c40a00SSimon J. Gerraty# mitigation would have been to add a new modifier 'U' to the already existing
183b0c40a00SSimon J. Gerraty# '1Wg' modifiers of the ':C' modifier.  That modifier could have been used in
184b0c40a00SSimon J. Gerraty# the modifier ':C,(a.)|(b.),\1\2,U' to treat unmatched subexpressions as
185b0c40a00SSimon J. Gerraty# empty.  This approach would have created a syntactical ambiguity since the
186b0c40a00SSimon J. Gerraty# modifiers ':S' and ':C' are open-ended (see mod-subst-chain), that is, they
187b0c40a00SSimon J. Gerraty# do not need to be followed by a ':' to separate them from the next modifier.
188b0c40a00SSimon J. Gerraty# Luckily the modifier :U does not make sense after :C, therefore this case
189b0c40a00SSimon J. Gerraty# does not happen in practice.
190b0c40a00SSimon J. Gerratyunmatched-subexpression:
191b0c40a00SSimon J. Gerraty	# In each of the following cases, if the regular expression matches at
192b0c40a00SSimon J. Gerraty	# all, the subexpression \1 matches as well.
193b0c40a00SSimon J. Gerraty	@echo $@.ok: ${:U1 1 2 3 5 8 13 21 34:C,1(.*),one\1,}
194b0c40a00SSimon J. Gerraty
195b0c40a00SSimon J. Gerraty	# In the following cases:
196b0c40a00SSimon J. Gerraty	#	* The subexpression \1 is only defined for 1 and 13.
197b0c40a00SSimon J. Gerraty	#	* The subexpression \2 is only defined for 2 and 21.
198b0c40a00SSimon J. Gerraty	#	* If the regular expression does not match at all, the
199b0c40a00SSimon J. Gerraty	#	  replacement string is not analyzed, thus no error messages.
200b0c40a00SSimon J. Gerraty	# In total, there are 5 error messages about unmatched subexpressions.
201b0c40a00SSimon J. Gerraty	@echo $@.1:  ${:U  1:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
202b0c40a00SSimon J. Gerraty	@echo $@.1:  ${:U  1:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
203b0c40a00SSimon J. Gerraty	@echo $@.2:  ${:U  2:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \1
204b0c40a00SSimon J. Gerraty	@echo $@.3:  ${:U  3:C,1(.*)|2(.*),(\1)(\2),:Q}
205b0c40a00SSimon J. Gerraty	@echo $@.5:  ${:U  5:C,1(.*)|2(.*),(\1)(\2),:Q}
206b0c40a00SSimon J. Gerraty	@echo $@.8:  ${:U  8:C,1(.*)|2(.*),(\1)(\2),:Q}
207b0c40a00SSimon J. Gerraty	@echo $@.13: ${:U 13:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \2
208b0c40a00SSimon J. Gerraty	@echo $@.21: ${:U 21:C,1(.*)|2(.*),(\1)(\2),:Q}		# missing \1
209b0c40a00SSimon J. Gerraty	@echo $@.34: ${:U 34:C,1(.*)|2(.*),(\1)(\2),:Q}
210b0c40a00SSimon J. Gerraty
211b0c40a00SSimon J. Gerraty	# And now all together: 5 error messages for 1, 1, 2, 13, 21.
212b0c40a00SSimon J. Gerraty	@echo $@.all: ${:U1 1 2 3 5 8 13 21 34:C,1(.*)|2(.*),(\1)(\2),:Q}
213