xref: /freebsd/contrib/bmake/unit-tests/varmod-subst-regex.mk (revision 06b9b3e0ad0dc3f0166b3e8f26ced68c271cf527)
1# $NetBSD: varmod-subst-regex.mk,v 1.6 2020/12/05 18:13:44 rillig Exp $
2#
3# Tests for the :C,from,to, variable modifier.
4
5all: mod-regex-compile-error
6all: mod-regex-limits
7all: mod-regex-errors
8
9# The variable expression expands to 4 words.  Of these words, none matches
10# the regular expression "a b" since these words don't contain any
11# whitespace.
12.if ${:Ua b b c:C,a b,,} != "a b b c"
13.  error
14.endif
15
16# Using the '1' modifier does not change anything.  The '1' modifier just
17# means to apply at most 1 replacement in the whole variable expression.
18.if ${:Ua b b c:C,a b,,1} != "a b b c"
19.  error
20.endif
21
22# The 'W' modifier treats the whole variable value as a single big word,
23# containing whitespace.  This big word matches the regular expression,
24# therefore it gets replaced.  Whitespace is preserved after replacing.
25.if ${:Ua b b c:C,a b,,W} != " b c"
26.  error
27.endif
28
29# The 'g' modifier does not have any effect here since each of the words
30# contains the character 'b' a single time.
31.if ${:Ua b b c:C,b,,g} != "a c"
32.  error
33.endif
34
35# The first :C modifier has the 'W' modifier, which makes the whole
36# expression a single word.  The 'g' modifier then replaces all occurrences
37# of "1 2" with "___".  The 'W' modifier only applies to this single :C
38# modifier.  This is demonstrated by the :C modifier that follows.  If the
39# 'W' modifier would be preserved, only a single underscore would have been
40# replaced with an 'x'.
41.if ${:U1 2 3 1 2 3:C,1 2,___,Wg:C,_,x,} != "x__ 3 x__ 3"
42.  error
43.endif
44
45# The regular expression does not match in the first word.
46# It matches once in the second word, and the \0\0 doubles that word.
47# In the third word, the regular expression matches as early as possible,
48# and since the matches must not overlap, the next possible match would
49# start at the 6, but at that point, there is only one character left,
50# and that cannot match the regular expression "..".  Therefore only the
51# "45" is doubled in the third word.
52.if ${:U1 23 456:C,..,\0\0,} != "1 2323 45456"
53.  error
54.endif
55
56# The modifier '1' applies the replacement at most once, across the whole
57# expression value, no matter whether it is a single big word or many small
58# words.
59#
60# Up to 2020-08-28, the manual page said that the modifiers '1' and 'g'
61# were orthogonal, which was wrong.  It doesn't make sense to specify both
62# 'g' and '1' at the same time.
63.if ${:U12345 12345:C,.,\0\0,1} != "112345 12345"
64.  error
65.endif
66
67# A regular expression that matches the empty string applies before every
68# single character of the word.
69# XXX: Most other places where regular expression are used match at the end
70# of the string as well.
71.if ${:U1a2b3c:C,a*,*,g} != "*1**2*b*3*c"
72.  error
73.endif
74
75# A dot in the regular expression matches any character, even a newline.
76# In most other contexts where regular expressions are used, a dot matches
77# any character except newline.  In make, regcomp is called without
78# REG_NEWLINE, thus newline is an ordinary character.
79.if ${:U"${.newline}":C,.,.,g} != "..."
80.  error
81.endif
82
83# Multiple asterisks form an invalid regular expression.  This produces an
84# error message and (as of 2020-08-28) stops parsing in the middle of the
85# variable expression.  The unparsed part of the expression is then copied
86# verbatim to the output, which is unexpected and can lead to strange shell
87# commands being run.
88mod-regex-compile-error:
89	@echo $@: ${:Uword1 word2:C,****,____,g:C,word,____,:Q}.
90
91# These tests generate error messages but as of 2020-08-28 just continue
92# parsing and execution as if nothing bad had happened.
93mod-regex-limits:
94	@echo $@:11-missing:${:U1 23 456:C,..,\1\1,:Q}
95	@echo $@:11-ok:${:U1 23 456:C,(.).,\1\1,:Q}
96	@echo $@:22-missing:${:U1 23 456:C,..,\2\2,:Q}
97	@echo $@:22-missing:${:U1 23 456:C,(.).,\2\2,:Q}
98	@echo $@:22-ok:${:U1 23 456:C,(.)(.),\2\2,:Q}
99	# The :C modifier only handles single-digit capturing groups,
100	# which is more than enough for daily use.
101	@echo $@:capture:${:UabcdefghijABCDEFGHIJrest:C,(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.),\9\8\7\6\5\4\3\2\1\0\10\11\12,}
102
103mod-regex-errors:
104	@echo $@: ${UNDEF:Uvalue:C,[,,}
105
106	# If the replacement pattern produces a parse error because of an
107	# unknown modifier, the parse error is ignored in ParseModifierPart
108	# and the faulty variable expression expands to "".
109	@echo $@: ${word:L:C,.*,x${:U:Z}y,W}
110