1# $NetBSD: varmod-subst-regex.mk,v 1.7 2021/06/21 08:17:39 rillig Exp $ 2# 3# Tests for the :C,from,to, variable modifier. 4 5# report unmatched subexpressions 6.MAKEFLAGS: -dL 7 8all: mod-regex-compile-error 9all: mod-regex-limits 10all: mod-regex-errors 11all: unmatched-subexpression 12 13# The variable expression expands to 4 words. Of these words, none matches 14# the regular expression "a b" since these words don't contain any 15# whitespace. 16.if ${:Ua b b c:C,a b,,} != "a b b c" 17. error 18.endif 19 20# Using the '1' modifier does not change anything. The '1' modifier just 21# means to apply at most 1 replacement in the whole variable expression. 22.if ${:Ua b b c:C,a b,,1} != "a b b c" 23. error 24.endif 25 26# The 'W' modifier treats the whole variable value as a single big word, 27# containing whitespace. This big word matches the regular expression, 28# therefore it gets replaced. Whitespace is preserved after replacing. 29.if ${:Ua b b c:C,a b,,W} != " b c" 30. error 31.endif 32 33# The 'g' modifier does not have any effect here since each of the words 34# contains the character 'b' a single time. 35.if ${:Ua b b c:C,b,,g} != "a c" 36. error 37.endif 38 39# The first :C modifier has the 'W' modifier, which makes the whole 40# expression a single word. The 'g' modifier then replaces all occurrences 41# of "1 2" with "___". The 'W' modifier only applies to this single :C 42# modifier. This is demonstrated by the :C modifier that follows. If the 43# 'W' modifier would be preserved, only a single underscore would have been 44# replaced with an 'x'. 45.if ${:U1 2 3 1 2 3:C,1 2,___,Wg:C,_,x,} != "x__ 3 x__ 3" 46. error 47.endif 48 49# The regular expression does not match in the first word. 50# It matches once in the second word, and the \0\0 doubles that word. 51# In the third word, the regular expression matches as early as possible, 52# and since the matches must not overlap, the next possible match would 53# start at the 6, but at that point, there is only one character left, 54# and that cannot match the regular expression "..". Therefore only the 55# "45" is doubled in the third word. 56.if ${:U1 23 456:C,..,\0\0,} != "1 2323 45456" 57. error 58.endif 59 60# The modifier '1' applies the replacement at most once, across the whole 61# expression value, no matter whether it is a single big word or many small 62# words. 63# 64# Up to 2020-08-28, the manual page said that the modifiers '1' and 'g' 65# were orthogonal, which was wrong. It doesn't make sense to specify both 66# 'g' and '1' at the same time. 67.if ${:U12345 12345:C,.,\0\0,1} != "112345 12345" 68. error 69.endif 70 71# A regular expression that matches the empty string applies before every 72# single character of the word. 73# XXX: Most other places where regular expression are used match at the end 74# of the string as well. 75.if ${:U1a2b3c:C,a*,*,g} != "*1**2*b*3*c" 76. error 77.endif 78 79# A dot in the regular expression matches any character, even a newline. 80# In most other contexts where regular expressions are used, a dot matches 81# any character except newline. In make, regcomp is called without 82# REG_NEWLINE, thus newline is an ordinary character. 83.if ${:U"${.newline}":C,.,.,g} != "..." 84. error 85.endif 86 87# Multiple asterisks form an invalid regular expression. This produces an 88# error message and (as of 2020-08-28) stops parsing in the middle of the 89# variable expression. The unparsed part of the expression is then copied 90# verbatim to the output, which is unexpected and can lead to strange shell 91# commands being run. 92mod-regex-compile-error: 93 @echo $@: ${:Uword1 word2:C,****,____,g:C,word,____,:Q}. 94 95# These tests generate error messages but as of 2020-08-28 just continue 96# parsing and execution as if nothing bad had happened. 97mod-regex-limits: 98 @echo $@:11-missing:${:U1 23 456:C,..,\1\1,:Q} 99 @echo $@:11-ok:${:U1 23 456:C,(.).,\1\1,:Q} 100 @echo $@:22-missing:${:U1 23 456:C,..,\2\2,:Q} 101 @echo $@:22-missing:${:U1 23 456:C,(.).,\2\2,:Q} 102 @echo $@:22-ok:${:U1 23 456:C,(.)(.),\2\2,:Q} 103 # The :C modifier only handles single-digit capturing groups, 104 # which is more than enough for daily use. 105 @echo $@:capture:${:UabcdefghijABCDEFGHIJrest:C,(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.),\9\8\7\6\5\4\3\2\1\0\10\11\12,} 106 107mod-regex-errors: 108 @echo $@: ${UNDEF:Uvalue:C,[,,} 109 110 # If the replacement pattern produces a parse error because of an 111 # unknown modifier, the parse error is ignored in ParseModifierPart 112 # and the faulty variable expression expands to "". 113 @echo $@: ${word:L:C,.*,x${:U:Z}y,W} 114 115# In regular expressions with alternatives, not all capturing groups are 116# always set; some may be missing. Make calls these "unmatched 117# subexpressions". 118# 119# Between var.c 1.16 from 1996-12-24 until before var.c 1.933 from 2021-06-21, 120# unmatched subexpressions produced an "error message" but did not have any 121# further effect since the "error handling" didn't influence the exit status. 122# 123# Before 2021-06-21 there was no way to turn off this warning, thus the 124# combination of alternative matches and capturing groups was seldom used, if 125# at all. 126# 127# Since var.c 1.933 from 2021-06-21, the error message is only printed in lint 128# mode (-dL), but not in default mode. 129# 130# As an alternative to the change from var.c 1.933 from 2021-06-21, a possible 131# mitigation would have been to add a new modifier 'U' to the already existing 132# '1Wg' modifiers of the ':C' modifier. That modifier could have been used in 133# the modifier ':C,(a.)|(b.),\1\2,U' to treat unmatched subexpressions as 134# empty. This approach would have created a syntactical ambiguity since the 135# modifiers ':S' and ':C' are open-ended (see mod-subst-chain), that is, they 136# do not need to be followed by a ':' to separate them from the next modifier. 137# Luckily the modifier :U does not make sense after :C, therefore this case 138# does not happen in practice. 139unmatched-subexpression: 140 # In each of the following cases, if the regular expression matches at 141 # all, the subexpression \1 matches as well. 142 @echo $@.ok: ${:U1 1 2 3 5 8 13 21 34:C,1(.*),one\1,} 143 144 # In the following cases: 145 # * The subexpression \1 is only defined for 1 and 13. 146 # * The subexpression \2 is only defined for 2 and 21. 147 # * If the regular expression does not match at all, the 148 # replacement string is not analyzed, thus no error messages. 149 # In total, there are 5 error messages about unmatched subexpressions. 150 @echo $@.1: ${:U 1:C,1(.*)|2(.*),(\1)(\2),:Q} # missing \2 151 @echo $@.1: ${:U 1:C,1(.*)|2(.*),(\1)(\2),:Q} # missing \2 152 @echo $@.2: ${:U 2:C,1(.*)|2(.*),(\1)(\2),:Q} # missing \1 153 @echo $@.3: ${:U 3:C,1(.*)|2(.*),(\1)(\2),:Q} 154 @echo $@.5: ${:U 5:C,1(.*)|2(.*),(\1)(\2),:Q} 155 @echo $@.8: ${:U 8:C,1(.*)|2(.*),(\1)(\2),:Q} 156 @echo $@.13: ${:U 13:C,1(.*)|2(.*),(\1)(\2),:Q} # missing \2 157 @echo $@.21: ${:U 21:C,1(.*)|2(.*),(\1)(\2),:Q} # missing \1 158 @echo $@.34: ${:U 34:C,1(.*)|2(.*),(\1)(\2),:Q} 159 160 # And now all together: 5 error messages for 1, 1, 2, 13, 21. 161 @echo $@.all: ${:U1 1 2 3 5 8 13 21 34:C,1(.*)|2(.*),(\1)(\2),:Q} 162