xref: /freebsd/contrib/bmake/unit-tests/directive-for-escape.mk (revision 18054d0220cfc8df9c9568c437bd6fbb59d53c3c)
1# $NetBSD: directive-for-escape.mk,v 1.15 2022/01/27 20:15:14 rillig Exp $
2#
3# Test escaping of special characters in the iteration values of a .for loop.
4# These values get expanded later using the :U variable modifier, and this
5# escaping and unescaping must pass all characters and strings effectively
6# unmodified.
7
8.MAKEFLAGS: -df
9
10# Even though the .for loops take quotes into account when splitting the
11# string into words, the quotes don't need to be balanced, as of 2020-12-31.
12# This could be considered a bug.
13ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
14
15# XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
16# the loop.  Not only would it need the escaping for the variable modifier
17# ':U' but also the escaping for the line-end comment.
18.for chars in ${ASCII}
19.  info ${chars}
20.endfor
21
22# As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
23# as comment character.  Using 3 backslashes doesn't help either since
24# then the situation is essentially the same as with 1 backslash.
25# This means that a '#' sign cannot be passed in the value of a .for loop
26# at all.
27ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
28.for chars in ${ASCII.2020-12-31}
29.  info ${chars}
30.endfor
31
32# Cover the code in ExprLen.
33#
34# XXX: It is unexpected that the variable V gets expanded in the loop body.
35# The double '$$' should intuitively prevent exactly this.  Probably nobody
36# was adventurous enough to use literal dollar signs in the values of a .for
37# loop, allowing this edge case to go unnoticed for years.
38#
39# See for.c, function ExprLen.
40V=		value
41VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
42.for i in ${VALUES}
43.  info $i
44.endfor
45
46# Try to cover the code for nested '{}' in ExprLen, without success.
47#
48# The value of the variable VALUES is not meant to be a variable expression.
49# Instead, it is meant to represent literal text, the only escaping mechanism
50# being that each '$' is written as '$$'.
51#
52# The .for loop splits ${VALUES} into 3 words, at the space characters, since
53# the '$$' is an ordinary character and the spaces are not escaped.
54#	Word 1 is '${UNDEF:U\$\$'
55#	Word 2 is '{{}}'
56#	Word 3 is 'end}'
57# The first iteration expands the body of the .for loop to:
58# expect: .  info ${:U\${UNDEF\:U\\$\\$}
59# The modifier ':U' unescapes the '\$' to a simple '$'.
60# The modifier ':U' unescapes the '\:' to a simple ':'.
61# The modifier ':U' unescapes the '\\' to a simple '\'.
62# The modifier ':U' resolves the expression '$\' to the word 'backslash', due
63# to the following variable definition.
64${:U\\}=	backslash
65# FIXME: There was no expression '$\' in the original text of the previous
66# line, that's a surprise in the parser.
67# The modifier ':U' unescapes the '\$' to a simple '$'.
68# expect+4: ${UNDEF:U\backslash$
69VALUES=		$${UNDEF:U\$$\$$ {{}} end}
70# XXX: Where in the code does the '\$\$' get converted into a single '\$'?
71.for i in ${VALUES}
72.  info $i
73.endfor
74
75# Second try to cover the code for nested '{}' in ExprLen.
76#
77# XXX: It is wrong that ExprLen requires the braces to be balanced.
78# Each variable modifier has its own inconsistent way of parsing nested
79# variable expressions, braces and parentheses.  (Compare ':M', ':S', and
80# ':D' for details.)  The only sensible thing to do is therefore to let
81# Var_Parse do all the parsing work.
82VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
83.for i in ${VALUES}
84.  info $i
85.endfor
86
87# A single trailing dollar doesn't happen in practice.
88# The dollar sign is correctly passed through to the body of the .for loop.
89# There, it is expanded by the .info directive, but even there a trailing
90# dollar sign is kept as-is.
91.for i in ${:U\$}
92.  info ${i}
93.endfor
94
95# As of 2020-12-31, the name of the iteration variable can even contain
96# colons, which then affects variable expressions having this exact modifier.
97# This is clearly an unintended side effect of the implementation.
98NUMBERS=	one two three
99.for NUMBERS:M*e in replaced
100.  info ${NUMBERS} ${NUMBERS:M*e}
101.endfor
102
103# As of 2020-12-31, the name of the iteration variable can contain braces,
104# which gets even more surprising than colons, since it allows to replace
105# sequences of variable expressions.  There is no practical use case for
106# this, though.
107BASENAME=	one
108EXT=		.c
109.for BASENAME}${EXT in replaced
110.  info ${BASENAME}${EXT}
111.endfor
112
113# Demonstrate the various ways to refer to the iteration variable.
114i=		outer
115i2=		two
116i,=		comma
117.for i in inner
118.  info .        $$i: $i
119.  info .      $${i}: ${i}
120.  info .   $${i:M*}: ${i:M*}
121.  info .      $$(i): $(i)
122.  info .   $$(i:M*): $(i:M*)
123.  info . $${i$${:U}}: ${i${:U}}
124.  info .    $${i\}}: ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
125.  info .     $${i2}: ${i2}
126.  info .     $${i,}: ${i,}
127.  info .  adjacent: $i${i}${i:M*}$i
128.endfor
129
130# The variable name can be a single '$' since there is no check on valid
131# variable names. ForLoop_SubstVarShort skips "stupid" variable names though,
132# but ForLoop_SubstVarLong naively parses the body of the loop, substituting
133# each '${$}' with an actual 'dollar'.
134.for $ in dollar
135.  info eight $$$$$$$$ and no cents.
136.  info eight ${$}${$}${$}${$} and no cents.
137.endfor
138# Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
139# a variable expression. The inner '$' is followed by a '}' and is thus a
140# silent syntax error, the '$' is skipped. The variable name is thus '', and
141# since since there is never a variable named '', the whole expression '${$}'
142# evaluates to an empty string.
143closing-brace=		}		# guard against an
144${closing-brace}=	<closing-brace>	# alternative interpretation
145.info eight ${$}${$}${$}${$} and no cents.
146
147# What happens if the values from the .for loop contain a literal newline?
148# Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
149# body of the .for loop, where it was then interpreted as a literal newline,
150# leading to syntax errors such as "Unclosed variable expression" in the upper
151# line and "Invalid line type" in the lower line.
152.for i in "${.newline}"
153.  info short: $i
154.  info long: ${i}
155.endfor
156
157# No error since the newline character is not actually used.
158.for i in "${.newline}"
159.endfor
160
161# Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
162# a newline character in a .for loop led to a crash since at the point where
163# the error message including the stack trace is printed, the body of the .for
164# loop is assembled, and at that point, ForLoop.nextItem had already been
165# advanced.
166.MAKEFLAGS: -dp
167.for i in "${.newline}"
168: $i
169.endfor
170.MAKEFLAGS: -d0
171
172.MAKEFLAGS: -df
173.for i in \# \\\#
174# $i
175.endfor
176
177.for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
178# $i
179.endfor
180
181# The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
182# target-local variables.  See for.c 1.45 from 2009-01-14.
183.for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
184# $i
185.endfor
186# expect: # ${:U${.TARGET}}
187# XXX: Why does '$' result in the same text as '$$'?
188# expect: # ${:U${.TARGET}}
189# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
190# expect: # ${:U$${.TARGET\}}
191# XXX: Why does '$' result in the same text as '$$'?
192# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
193# expect: # ${:U$${.TARGET\}}
194
195.for i in ((( {{{ ))) }}}
196# $i
197.endfor
198.MAKEFLAGS: -d0
199
200all:
201