xref: /freebsd/contrib/bmake/unit-tests/directive-for-escape.mk (revision a91a246563dffa876a52f53a98de4af9fa364c52)
1# $NetBSD: directive-for-escape.mk,v 1.23 2023/11/19 22:32:44 rillig Exp $
2#
3# Test escaping of special characters in the iteration values of a .for loop.
4# These values get expanded later using the :U variable modifier, and this
5# escaping and unescaping must pass all characters and strings unmodified.
6
7.MAKEFLAGS: -df
8
9# Even though the .for loops take quotes into account when splitting the
10# string into words, the quotes don't need to be balanced, as of 2020-12-31.
11# This could be considered a bug.
12ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
13
14
15# XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
16# the loop.  Not only would it need the escaping for the variable modifier
17# ':U' but also the escaping for the line-end comment.
18.for chars in ${ASCII}
19.  info ${chars}
20.endfor
21# expect-2: !"
22
23# As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
24# as comment character.  Using 3 backslashes doesn't help either since
25# then the situation is essentially the same as with 1 backslash.
26# This means that a '#' sign cannot be passed in the value of a .for loop
27# at all.
28ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
29.for chars in ${ASCII.2020-12-31}
30.  info ${chars}
31.endfor
32# expect-2: !"\\
33
34# Cover the code in ExprLen.
35#
36# XXX: It is unexpected that the variable V gets expanded in the loop body.
37# The double '$$' should intuitively prevent exactly this.  Probably nobody
38# was adventurous enough to use literal dollar signs in the values of a .for
39# loop, allowing this edge case to go unnoticed for years.
40#
41# See for.c, function ExprLen.
42V=		value
43VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
44.for i in ${VALUES}
45.  info $i
46.endfor
47# expect: .  info ${:U\$}
48# expect-3: $
49# expect: .  info ${:U${V}}
50# expect-5: value
51# expect: .  info ${:U${V:=-with-modifier}}
52# expect-7: value-with-modifier
53# expect: .  info ${:U$(V)}
54# expect-9: value
55# expect: .  info ${:U$(V:=-with-modifier)}
56# expect-11: value-with-modifier
57#
58# Providing the loop items directly has the same effect.
59.for i in $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
60.  info $i
61.endfor
62# expect: .  info ${:U\$}
63# expect-3: $
64# expect: .  info ${:U${V}}
65# expect-5: value
66# expect-6: value-with-modifier
67# expect-7: value
68# expect-8: value-with-modifier
69
70# Try to cover the code for nested '{}' in ExprLen, without success.
71#
72# The value of the variable VALUES is not meant to be an expression.
73# Instead, it is meant to represent literal text, the only escaping mechanism
74# being that each '$' is written as '$$'.
75VALUES=		$${UNDEF:U\$$\$$ {{}} end}
76#
77# The .for loop splits ${VALUES} into 3 words, at the space characters, since
78# the '$$' is an ordinary character and the spaces are not escaped.
79#	Word 1 is '${UNDEF:U\$\$'
80#	Word 2 is '{{}}'
81#	Word 3 is 'end}'
82#
83# Each of these words is now inserted in the body of the .for loop.
84.for i in ${VALUES}
85# $i
86.endfor
87#
88# When these words are injected into the body of the .for loop, each inside a
89# '${:U...}' expression, the result is:
90#
91# expect: For: loop body with i = ${UNDEF:U\$\$:
92# expect: # ${:U\${UNDEF\:U\\$\\$}
93# expect: For: loop body with i = {{}}:
94# expect: # ${:U{{\}\}}
95# expect: For: loop body with i = end}:
96# expect: # ${:Uend\}}
97# expect: For: end for 1
98#
99# The first of these expressions is the most interesting one, due to its many
100# special characters.  This expression is properly balanced:
101#
102#	Text	Meaning		Explanation
103#	\$	$		escaped
104#	{	{		ordinary text
105#	UNDEF	UNDEF		ordinary text
106#	\:	:		escaped
107#	U	U		ordinary text
108#	\\	\		escaped
109#	$\	(expr)		an expression, the variable name is '\'
110#	\$	$		escaped
111#
112# To make the expression '$\' visible, define it to an actual word:
113${:U\\}=	backslash
114.for i in ${VALUES}
115.  info $i
116.endfor
117#
118# expect-3: ${UNDEF:U\backslash$
119# expect-4: {{}}
120# expect-5: end}
121#
122# FIXME: There was no expression '$\' in the original text of the variable
123# 'VALUES', that's a surprise in the parser.
124
125
126# Second try to cover the code for nested '{}' in ExprLen.
127#
128# XXX: It is not the job of ExprLen to parse an expression, it is naive to
129# expect ExprLen to get all the details right in just a few lines of code.
130# Each variable modifier has its own inconsistent way of parsing nested
131# expressions, braces and parentheses.  (Compare ':M', ':S', and
132# ':D' for details.)  The only sensible thing to do is therefore to let
133# Var_Parse do all the parsing work.
134VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
135.for i in ${VALUES}
136.  info $i
137.endfor
138# expect-2: begin<fallback>end
139
140# A single trailing dollar doesn't happen in practice.
141# The dollar sign is correctly passed through to the body of the .for loop.
142# There, it is expanded by the .info directive, but even there a trailing
143# dollar sign is kept as-is.
144.for i in ${:U\$}
145.  info ${i}
146.endfor
147# expect-2: $
148
149# Before for.c 1.173 from 2023-05-08, the name of the iteration variable
150# could contain colons, which affected expressions having this exact
151# modifier.  This possibility was neither intended nor documented.
152NUMBERS=	one two three
153# expect+1: invalid character ':' in .for loop variable name
154.for NUMBERS:M*e in replaced
155.  info ${NUMBERS} ${NUMBERS:M*e}
156.endfor
157
158# Before for.c 1.173 from 2023-05-08, the name of the iteration variable
159# could contain braces, which allowed to replace sequences of
160# expressions.  This possibility was neither intended nor documented.
161BASENAME=	one
162EXT=		.c
163# expect+1: invalid character '}' in .for loop variable name
164.for BASENAME}${EXT in replaced
165.  info ${BASENAME}${EXT}
166.endfor
167
168# Demonstrate the various ways to refer to the iteration variable.
169i=		outer
170i2=		two
171i,=		comma
172.for i in inner
173.  info .        $$i: $i
174.  info .      $${i}: ${i}
175.  info .   $${i:M*}: ${i:M*}
176.  info .      $$(i): $(i)
177.  info .   $$(i:M*): $(i:M*)
178.  info . $${i$${:U}}: ${i${:U}}
179.  info .    $${i\}}: ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
180.  info .     $${i2}: ${i2}
181.  info .     $${i,}: ${i,}
182.  info .  adjacent: $i${i}${i:M*}$i
183.endfor
184# expect-11: .        $i: inner
185# expect-11: .      ${i}: inner
186# expect-11: .   ${i:M*}: inner
187# expect-11: .      $(i): inner
188# expect-11: .   $(i:M*): inner
189# expect-11: . ${i${:U}}: outer
190# expect-11: .    ${i\}}: inner}
191# expect-11: .     ${i2}: two
192# expect-11: .     ${i,}: comma
193# expect-11: .  adjacent: innerinnerinnerinner
194
195# Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
196# since there was no check on valid variable names.  ForLoop_SubstVarShort
197# skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
198# parsed the body of the loop, substituting each '${$}' with an actual
199# '${:Udollar}'.
200# expect+1: invalid character '$' in .for loop variable name
201.for $ in dollar
202.  info eight $$$$$$$$ and no cents.
203.  info eight ${$}${$}${$}${$} and no cents.
204.endfor
205# Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
206# an expression. The inner '$' is followed by a '}' and is thus a
207# silent syntax error, the '$' is skipped. The variable name is thus '', and
208# since since there is never a variable named '', the whole expression '${$}'
209# evaluates to an empty string.
210closing-brace=		}		# guard against an
211${closing-brace}=	<closing-brace>	# alternative interpretation
212# expect+1: eight  and no cents.
213.info eight ${$}${$}${$}${$} and no cents.
214
215# What happens if the values from the .for loop contain a literal newline?
216# Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
217# body of the .for loop, where it was then interpreted as a literal newline,
218# leading to syntax errors such as "Unclosed variable expression" in the upper
219# line and "Invalid line type" in the lower line.
220#
221# The error message occurs in the line of the .for loop since that's the place
222# where the body of the .for loop is constructed, and at this point the
223# newline character gets replaced with a plain space.
224# expect+2: newline in .for value
225# expect+1: newline in .for value
226.for i in "${.newline}"
227.  info short: $i
228.  info long: ${i}
229.endfor
230# expect-3: short: " "
231# expect-3: long: " "
232
233# No error since the newline character is not actually used.
234.for i in "${.newline}"
235.endfor
236
237# Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
238# a newline character in a .for loop led to a crash since at the point where
239# the error message including the stack trace is printed, the body of the .for
240# loop is assembled, and at that point, ForLoop.nextItem had already been
241# advanced.
242.MAKEFLAGS: -dp
243# expect+1: newline in .for value
244.for i in "${.newline}"
245: $i
246.endfor
247.MAKEFLAGS: -d0
248
249.MAKEFLAGS: -df
250.for i in \# \\\#
251# $i
252.endfor
253
254.for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
255# $i
256.endfor
257
258# The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
259# target-local variables.  See for.c 1.45 from 2009-01-14.
260.for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
261# $i
262.endfor
263# expect: # ${:U${.TARGET}}
264# XXX: Why does '$' result in the same text as '$$'?
265# expect: # ${:U${.TARGET}}
266# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
267# expect: # ${:U$${.TARGET\}}
268# XXX: Why does '$' result in the same text as '$$'?
269# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
270# expect: # ${:U$${.TARGET\}}
271
272.for i in ((( {{{ ))) }}}
273# $i
274.endfor
275
276
277# When generating the body of a .for loop, recognizing the expressions is done
278# using simple heuristics.  These can go wrong in ambiguous cases like this.
279# The variable name ',' is unusual as it is not a pronounceable name, but the
280# same principle applies for other names as well.  In this case, the text '$,'
281# is replaced with the expression '${:U1}', even though the text does not
282# represent an expression.
283.for , in 1
284# $$i $i
285# VAR= $$i $i ${a:S,from$,to,}
286VAR= $$i $i ${a:S,from$,to,}
287.endfor
288# expect: # $$i $i
289# expect: # VAR= $$i $i ${a:S,from${:U1}to,}
290# expect: VAR= $$i $i ${a:S,from${:U1}to,}
291#
292# When the above variable is evaluated, make will complain about the
293# unfinished modifier ':S', as it is missing a comma.
294