xref: /freebsd/contrib/bmake/unit-tests/directive-for-escape.mk (revision d91f8db5f1822c43cd256f19aae1d059e4b25a26)
1# $NetBSD: directive-for-escape.mk,v 1.18 2023/05/09 19:43:12 rillig Exp $
2#
3# Test escaping of special characters in the iteration values of a .for loop.
4# These values get expanded later using the :U variable modifier, and this
5# escaping and unescaping must pass all characters and strings unmodified.
6
7# expect-all
8
9.MAKEFLAGS: -df
10
11# Even though the .for loops take quotes into account when splitting the
12# string into words, the quotes don't need to be balanced, as of 2020-12-31.
13# This could be considered a bug.
14ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
15
16
17# XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
18# the loop.  Not only would it need the escaping for the variable modifier
19# ':U' but also the escaping for the line-end comment.
20.for chars in ${ASCII}
21.  info ${chars}
22.endfor
23# expect-2: !"
24
25# As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
26# as comment character.  Using 3 backslashes doesn't help either since
27# then the situation is essentially the same as with 1 backslash.
28# This means that a '#' sign cannot be passed in the value of a .for loop
29# at all.
30ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
31.for chars in ${ASCII.2020-12-31}
32.  info ${chars}
33.endfor
34# expect-2: !"\\
35
36# Cover the code in ExprLen.
37#
38# XXX: It is unexpected that the variable V gets expanded in the loop body.
39# The double '$$' should intuitively prevent exactly this.  Probably nobody
40# was adventurous enough to use literal dollar signs in the values of a .for
41# loop, allowing this edge case to go unnoticed for years.
42#
43# See for.c, function ExprLen.
44V=		value
45VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
46.for i in ${VALUES}
47.  info $i
48.endfor
49# expect-2: $
50# expect-3: value
51# expect-4: value-with-modifier
52# expect-5: value
53# expect-6: value-with-modifier
54
55
56# Try to cover the code for nested '{}' in ExprLen, without success.
57#
58# The value of the variable VALUES is not meant to be a variable expression.
59# Instead, it is meant to represent literal text, the only escaping mechanism
60# being that each '$' is written as '$$'.
61VALUES=		$${UNDEF:U\$$\$$ {{}} end}
62#
63# The .for loop splits ${VALUES} into 3 words, at the space characters, since
64# the '$$' is an ordinary character and the spaces are not escaped.
65#	Word 1 is '${UNDEF:U\$\$'
66#	Word 2 is '{{}}'
67#	Word 3 is 'end}'
68#
69# Each of these words is now inserted in the body of the .for loop.
70.for i in ${VALUES}
71# $i
72.endfor
73#
74# When these words are injected into the body of the .for loop, each inside a
75# '${:U...}' expression, the result is:
76#
77# expect: For: loop body:
78# expect: # ${:U\${UNDEF\:U\\$\\$}
79# expect: For: loop body:
80# expect: # ${:U{{\}\}}
81# expect: For: loop body:
82# expect: # ${:Uend\}}
83# expect: For: end for 1
84#
85# The first of these expressions is the most interesting one, due to its many
86# special characters.  This expression is properly balanced:
87#
88#	Text	Meaning		Explanation
89#	\$	$		escaped
90#	{	{		ordinary text
91#	UNDEF	UNDEF		ordinary text
92#	\:	:		escaped
93#	U	U		ordinary text
94#	\\	\		escaped
95#	$\	(expr)		an expression, the variable name is '\'
96#	\$	$		escaped
97#
98# To make the expression '$\' visible, define it to an actual word:
99${:U\\}=	backslash
100.for i in ${VALUES}
101.  info $i
102.endfor
103#
104# expect-3: ${UNDEF:U\backslash$
105# expect-4: {{}}
106# expect-5: end}
107#
108# FIXME: There was no expression '$\' in the original text of the variable
109# 'VALUES', that's a surprise in the parser.
110
111
112# Second try to cover the code for nested '{}' in ExprLen.
113#
114# XXX: It is not the job of ExprLen to parse an expression, it is naive to
115# expect ExprLen to get all the details right in just a few lines of code.
116# Each variable modifier has its own inconsistent way of parsing nested
117# variable expressions, braces and parentheses.  (Compare ':M', ':S', and
118# ':D' for details.)  The only sensible thing to do is therefore to let
119# Var_Parse do all the parsing work.
120VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
121.for i in ${VALUES}
122.  info $i
123.endfor
124# expect-2: begin<fallback>end
125
126# A single trailing dollar doesn't happen in practice.
127# The dollar sign is correctly passed through to the body of the .for loop.
128# There, it is expanded by the .info directive, but even there a trailing
129# dollar sign is kept as-is.
130.for i in ${:U\$}
131.  info ${i}
132.endfor
133# expect-2: $
134
135# Before for.c 1.173 from 2023-05-08, the name of the iteration variable
136# could contain colons, which affected variable expressions having this exact
137# modifier.  This possibility was neither intended nor documented.
138NUMBERS=	one two three
139# expect+1: invalid character ':' in .for loop variable name
140.for NUMBERS:M*e in replaced
141.  info ${NUMBERS} ${NUMBERS:M*e}
142.endfor
143
144# Before for.c 1.173 from 2023-05-08, the name of the iteration variable
145# could contain braces, which allowed to replace sequences of variable
146# expressions.  This possibility was neither intended nor documented.
147BASENAME=	one
148EXT=		.c
149# expect+1: invalid character '}' in .for loop variable name
150.for BASENAME}${EXT in replaced
151.  info ${BASENAME}${EXT}
152.endfor
153
154# Demonstrate the various ways to refer to the iteration variable.
155i=		outer
156i2=		two
157i,=		comma
158.for i in inner
159.  info .        $$i: $i
160.  info .      $${i}: ${i}
161.  info .   $${i:M*}: ${i:M*}
162.  info .      $$(i): $(i)
163.  info .   $$(i:M*): $(i:M*)
164.  info . $${i$${:U}}: ${i${:U}}
165.  info .    $${i\}}: ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
166.  info .     $${i2}: ${i2}
167.  info .     $${i,}: ${i,}
168.  info .  adjacent: $i${i}${i:M*}$i
169.endfor
170# expect-11: .        $i: inner
171# expect-11: .      ${i}: inner
172# expect-11: .   ${i:M*}: inner
173# expect-11: .      $(i): inner
174# expect-11: .   $(i:M*): inner
175# expect-11: . ${i${:U}}: outer
176# expect-11: .    ${i\}}: inner}
177# expect-11: .     ${i2}: two
178# expect-11: .     ${i,}: comma
179# expect-11: .  adjacent: innerinnerinnerinner
180
181# Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
182# since there was no check on valid variable names.  ForLoop_SubstVarShort
183# skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
184# parsed the body of the loop, substituting each '${$}' with an actual
185# '${:Udollar}'.
186# expect+1: invalid character '$' in .for loop variable name
187.for $ in dollar
188.  info eight $$$$$$$$ and no cents.
189.  info eight ${$}${$}${$}${$} and no cents.
190.endfor
191# Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
192# a variable expression. The inner '$' is followed by a '}' and is thus a
193# silent syntax error, the '$' is skipped. The variable name is thus '', and
194# since since there is never a variable named '', the whole expression '${$}'
195# evaluates to an empty string.
196closing-brace=		}		# guard against an
197${closing-brace}=	<closing-brace>	# alternative interpretation
198# expect+1: eight  and no cents.
199.info eight ${$}${$}${$}${$} and no cents.
200
201# What happens if the values from the .for loop contain a literal newline?
202# Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
203# body of the .for loop, where it was then interpreted as a literal newline,
204# leading to syntax errors such as "Unclosed variable expression" in the upper
205# line and "Invalid line type" in the lower line.
206#
207# The error message occurs in the line of the .for loop since that's the place
208# where the body of the .for loop is constructed, and at this point the
209# newline character gets replaced with a plain space.
210# expect+2: newline in .for value
211# expect+1: newline in .for value
212.for i in "${.newline}"
213.  info short: $i
214.  info long: ${i}
215.endfor
216# expect-3: short: " "
217# expect-3: long: " "
218
219# No error since the newline character is not actually used.
220.for i in "${.newline}"
221.endfor
222
223# Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
224# a newline character in a .for loop led to a crash since at the point where
225# the error message including the stack trace is printed, the body of the .for
226# loop is assembled, and at that point, ForLoop.nextItem had already been
227# advanced.
228.MAKEFLAGS: -dp
229# expect+1: newline in .for value
230.for i in "${.newline}"
231: $i
232.endfor
233.MAKEFLAGS: -d0
234
235.MAKEFLAGS: -df
236.for i in \# \\\#
237# $i
238.endfor
239
240.for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
241# $i
242.endfor
243
244# The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
245# target-local variables.  See for.c 1.45 from 2009-01-14.
246.for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
247# $i
248.endfor
249# expect: # ${:U${.TARGET}}
250# XXX: Why does '$' result in the same text as '$$'?
251# expect: # ${:U${.TARGET}}
252# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
253# expect: # ${:U$${.TARGET\}}
254# XXX: Why does '$' result in the same text as '$$'?
255# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
256# expect: # ${:U$${.TARGET\}}
257
258.for i in ((( {{{ ))) }}}
259# $i
260.endfor
261.MAKEFLAGS: -d0
262
263all:
264