xref: /freebsd/contrib/bmake/unit-tests/directive-for-escape.mk (revision 5956d97f4b3204318ceb6aa9c77bd0bc6ea87a41)
1# $NetBSD: directive-for-escape.mk,v 1.16 2022/06/12 16:09:21 rillig Exp $
2#
3# Test escaping of special characters in the iteration values of a .for loop.
4# These values get expanded later using the :U variable modifier, and this
5# escaping and unescaping must pass all characters and strings effectively
6# unmodified.
7
8.MAKEFLAGS: -df
9
10# Even though the .for loops take quotes into account when splitting the
11# string into words, the quotes don't need to be balanced, as of 2020-12-31.
12# This could be considered a bug.
13ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
14
15# XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
16# the loop.  Not only would it need the escaping for the variable modifier
17# ':U' but also the escaping for the line-end comment.
18.for chars in ${ASCII}
19.  info ${chars}
20.endfor
21
22# As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
23# as comment character.  Using 3 backslashes doesn't help either since
24# then the situation is essentially the same as with 1 backslash.
25# This means that a '#' sign cannot be passed in the value of a .for loop
26# at all.
27ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
28.for chars in ${ASCII.2020-12-31}
29.  info ${chars}
30.endfor
31
32# Cover the code in ExprLen.
33#
34# XXX: It is unexpected that the variable V gets expanded in the loop body.
35# The double '$$' should intuitively prevent exactly this.  Probably nobody
36# was adventurous enough to use literal dollar signs in the values of a .for
37# loop, allowing this edge case to go unnoticed for years.
38#
39# See for.c, function ExprLen.
40V=		value
41VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
42.for i in ${VALUES}
43.  info $i
44.endfor
45
46
47# Try to cover the code for nested '{}' in ExprLen, without success.
48#
49# The value of the variable VALUES is not meant to be a variable expression.
50# Instead, it is meant to represent literal text, the only escaping mechanism
51# being that each '$' is written as '$$'.
52VALUES=		$${UNDEF:U\$$\$$ {{}} end}
53#
54# The .for loop splits ${VALUES} into 3 words, at the space characters, since
55# the '$$' is an ordinary character and the spaces are not escaped.
56#	Word 1 is '${UNDEF:U\$\$'
57#	Word 2 is '{{}}'
58#	Word 3 is 'end}'
59#
60# Each of these words is now inserted in the body of the .for loop.
61.for i in ${VALUES}
62# $i
63.endfor
64#
65# When these words are injected into the body of the .for loop, each inside a
66# '${:U...}' expression, the result is:
67#
68# expect: For: loop body:
69# expect: # ${:U\${UNDEF\:U\\$\\$}
70# expect: For: loop body:
71# expect: # ${:U{{\}\}}
72# expect: For: loop body:
73# expect: # ${:Uend\}}
74# expect: For: end for 1
75#
76# The first of these expressions is the most interesting one, due to its many
77# special characters.  This expression is properly balanced:
78#
79#	Text	Meaning		Explanation
80#	\$	$		escaped
81#	{	{		ordinary text
82#	UNDEF	UNDEF		ordinary text
83#	\:	:		escaped
84#	U	U		ordinary text
85#	\\	\		escaped
86#	$\	(expr)		an expression, the variable name is '\'
87#	\$	$		escaped
88#
89# To make the expression '$\' visible, define it to an actual word:
90${:U\\}=	backslash
91.for i in ${VALUES}
92.  info $i
93.endfor
94#
95# expect-3: ${UNDEF:U\backslash$
96# expect-4: {{}}
97# expect-5: end}
98#
99# FIXME: There was no expression '$\' in the original text of the variable
100# 'VALUES', that's a surprise in the parser.
101
102
103# Second try to cover the code for nested '{}' in ExprLen.
104#
105# XXX: It is not the job of ExprLen to parse an expression, it is naive to
106# expect ExprLen to get all the details right in just a few lines of code.
107# Each variable modifier has its own inconsistent way of parsing nested
108# variable expressions, braces and parentheses.  (Compare ':M', ':S', and
109# ':D' for details.)  The only sensible thing to do is therefore to let
110# Var_Parse do all the parsing work.
111VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
112.for i in ${VALUES}
113.  info $i
114.endfor
115
116# A single trailing dollar doesn't happen in practice.
117# The dollar sign is correctly passed through to the body of the .for loop.
118# There, it is expanded by the .info directive, but even there a trailing
119# dollar sign is kept as-is.
120.for i in ${:U\$}
121.  info ${i}
122.endfor
123
124# As of 2020-12-31, the name of the iteration variable can even contain
125# colons, which then affects variable expressions having this exact modifier.
126# This is clearly an unintended side effect of the implementation.
127NUMBERS=	one two three
128.for NUMBERS:M*e in replaced
129.  info ${NUMBERS} ${NUMBERS:M*e}
130.endfor
131
132# As of 2020-12-31, the name of the iteration variable can contain braces,
133# which gets even more surprising than colons, since it allows to replace
134# sequences of variable expressions.  There is no practical use case for
135# this, though.
136BASENAME=	one
137EXT=		.c
138.for BASENAME}${EXT in replaced
139.  info ${BASENAME}${EXT}
140.endfor
141
142# Demonstrate the various ways to refer to the iteration variable.
143i=		outer
144i2=		two
145i,=		comma
146.for i in inner
147.  info .        $$i: $i
148.  info .      $${i}: ${i}
149.  info .   $${i:M*}: ${i:M*}
150.  info .      $$(i): $(i)
151.  info .   $$(i:M*): $(i:M*)
152.  info . $${i$${:U}}: ${i${:U}}
153.  info .    $${i\}}: ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
154.  info .     $${i2}: ${i2}
155.  info .     $${i,}: ${i,}
156.  info .  adjacent: $i${i}${i:M*}$i
157.endfor
158
159# The variable name can be a single '$' since there is no check on valid
160# variable names. ForLoop_SubstVarShort skips "stupid" variable names though,
161# but ForLoop_SubstVarLong naively parses the body of the loop, substituting
162# each '${$}' with an actual 'dollar'.
163.for $ in dollar
164.  info eight $$$$$$$$ and no cents.
165.  info eight ${$}${$}${$}${$} and no cents.
166.endfor
167# Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
168# a variable expression. The inner '$' is followed by a '}' and is thus a
169# silent syntax error, the '$' is skipped. The variable name is thus '', and
170# since since there is never a variable named '', the whole expression '${$}'
171# evaluates to an empty string.
172closing-brace=		}		# guard against an
173${closing-brace}=	<closing-brace>	# alternative interpretation
174.info eight ${$}${$}${$}${$} and no cents.
175
176# What happens if the values from the .for loop contain a literal newline?
177# Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
178# body of the .for loop, where it was then interpreted as a literal newline,
179# leading to syntax errors such as "Unclosed variable expression" in the upper
180# line and "Invalid line type" in the lower line.
181.for i in "${.newline}"
182.  info short: $i
183.  info long: ${i}
184.endfor
185
186# No error since the newline character is not actually used.
187.for i in "${.newline}"
188.endfor
189
190# Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
191# a newline character in a .for loop led to a crash since at the point where
192# the error message including the stack trace is printed, the body of the .for
193# loop is assembled, and at that point, ForLoop.nextItem had already been
194# advanced.
195.MAKEFLAGS: -dp
196.for i in "${.newline}"
197: $i
198.endfor
199.MAKEFLAGS: -d0
200
201.MAKEFLAGS: -df
202.for i in \# \\\#
203# $i
204.endfor
205
206.for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
207# $i
208.endfor
209
210# The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
211# target-local variables.  See for.c 1.45 from 2009-01-14.
212.for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
213# $i
214.endfor
215# expect: # ${:U${.TARGET}}
216# XXX: Why does '$' result in the same text as '$$'?
217# expect: # ${:U${.TARGET}}
218# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
219# expect: # ${:U$${.TARGET\}}
220# XXX: Why does '$' result in the same text as '$$'?
221# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
222# expect: # ${:U$${.TARGET\}}
223
224.for i in ((( {{{ ))) }}}
225# $i
226.endfor
227.MAKEFLAGS: -d0
228
229all:
230