xref: /freebsd/contrib/bmake/unit-tests/directive-for-escape.mk (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1# $NetBSD: directive-for-escape.mk,v 1.28 2024/07/07 11:20:10 rillig Exp $
2#
3# Test escaping of special characters in the iteration values of a .for loop.
4# These values get expanded later using the :U variable modifier, and this
5# escaping and unescaping must pass all characters and strings unmodified.
6
7.MAKEFLAGS: -df
8
9# Even though the .for loops take quotes into account when splitting the
10# string into words, the quotes don't need to be balanced, as of 2020-12-31.
11# This could be considered a bug.
12ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
13
14
15# XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
16# the loop.  Not only would it need the escaping for the variable modifier
17# ':U' but also the escaping for the line-end comment.
18# expect+3: while evaluating "${:U!"" with value "!"": Unclosed expression, expecting '}' for modifier "U!""
19# expect+2: !"
20.for chars in ${ASCII}
21.  info ${chars}
22.endfor
23
24# As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
25# as comment character.  Using 3 backslashes doesn't help either since
26# then the situation is essentially the same as with 1 backslash.
27# This means that a '#' sign cannot be passed in the value of a .for loop
28# at all.
29ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
30# expect+3: while evaluating "${:U!"\\\\" with value "!"\\": Unclosed expression, expecting '}' for modifier "U!"\\\\"
31# expect+2: !"\\
32.for chars in ${ASCII.2020-12-31}
33.  info ${chars}
34.endfor
35
36# Cover the code in ExprLen.
37#
38# XXX: It is unexpected that the variable V gets expanded in the loop body.
39# The double '$$' should intuitively prevent exactly this.  Probably nobody
40# was adventurous enough to use literal dollar signs in the values of a .for
41# loop, allowing this edge case to go unnoticed for years.
42#
43# See for.c, function ExprLen.
44V=		value
45VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
46# expect: .  info ${:U\$}
47# expect+10: $
48# expect: .  info ${:U${V}}
49# expect+8: value
50# expect: .  info ${:U${V:=-with-modifier}}
51# expect+6: value-with-modifier
52# expect: .  info ${:U$(V)}
53# expect+4: value
54# expect: .  info ${:U$(V:=-with-modifier)}
55# expect+2: value-with-modifier
56.for i in ${VALUES}
57.  info $i
58.endfor
59#
60# Providing the loop items directly has the same effect.
61# expect: .  info ${:U\$}
62# expect+7: $
63# expect: .  info ${:U${V}}
64# expect+5: value
65# expect+4: value-with-modifier
66# expect+3: value
67# expect+2: value-with-modifier
68.for i in $$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
69.  info $i
70.endfor
71
72# Try to cover the code for nested '{}' in ExprLen, without success.
73#
74# The value of the variable VALUES is not meant to be an expression.
75# Instead, it is meant to represent literal text, the only escaping mechanism
76# being that each '$' is written as '$$'.
77VALUES=		$${UNDEF:U\$$\$$ {{}} end}
78#
79# The .for loop splits ${VALUES} into 3 words, at the space characters, since
80# the '$$' is an ordinary character and the spaces are not escaped.
81#	Word 1 is '${UNDEF:U\$\$'
82#	Word 2 is '{{}}'
83#	Word 3 is 'end}'
84#
85# Each of these words is now inserted in the body of the .for loop.
86.for i in ${VALUES}
87# $i
88.endfor
89#
90# When these words are injected into the body of the .for loop, each inside a
91# '${:U...}' expression, the result is:
92#
93# expect: For: loop body with i = ${UNDEF:U\$\$:
94# expect: # ${:U\${UNDEF\:U\\$\\$}
95# expect: For: loop body with i = {{}}:
96# expect: # ${:U{{\}\}}
97# expect: For: loop body with i = end}:
98# expect: # ${:Uend\}}
99# expect: For: end for 1
100#
101# The first of these expressions is the most interesting one, due to its many
102# special characters.  This expression is properly balanced:
103#
104#	Text	Meaning		Explanation
105#	\$	$		escaped
106#	{	{		ordinary text
107#	UNDEF	UNDEF		ordinary text
108#	\:	:		escaped
109#	U	U		ordinary text
110#	\\	\		escaped
111#	$\	(expr)		an expression, the variable name is '\'
112#	\$	$		escaped
113#
114# To make the expression '$\' visible, define it to an actual word:
115${:U\\}=	backslash
116# expect+4: ${UNDEF:U\backslash$
117# expect+3: {{}}
118# expect+2: end}
119.for i in ${VALUES}
120.  info $i
121.endfor
122#
123# FIXME: There was no expression '$\' in the original text of the variable
124# 'VALUES', that's a surprise in the parser.
125
126
127# The second attempt to cover the code for nested '{}' in ExprLen.
128#
129# XXX: It is not the job of ExprLen to parse an expression, it is naive to
130# expect ExprLen to get all the details right in just a few lines of code.
131# Each variable modifier has its own inconsistent way of parsing nested
132# expressions, braces and parentheses.  (Compare ':M', ':S', and
133# ':D' for details.)  The only sensible thing to do is therefore to let
134# Var_Parse do all the parsing work.
135VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
136# expect+2: begin<fallback>end
137.for i in ${VALUES}
138.  info $i
139.endfor
140
141# A single trailing dollar doesn't happen in practice.
142# The dollar sign is correctly passed through to the body of the .for loop.
143# There, it is expanded by the .info directive, but even there a trailing
144# dollar sign is kept as-is.
145# expect+2: $
146.for i in ${:U\$}
147.  info ${i}
148.endfor
149
150# Before for.c 1.173 from 2023-05-08, the name of the iteration variable
151# could contain colons, which affected expressions having this exact
152# modifier.  This possibility was neither intended nor documented.
153NUMBERS=	one two three
154# expect+1: invalid character ':' in .for loop variable name
155.for NUMBERS:M*e in replaced
156.  info ${NUMBERS} ${NUMBERS:M*e}
157.endfor
158
159# Before for.c 1.173 from 2023-05-08, the name of the iteration variable
160# could contain braces, which allowed to replace sequences of
161# expressions.  This possibility was neither intended nor documented.
162BASENAME=	one
163EXT=		.c
164# expect+1: invalid character '}' in .for loop variable name
165.for BASENAME}${EXT in replaced
166.  info ${BASENAME}${EXT}
167.endfor
168
169# Demonstrate the various ways to refer to the iteration variable.
170i=		outer
171i2=		two
172i,=		comma
173# expect+2: inner inner inner inner inner
174.for i in inner
175.  info $i ${i} ${i:M*} $(i) $(i:M*)
176.endfor
177# expect+2: outer
178.for i in inner
179.  info ${i${:U}}
180.endfor
181# expect+2: inner}
182.for i in inner
183.  info ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
184.endfor
185# expect+2: two comma innerinnerinnerinner
186.for i in inner
187.  info ${i2} ${i,} $i${i}${i:M*}$i
188.endfor
189
190# Before for.c 1.173 from 2023-05-08, the variable name could be a single '$'
191# since there was no check on valid variable names.  ForLoop_SubstVarShort
192# skipped "stupid" variable names though, but ForLoop_SubstVarLong naively
193# parsed the body of the loop, substituting each '${$}' with an actual
194# '${:Udollar}'.
195# expect+1: invalid character '$' in .for loop variable name
196.for $ in dollar
197.  info eight $$$$$$$$ and no cents.
198.  info eight ${$}${$}${$}${$} and no cents.
199.endfor
200# Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
201# an expression. The inner '$' is followed by a '}' and is thus a
202# silent syntax error, the '$' is skipped. The variable name is thus '', and
203# since since there is never a variable named '', the whole expression '${$}'
204# evaluates to an empty string.
205closing-brace=		}		# guard against an
206${closing-brace}=	<closing-brace>	# alternative interpretation
207# expect+1: eight  and no cents.
208.info eight ${$}${$}${$}${$} and no cents.
209
210# What happens if the values from the .for loop contain a literal newline?
211# Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
212# body of the .for loop, where it was then interpreted as a literal newline,
213# leading to syntax errors such as "Unclosed variable expression" in the upper
214# line and "Invalid line type" in the lower line.
215#
216# The error message occurs in the line of the .for loop since that's the place
217# where the body of the .for loop is constructed, and at this point the
218# newline character gets replaced with a plain space.
219# expect+3: newline in .for value
220# expect+2: newline in .for value
221# expect+2: short: " ", long: " "
222.for i in "${.newline}"
223.  info short: $i, long: ${i}
224.endfor
225# No error since the newline character is not actually used in the body.
226.for i in "${.newline}"
227.endfor
228
229# Between for.c 1.161 from 2022-01-08 and before for.c 1.163 from 2022-01-09,
230# a newline character in a .for loop led to a crash since at the point where
231# the error message including the stack trace is printed, the body of the .for
232# loop is assembled, and at that point, ForLoop.nextItem had already been
233# advanced.
234.MAKEFLAGS: -dp
235# expect+1: newline in .for value
236.for i in "${.newline}"
237: $i
238.endfor
239.MAKEFLAGS: -d0
240
241.MAKEFLAGS: -df
242.for i in \# \\\#
243# $i
244.endfor
245
246.for i in $$ $$i $$(i) $${i} $$$$ $$$$$$$$ $${:U\$$\$$}
247# $i
248.endfor
249
250# The expression '${.TARGET}' must be preserved as it is one of the 7 built-in
251# target-local variables.  See for.c 1.45 from 2009-01-14.
252.for i in ${.TARGET} $${.TARGET} $$${.TARGET} $$$${.TARGET}
253# $i
254.endfor
255# expect: # ${:U${.TARGET}}
256# XXX: Why does '$' result in the same text as '$$'?
257# expect: # ${:U${.TARGET}}
258# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
259# expect: # ${:U$${.TARGET\}}
260# XXX: Why does '$' result in the same text as '$$'?
261# XXX: Why does the '$$' before the '${.TARGET}' lead to an escaped '}'?
262# expect: # ${:U$${.TARGET\}}
263
264.for i in ((( {{{ ))) }}}
265# $i
266.endfor
267
268
269# When generating the body of a .for loop, recognizing the expressions is done
270# using simple heuristics.  These can go wrong in ambiguous cases like this.
271# The variable name ',' is unusual as it is not a pronounceable name, but the
272# same principle applies for other names as well.  In this case, the text '$,'
273# is replaced with the expression '${:U1}', even though the text does not
274# represent an expression.
275.for , in 1
276# $$i $i
277# VAR= $$i $i ${a:S,from$,to,}
278VAR= $$i $i ${a:S,from$,to,}
279.endfor
280# expect: # $$i $i
281# expect: # VAR= $$i $i ${a:S,from${:U1}to,}
282# expect: VAR= $$i $i ${a:S,from${:U1}to,}
283#
284# When the above variable is evaluated, make will complain about the
285# unfinished modifier ':S', as it is missing a comma.
286