xref: /freebsd/contrib/bmake/unit-tests/directive-for-escape.mk (revision 963f5dc7a30624e95d72fb7f87b8892651164e46)
1# $NetBSD: directive-for-escape.mk,v 1.12 2021/12/05 11:40:03 rillig Exp $
2#
3# Test escaping of special characters in the iteration values of a .for loop.
4# These values get expanded later using the :U variable modifier, and this
5# escaping and unescaping must pass all characters and strings effectively
6# unmodified.
7
8.MAKEFLAGS: -df
9
10# Even though the .for loops take quotes into account when splitting the
11# string into words, the quotes don't need to be balanced, as of 2020-12-31.
12# This could be considered a bug.
13ASCII=	!"\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
14
15# XXX: As of 2020-12-31, the '#' is not preserved in the expanded body of
16# the loop.  Not only would it need the escaping for the variable modifier
17# ':U' but also the escaping for the line-end comment.
18.for chars in ${ASCII}
19.  info ${chars}
20.endfor
21
22# As of 2020-12-31, using 2 backslashes before be '#' would treat the '#'
23# as comment character.  Using 3 backslashes doesn't help either since
24# then the situation is essentially the same as with 1 backslash.
25# This means that a '#' sign cannot be passed in the value of a .for loop
26# at all.
27ASCII.2020-12-31=	!"\\\#$$%&'()*+,-./0-9:;<=>?@A-Z[\]_^a-z{|}~
28.for chars in ${ASCII.2020-12-31}
29.  info ${chars}
30.endfor
31
32# Cover the code in ExprLen.
33#
34# XXX: It is unexpected that the variable V gets expanded in the loop body.
35# The double '$$' should intuitively prevent exactly this.  Probably nobody
36# was adventurous enough to use literal dollar signs in the values of a .for
37# loop, allowing this edge case to go unnoticed for years.
38#
39# See for.c, function ExprLen.
40V=		value
41VALUES=		$$ $${V} $${V:=-with-modifier} $$(V) $$(V:=-with-modifier)
42.for i in ${VALUES}
43.  info $i
44.endfor
45
46# Try to cover the code for nested '{}' in ExprLen, without success.
47#
48# The value of the variable VALUES is not meant to be a variable expression.
49# Instead, it is meant to represent literal text, the only escaping mechanism
50# being that each '$' is written as '$$'.
51#
52# The .for loop splits ${VALUES} into 3 words, at the space characters, since
53# these are not escaped.
54VALUES=		$${UNDEF:U\$$\$$ {{}} end}
55# XXX: Where in the code does the '\$\$' get converted into a single '\$'?
56.for i in ${VALUES}
57.  info $i
58.endfor
59
60# Second try to cover the code for nested '{}' in ExprLen.
61#
62# XXX: It is wrong that ExprLen requires the braces to be balanced.
63# Each variable modifier has its own inconsistent way of parsing nested
64# variable expressions, braces and parentheses.  (Compare ':M', ':S', and
65# ':D' for details.)  The only sensible thing to do is therefore to let
66# Var_Parse do all the parsing work.
67VALUES=		begin<$${UNDEF:Ufallback:N{{{}}}}>end
68.for i in ${VALUES}
69.  info $i
70.endfor
71
72# A single trailing dollar doesn't happen in practice.
73# The dollar sign is correctly passed through to the body of the .for loop.
74# There, it is expanded by the .info directive, but even there a trailing
75# dollar sign is kept as-is.
76.for i in ${:U\$}
77.  info ${i}
78.endfor
79
80# As of 2020-12-31, the name of the iteration variable can even contain
81# colons, which then affects variable expressions having this exact modifier.
82# This is clearly an unintended side effect of the implementation.
83NUMBERS=	one two three
84.for NUMBERS:M*e in replaced
85.  info ${NUMBERS} ${NUMBERS:M*e}
86.endfor
87
88# As of 2020-12-31, the name of the iteration variable can contain braces,
89# which gets even more surprising than colons, since it allows to replace
90# sequences of variable expressions.  There is no practical use case for
91# this, though.
92BASENAME=	one
93EXT=		.c
94.for BASENAME}${EXT in replaced
95.  info ${BASENAME}${EXT}
96.endfor
97
98# Demonstrate the various ways to refer to the iteration variable.
99i=		outer
100i2=		two
101i,=		comma
102.for i in inner
103.  info .        $$i: $i
104.  info .      $${i}: ${i}
105.  info .   $${i:M*}: ${i:M*}
106.  info .      $$(i): $(i)
107.  info .   $$(i:M*): $(i:M*)
108.  info . $${i$${:U}}: ${i${:U}}
109.  info .    $${i\}}: ${i\}}	# XXX: unclear why ForLoop_SubstVarLong needs this
110.  info .     $${i2}: ${i2}
111.  info .     $${i,}: ${i,}
112.  info .  adjacent: $i${i}${i:M*}$i
113.endfor
114
115# The variable name can be a single '$' since there is no check on valid
116# variable names. ForLoop_SubstVarShort skips "stupid" variable names though,
117# but ForLoop_SubstVarLong naively parses the body of the loop, substituting
118# each '${$}' with an actual 'dollar'.
119.for $ in dollar
120.  info eight $$$$$$$$ and no cents.
121.  info eight ${$}${$}${$}${$} and no cents.
122.endfor
123# Outside a .for loop, '${$}' is interpreted differently. The outer '$' starts
124# a variable expression. The inner '$' is followed by a '}' and is thus a
125# silent syntax error, the '$' is skipped. The variable name is thus '', and
126# since since there is never a variable named '', the whole expression '${$}'
127# evaluates to an empty string.
128closing-brace=		}		# guard against an
129${closing-brace}=	<closing-brace>	# alternative interpretation
130.info eight ${$}${$}${$}${$} and no cents.
131
132# What happens if the values from the .for loop contain a literal newline?
133# Before for.c 1.144 from 2021-06-25, the newline was passed verbatim to the
134# body of the .for loop, where it was then interpreted as a literal newline,
135# leading to syntax errors such as "Unclosed variable expression" in the upper
136# line and "Invalid line type" in the lower line.
137.for i in "${.newline}"
138.  info short: $i
139.  info long: ${i}
140.endfor
141
142all:
143