xref: /freebsd/sys/kern/imgact_shell.c (revision 7afc53b8dfcc7d5897920ce6cc7e842fbb4ab813)
1 /*-
2  * Copyright (c) 1993, David Greenman
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/vnode.h>
32 #include <sys/proc.h>
33 #include <sys/systm.h>
34 #include <sys/sysproto.h>
35 #include <sys/exec.h>
36 #include <sys/imgact.h>
37 #include <sys/kernel.h>
38 
39 #define	KEEP_OLDCODE	1
40 #if BYTE_ORDER == LITTLE_ENDIAN		/* temp for OLD_CODE kludge */
41 #define	DBG_MAGIC	0x2B23		/* #+ in "little-endian" */
42 #define	OLD_MAGIC	0x3C23		/* #< */
43 #else
44 #define	DBG_MAGIC	0x232B		/* #+ in big-endian */
45 #define	OLD_MAGIC	0x233C		/* #< */
46 #endif
47 
48 #if BYTE_ORDER == LITTLE_ENDIAN
49 #define SHELLMAGIC	0x2123 /* #! */
50 #else
51 #define SHELLMAGIC	0x2321
52 #endif
53 
54 /*
55  * At the time of this writing, MAXSHELLCMDLEN == PAGE_SIZE.  This is
56  * significant because the caller has only mapped in one page of the
57  * file we're reading.  This code should be changed to know how to
58  * read in the second page, but I'm not doing that just yet...
59  */
60 #if MAXSHELLCMDLEN > PAGE_SIZE
61 #error "MAXSHELLCMDLEN is larger than a single page!"
62 #endif
63 
64 /**
65  * Shell interpreter image activator. An interpreter name beginning at
66  * imgp->args->begin_argv is the minimal successful exit requirement.
67  *
68  * If the given file is a shell-script, then the first line will start
69  * with the two characters `#!' (aka SHELLMAGIC), followed by the name
70  * of the shell-interpreter to run, followed by zero or more tokens.
71  *
72  * The interpreter is then started up such that it will see:
73  *    arg[0] -> The name of interpreter as specified after `#!' in the
74  *		first line of the script.  The interpreter name must
75  *		not be longer than MAXSHELLCMDLEN bytes.
76  *    arg[1] -> *If* there are any additional tokens on the first line,
77  *		then we add a new arg[1], which is a copy of the rest of
78  *		that line.  The copy starts at the first token after the
79  *		interpreter name.  We leave it to the interpreter to
80  *		parse the tokens in that value.
81  *    arg[x] -> the full pathname of the script.  This will either be
82  *		arg[2] or arg[1], depending on whether or not tokens
83  *		were found after the interpreter name.
84  *  arg[x+1] -> all the arguments that were specified on the original
85  *		command line.
86  *
87  * This processing is described in the execve(2) man page.
88  */
89 
90 /*
91  * HISTORICAL NOTE: From 1993 to mid-2005, FreeBSD parsed out the tokens as
92  * found on the first line of the script, and setup each token as a separate
93  * value in arg[].  This extra processing did not match the behavior of other
94  * OS's, and caused a few subtle problems.  For one, it meant the kernel was
95  * deciding how those values should be parsed (wrt characters for quoting or
96  * comments, etc), while the interpreter might have other rules for parsing.
97  * It also meant the interpreter had no way of knowing which arguments came
98  * from the first line of the shell script, and which arguments were specified
99  * by the user on the command line.
100  *
101  * Only few things in the base system might depend on that non-standard
102  * processing (mainly /bin/sh and /usr/bin/env).  And for programs which are
103  * not in the base system, the "newer" behavior matches how NetBSD, OpenBSD,
104  * Linux, Solaris, AIX, IRIX, and many other Unixes have set up the arg-list
105  * for the interpreter.  So if a program can handle this behavior on those
106  * other OS's, it should be able to handle it for FreeBSD too.
107  */
108 int
109 exec_shell_imgact(imgp)
110 	struct image_params *imgp;
111 {
112 	const char *image_header = imgp->image_header;
113 	const char *ihp, *interpb, *interpe, *maxp, *optb, *opte;
114 	int error, offset;
115 	size_t length, clength;
116 	struct vattr vattr;
117 
118 	/* a shell script? */
119 	if (((const short *) image_header)[0] != SHELLMAGIC)
120 		return(-1);
121 
122 	/*
123 	 * Don't allow a shell script to be the shell for a shell
124 	 *	script. :-)
125 	 */
126 	if (imgp->interpreted)
127 		return(ENOEXEC);
128 
129 	imgp->interpreted = 1;
130 
131 	/*
132 	 * At this point we have the first page of the file mapped.
133 	 * However, we don't know how far into the page the contents are
134 	 * valid -- the actual file might be much shorter than the page.
135 	 * So find out the file size.
136  	 */
137 	error = VOP_GETATTR(imgp->vp, &vattr, imgp->proc->p_ucred, curthread);
138 	if (error)
139 		return (error);
140 
141 	/*
142 	 * Copy shell name and arguments from image_header into a string
143 	 *	buffer.  Remember that the caller has mapped only the
144 	 *	first page of the file into memory.
145 	 */
146 	clength = (vattr.va_size > PAGE_SIZE) ? PAGE_SIZE : vattr.va_size;
147 
148 	maxp = &image_header[clength];
149 	ihp = &image_header[2];
150 #if KEEP_OLDCODE
151 	/*
152 	 * XXX - Temporarily provide a quick-and-dirty way to get the
153 	 * older, non-standard option-parsing behavior, just in case
154 	 * someone finds themselves in an emergency where they need it.
155 	 * This will not be documented.  It is only for initial testing.
156 	 */
157 	if (*(const short *)ihp == OLD_MAGIC)
158 		ihp += 2;
159 	else
160 		goto new_code;
161 	interpb = ihp;
162 
163 	/*
164 	 * Figure out the number of bytes that need to be reserved in the
165 	 * argument string to copy the contents of the interpreter's command
166 	 * line into the argument string.
167 	 */
168 	ihp = interpb;
169 	offset = 0;
170 	while (ihp < &image_header[clength]) {
171 		/* Skip any whitespace */
172 		if ((*ihp == ' ') || (*ihp == '\t')) {
173 			ihp++;
174 			continue;
175 		}
176 
177 		/* End of line? */
178 		if ((*ihp == '\n') || (*ihp == '#') || (*ihp == '\0'))
179 			break;
180 
181 		/* Found a token */
182 		do {
183 			offset++;
184 			ihp++;
185 		} while ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n') &&
186 		    (*ihp != '#') && (*ihp != '\0') &&
187 		    (ihp < &image_header[clength]));
188 		/* Include terminating nulls in the offset */
189 		offset++;
190 	}
191 
192 	/* If the script gives a null line as the interpreter, we bail */
193 	if (offset == 0)
194 		return (ENOEXEC);
195 
196 	/* Check that we aren't too big */
197 	if (ihp == &image_header[MAXSHELLCMDLEN])
198 		return (ENAMETOOLONG);
199 
200 	/*
201 	 * The full path name of the original script file must be tagged
202 	 * onto the end, adjust the offset to deal with it.
203 	 *
204 	 * The original argv[0] is being replaced, set 'length' to the number
205 	 * of bytes being removed.  So 'offset' is the number of bytes being
206 	 * added and 'length' is the number of bytes being removed.
207 	 */
208 	offset += strlen(imgp->args->fname) + 1;	/* add fname */
209 	length = (imgp->args->argc == 0) ? 0 :
210 	    strlen(imgp->args->begin_argv) + 1;		/* bytes to delete */
211 
212 	if (offset - length > imgp->args->stringspace)
213 		return (E2BIG);
214 
215 	bcopy(imgp->args->begin_argv + length, imgp->args->begin_argv + offset,
216 	    imgp->args->endp - (imgp->args->begin_argv + length));
217 
218 	offset -= length;		/* calculate actual adjustment */
219 	imgp->args->begin_envv += offset;
220 	imgp->args->endp += offset;
221 	imgp->args->stringspace -= offset;
222 
223 	/*
224 	 * If there were no arguments then we've added one, otherwise
225 	 * decr argc remove old argv[0], incr argc for fname add, net 0
226 	 */
227 	if (imgp->args->argc == 0)
228 		imgp->args->argc = 1;
229 
230 	/*
231 	 * Loop through the interpreter name yet again, copying as
232 	 * we go.
233 	 */
234 	ihp = interpb;
235 	offset = 0;
236 	while (ihp < &image_header[clength]) {
237 		/* Skip whitespace */
238 		if ((*ihp == ' ') || (*ihp == '\t')) {
239 			ihp++;
240 			continue;
241 		}
242 
243 		/* End of line? */
244 		if ((*ihp == '\n') || (*ihp == '#') || (*ihp == '\0'))
245 			break;
246 
247 		/* Found a token, copy it */
248 		do {
249 			imgp->args->begin_argv[offset++] = *ihp++;
250 		} while ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n') &&
251 		    (*ihp != '#') && (*ihp != '\0') &&
252 		    (ihp < &image_header[MAXSHELLCMDLEN]));
253 		imgp->args->begin_argv[offset++] = '\0';
254 		imgp->args->argc++;
255 	}
256 	goto common_end;
257 new_code:
258 #endif
259 	/*
260 	 * Find the beginning and end of the interpreter_name.  If the
261 	 * line does not include any interpreter, or if the name which
262 	 * was found is too long, we bail out.
263 	 */
264 	while (ihp < maxp && ((*ihp == ' ') || (*ihp == '\t')))
265 		ihp++;
266 	interpb = ihp;
267 	while (ihp < maxp && ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n')
268 	    && (*ihp != '\0')))
269 		ihp++;
270 	interpe = ihp;
271 	if (interpb == interpe)
272 		return (ENOEXEC);
273 	if ((interpe - interpb) >= MAXSHELLCMDLEN)
274 		return (ENAMETOOLONG);
275 
276 	/*
277 	 * Find the beginning of the options (if any), and the end-of-line.
278 	 * Then trim the trailing blanks off the value.  Note that some
279 	 * other operating systems do *not* trim the trailing whitespace...
280 	 */
281 	while (ihp < maxp && ((*ihp == ' ') || (*ihp == '\t')))
282 		ihp++;
283 	optb = ihp;
284 	while (ihp < maxp && ((*ihp != '\n') && (*ihp != '\0')))
285 		ihp++;
286 	opte = ihp;
287 	while (--ihp > interpe && ((*ihp == ' ') || (*ihp == '\t')))
288 		opte = ihp;
289 
290 	/*
291 	 * We need to "pop" (remove) the present value of arg[0], and "push"
292 	 * either two or three new values in the arg[] list.  To do this,
293 	 * we first shift all the other values in the `begin_argv' area to
294 	 * provide the exact amount of room for the values added.  Set up
295 	 * `offset' as the number of bytes to be added to the `begin_argv'
296 	 * area, and 'length' as the number of bytes being removed.
297 	 */
298 	offset = interpe - interpb + 1;			/* interpreter */
299 	if (opte != optb)				/* options (if any) */
300 		offset += opte - optb + 1;
301 	offset += strlen(imgp->args->fname) + 1;	/* fname of script */
302 	length = (imgp->args->argc == 0) ? 0 :
303 	    strlen(imgp->args->begin_argv) + 1;		/* bytes to delete */
304 
305 	if (offset - length > imgp->args->stringspace)
306 		return (E2BIG);
307 
308 	bcopy(imgp->args->begin_argv + length, imgp->args->begin_argv + offset,
309 	    imgp->args->endp - (imgp->args->begin_argv + length));
310 
311 	offset -= length;		/* calculate actual adjustment */
312 	imgp->args->begin_envv += offset;
313 	imgp->args->endp += offset;
314 	imgp->args->stringspace -= offset;
315 
316 	/*
317 	 * If there was no arg[0] when we started, then the interpreter_name
318 	 * is adding an argument (instead of replacing the arg[0] we started
319 	 * with).  And we're always adding an argument when we include the
320 	 * full pathname of the original script.
321 	 */
322 	if (imgp->args->argc == 0)
323 		imgp->args->argc = 1;
324 	imgp->args->argc++;
325 
326 	/*
327 	 * The original arg[] list has been shifted appropriately.  Copy in
328 	 * the interpreter name and options-string.
329 	 */
330 	length = interpe - interpb;
331 	bcopy(interpb, imgp->args->buf, length);
332 	*(imgp->args->buf + length) = '\0';
333 	offset = length + 1;
334 	if (opte != optb) {
335 		length = opte - optb;
336 		bcopy(optb, imgp->args->buf + offset, length);
337 		*(imgp->args->buf + offset + length) = '\0';
338 		offset += length + 1;
339 		imgp->args->argc++;
340 	}
341 
342 #if KEEP_OLDCODE
343 common_end:
344 #endif
345 	/*
346 	 * Finally, add the filename onto the end for the interpreter to
347 	 * use and copy the interpreter's name to imgp->interpreter_name
348 	 * for exec to use.
349 	 */
350 	error = copystr(imgp->args->fname, imgp->args->buf + offset,
351 	    imgp->args->stringspace, &length);
352 
353 	if (error == 0)
354 		error = copystr(imgp->args->begin_argv, imgp->interpreter_name,
355 		    MAXSHELLCMDLEN, &length);
356 
357 	return (error);
358 }
359 
360 /*
361  * Tell kern_execve.c about it, with a little help from the linker.
362  */
363 static struct execsw shell_execsw = { exec_shell_imgact, "#!" };
364 EXEC_SET(shell, shell_execsw);
365