xref: /freebsd/sys/kern/imgact_shell.c (revision 43e29d03f416d7dda52112a29600a7c82ee1a91e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1993, David Greenman
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/vnode.h>
34 #include <sys/proc.h>
35 #include <sys/sbuf.h>
36 #include <sys/systm.h>
37 #include <sys/sysproto.h>
38 #include <sys/exec.h>
39 #include <sys/imgact.h>
40 #include <sys/kernel.h>
41 
42 #if BYTE_ORDER == LITTLE_ENDIAN
43 #define SHELLMAGIC	0x2123 /* #! */
44 #else
45 #define SHELLMAGIC	0x2321
46 #endif
47 
48 /*
49  * At the time of this writing, MAXSHELLCMDLEN == PAGE_SIZE.  This is
50  * significant because the caller has only mapped in one page of the
51  * file we're reading.
52  */
53 #if MAXSHELLCMDLEN > PAGE_SIZE
54 #error "MAXSHELLCMDLEN is larger than a single page!"
55 #endif
56 
57 /*
58  * MAXSHELLCMDLEN must be at least MAXINTERP plus the size of the `#!'
59  * prefix and terminating newline.
60  */
61 CTASSERT(MAXSHELLCMDLEN >= MAXINTERP + 3);
62 
63 /**
64  * Shell interpreter image activator. An interpreter name beginning at
65  * imgp->args->begin_argv is the minimal successful exit requirement.
66  *
67  * If the given file is a shell-script, then the first line will start
68  * with the two characters `#!' (aka SHELLMAGIC), followed by the name
69  * of the shell-interpreter to run, followed by zero or more tokens.
70  *
71  * The interpreter is then started up such that it will see:
72  *    arg[0] -> The name of interpreter as specified after `#!' in the
73  *		first line of the script.  The interpreter name must
74  *		not be longer than MAXSHELLCMDLEN bytes.
75  *    arg[1] -> *If* there are any additional tokens on the first line,
76  *		then we add a new arg[1], which is a copy of the rest of
77  *		that line.  The copy starts at the first token after the
78  *		interpreter name.  We leave it to the interpreter to
79  *		parse the tokens in that value.
80  *    arg[x] -> the full pathname of the script.  This will either be
81  *		arg[2] or arg[1], depending on whether or not tokens
82  *		were found after the interpreter name.
83  *  arg[x+1] -> all the arguments that were specified on the original
84  *		command line.
85  *
86  * This processing is described in the execve(2) man page.
87  */
88 
89 /*
90  * HISTORICAL NOTE: From 1993 to mid-2005, FreeBSD parsed out the tokens as
91  * found on the first line of the script, and setup each token as a separate
92  * value in arg[].  This extra processing did not match the behavior of other
93  * OS's, and caused a few subtle problems.  For one, it meant the kernel was
94  * deciding how those values should be parsed (wrt characters for quoting or
95  * comments, etc), while the interpreter might have other rules for parsing.
96  * It also meant the interpreter had no way of knowing which arguments came
97  * from the first line of the shell script, and which arguments were specified
98  * by the user on the command line.  That extra processing was dropped in the
99  * 6.x branch on May 28, 2005 (matching __FreeBSD_version 600029).
100  */
101 int
102 exec_shell_imgact(struct image_params *imgp)
103 {
104 	const char *image_header = imgp->image_header;
105 	const char *ihp, *interpb, *interpe, *maxp, *optb, *opte, *fname;
106 	int error, offset;
107 	size_t length;
108 	struct vattr vattr;
109 	struct sbuf *sname;
110 
111 	/* a shell script? */
112 	if (((const short *)image_header)[0] != SHELLMAGIC)
113 		return (-1);
114 
115 	/*
116 	 * Don't allow a shell script to be the shell for a shell
117 	 *	script. :-)
118 	 */
119 	if (imgp->interpreted & IMGACT_SHELL)
120 		return (ENOEXEC);
121 
122 	imgp->interpreted |= IMGACT_SHELL;
123 
124 	/*
125 	 * At this point we have the first page of the file mapped.
126 	 * However, we don't know how far into the page the contents are
127 	 * valid -- the actual file might be much shorter than the page.
128 	 * So find out the file size.
129 	 */
130 	error = VOP_GETATTR(imgp->vp, &vattr, imgp->proc->p_ucred);
131 	if (error)
132 		return (error);
133 
134 	/*
135 	 * Copy shell name and arguments from image_header into a string
136 	 * buffer.
137 	 */
138 	maxp = &image_header[MIN(vattr.va_size, MAXSHELLCMDLEN)];
139 	ihp = &image_header[2];
140 
141 	/*
142 	 * Find the beginning and end of the interpreter_name.  If the
143 	 * line does not include any interpreter, or if the name which
144 	 * was found is too long, we bail out.
145 	 */
146 	while (ihp < maxp && ((*ihp == ' ') || (*ihp == '\t')))
147 		ihp++;
148 	interpb = ihp;
149 	while (ihp < maxp && ((*ihp != ' ') && (*ihp != '\t') && (*ihp != '\n')
150 	    && (*ihp != '\0')))
151 		ihp++;
152 	interpe = ihp;
153 	if (interpb == interpe)
154 		return (ENOEXEC);
155 	if (interpe - interpb >= MAXINTERP)
156 		return (ENAMETOOLONG);
157 
158 	/*
159 	 * Find the beginning of the options (if any), and the end-of-line.
160 	 * Then trim the trailing blanks off the value.  Note that some
161 	 * other operating systems do *not* trim the trailing whitespace...
162 	 */
163 	while (ihp < maxp && ((*ihp == ' ') || (*ihp == '\t')))
164 		ihp++;
165 	optb = ihp;
166 	while (ihp < maxp && ((*ihp != '\n') && (*ihp != '\0')))
167 		ihp++;
168 	opte = ihp;
169 	if (opte == maxp)
170 		return (ENOEXEC);
171 	while (--ihp > optb && ((*ihp == ' ') || (*ihp == '\t')))
172 		opte = ihp;
173 
174 	if (imgp->args->fname != NULL) {
175 		fname = imgp->args->fname;
176 		sname = NULL;
177 	} else {
178 		sname = sbuf_new_auto();
179 		sbuf_printf(sname, "/dev/fd/%d", imgp->args->fd);
180 		sbuf_finish(sname);
181 		fname = sbuf_data(sname);
182 	}
183 
184 	/*
185 	 * We need to "pop" (remove) the present value of arg[0], and "push"
186 	 * either two or three new values in the arg[] list.  To do this,
187 	 * we first shift all the other values in the `begin_argv' area to
188 	 * provide the exact amount of room for the values added.  Set up
189 	 * `offset' as the number of bytes to be added to the `begin_argv'
190 	 * area, and 'length' as the number of bytes being removed.
191 	 */
192 	offset = interpe - interpb + 1;			/* interpreter */
193 	if (opte > optb)				/* options (if any) */
194 		offset += opte - optb + 1;
195 	offset += strlen(fname) + 1;			/* fname of script */
196 	length = (imgp->args->argc == 0) ? 0 :
197 	    strlen(imgp->args->begin_argv) + 1;		/* bytes to delete */
198 
199 	error = exec_args_adjust_args(imgp->args, length, offset);
200 	if (error != 0) {
201 		if (sname != NULL)
202 			sbuf_delete(sname);
203 		return (error);
204 	}
205 
206 	/*
207 	 * If there was no arg[0] when we started, then the interpreter_name
208 	 * is adding an argument (instead of replacing the arg[0] we started
209 	 * with).  And we're always adding an argument when we include the
210 	 * full pathname of the original script.
211 	 */
212 	if (imgp->args->argc == 0)
213 		imgp->args->argc = 1;
214 	imgp->args->argc++;
215 
216 	/*
217 	 * The original arg[] list has been shifted appropriately.  Copy in
218 	 * the interpreter name and options-string.
219 	 */
220 	length = interpe - interpb;
221 	bcopy(interpb, imgp->args->begin_argv, length);
222 	*(imgp->args->begin_argv + length) = '\0';
223 	offset = length + 1;
224 	if (opte > optb) {
225 		length = opte - optb;
226 		bcopy(optb, imgp->args->begin_argv + offset, length);
227 		*(imgp->args->begin_argv + offset + length) = '\0';
228 		offset += length + 1;
229 		imgp->args->argc++;
230 	}
231 
232 	/*
233 	 * Finally, add the filename onto the end for the interpreter to
234 	 * use and copy the interpreter's name to imgp->interpreter_name
235 	 * for exec to use.
236 	 */
237 	error = copystr(fname, imgp->args->begin_argv + offset,
238 	    imgp->args->stringspace, NULL);
239 
240 	if (error == 0)
241 		imgp->interpreter_name = imgp->args->begin_argv;
242 
243 	if (sname != NULL)
244 		sbuf_delete(sname);
245 	return (error);
246 }
247 
248 /*
249  * Tell kern_execve.c about it, with a little help from the linker.
250  */
251 static struct execsw shell_execsw = {
252 	.ex_imgact = exec_shell_imgact,
253 	.ex_name = "#!"
254 };
255 EXEC_SET(shell, shell_execsw);
256