xref: /illumos-gate/usr/src/cmd/zlogin/zlogin.c (revision c8589f13ba961772dd5a0d699c5bb926f3006c33)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * zlogin provides three types of login which allow users in the global
30  * zone to access non-global zones.
31  *
32  * - "interactive login" is similar to rlogin(1); for example, the user could
33  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
34  *   granted a new pty (which is then shoved into the zone), and an I/O
35  *   loop between parent and child processes takes care of the interactive
36  *   session.  In this mode, login(1) (and its -c option, which means
37  *   "already authenticated") is employed to take care of the initialization
38  *   of the user's session.
39  *
40  * - "non-interactive login" is similar to su(1M); the user could issue
41  *   'zlogin my-zone ls -l' and the command would be run as specified.
42  *   In this mode, zlogin sets up pipes as the communication channel, and
43  *   'su' is used to do the login setup work.
44  *
45  * - "console login" is the equivalent to accessing the tip line for a
46  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
47  *   In this mode, zlogin contacts the zoneadmd process via unix domain
48  *   socket.  If zoneadmd is not running, it starts it.  This allows the
49  *   console to be available anytime the zone is installed, regardless of
50  *   whether it is running.
51  */
52 
53 #include <sys/socket.h>
54 #include <sys/termios.h>
55 #include <sys/utsname.h>
56 #include <sys/stat.h>
57 #include <sys/types.h>
58 #include <sys/contract/process.h>
59 #include <sys/ctfs.h>
60 #include <sys/brand.h>
61 
62 #include <alloca.h>
63 #include <assert.h>
64 #include <ctype.h>
65 #include <door.h>
66 #include <errno.h>
67 #include <poll.h>
68 #include <priv.h>
69 #include <pwd.h>
70 #include <unistd.h>
71 #include <utmpx.h>
72 #include <sac.h>
73 #include <signal.h>
74 #include <stdarg.h>
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <string.h>
78 #include <strings.h>
79 #include <stropts.h>
80 #include <wait.h>
81 #include <zone.h>
82 #include <fcntl.h>
83 #include <libdevinfo.h>
84 #include <libintl.h>
85 #include <locale.h>
86 #include <libzonecfg.h>
87 #include <libcontract.h>
88 #include <libbrand.h>
89 
90 static int masterfd;
91 static struct termios save_termios;
92 static struct termios effective_termios;
93 static int save_fd;
94 static struct winsize winsize;
95 static volatile int dead;
96 static volatile pid_t child_pid = -1;
97 static int interactive = 0;
98 static priv_set_t *dropprivs;
99 
100 static int nocmdchar = 0;
101 static int failsafe = 0;
102 static char cmdchar = '~';
103 
104 static int pollerr = 0;
105 
106 static const char *pname;
107 
108 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
109 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
110 #endif
111 
112 #define	SUPATH	"/usr/bin/su"
113 #define	FAILSAFESHELL	"/sbin/sh"
114 #define	DEFAULTSHELL	"/sbin/sh"
115 #define	DEF_PATH	"/usr/sbin:/usr/bin"
116 
117 #define	ZLOGIN_BUFSIZ	8192
118 
119 /*
120  * See canonify() below.  CANONIFY_LEN is the maximum length that a
121  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
122  */
123 #define	CANONIFY_LEN 5
124 
125 static void
126 usage(void)
127 {
128 	(void) fprintf(stderr, gettext("usage: %s [ -CES ] [ -e cmdchar ] "
129 	    "[-l user] zonename [command [args ...] ]\n"), pname);
130 	exit(2);
131 }
132 
133 static const char *
134 getpname(const char *arg0)
135 {
136 	const char *p = strrchr(arg0, '/');
137 
138 	if (p == NULL)
139 		p = arg0;
140 	else
141 		p++;
142 
143 	pname = p;
144 	return (p);
145 }
146 
147 static void
148 zerror(const char *fmt, ...)
149 {
150 	va_list alist;
151 
152 	(void) fprintf(stderr, "%s: ", pname);
153 	va_start(alist, fmt);
154 	(void) vfprintf(stderr, fmt, alist);
155 	va_end(alist);
156 	(void) fprintf(stderr, "\n");
157 }
158 
159 static void
160 zperror(const char *str)
161 {
162 	const char *estr;
163 
164 	if ((estr = strerror(errno)) != NULL)
165 		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
166 	else
167 		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
168 }
169 
170 /*
171  * The first part of our privilege dropping scheme needs to be called before
172  * fork(), since we must have it for security; we don't want to be surprised
173  * later that we couldn't allocate the privset.
174  */
175 static int
176 prefork_dropprivs()
177 {
178 	if ((dropprivs = priv_allocset()) == NULL)
179 		return (1);
180 	priv_emptyset(dropprivs);
181 
182 	/*
183 	 * We need these privileges in order to query session information and
184 	 * send signals.
185 	 */
186 	if (interactive == 0) {
187 		if (priv_addset(dropprivs, "proc_session") == -1)
188 			return (1);
189 		if (priv_addset(dropprivs, "proc_zone") == -1)
190 			return (1);
191 		if (priv_addset(dropprivs, "proc_owner") == -1)
192 			return (1);
193 	}
194 
195 	return (0);
196 }
197 
198 /*
199  * The second part of the privilege drop.  We are paranoid about being attacked
200  * by the zone, so we drop all privileges.  This should prevent a compromise
201  * which gets us to fork(), exec(), symlink(), etc.
202  */
203 static void
204 postfork_dropprivs()
205 {
206 	if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
207 		zperror(gettext("Warning: could not set permitted privileges"));
208 	}
209 	if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
210 		zperror(gettext("Warning: could not set limit privileges"));
211 	}
212 	if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
213 		zperror(gettext("Warning: could not set inheritable "
214 		    "privileges"));
215 	}
216 }
217 
218 /*
219  * Create the unix domain socket and call the zoneadmd server; handshake
220  * with it to determine whether it will allow us to connect.
221  */
222 static int
223 get_console_master(const char *zname)
224 {
225 	int sockfd = -1;
226 	struct sockaddr_un servaddr;
227 	char clientid[MAXPATHLEN];
228 	char handshake[MAXPATHLEN], c;
229 	int msglen;
230 	int i = 0, err = 0;
231 
232 	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
233 		zperror(gettext("could not create socket"));
234 		return (-1);
235 	}
236 
237 	bzero(&servaddr, sizeof (servaddr));
238 	servaddr.sun_family = AF_UNIX;
239 	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
240 	    "%s/%s.console_sock", ZONES_TMPDIR, zname);
241 
242 	if (connect(sockfd, (struct sockaddr *)&servaddr,
243 	    sizeof (servaddr)) == -1) {
244 		zperror(gettext("Could not connect to zone console"));
245 		goto bad;
246 	}
247 	masterfd = sockfd;
248 
249 	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s\n",
250 	    getpid(), setlocale(LC_MESSAGES, NULL));
251 
252 	if (msglen >= sizeof (clientid) || msglen < 0) {
253 		zerror("protocol error");
254 		goto bad;
255 	}
256 
257 	if (write(masterfd, clientid, msglen) != msglen) {
258 		zerror("protocol error");
259 		goto bad;
260 	}
261 
262 	bzero(handshake, sizeof (handshake));
263 
264 	/*
265 	 * Take care not to accumulate more than our fill, and leave room for
266 	 * the NUL at the end.
267 	 */
268 	while ((err = read(masterfd, &c, 1)) == 1) {
269 		if (i >= (sizeof (handshake) - 1))
270 			break;
271 		if (c == '\n')
272 			break;
273 		handshake[i] = c;
274 		i++;
275 	}
276 
277 	/*
278 	 * If something went wrong during the handshake we bail; perhaps
279 	 * the server died off.
280 	 */
281 	if (err == -1) {
282 		zperror(gettext("Could not connect to zone console"));
283 		goto bad;
284 	}
285 
286 	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
287 		return (0);
288 
289 	zerror(gettext("Console is already in use by process ID %s."),
290 	    handshake);
291 bad:
292 	(void) close(sockfd);
293 	masterfd = -1;
294 	return (-1);
295 }
296 
297 
298 /*
299  * Routines to handle pty creation upon zone entry and to shuttle I/O back
300  * and forth between the two terminals.  We also compute and store the
301  * name of the slave terminal associated with the master side.
302  */
303 static int
304 get_master_pty()
305 {
306 	if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
307 		zperror(gettext("failed to obtain a pseudo-tty"));
308 		return (-1);
309 	}
310 	if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
311 		zperror(gettext("failed to get terminal settings from stdin"));
312 		return (-1);
313 	}
314 	(void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
315 
316 	return (0);
317 }
318 
319 /*
320  * This is a bit tricky; normally a pts device will belong to the zone it
321  * is granted to.  But in the case of "entering" a zone, we need to establish
322  * the pty before entering the zone so that we can vector I/O to and from it
323  * from the global zone.
324  *
325  * We use the zonept() call to let the ptm driver know what we are up to;
326  * the only other hairy bit is the setting of zoneslavename (which happens
327  * above, in get_master_pty()).
328  */
329 static int
330 init_slave_pty(zoneid_t zoneid, char *devroot)
331 {
332 	int slavefd = -1;
333 	char *slavename, zoneslavename[MAXPATHLEN];
334 
335 	/*
336 	 * Set slave permissions, zone the pts, then unlock it.
337 	 */
338 	if (grantpt(masterfd) != 0) {
339 		zperror(gettext("grantpt failed"));
340 		return (-1);
341 	}
342 
343 	if (unlockpt(masterfd) != 0) {
344 		zperror(gettext("unlockpt failed"));
345 		return (-1);
346 	}
347 
348 	/*
349 	 * We must open the slave side before zoning this pty; otherwise
350 	 * the kernel would refuse us the open-- zoning a pty makes it
351 	 * inaccessible to the global zone.  Note we are trying to open
352 	 * the device node via the $ZONEROOT/dev path for this pty.
353 	 *
354 	 * Later we'll close the slave out when once we've opened it again
355 	 * from within the target zone.  Blarg.
356 	 */
357 	if ((slavename = ptsname(masterfd)) == NULL) {
358 		zperror(gettext("failed to get name for pseudo-tty"));
359 		return (-1);
360 	}
361 
362 	(void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
363 	    devroot, slavename);
364 
365 	if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
366 		zerror(gettext("failed to open %s: %s"), zoneslavename,
367 		    strerror(errno));
368 		return (-1);
369 	}
370 
371 	/*
372 	 * Push hardware emulation (ptem), line discipline (ldterm),
373 	 * and V7/4BSD/Xenix compatibility (ttcompat) modules.
374 	 */
375 	if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
376 		zperror(gettext("failed to push ptem module"));
377 		if (!failsafe)
378 			goto bad;
379 	}
380 
381 	/*
382 	 * Anchor the stream to prevent malicious I_POPs; we prefer to do
383 	 * this prior to entering the zone so that we can detect any errors
384 	 * early, and so that we can set the anchor from the global zone.
385 	 */
386 	if (ioctl(slavefd, I_ANCHOR) == -1) {
387 		zperror(gettext("failed to set stream anchor"));
388 		if (!failsafe)
389 			goto bad;
390 	}
391 
392 	if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
393 		zperror(gettext("failed to push ldterm module"));
394 		if (!failsafe)
395 			goto bad;
396 	}
397 	if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
398 		zperror(gettext("failed to push ttcompat module"));
399 		if (!failsafe)
400 			goto bad;
401 	}
402 
403 	/*
404 	 * Propagate terminal settings from the external term to the new one.
405 	 */
406 	if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
407 		zperror(gettext("failed to set terminal settings"));
408 		if (!failsafe)
409 			goto bad;
410 	}
411 	(void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
412 
413 	if (zonept(masterfd, zoneid) != 0) {
414 		zperror(gettext("could not set zoneid of pty"));
415 		goto bad;
416 	}
417 
418 	return (slavefd);
419 
420 bad:
421 	(void) close(slavefd);
422 	return (-1);
423 }
424 
425 /*
426  * Place terminal into raw mode.
427  */
428 static int
429 set_tty_rawmode(int fd)
430 {
431 	struct termios term;
432 	if (tcgetattr(fd, &term) < 0) {
433 		zperror(gettext("failed to get user terminal settings"));
434 		return (-1);
435 	}
436 
437 	/* Stash for later, so we can revert back to previous mode */
438 	save_termios = term;
439 	save_fd = fd;
440 
441 	/* disable 8->7 bit strip, start/stop, enable any char to restart */
442 	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
443 	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
444 	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
445 	/* disable output post-processing */
446 	term.c_oflag &= ~OPOST;
447 	/* disable canonical mode, signal chars, echo & extended functions */
448 	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
449 
450 	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
451 	term.c_cc[VTIME] = 0;
452 
453 	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
454 		zperror(gettext("failed to set user terminal to raw mode"));
455 		return (-1);
456 	}
457 
458 	/*
459 	 * We need to know the value of VEOF so that we can properly process for
460 	 * client-side ~<EOF>.  But we have obliterated VEOF in term,
461 	 * because VMIN overloads the same array slot in non-canonical mode.
462 	 * Stupid @&^%!
463 	 *
464 	 * So here we construct the "effective" termios from the current
465 	 * terminal settings, and the corrected VEOF and VEOL settings.
466 	 */
467 	if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
468 		zperror(gettext("failed to get user terminal settings"));
469 		return (-1);
470 	}
471 	effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
472 	effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
473 
474 	return (0);
475 }
476 
477 /*
478  * Copy terminal window size from our terminal to the pts.
479  */
480 /*ARGSUSED*/
481 static void
482 sigwinch(int s)
483 {
484 	struct winsize ws;
485 
486 	if (ioctl(0, TIOCGWINSZ, &ws) == 0)
487 		(void) ioctl(masterfd, TIOCSWINSZ, &ws);
488 }
489 
490 static void
491 /*ARGSUSED*/
492 sigcld(int s)
493 {
494 	int status;
495 	pid_t pid;
496 
497 	/*
498 	 * Peek at the exit status.  If this isn't the process we cared
499 	 * about, then just reap it.
500 	 */
501 	if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
502 		if (pid == child_pid &&
503 		    (WIFEXITED(status) || WIFSIGNALED(status)))
504 			dead = 1;
505 		else
506 			(void) waitpid(pid, &status, WNOHANG);
507 	}
508 }
509 
510 /*
511  * Some signals (currently, SIGINT) must be forwarded on to the process
512  * group of the child process.
513  */
514 static void
515 sig_forward(int s)
516 {
517 	if (child_pid != -1) {
518 		pid_t pgid = getpgid(child_pid);
519 		if (pgid != -1)
520 			(void) sigsend(P_PGID, pgid, s);
521 	}
522 }
523 
524 /*
525  * reset terminal settings for global environment
526  */
527 static void
528 reset_tty()
529 {
530 	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
531 }
532 
533 /*
534  * Convert character to printable representation, for display with locally
535  * echoed command characters (like when we need to display ~^D)
536  */
537 static void
538 canonify(char c, char *cc)
539 {
540 	if (isprint(c)) {
541 		cc[0] = c;
542 		cc[1] = '\0';
543 	} else if (c >= 0 && c <= 31) {	/* ^@ through ^_ */
544 		cc[0] = '^';
545 		cc[1] = c + '@';
546 		cc[2] = '\0';
547 	} else {
548 		cc[0] = '\\';
549 		cc[1] = ((c >> 6) & 7) + '0';
550 		cc[2] = ((c >> 3) & 7) + '0';
551 		cc[3] = (c & 7) + '0';
552 		cc[4] = '\0';
553 	}
554 }
555 
556 /*
557  * process_user_input watches the input stream for the escape sequence for
558  * 'quit' (by default, tilde-period).  Because we might be fed just one
559  * keystroke at a time, state associated with the user input (are we at the
560  * beginning of the line?  are we locally echoing the next character?) is
561  * maintained by beginning_of_line and local_echo across calls to the routine.
562  * If the write to outfd fails, we'll try to read from infd in an attempt
563  * to prevent deadlock between the two processes.
564  *
565  * This routine returns -1 when the 'quit' escape sequence has been issued,
566  * and 0 otherwise.
567  */
568 static int
569 process_user_input(int outfd, int infd, char *buf, size_t nbytes)
570 {
571 	static boolean_t beginning_of_line = B_TRUE;
572 	static boolean_t local_echo = B_FALSE;
573 
574 	char c = *buf;
575 	for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
576 		buf++;
577 		if (beginning_of_line && !nocmdchar) {
578 			beginning_of_line = B_FALSE;
579 			if (c == cmdchar) {
580 				local_echo = B_TRUE;
581 				continue;
582 			}
583 		} else if (local_echo) {
584 			local_echo = B_FALSE;
585 			if (c == '.' || c == effective_termios.c_cc[VEOF]) {
586 				char cc[CANONIFY_LEN];
587 
588 				canonify(c, cc);
589 				(void) write(STDOUT_FILENO, &cmdchar, 1);
590 				(void) write(STDOUT_FILENO, cc, strlen(cc));
591 				return (-1);
592 			}
593 		}
594 retry:
595 		if (write(outfd, &c, 1) <= 0) {
596 			/*
597 			 * Since the fd we are writing to is opened with
598 			 * O_NONBLOCK it is possible to get EAGAIN if the
599 			 * pipe is full.  One way this could happen is if we
600 			 * are writing a lot of data into the pipe in this loop
601 			 * and the application on the other end is echoing that
602 			 * data back out to its stdout.  The output pipe can
603 			 * fill up since we are stuck here in this loop and not
604 			 * draining the other pipe.  We can try to read some of
605 			 * the data to see if we can drain the pipe so that the
606 			 * application can continue to make progress.  The read
607 			 * is non-blocking so we won't hang here.  We also wait
608 			 * a bit before retrying since there could be other
609 			 * reasons why the pipe is full and we don't want to
610 			 * continuously retry.
611 			 */
612 			if (errno == EAGAIN) {
613 				struct timespec rqtp;
614 				int ln;
615 				char ibuf[ZLOGIN_BUFSIZ];
616 
617 				if ((ln = read(infd, ibuf, ZLOGIN_BUFSIZ)) > 0)
618 					(void) write(STDOUT_FILENO, ibuf, ln);
619 
620 				/* sleep for 10 milliseconds */
621 				rqtp.tv_sec = 0;
622 				rqtp.tv_nsec = 10 * (NANOSEC / MILLISEC);
623 				(void) nanosleep(&rqtp, NULL);
624 				if (!dead)
625 					goto retry;
626 			}
627 
628 			return (-1);
629 		}
630 		beginning_of_line = (c == '\r' || c == '\n' ||
631 		    c == effective_termios.c_cc[VKILL] ||
632 		    c == effective_termios.c_cc[VEOL] ||
633 		    c == effective_termios.c_cc[VSUSP] ||
634 		    c == effective_termios.c_cc[VINTR]);
635 	}
636 	return (0);
637 }
638 
639 /*
640  * This is the main I/O loop, and is shared across all zlogin modes.
641  * Parameters:
642  * 	stdin_fd:  The fd representing 'stdin' for the slave side; input to
643  *	           the zone will be written here.
644  *
645  *	stdout_fd: The fd representing 'stdout' for the slave side; output
646  *	           from the zone will arrive here.
647  *
648  *	stderr_fd: The fd representing 'stderr' for the slave side; output
649  *	           from the zone will arrive here.
650  *
651  *	raw_mode:  If TRUE, then no processing (for example, for '~.') will
652  *	           be performed on the input coming from STDIN.
653  *
654  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
655  * mode supplies a stderr).
656  *
657  */
658 static void
659 doio(int stdin_fd, int stdout_fd, int stderr_fd, boolean_t raw_mode)
660 {
661 	struct pollfd pollfds[3];
662 	char ibuf[ZLOGIN_BUFSIZ];
663 	int cc, ret;
664 
665 	/* read from stdout of zone and write to stdout of global zone */
666 	pollfds[0].fd = stdout_fd;
667 	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
668 
669 	/* read from stderr of zone and write to stderr of global zone */
670 	pollfds[1].fd = stderr_fd;
671 	pollfds[1].events = pollfds[0].events;
672 
673 	/* read from stdin of global zone and write to stdin of zone */
674 	pollfds[2].fd = STDIN_FILENO;
675 	pollfds[2].events = pollfds[0].events;
676 
677 	for (;;) {
678 		pollfds[0].revents = pollfds[1].revents =
679 		    pollfds[2].revents = 0;
680 
681 		if (dead)
682 			break;
683 
684 		ret = poll(pollfds,
685 		    sizeof (pollfds) / sizeof (struct pollfd), -1);
686 		if (ret == -1 && errno != EINTR) {
687 			perror("poll failed");
688 			break;
689 		}
690 
691 		if (errno == EINTR && dead) {
692 			break;
693 		}
694 
695 		/* event from master side stdout */
696 		if (pollfds[0].revents) {
697 			if (pollfds[0].revents &
698 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
699 				cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ);
700 				if (cc == -1 && (errno != EINTR || dead))
701 					break;
702 				if (cc == 0)	/* EOF */
703 					break;
704 				(void) write(STDOUT_FILENO, ibuf, cc);
705 			} else {
706 				pollerr = pollfds[0].revents;
707 				break;
708 			}
709 		}
710 
711 		/* event from master side stderr */
712 		if (pollfds[1].revents) {
713 			if (pollfds[1].revents &
714 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
715 				cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ);
716 				if (cc == -1 && (errno != EINTR || dead))
717 					break;
718 				if (cc == 0)	/* EOF */
719 					break;
720 				(void) write(STDERR_FILENO, ibuf, cc);
721 			} else {
722 				pollerr = pollfds[1].revents;
723 				break;
724 			}
725 		}
726 
727 		/* event from user STDIN side */
728 		if (pollfds[2].revents) {
729 			if (pollfds[2].revents &
730 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
731 				cc = read(STDIN_FILENO, ibuf, ZLOGIN_BUFSIZ);
732 				if (cc == -1 && (errno != EINTR || dead))
733 					break;
734 
735 				/*
736 				 * stdin fd is stdin of the target; so,
737 				 * the thing we'll write the user data *to*.
738 				 *
739 				 * Also, unlike on the output side, we
740 				 * propagate zero-length messages to the
741 				 * other side.
742 				 */
743 				if (raw_mode == B_TRUE) {
744 					if (write(stdin_fd, ibuf, cc) == -1)
745 						break;
746 				} else {
747 					if (process_user_input(stdin_fd,
748 					    stdout_fd, ibuf, cc) == -1)
749 						break;
750 				}
751 			} else if (raw_mode == B_TRUE &&
752 			    pollfds[2].revents & POLLHUP) {
753 				/*
754 				 * It's OK to get a POLLHUP on STDIN-- it
755 				 * always happens if you do:
756 				 *
757 				 * echo foo | zlogin <zone> <command>
758 				 *
759 				 * We reset fd to -1 in this case to clear
760 				 * the condition and write an EOF to the
761 				 * other side in order to wrap things up.
762 				 */
763 				pollfds[2].fd = -1;
764 				(void) write(stdin_fd, ibuf, 0);
765 			} else {
766 				pollerr = pollfds[2].revents;
767 				break;
768 			}
769 		}
770 	}
771 
772 	/*
773 	 * We are in the midst of dying, but try to poll with a short
774 	 * timeout to see if we can catch the last bit of I/O from the
775 	 * children.
776 	 */
777 	pollfds[0].revents = pollfds[1].revents = pollfds[2].revents = 0;
778 	(void) poll(pollfds,
779 	    sizeof (pollfds) / sizeof (struct pollfd), 100);
780 	if (pollfds[0].revents &
781 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
782 		if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0)
783 			(void) write(STDOUT_FILENO, ibuf, cc);
784 	}
785 	if (pollfds[1].revents &
786 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
787 		if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0)
788 			(void) write(STDERR_FILENO, ibuf, cc);
789 	}
790 }
791 
792 static char **
793 zone_login_cmd(brand_handle_t bh, const char *login)
794 {
795 	static char result_buf[ARG_MAX];
796 	char **new_argv, *ptr, *lasts;
797 	int n, a;
798 
799 	/* Get the login command for the target zone. */
800 	bzero(result_buf, sizeof (result_buf));
801 	if (brand_get_login_cmd(bh, login,
802 	    result_buf, sizeof (result_buf)) != 0)
803 		return (NULL);
804 
805 	/*
806 	 * We got back a string that we'd like to execute.  But since
807 	 * we're not doing the execution via a shell we'll need to convert
808 	 * the exec string to an array of strings.  We'll do that here
809 	 * but we're going to be very simplistic about it and break stuff
810 	 * up based on spaces.  We're not even going to support any kind
811 	 * of quoting or escape characters.  It's truly amazing that
812 	 * there is no library function in OpenSolaris to do this for us.
813 	 */
814 
815 	/*
816 	 * Be paranoid.  Since we're deliniating based on spaces make
817 	 * sure there are no adjacent spaces.
818 	 */
819 	if (strstr(result_buf, "  ") != NULL)
820 		return (NULL);
821 
822 	/* Remove any trailing whitespace.  */
823 	n = strlen(result_buf);
824 	if (result_buf[n - 1] == ' ')
825 		result_buf[n - 1] = '\0';
826 
827 	/* Count how many elements there are in the exec string. */
828 	ptr = result_buf;
829 	for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
830 		;
831 
832 	/* Allocate the argv array that we're going to return. */
833 	if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
834 		return (NULL);
835 
836 	/* Tokenize the exec string and return. */
837 	a = 0;
838 	new_argv[a++] = result_buf;
839 	if (n > 2) {
840 		(void) strtok_r(result_buf, " ", &lasts);
841 		while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
842 			;
843 	} else {
844 		new_argv[a++] = NULL;
845 	}
846 	assert(n == a);
847 	return (new_argv);
848 }
849 
850 /*
851  * Prepare argv array for exec'd process; if we're passing commands to the
852  * new process, then use su(1M) to do the invocation.  Otherwise, use
853  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
854  * login that we're coming from another zone, and to disregard its CONSOLE
855  * checks).
856  */
857 static char **
858 prep_args(brand_handle_t bh, const char *login, char **argv)
859 {
860 	int argc = 0, a = 0, i, n = -1;
861 	char **new_argv;
862 
863 	if (argv != NULL) {
864 		size_t subshell_len = 1;
865 		char *subshell;
866 
867 		while (argv[argc] != NULL)
868 			argc++;
869 
870 		for (i = 0; i < argc; i++) {
871 			subshell_len += strlen(argv[i]) + 1;
872 		}
873 		if ((subshell = calloc(1, subshell_len)) == NULL)
874 			return (NULL);
875 
876 		for (i = 0; i < argc; i++) {
877 			(void) strcat(subshell, argv[i]);
878 			(void) strcat(subshell, " ");
879 		}
880 
881 		if (failsafe) {
882 			n = 4;
883 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
884 				return (NULL);
885 
886 			new_argv[a++] = FAILSAFESHELL;
887 		} else {
888 			n = 5;
889 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
890 				return (NULL);
891 
892 			new_argv[a++] = SUPATH;
893 			new_argv[a++] = (char *)login;
894 		}
895 		new_argv[a++] = "-c";
896 		new_argv[a++] = subshell;
897 		new_argv[a++] = NULL;
898 		assert(a == n);
899 	} else {
900 		if (failsafe) {
901 			n = 2;
902 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
903 				return (NULL);
904 			new_argv[a++] = FAILSAFESHELL;
905 			new_argv[a++] = NULL;
906 			assert(n == a);
907 		} else {
908 			new_argv = zone_login_cmd(bh, login);
909 		}
910 	}
911 
912 	return (new_argv);
913 }
914 
915 /*
916  * Helper routine for prep_env below.
917  */
918 static char *
919 add_env(char *name, char *value)
920 {
921 	size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
922 	char *str;
923 
924 	if ((str = malloc(sz)) == NULL)
925 		return (NULL);
926 
927 	(void) snprintf(str, sz, "%s=%s", name, value);
928 	return (str);
929 }
930 
931 /*
932  * Prepare envp array for exec'd process.
933  */
934 static char **
935 prep_env()
936 {
937 	int e = 0, size = 1;
938 	char **new_env, *estr;
939 	char *term = getenv("TERM");
940 
941 	size++;	/* for $PATH */
942 	if (term != NULL)
943 		size++;
944 
945 	/*
946 	 * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
947 	 * We also set $SHELL, since neither login nor su will be around to do
948 	 * it.
949 	 */
950 	if (failsafe)
951 		size += 2;
952 
953 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
954 		return (NULL);
955 
956 	if ((estr = add_env("PATH", DEF_PATH)) == NULL)
957 		return (NULL);
958 	new_env[e++] = estr;
959 
960 	if (term != NULL) {
961 		if ((estr = add_env("TERM", term)) == NULL)
962 			return (NULL);
963 		new_env[e++] = estr;
964 	}
965 
966 	if (failsafe) {
967 		if ((estr = add_env("HOME", "/")) == NULL)
968 			return (NULL);
969 		new_env[e++] = estr;
970 
971 		if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
972 			return (NULL);
973 		new_env[e++] = estr;
974 	}
975 
976 	new_env[e++] = NULL;
977 
978 	assert(e == size);
979 
980 	return (new_env);
981 }
982 
983 /*
984  * Finish the preparation of the envp array for exec'd non-interactive
985  * zlogins.  This is called in the child process *after* we zone_enter(), since
986  * it derives things we can only know within the zone, such as $HOME, $SHELL,
987  * etc.  We need only do this in the non-interactive, mode, since otherwise
988  * login(1) will do it.  We don't do this in failsafe mode, since it presents
989  * additional ways in which the command could fail, and we'd prefer to avoid
990  * that.
991  */
992 static char **
993 prep_env_noninteractive(char *login, char **env)
994 {
995 	size_t size;
996 	struct passwd *pw;
997 	char **new_env;
998 	int e, i;
999 	char *estr;
1000 	char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1001 
1002 	assert(env != NULL);
1003 	assert(failsafe == 0);
1004 
1005 	/*
1006 	 * Get existing envp size.
1007 	 */
1008 	for (size = 0; env[size] != NULL; size++)
1009 		;
1010 	e = size;
1011 
1012 	/*
1013 	 * Finish filling out the environment; we duplicate the environment
1014 	 * setup described in login(1), for lack of a better precedent.
1015 	 */
1016 	if ((pw = getpwnam(login)) != NULL) {
1017 		size += 3;	/* LOGNAME, HOME, MAIL */
1018 	}
1019 	size++;	/* always fill in SHELL */
1020 	size++; /* terminating NULL */
1021 
1022 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1023 		goto malloc_fail;
1024 
1025 	/*
1026 	 * Copy existing elements of env into new_env.
1027 	 */
1028 	for (i = 0; env[i] != NULL; i++) {
1029 		if ((new_env[i] = strdup(env[i])) == NULL)
1030 			goto malloc_fail;
1031 	}
1032 	assert(e == i);
1033 
1034 	if (pw != NULL) {
1035 		if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1036 			goto malloc_fail;
1037 		new_env[e++] = estr;
1038 
1039 		if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1040 			goto malloc_fail;
1041 		new_env[e++] = estr;
1042 
1043 		if (chdir(pw->pw_dir) != 0)
1044 			zerror(gettext("Could not chdir to home directory "
1045 			    "%s: %s"), pw->pw_dir, strerror(errno));
1046 
1047 		(void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1048 		    pw->pw_name);
1049 		if ((estr = add_env("MAIL", varmail)) == NULL)
1050 			goto malloc_fail;
1051 		new_env[e++] = estr;
1052 	}
1053 
1054 	if (pw != NULL && strlen(pw->pw_shell) > 0) {
1055 		if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1056 			goto malloc_fail;
1057 		new_env[e++] = estr;
1058 	} else {
1059 		if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1060 			goto malloc_fail;
1061 		new_env[e++] = estr;
1062 	}
1063 
1064 	new_env[e++] = NULL;	/* add terminating NULL */
1065 
1066 	assert(e == size);
1067 	return (new_env);
1068 
1069 malloc_fail:
1070 	zperror(gettext("failed to allocate memory for process environment"));
1071 	return (NULL);
1072 }
1073 
1074 static int
1075 close_func(void *slavefd, int fd)
1076 {
1077 	if (fd != *(int *)slavefd)
1078 		(void) close(fd);
1079 	return (0);
1080 }
1081 
1082 static void
1083 set_cmdchar(char *cmdcharstr)
1084 {
1085 	char c;
1086 	long lc;
1087 
1088 	if ((c = *cmdcharstr) != '\\') {
1089 		cmdchar = c;
1090 		return;
1091 	}
1092 
1093 	c = cmdcharstr[1];
1094 	if (c == '\0' || c == '\\') {
1095 		cmdchar = '\\';
1096 		return;
1097 	}
1098 
1099 	if (c < '0' || c > '7') {
1100 		zerror(gettext("Unrecognized escape character option %s"),
1101 		    cmdcharstr);
1102 		usage();
1103 	}
1104 
1105 	lc = strtol(cmdcharstr + 1, NULL, 8);
1106 	if (lc < 0 || lc > 255) {
1107 		zerror(gettext("Octal escape character '%s' too large"),
1108 		    cmdcharstr);
1109 		usage();
1110 	}
1111 	cmdchar = (char)lc;
1112 }
1113 
1114 static int
1115 setup_utmpx(char *slavename)
1116 {
1117 	struct utmpx ut;
1118 
1119 	bzero(&ut, sizeof (ut));
1120 	(void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1121 	(void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1122 	ut.ut_pid = getpid();
1123 	ut.ut_id[0] = 'z';
1124 	ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1125 	ut.ut_type = LOGIN_PROCESS;
1126 	(void) time(&ut.ut_tv.tv_sec);
1127 
1128 	if (makeutx(&ut) == NULL) {
1129 		zerror(gettext("makeutx failed"));
1130 		return (-1);
1131 	}
1132 	return (0);
1133 }
1134 
1135 static void
1136 release_lock_file(int lockfd)
1137 {
1138 	(void) close(lockfd);
1139 }
1140 
1141 static int
1142 grab_lock_file(const char *zone_name, int *lockfd)
1143 {
1144 	char pathbuf[PATH_MAX];
1145 	struct flock flock;
1146 
1147 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1148 		zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1149 		    strerror(errno));
1150 		return (-1);
1151 	}
1152 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
1153 	(void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1154 	    ZONES_TMPDIR, zone_name);
1155 
1156 	if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1157 		zerror(gettext("could not open %s: %s"), pathbuf,
1158 		    strerror(errno));
1159 		return (-1);
1160 	}
1161 	/*
1162 	 * Lock the file to synchronize with other zoneadmds
1163 	 */
1164 	flock.l_type = F_WRLCK;
1165 	flock.l_whence = SEEK_SET;
1166 	flock.l_start = (off_t)0;
1167 	flock.l_len = (off_t)0;
1168 	if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1169 		zerror(gettext("unable to lock %s: %s"), pathbuf,
1170 		    strerror(errno));
1171 		release_lock_file(*lockfd);
1172 		return (-1);
1173 	}
1174 	return (Z_OK);
1175 }
1176 
1177 static int
1178 start_zoneadmd(const char *zone_name)
1179 {
1180 	pid_t retval;
1181 	int pstatus = 0, error = -1, lockfd, doorfd;
1182 	struct door_info info;
1183 	char doorpath[MAXPATHLEN];
1184 
1185 	(void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1186 
1187 	if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1188 		return (-1);
1189 	/*
1190 	 * We must do the door check with the lock held.  Otherwise, we
1191 	 * might race against another zoneadm/zlogin process and wind
1192 	 * up with two processes trying to start zoneadmd at the same
1193 	 * time.  zoneadmd will detect this, and fail, but we prefer this
1194 	 * to be as seamless as is practical, from a user perspective.
1195 	 */
1196 	if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1197 		if (errno != ENOENT) {
1198 			zerror("failed to open %s: %s", doorpath,
1199 			    strerror(errno));
1200 			goto out;
1201 		}
1202 	} else {
1203 		/*
1204 		 * Seems to be working ok.
1205 		 */
1206 		if (door_info(doorfd, &info) == 0 &&
1207 		    ((info.di_attributes & DOOR_REVOKED) == 0)) {
1208 			error = 0;
1209 			goto out;
1210 		}
1211 	}
1212 
1213 	if ((child_pid = fork()) == -1) {
1214 		zperror(gettext("could not fork"));
1215 		goto out;
1216 	} else if (child_pid == 0) {
1217 		/* child process */
1218 		(void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1219 		    zone_name, NULL);
1220 		zperror(gettext("could not exec zoneadmd"));
1221 		_exit(1);
1222 	}
1223 
1224 	/* parent process */
1225 	do {
1226 		retval = waitpid(child_pid, &pstatus, 0);
1227 	} while (retval != child_pid);
1228 	if (WIFSIGNALED(pstatus) ||
1229 	    (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1230 		zerror(gettext("could not start %s"), "zoneadmd");
1231 		goto out;
1232 	}
1233 	error = 0;
1234 out:
1235 	release_lock_file(lockfd);
1236 	(void) close(doorfd);
1237 	return (error);
1238 }
1239 
1240 static int
1241 init_template(void)
1242 {
1243 	int fd;
1244 	int err = 0;
1245 
1246 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1247 	if (fd == -1)
1248 		return (-1);
1249 
1250 	/*
1251 	 * zlogin doesn't do anything with the contract.
1252 	 * Deliver no events, don't inherit, and allow it to be orphaned.
1253 	 */
1254 	err |= ct_tmpl_set_critical(fd, 0);
1255 	err |= ct_tmpl_set_informative(fd, 0);
1256 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1257 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1258 	if (err || ct_tmpl_activate(fd)) {
1259 		(void) close(fd);
1260 		return (-1);
1261 	}
1262 
1263 	return (fd);
1264 }
1265 
1266 static int
1267 noninteractive_login(char *zonename, zoneid_t zoneid, char *login,
1268     char **new_args, char **new_env)
1269 {
1270 	pid_t retval;
1271 	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2];
1272 	int child_status;
1273 	int tmpl_fd;
1274 	sigset_t block_cld;
1275 
1276 	if ((tmpl_fd = init_template()) == -1) {
1277 		reset_tty();
1278 		zperror(gettext("could not create contract"));
1279 		return (1);
1280 	}
1281 
1282 	if (pipe(stdin_pipe) != 0) {
1283 		zperror(gettext("could not create STDIN pipe"));
1284 		return (1);
1285 	}
1286 	/*
1287 	 * When the user types ^D, we get a zero length message on STDIN.
1288 	 * We need to echo that down the pipe to send it to the other side;
1289 	 * but by default, pipes don't propagate zero-length messages.  We
1290 	 * toggle that behavior off using I_SWROPT.  See streamio(7i).
1291 	 */
1292 	if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1293 		zperror(gettext("could not configure STDIN pipe"));
1294 		return (1);
1295 
1296 	}
1297 	if (pipe(stdout_pipe) != 0) {
1298 		zperror(gettext("could not create STDOUT pipe"));
1299 		return (1);
1300 	}
1301 	if (pipe(stderr_pipe) != 0) {
1302 		zperror(gettext("could not create STDERR pipe"));
1303 		return (1);
1304 	}
1305 
1306 	/*
1307 	 * If any of the pipe FD's winds up being less than STDERR, then we
1308 	 * have a mess on our hands-- and we are lacking some of the I/O
1309 	 * streams we would expect anyway.  So we bail.
1310 	 */
1311 	if (stdin_pipe[0] <= STDERR_FILENO ||
1312 	    stdin_pipe[1] <= STDERR_FILENO ||
1313 	    stdout_pipe[0] <= STDERR_FILENO ||
1314 	    stdout_pipe[1] <= STDERR_FILENO ||
1315 	    stderr_pipe[0] <= STDERR_FILENO ||
1316 	    stderr_pipe[1] <= STDERR_FILENO) {
1317 		zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1318 		return (1);
1319 	}
1320 
1321 	if (prefork_dropprivs() != 0) {
1322 		zperror(gettext("could not allocate privilege set"));
1323 		return (1);
1324 	}
1325 
1326 	(void) sigset(SIGCLD, sigcld);
1327 	(void) sigemptyset(&block_cld);
1328 	(void) sigaddset(&block_cld, SIGCLD);
1329 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1330 
1331 	if ((child_pid = fork()) == -1) {
1332 		(void) ct_tmpl_clear(tmpl_fd);
1333 		(void) close(tmpl_fd);
1334 		zperror(gettext("could not fork"));
1335 		return (1);
1336 	} else if (child_pid == 0) { /* child process */
1337 		(void) ct_tmpl_clear(tmpl_fd);
1338 
1339 		/*
1340 		 * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1341 		 */
1342 		(void) close(STDIN_FILENO);
1343 		(void) close(STDOUT_FILENO);
1344 		(void) close(STDERR_FILENO);
1345 		(void) dup2(stdin_pipe[1], STDIN_FILENO);
1346 		(void) dup2(stdout_pipe[1], STDOUT_FILENO);
1347 		(void) dup2(stderr_pipe[1], STDERR_FILENO);
1348 		(void) closefrom(STDERR_FILENO + 1);
1349 
1350 		(void) sigset(SIGCLD, SIG_DFL);
1351 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1352 		/*
1353 		 * In case any of stdin, stdout or stderr are streams,
1354 		 * anchor them to prevent malicious I_POPs.
1355 		 */
1356 		(void) ioctl(STDIN_FILENO, I_ANCHOR);
1357 		(void) ioctl(STDOUT_FILENO, I_ANCHOR);
1358 		(void) ioctl(STDERR_FILENO, I_ANCHOR);
1359 
1360 		if (zone_enter(zoneid) == -1) {
1361 			zerror(gettext("could not enter zone %s: %s"),
1362 			    zonename, strerror(errno));
1363 			_exit(1);
1364 		}
1365 
1366 		if (!failsafe)
1367 			new_env = prep_env_noninteractive(login, new_env);
1368 
1369 		if (new_env == NULL) {
1370 			_exit(1);
1371 		}
1372 
1373 		/*
1374 		 * Move into a new process group; the zone_enter will have
1375 		 * placed us into zsched's session, and we want to be in
1376 		 * a unique process group.
1377 		 */
1378 		(void) setpgid(getpid(), getpid());
1379 
1380 		(void) execve(new_args[0], new_args, new_env);
1381 		zperror(gettext("exec failure"));
1382 		_exit(1);
1383 	}
1384 	/* parent */
1385 	(void) sigset(SIGINT, sig_forward);
1386 
1387 	postfork_dropprivs();
1388 
1389 	(void) ct_tmpl_clear(tmpl_fd);
1390 	(void) close(tmpl_fd);
1391 
1392 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1393 	doio(stdin_pipe[0], stdout_pipe[0], stderr_pipe[0], B_TRUE);
1394 	do {
1395 		retval = waitpid(child_pid, &child_status, 0);
1396 		if (retval == -1) {
1397 			child_status = 0;
1398 		}
1399 	} while (retval != child_pid && errno != ECHILD);
1400 
1401 	return (WEXITSTATUS(child_status));
1402 }
1403 
1404 int
1405 main(int argc, char **argv)
1406 {
1407 	int arg, console = 0;
1408 	zoneid_t zoneid;
1409 	zone_state_t st;
1410 	char *login = "root";
1411 	int lflag = 0;
1412 	char *zonename = NULL;
1413 	char **proc_args = NULL;
1414 	char **new_args, **new_env;
1415 	sigset_t block_cld;
1416 	char devroot[MAXPATHLEN];
1417 	char *slavename, slaveshortname[MAXPATHLEN];
1418 	priv_set_t *privset;
1419 	int tmpl_fd;
1420 	char zonebrand[MAXNAMELEN];
1421 	struct stat sb;
1422 	char kernzone[ZONENAME_MAX];
1423 	brand_handle_t bh;
1424 
1425 	(void) setlocale(LC_ALL, "");
1426 	(void) textdomain(TEXT_DOMAIN);
1427 
1428 	(void) getpname(argv[0]);
1429 
1430 	while ((arg = getopt(argc, argv, "ECR:Se:l:")) != EOF) {
1431 		switch (arg) {
1432 		case 'C':
1433 			console = 1;
1434 			break;
1435 		case 'E':
1436 			nocmdchar = 1;
1437 			break;
1438 		case 'R':	/* undocumented */
1439 			if (*optarg != '/') {
1440 				zerror(gettext("root path must be absolute."));
1441 				exit(2);
1442 			}
1443 			if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1444 				zerror(
1445 				    gettext("root path must be a directory."));
1446 				exit(2);
1447 			}
1448 			zonecfg_set_root(optarg);
1449 			break;
1450 		case 'S':
1451 			failsafe = 1;
1452 			break;
1453 		case 'e':
1454 			set_cmdchar(optarg);
1455 			break;
1456 		case 'l':
1457 			login = optarg;
1458 			lflag = 1;
1459 			break;
1460 		default:
1461 			usage();
1462 		}
1463 	}
1464 
1465 	if (console != 0 && lflag != 0) {
1466 		zerror(gettext("-l may not be specified for console login"));
1467 		usage();
1468 	}
1469 
1470 	if (console != 0 && failsafe != 0) {
1471 		zerror(gettext("-S may not be specified for console login"));
1472 		usage();
1473 	}
1474 
1475 	if (console != 0 && zonecfg_in_alt_root()) {
1476 		zerror(gettext("-R may not be specified for console login"));
1477 		exit(2);
1478 	}
1479 
1480 	if (failsafe != 0 && lflag != 0) {
1481 		zerror(gettext("-l may not be specified for failsafe login"));
1482 		usage();
1483 	}
1484 
1485 	if (optind == (argc - 1)) {
1486 		/*
1487 		 * zone name, no process name; this should be an interactive
1488 		 * as long as STDIN is really a tty.
1489 		 */
1490 		if (isatty(STDIN_FILENO))
1491 			interactive = 1;
1492 		zonename = argv[optind];
1493 	} else if (optind < (argc - 1)) {
1494 		if (console) {
1495 			zerror(gettext("Commands may not be specified for "
1496 			    "console login."));
1497 			usage();
1498 		}
1499 		/* zone name and process name, and possibly some args */
1500 		zonename = argv[optind];
1501 		proc_args = &argv[optind + 1];
1502 		interactive = 0;
1503 	} else {
1504 		usage();
1505 	}
1506 
1507 	if (getzoneid() != GLOBAL_ZONEID) {
1508 		zerror(gettext("'%s' may only be used from the global zone"),
1509 		    pname);
1510 		return (1);
1511 	}
1512 
1513 	if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1514 		zerror(gettext("'%s' not applicable to the global zone"),
1515 		    pname);
1516 		return (1);
1517 	}
1518 
1519 	if (zone_get_state(zonename, &st) != Z_OK) {
1520 		zerror(gettext("zone '%s' unknown"), zonename);
1521 		return (1);
1522 	}
1523 
1524 	if (st < ZONE_STATE_INSTALLED) {
1525 		zerror(gettext("cannot login to a zone which is '%s'"),
1526 		    zone_state_str(st));
1527 		return (1);
1528 	}
1529 
1530 	/*
1531 	 * In both console and non-console cases, we require all privs.
1532 	 * In the console case, because we may need to startup zoneadmd.
1533 	 * In the non-console case in order to do zone_enter(2), zonept()
1534 	 * and other tasks.
1535 	 *
1536 	 * Future work: this solution is temporary.  Ultimately, we need to
1537 	 * move to a flexible system which allows the global admin to
1538 	 * designate that a particular user can zlogin (and probably zlogin
1539 	 * -C) to a particular zone.  This all-root business we have now is
1540 	 * quite sketchy.
1541 	 */
1542 	if ((privset = priv_allocset()) == NULL) {
1543 		zperror(gettext("priv_allocset failed"));
1544 		return (1);
1545 	}
1546 
1547 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1548 		zperror(gettext("getppriv failed"));
1549 		priv_freeset(privset);
1550 		return (1);
1551 	}
1552 
1553 	if (priv_isfullset(privset) == B_FALSE) {
1554 		zerror(gettext("You lack sufficient privilege to run "
1555 		    "this command (all privs required)"));
1556 		priv_freeset(privset);
1557 		return (1);
1558 	}
1559 	priv_freeset(privset);
1560 
1561 	/*
1562 	 * The console is a separate case from the rest of the code; handle
1563 	 * it first.
1564 	 */
1565 	if (console) {
1566 
1567 		/*
1568 		 * Ensure that zoneadmd for this zone is running.
1569 		 */
1570 		if (start_zoneadmd(zonename) == -1)
1571 			return (1);
1572 
1573 		/*
1574 		 * Make contact with zoneadmd.
1575 		 */
1576 		if (get_console_master(zonename) == -1)
1577 			return (1);
1578 
1579 		(void) printf(gettext("[Connected to zone '%s' console]\n"),
1580 		    zonename);
1581 
1582 		if (set_tty_rawmode(STDIN_FILENO) == -1) {
1583 			reset_tty();
1584 			zperror(gettext("failed to set stdin pty to raw mode"));
1585 			return (1);
1586 		}
1587 
1588 		(void) sigset(SIGWINCH, sigwinch);
1589 		(void) sigwinch(0);
1590 
1591 		/*
1592 		 * Run the I/O loop until we get disconnected.
1593 		 */
1594 		doio(masterfd, masterfd, -1, B_FALSE);
1595 		reset_tty();
1596 		(void) printf(gettext("\n[Connection to zone '%s' console "
1597 		    "closed]\n"), zonename);
1598 
1599 		return (0);
1600 	}
1601 
1602 	if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1603 		zerror(gettext("login allowed only to running zones "
1604 		    "(%s is '%s')."), zonename, zone_state_str(st));
1605 		return (1);
1606 	}
1607 
1608 	(void) strlcpy(kernzone, zonename, sizeof (kernzone));
1609 	if (zonecfg_in_alt_root()) {
1610 		FILE *fp = zonecfg_open_scratch("", B_FALSE);
1611 
1612 		if (fp == NULL || zonecfg_find_scratch(fp, zonename,
1613 		    zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
1614 			zerror(gettext("cannot find scratch zone %s"),
1615 			    zonename);
1616 			if (fp != NULL)
1617 				zonecfg_close_scratch(fp);
1618 			return (1);
1619 		}
1620 		zonecfg_close_scratch(fp);
1621 	}
1622 
1623 	if ((zoneid = getzoneidbyname(kernzone)) == -1) {
1624 		zerror(gettext("failed to get zoneid for zone '%s'"),
1625 		    zonename);
1626 		return (1);
1627 	}
1628 
1629 	/*
1630 	 * We need the zone root path only if we are setting up a pty.
1631 	 */
1632 	if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
1633 		zerror(gettext("could not get dev path for zone %s"),
1634 		    zonename);
1635 		return (1);
1636 	}
1637 
1638 	/* Get a handle to the brand info for this zone */
1639 	if ((zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) ||
1640 	    ((bh = brand_open(zonebrand)) == NULL)) {
1641 		zerror(gettext("could not get brand for zone %s"), zonename);
1642 		return (1);
1643 	}
1644 	if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
1645 		zperror(gettext("could not assemble new arguments"));
1646 		brand_close(bh);
1647 		return (1);
1648 	}
1649 	brand_close(bh);
1650 
1651 	if ((new_env = prep_env()) == NULL) {
1652 		zperror(gettext("could not assemble new environment"));
1653 		return (1);
1654 	}
1655 
1656 	if (!interactive)
1657 		return (noninteractive_login(zonename, zoneid, login, new_args,
1658 		    new_env));
1659 
1660 	if (zonecfg_in_alt_root()) {
1661 		zerror(gettext("cannot use interactive login with scratch "
1662 		    "zone"));
1663 		return (1);
1664 	}
1665 
1666 	/*
1667 	 * Things are more complex in interactive mode; we get the
1668 	 * master side of the pty, then place the user's terminal into
1669 	 * raw mode.
1670 	 */
1671 	if (get_master_pty() == -1) {
1672 		zerror(gettext("could not setup master pty device"));
1673 		return (1);
1674 	}
1675 
1676 	/*
1677 	 * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
1678 	 */
1679 	if ((slavename = ptsname(masterfd)) == NULL) {
1680 		zperror(gettext("failed to get name for pseudo-tty"));
1681 		return (1);
1682 	}
1683 	if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
1684 		(void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
1685 		    sizeof (slaveshortname));
1686 	else
1687 		(void) strlcpy(slaveshortname, slavename,
1688 		    sizeof (slaveshortname));
1689 
1690 	(void) printf(gettext("[Connected to zone '%s' %s]\n"), zonename,
1691 	    slaveshortname);
1692 
1693 	if (set_tty_rawmode(STDIN_FILENO) == -1) {
1694 		reset_tty();
1695 		zperror(gettext("failed to set stdin pty to raw mode"));
1696 		return (1);
1697 	}
1698 
1699 	if (prefork_dropprivs() != 0) {
1700 		reset_tty();
1701 		zperror(gettext("could not allocate privilege set"));
1702 		return (1);
1703 	}
1704 
1705 	/*
1706 	 * We must mask SIGCLD until after we have coped with the fork
1707 	 * sufficiently to deal with it; otherwise we can race and receive the
1708 	 * signal before child_pid has been initialized (yes, this really
1709 	 * happens).
1710 	 */
1711 	(void) sigset(SIGCLD, sigcld);
1712 	(void) sigemptyset(&block_cld);
1713 	(void) sigaddset(&block_cld, SIGCLD);
1714 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1715 
1716 	/*
1717 	 * We activate the contract template at the last minute to
1718 	 * avoid intermediate functions that could be using fork(2)
1719 	 * internally.
1720 	 */
1721 	if ((tmpl_fd = init_template()) == -1) {
1722 		reset_tty();
1723 		zperror(gettext("could not create contract"));
1724 		return (1);
1725 	}
1726 
1727 	if ((child_pid = fork()) == -1) {
1728 		(void) ct_tmpl_clear(tmpl_fd);
1729 		reset_tty();
1730 		zperror(gettext("could not fork"));
1731 		return (1);
1732 	} else if (child_pid == 0) { /* child process */
1733 		int slavefd, newslave;
1734 
1735 		(void) ct_tmpl_clear(tmpl_fd);
1736 		(void) close(tmpl_fd);
1737 
1738 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1739 
1740 		if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
1741 			return (1);
1742 
1743 		/*
1744 		 * Close all fds except for the slave pty.
1745 		 */
1746 		(void) fdwalk(close_func, &slavefd);
1747 
1748 		/*
1749 		 * Temporarily dup slavefd to stderr; that way if we have
1750 		 * to print out that zone_enter failed, the output will
1751 		 * have somewhere to go.
1752 		 */
1753 		if (slavefd != STDERR_FILENO)
1754 			(void) dup2(slavefd, STDERR_FILENO);
1755 
1756 		if (zone_enter(zoneid) == -1) {
1757 			zerror(gettext("could not enter zone %s: %s"),
1758 			    zonename, strerror(errno));
1759 			return (1);
1760 		}
1761 
1762 		if (slavefd != STDERR_FILENO)
1763 			(void) close(STDERR_FILENO);
1764 
1765 		/*
1766 		 * We take pains to get this process into a new process
1767 		 * group, and subsequently a new session.  In this way,
1768 		 * we'll have a session which doesn't yet have a controlling
1769 		 * terminal.  When we open the slave, it will become the
1770 		 * controlling terminal; no PIDs concerning pgrps or sids
1771 		 * will leak inappropriately into the zone.
1772 		 */
1773 		(void) setpgrp();
1774 
1775 		/*
1776 		 * We need the slave pty to be referenced from the zone's
1777 		 * /dev in order to ensure that the devt's, etc are all
1778 		 * correct.  Otherwise we break ttyname and the like.
1779 		 */
1780 		if ((newslave = open(slavename, O_RDWR)) == -1) {
1781 			(void) close(slavefd);
1782 			return (1);
1783 		}
1784 		(void) close(slavefd);
1785 		slavefd = newslave;
1786 
1787 		/*
1788 		 * dup the slave to the various FDs, so that when the
1789 		 * spawned process does a write/read it maps to the slave
1790 		 * pty.
1791 		 */
1792 		(void) dup2(slavefd, STDIN_FILENO);
1793 		(void) dup2(slavefd, STDOUT_FILENO);
1794 		(void) dup2(slavefd, STDERR_FILENO);
1795 		if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
1796 		    slavefd != STDERR_FILENO) {
1797 			(void) close(slavefd);
1798 		}
1799 
1800 		/*
1801 		 * In failsafe mode, we don't use login(1), so don't try
1802 		 * setting up a utmpx entry.
1803 		 *
1804 		 * A branded zone may have very different utmpx semantics.
1805 		 * At the moment, we only have two brand types:
1806 		 * Solaris-like (native, sn1) and Linux.  In the Solaris
1807 		 * case, we know exactly how to do the necessary utmpx
1808 		 * setup.  Fortunately for us, the Linux /bin/login is
1809 		 * prepared to deal with a non-initialized utmpx entry, so
1810 		 * we can simply skip it.  If future brands don't fall into
1811 		 * either category, we'll have to add a per-brand utmpx
1812 		 * setup hook.
1813 		 */
1814 		if (!failsafe && (strcmp(zonebrand, "lx") != 0))
1815 			if (setup_utmpx(slaveshortname) == -1)
1816 				return (1);
1817 
1818 		(void) execve(new_args[0], new_args, new_env);
1819 		zperror(gettext("exec failure"));
1820 		return (1);
1821 	}
1822 	(void) ct_tmpl_clear(tmpl_fd);
1823 	(void) close(tmpl_fd);
1824 
1825 	/*
1826 	 * The rest is only for the parent process.
1827 	 */
1828 	(void) sigset(SIGWINCH, sigwinch);
1829 
1830 	postfork_dropprivs();
1831 
1832 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1833 	doio(masterfd, masterfd, -1, B_FALSE);
1834 
1835 	reset_tty();
1836 	(void) fprintf(stderr,
1837 	    gettext("\n[Connection to zone '%s' %s closed]\n"), zonename,
1838 	    slaveshortname);
1839 
1840 	if (pollerr != 0) {
1841 		(void) fprintf(stderr, gettext("Error: connection closed due "
1842 		    "to unexpected pollevents=0x%x.\n"), pollerr);
1843 		return (1);
1844 	}
1845 
1846 	return (0);
1847 }
1848