xref: /illumos-gate/usr/src/cmd/zlogin/zlogin.c (revision facf4a8d7b59fde89a8662b4f4c73a758e6c402c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * zlogin provides three types of login which allow users in the global
30  * zone to access non-global zones.
31  *
32  * - "interactive login" is similar to rlogin(1); for example, the user could
33  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
34  *   granted a new pty (which is then shoved into the zone), and an I/O
35  *   loop between parent and child processes takes care of the interactive
36  *   session.  In this mode, login(1) (and its -c option, which means
37  *   "already authenticated") is employed to take care of the initialization
38  *   of the user's session.
39  *
40  * - "non-interactive login" is similar to su(1M); the user could issue
41  *   'zlogin my-zone ls -l' and the command would be run as specified.
42  *   In this mode, zlogin sets up pipes as the communication channel, and
43  *   'su' is used to do the login setup work.
44  *
45  * - "console login" is the equivalent to accessing the tip line for a
46  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
47  *   In this mode, zlogin contacts the zoneadmd process via unix domain
48  *   socket.  If zoneadmd is not running, it starts it.  This allows the
49  *   console to be available anytime the zone is installed, regardless of
50  *   whether it is running.
51  */
52 
53 #include <sys/socket.h>
54 #include <sys/termios.h>
55 #include <sys/utsname.h>
56 #include <sys/stat.h>
57 #include <sys/types.h>
58 #include <sys/contract/process.h>
59 #include <sys/ctfs.h>
60 
61 #include <alloca.h>
62 #include <assert.h>
63 #include <ctype.h>
64 #include <door.h>
65 #include <errno.h>
66 #include <poll.h>
67 #include <priv.h>
68 #include <pwd.h>
69 #include <unistd.h>
70 #include <utmpx.h>
71 #include <sac.h>
72 #include <signal.h>
73 #include <stdarg.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <strings.h>
78 #include <stropts.h>
79 #include <wait.h>
80 #include <zone.h>
81 #include <fcntl.h>
82 #include <libdevinfo.h>
83 #include <libintl.h>
84 #include <locale.h>
85 #include <libzonecfg.h>
86 #include <libcontract.h>
87 
88 static int masterfd;
89 static struct termios save_termios;
90 static struct termios effective_termios;
91 static int save_fd;
92 static struct winsize winsize;
93 static volatile int dead;
94 static volatile pid_t child_pid = -1;
95 static int interactive = 0;
96 static priv_set_t *dropprivs;
97 
98 static int nocmdchar = 0;
99 static int failsafe = 0;
100 static char cmdchar = '~';
101 
102 static int pollerr = 0;
103 
104 static const char *pname;
105 
106 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
107 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
108 #endif
109 
110 #define	SUPATH	"/usr/bin/su"
111 #define	FAILSAFESHELL	"/sbin/sh"
112 #define	DEFAULTSHELL	"/sbin/sh"
113 #define	LOGINPATH	"/usr/bin/login"
114 #define	DEF_PATH	"/usr/sbin:/usr/bin"
115 
116 /*
117  * See canonify() below.  CANONIFY_LEN is the maximum length that a
118  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
119  */
120 #define	CANONIFY_LEN 5
121 
122 static void
123 usage(void)
124 {
125 	(void) fprintf(stderr, gettext("usage: %s [ -CES ] [ -e cmdchar ] "
126 	    "[-l user] zonename [command [args ...] ]\n"), pname);
127 	exit(2);
128 }
129 
130 static const char *
131 getpname(const char *arg0)
132 {
133 	const char *p = strrchr(arg0, '/');
134 
135 	if (p == NULL)
136 		p = arg0;
137 	else
138 		p++;
139 
140 	pname = p;
141 	return (p);
142 }
143 
144 static void
145 zerror(const char *fmt, ...)
146 {
147 	va_list alist;
148 
149 	(void) fprintf(stderr, "%s: ", pname);
150 	va_start(alist, fmt);
151 	(void) vfprintf(stderr, fmt, alist);
152 	va_end(alist);
153 	(void) fprintf(stderr, "\n");
154 }
155 
156 static void
157 zperror(const char *str)
158 {
159 	const char *estr;
160 
161 	if ((estr = strerror(errno)) != NULL)
162 		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
163 	else
164 		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
165 }
166 
167 /*
168  * The first part of our privilege dropping scheme needs to be called before
169  * fork(), since we must have it for security; we don't want to be surprised
170  * later that we couldn't allocate the privset.
171  */
172 static int
173 prefork_dropprivs()
174 {
175 	if ((dropprivs = priv_allocset()) == NULL)
176 		return (1);
177 	priv_emptyset(dropprivs);
178 
179 	/*
180 	 * We need these privileges in order to query session information and
181 	 * send signals.
182 	 */
183 	if (interactive == 0) {
184 		if (priv_addset(dropprivs, "proc_session") == -1)
185 			return (1);
186 		if (priv_addset(dropprivs, "proc_zone") == -1)
187 			return (1);
188 		if (priv_addset(dropprivs, "proc_owner") == -1)
189 			return (1);
190 	}
191 
192 	return (0);
193 }
194 
195 /*
196  * The second part of the privilege drop.  We are paranoid about being attacked
197  * by the zone, so we drop all privileges.  This should prevent a compromise
198  * which gets us to fork(), exec(), symlink(), etc.
199  */
200 static void
201 postfork_dropprivs()
202 {
203 	if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
204 		zperror(gettext("Warning: could not set permitted privileges"));
205 	}
206 	if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
207 		zperror(gettext("Warning: could not set limit privileges"));
208 	}
209 	if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
210 		zperror(gettext("Warning: could not set inheritable "
211 		    "privileges"));
212 	}
213 }
214 
215 /*
216  * Create the unix domain socket and call the zoneadmd server; handshake
217  * with it to determine whether it will allow us to connect.
218  */
219 static int
220 get_console_master(const char *zname)
221 {
222 	int sockfd = -1;
223 	struct sockaddr_un servaddr;
224 	char clientid[MAXPATHLEN];
225 	char handshake[MAXPATHLEN], c;
226 	int msglen;
227 	int i = 0, err = 0;
228 
229 	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
230 		zperror(gettext("could not create socket"));
231 		return (-1);
232 	}
233 
234 	bzero(&servaddr, sizeof (servaddr));
235 	servaddr.sun_family = AF_UNIX;
236 	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
237 	    "%s/%s.console_sock", ZONES_TMPDIR, zname);
238 
239 	if (connect(sockfd, (struct sockaddr *)&servaddr,
240 	    sizeof (servaddr)) == -1) {
241 		zperror(gettext("Could not connect to zone console"));
242 		goto bad;
243 	}
244 	masterfd = sockfd;
245 
246 	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s\n",
247 	    getpid(), setlocale(LC_MESSAGES, NULL));
248 
249 	if (msglen >= sizeof (clientid) || msglen < 0) {
250 		zerror("protocol error");
251 		goto bad;
252 	}
253 
254 	if (write(masterfd, clientid, msglen) != msglen) {
255 		zerror("protocol error");
256 		goto bad;
257 	}
258 
259 	bzero(handshake, sizeof (handshake));
260 
261 	/*
262 	 * Take care not to accumulate more than our fill, and leave room for
263 	 * the NUL at the end.
264 	 */
265 	while ((err = read(masterfd, &c, 1)) == 1) {
266 		if (i >= (sizeof (handshake) - 1))
267 			break;
268 		if (c == '\n')
269 			break;
270 		handshake[i] = c;
271 		i++;
272 	}
273 
274 	/*
275 	 * If something went wrong during the handshake we bail; perhaps
276 	 * the server died off.
277 	 */
278 	if (err == -1) {
279 		zperror(gettext("Could not connect to zone console"));
280 		goto bad;
281 	}
282 
283 	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
284 		return (0);
285 
286 	zerror(gettext("Console is already in use by process ID %s."),
287 	    handshake);
288 bad:
289 	(void) close(sockfd);
290 	masterfd = -1;
291 	return (-1);
292 }
293 
294 
295 /*
296  * Routines to handle pty creation upon zone entry and to shuttle I/O back
297  * and forth between the two terminals.  We also compute and store the
298  * name of the slave terminal associated with the master side.
299  */
300 static int
301 get_master_pty()
302 {
303 	if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
304 		zperror(gettext("failed to obtain a pseudo-tty"));
305 		return (-1);
306 	}
307 	if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
308 		zperror(gettext("failed to get terminal settings from stdin"));
309 		return (-1);
310 	}
311 	(void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
312 
313 	return (0);
314 }
315 
316 /*
317  * This is a bit tricky; normally a pts device will belong to the zone it
318  * is granted to.  But in the case of "entering" a zone, we need to establish
319  * the pty before entering the zone so that we can vector I/O to and from it
320  * from the global zone.
321  *
322  * We use the zonept() call to let the ptm driver know what we are up to;
323  * the only other hairy bit is the setting of zoneslavename (which happens
324  * above, in get_master_pty()).
325  */
326 static int
327 init_slave_pty(zoneid_t zoneid, char *devroot)
328 {
329 	int slavefd = -1;
330 	char *slavename, zoneslavename[MAXPATHLEN];
331 
332 	/*
333 	 * Set slave permissions, zone the pts, then unlock it.
334 	 */
335 	if (grantpt(masterfd) != 0) {
336 		zperror(gettext("grantpt failed"));
337 		return (-1);
338 	}
339 
340 	if (unlockpt(masterfd) != 0) {
341 		zperror(gettext("unlockpt failed"));
342 		return (-1);
343 	}
344 
345 	/*
346 	 * We must open the slave side before zoning this pty; otherwise
347 	 * the kernel would refuse us the open-- zoning a pty makes it
348 	 * inaccessible to the global zone.  Note we are trying to open
349 	 * the device node via the $ZONEROOT/dev path for this pty.
350 	 *
351 	 * Later we'll close the slave out when once we've opened it again
352 	 * from within the target zone.  Blarg.
353 	 */
354 	if ((slavename = ptsname(masterfd)) == NULL) {
355 		zperror(gettext("failed to get name for pseudo-tty"));
356 		return (-1);
357 	}
358 
359 	(void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
360 	    devroot, slavename);
361 
362 	if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
363 		zerror(gettext("failed to open %s: %s"), zoneslavename,
364 		    strerror(errno));
365 		return (-1);
366 	}
367 
368 	/*
369 	 * Push hardware emulation (ptem), line discipline (ldterm),
370 	 * and V7/4BSD/Xenix compatibility (ttcompat) modules.
371 	 */
372 	if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
373 		zperror(gettext("failed to push ptem module"));
374 		if (!failsafe)
375 			goto bad;
376 	}
377 
378 	/*
379 	 * Anchor the stream to prevent malicious I_POPs; we prefer to do
380 	 * this prior to entering the zone so that we can detect any errors
381 	 * early, and so that we can set the anchor from the global zone.
382 	 */
383 	if (ioctl(slavefd, I_ANCHOR) == -1) {
384 		zperror(gettext("failed to set stream anchor"));
385 		if (!failsafe)
386 			goto bad;
387 	}
388 
389 	if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
390 		zperror(gettext("failed to push ldterm module"));
391 		if (!failsafe)
392 			goto bad;
393 	}
394 	if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
395 		zperror(gettext("failed to push ttcompat module"));
396 		if (!failsafe)
397 			goto bad;
398 	}
399 
400 	/*
401 	 * Propagate terminal settings from the external term to the new one.
402 	 */
403 	if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
404 		zperror(gettext("failed to set terminal settings"));
405 		if (!failsafe)
406 			goto bad;
407 	}
408 	(void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
409 
410 	if (zonept(masterfd, zoneid) != 0) {
411 		zperror(gettext("could not set zoneid of pty"));
412 		goto bad;
413 	}
414 
415 	return (slavefd);
416 
417 bad:
418 	(void) close(slavefd);
419 	return (-1);
420 }
421 
422 /*
423  * Place terminal into raw mode.
424  */
425 static int
426 set_tty_rawmode(int fd)
427 {
428 	struct termios term;
429 	if (tcgetattr(fd, &term) < 0) {
430 		zperror(gettext("failed to get user terminal settings"));
431 		return (-1);
432 	}
433 
434 	/* Stash for later, so we can revert back to previous mode */
435 	save_termios = term;
436 	save_fd = fd;
437 
438 	/* disable 8->7 bit strip, start/stop, enable any char to restart */
439 	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
440 	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
441 	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
442 	/* disable output post-processing */
443 	term.c_oflag &= ~OPOST;
444 	/* disable canonical mode, signal chars, echo & extended functions */
445 	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
446 
447 	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
448 	term.c_cc[VTIME] = 0;
449 
450 	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
451 		zperror(gettext("failed to set user terminal to raw mode"));
452 		return (-1);
453 	}
454 
455 	/*
456 	 * We need to know the value of VEOF so that we can properly process for
457 	 * client-side ~<EOF>.  But we have obliterated VEOF in term,
458 	 * because VMIN overloads the same array slot in non-canonical mode.
459 	 * Stupid @&^%!
460 	 *
461 	 * So here we construct the "effective" termios from the current
462 	 * terminal settings, and the corrected VEOF and VEOL settings.
463 	 */
464 	if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
465 		zperror(gettext("failed to get user terminal settings"));
466 		return (-1);
467 	}
468 	effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
469 	effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
470 
471 	return (0);
472 }
473 
474 /*
475  * Copy terminal window size from our terminal to the pts.
476  */
477 /*ARGSUSED*/
478 static void
479 sigwinch(int s)
480 {
481 	struct winsize ws;
482 
483 	if (ioctl(0, TIOCGWINSZ, &ws) == 0)
484 		(void) ioctl(masterfd, TIOCSWINSZ, &ws);
485 }
486 
487 static void
488 /*ARGSUSED*/
489 sigcld(int s)
490 {
491 	int status;
492 	pid_t pid;
493 
494 	/*
495 	 * Peek at the exit status.  If this isn't the process we cared
496 	 * about, then just reap it.
497 	 */
498 	if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
499 		if (pid == child_pid &&
500 		    (WIFEXITED(status) || WIFSIGNALED(status)))
501 			dead = 1;
502 		else
503 			(void) waitpid(pid, &status, WNOHANG);
504 	}
505 }
506 
507 /*
508  * Some signals (currently, SIGINT) must be forwarded on to the process
509  * group of the child process.
510  */
511 static void
512 sig_forward(int s)
513 {
514 	if (child_pid != -1) {
515 		pid_t pgid = getpgid(child_pid);
516 		if (pgid != -1)
517 			(void) sigsend(P_PGID, pgid, s);
518 	}
519 }
520 
521 /*
522  * reset terminal settings for global environment
523  */
524 static void
525 reset_tty()
526 {
527 	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
528 }
529 
530 /*
531  * Convert character to printable representation, for display with locally
532  * echoed command characters (like when we need to display ~^D)
533  */
534 static void
535 canonify(char c, char *cc)
536 {
537 	if (isprint(c)) {
538 		cc[0] = c;
539 		cc[1] = '\0';
540 	} else if (c >= 0 && c <= 31) {	/* ^@ through ^_ */
541 		cc[0] = '^';
542 		cc[1] = c + '@';
543 		cc[2] = '\0';
544 	} else {
545 		cc[0] = '\\';
546 		cc[1] = ((c >> 6) & 7) + '0';
547 		cc[2] = ((c >> 3) & 7) + '0';
548 		cc[3] = (c & 7) + '0';
549 		cc[4] = '\0';
550 	}
551 }
552 
553 /*
554  * process_user_input watches the input stream for the escape sequence for
555  * 'quit' (by default, tilde-period).  Because we might be fed just one
556  * keystroke at a time, state associated with the user input (are we at the
557  * beginning of the line?  are we locally echoing the next character?) is
558  * maintained by beginning_of_line and local_echo across calls to the routine.
559  *
560  * This routine returns -1 when the 'quit' escape sequence has been issued,
561  * and 0 otherwise.
562  */
563 static int
564 process_user_input(int outfd, char *buf, size_t nbytes)
565 {
566 	static boolean_t beginning_of_line = B_TRUE;
567 	static boolean_t local_echo = B_FALSE;
568 
569 	char c = *buf;
570 	for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
571 		buf++;
572 		if (beginning_of_line && !nocmdchar) {
573 			beginning_of_line = B_FALSE;
574 			if (c == cmdchar) {
575 				local_echo = B_TRUE;
576 				continue;
577 			}
578 		} else if (local_echo) {
579 			local_echo = B_FALSE;
580 			if (c == '.' || c == effective_termios.c_cc[VEOF]) {
581 				char cc[CANONIFY_LEN];
582 				canonify(c, cc);
583 				(void) write(STDOUT_FILENO, &cmdchar, 1);
584 				(void) write(STDOUT_FILENO, cc, strlen(cc));
585 				return (-1);
586 			}
587 		}
588 		if (write(outfd, &c, 1) <= 0)
589 			return (-1);
590 		beginning_of_line = (c == '\r' || c == '\n' ||
591 		    c == effective_termios.c_cc[VKILL] ||
592 		    c == effective_termios.c_cc[VEOL] ||
593 		    c == effective_termios.c_cc[VSUSP] ||
594 		    c == effective_termios.c_cc[VINTR]);
595 	}
596 	return (0);
597 }
598 
599 /*
600  * This is the main I/O loop, and is shared across all zlogin modes.
601  * Parameters:
602  * 	stdin_fd:  The fd representing 'stdin' for the slave side; input to
603  *	           the zone will be written here.
604  *
605  *	stdout_fd: The fd representing 'stdout' for the slave side; output
606  *	           from the zone will arrive here.
607  *
608  *	stderr_fd: The fd representing 'stderr' for the slave side; output
609  *	           from the zone will arrive here.
610  *
611  *	raw_mode:  If TRUE, then no processing (for example, for '~.') will
612  *	           be performed on the input coming from STDIN.
613  *
614  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
615  * mode supplies a stderr).
616  *
617  */
618 static void
619 doio(int stdin_fd, int stdout_fd, int stderr_fd, boolean_t raw_mode)
620 {
621 	struct pollfd pollfds[3];
622 	char ibuf[BUFSIZ];
623 	int cc, ret;
624 
625 	/* read from stdout of zone and write to stdout of global zone */
626 	pollfds[0].fd = stdout_fd;
627 	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
628 
629 	/* read from stderr of zone and write to stderr of global zone */
630 	pollfds[1].fd = stderr_fd;
631 	pollfds[1].events = pollfds[0].events;
632 
633 	/* read from stdin of global zone and write to stdin of zone */
634 	pollfds[2].fd = STDIN_FILENO;
635 	pollfds[2].events = pollfds[0].events;
636 
637 	for (;;) {
638 		pollfds[0].revents = pollfds[1].revents =
639 		    pollfds[2].revents = 0;
640 
641 		if (dead)
642 			break;
643 
644 		ret = poll(pollfds,
645 		    sizeof (pollfds) / sizeof (struct pollfd), -1);
646 		if (ret == -1 && errno != EINTR) {
647 			perror("poll failed");
648 			break;
649 		}
650 
651 		if (errno == EINTR && dead) {
652 			break;
653 		}
654 
655 		/* event from master side stdout */
656 		if (pollfds[0].revents) {
657 			if (pollfds[0].revents &
658 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
659 				cc = read(stdout_fd, ibuf, BUFSIZ);
660 				if (cc == -1 && (errno != EINTR || dead))
661 					break;
662 				if (cc == 0)	/* EOF */
663 					break;
664 				(void) write(STDOUT_FILENO, ibuf, cc);
665 			} else {
666 				pollerr = pollfds[0].revents;
667 				break;
668 			}
669 		}
670 
671 		/* event from master side stderr */
672 		if (pollfds[1].revents) {
673 			if (pollfds[1].revents &
674 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
675 				cc = read(stderr_fd, ibuf, BUFSIZ);
676 				if (cc == -1 && (errno != EINTR || dead))
677 					break;
678 				if (cc == 0)	/* EOF */
679 					break;
680 				(void) write(STDERR_FILENO, ibuf, cc);
681 			} else {
682 				pollerr = pollfds[1].revents;
683 				break;
684 			}
685 		}
686 
687 		/* event from user STDIN side */
688 		if (pollfds[2].revents) {
689 			if (pollfds[2].revents &
690 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
691 				cc = read(STDIN_FILENO, ibuf, BUFSIZ);
692 				if (cc == -1 && (errno != EINTR || dead))
693 					break;
694 
695 				/*
696 				 * stdin fd is stdin of the target; so,
697 				 * the thing we'll write the user data *to*.
698 				 *
699 				 * Also, unlike on the output side, we
700 				 * propagate zero-length messages to the
701 				 * other side.
702 				 */
703 				if (raw_mode == B_TRUE) {
704 					if (write(stdin_fd, ibuf, cc) == -1)
705 						break;
706 				} else {
707 					if (process_user_input(stdin_fd, ibuf,
708 					    cc) == -1)
709 						break;
710 				}
711 			} else if (raw_mode == B_TRUE &&
712 			    pollfds[2].revents & POLLHUP) {
713 				/*
714 				 * It's OK to get a POLLHUP on STDIN-- it
715 				 * always happens if you do:
716 				 *
717 				 * echo foo | zlogin <zone> <command>
718 				 *
719 				 * We reset fd to -1 in this case to clear
720 				 * the condition and write an EOF to the
721 				 * other side in order to wrap things up.
722 				 */
723 				pollfds[2].fd = -1;
724 				(void) write(stdin_fd, ibuf, 0);
725 			} else {
726 				pollerr = pollfds[2].revents;
727 				break;
728 			}
729 		}
730 	}
731 
732 	/*
733 	 * We are in the midst of dying, but try to poll with a short
734 	 * timeout to see if we can catch the last bit of I/O from the
735 	 * children.
736 	 */
737 	pollfds[0].revents = pollfds[1].revents = pollfds[2].revents = 0;
738 	(void) poll(pollfds,
739 	    sizeof (pollfds) / sizeof (struct pollfd), 100);
740 	if (pollfds[0].revents &
741 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
742 		if ((cc = read(stdout_fd, ibuf, BUFSIZ)) > 0)
743 			(void) write(STDOUT_FILENO, ibuf, cc);
744 	}
745 	if (pollfds[1].revents &
746 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
747 		if ((cc = read(stderr_fd, ibuf, BUFSIZ)) > 0)
748 			(void) write(STDERR_FILENO, ibuf, cc);
749 	}
750 }
751 
752 /*
753  * Prepare argv array for exec'd process; if we're passing commands to the
754  * new process, then use su(1M) to do the invocation.  Otherwise, use
755  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
756  * login that we're coming from another zone, and to disregard its CONSOLE
757  * checks).
758  */
759 static char **
760 prep_args(char *login, char **argv)
761 {
762 	int argc = 0, a = 0, i, n = -1;
763 	char **new_argv;
764 
765 	if (argv != NULL) {
766 		size_t subshell_len = 1;
767 		char *subshell;
768 
769 		while (argv[argc] != NULL)
770 			argc++;
771 
772 		for (i = 0; i < argc; i++) {
773 			subshell_len += strlen(argv[i]) + 1;
774 		}
775 		if ((subshell = calloc(1, subshell_len)) == NULL)
776 			return (NULL);
777 
778 		for (i = 0; i < argc; i++) {
779 			(void) strcat(subshell, argv[i]);
780 			(void) strcat(subshell, " ");
781 		}
782 
783 		if (failsafe) {
784 			n = 4;
785 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
786 				return (NULL);
787 
788 			new_argv[a++] = FAILSAFESHELL;
789 		} else {
790 			n = 5;
791 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
792 				return (NULL);
793 
794 			new_argv[a++] = SUPATH;
795 			new_argv[a++] = login;
796 		}
797 		new_argv[a++] = "-c";
798 		new_argv[a++] = subshell;
799 		new_argv[a++] = NULL;
800 		assert(a == n);
801 	} else {
802 		if (failsafe) {
803 			n = 2;
804 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
805 				return (NULL);
806 			new_argv[a++] = FAILSAFESHELL;
807 			new_argv[a++] = NULL;
808 		} else {
809 			n = 6;
810 
811 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
812 				return (NULL);
813 
814 			new_argv[a++] = LOGINPATH;
815 			new_argv[a++] = "-z";
816 			new_argv[a++] = "global";	/* hardcode, for now */
817 			new_argv[a++] = "-f";
818 			new_argv[a++] = login;
819 			new_argv[a++] = NULL;
820 		}
821 	}
822 	/*
823 	 * If this assert ever trips, it's because we've botched the setup
824 	 * of ARGV above-- it's too large or too small.
825 	 */
826 	assert(n == a);
827 	return (new_argv);
828 }
829 
830 /*
831  * Helper routine for prep_env below.
832  */
833 static char *
834 add_env(char *name, char *value)
835 {
836 	size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
837 	char *str;
838 
839 	if ((str = malloc(sz)) == NULL)
840 		return (NULL);
841 
842 	(void) snprintf(str, sz, "%s=%s", name, value);
843 	return (str);
844 }
845 
846 /*
847  * Prepare envp array for exec'd process.
848  */
849 static char **
850 prep_env()
851 {
852 	int e = 0, size = 1;
853 	char **new_env, *estr;
854 	char *term = getenv("TERM");
855 
856 	size++;	/* for $PATH */
857 	if (term != NULL)
858 		size++;
859 
860 	/*
861 	 * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
862 	 * We also set $SHELL, since neither login nor su will be around to do
863 	 * it.
864 	 */
865 	if (failsafe)
866 		size += 2;
867 
868 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
869 		return (NULL);
870 
871 	if ((estr = add_env("PATH", DEF_PATH)) == NULL)
872 		return (NULL);
873 	new_env[e++] = estr;
874 
875 	if (term != NULL) {
876 		if ((estr = add_env("TERM", term)) == NULL)
877 			return (NULL);
878 		new_env[e++] = estr;
879 	}
880 
881 	if (failsafe) {
882 		if ((estr = add_env("HOME", "/")) == NULL)
883 			return (NULL);
884 		new_env[e++] = estr;
885 
886 		if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
887 			return (NULL);
888 		new_env[e++] = estr;
889 	}
890 
891 	new_env[e++] = NULL;
892 
893 	assert(e == size);
894 
895 	return (new_env);
896 }
897 
898 /*
899  * Finish the preparation of the envp array for exec'd non-interactive
900  * zlogins.  This is called in the child process *after* we zone_enter(), since
901  * it derives things we can only know within the zone, such as $HOME, $SHELL,
902  * etc.  We need only do this in the non-interactive, mode, since otherwise
903  * login(1) will do it.  We don't do this in failsafe mode, since it presents
904  * additional ways in which the command could fail, and we'd prefer to avoid
905  * that.
906  */
907 static char **
908 prep_env_noninteractive(char *login, char **env)
909 {
910 	size_t size;
911 	struct passwd *pw;
912 	char **new_env;
913 	int e, i;
914 	char *estr;
915 	char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
916 
917 	assert(env != NULL);
918 	assert(failsafe == 0);
919 
920 	/*
921 	 * Get existing envp size.
922 	 */
923 	for (size = 0; env[size] != NULL; size++)
924 		;
925 	e = size;
926 
927 	/*
928 	 * Finish filling out the environment; we duplicate the environment
929 	 * setup described in login(1), for lack of a better precedent.
930 	 */
931 	if ((pw = getpwnam(login)) != NULL) {
932 		size += 3;	/* LOGNAME, HOME, MAIL */
933 	}
934 	size++;	/* always fill in SHELL */
935 	size++; /* terminating NULL */
936 
937 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
938 		goto malloc_fail;
939 
940 	/*
941 	 * Copy existing elements of env into new_env.
942 	 */
943 	for (i = 0; env[i] != NULL; i++) {
944 		if ((new_env[i] = strdup(env[i])) == NULL)
945 			goto malloc_fail;
946 	}
947 	assert(e == i);
948 
949 	if (pw != NULL) {
950 		if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
951 			goto malloc_fail;
952 		new_env[e++] = estr;
953 
954 		if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
955 			goto malloc_fail;
956 		new_env[e++] = estr;
957 
958 		if (chdir(pw->pw_dir) != 0)
959 			zerror(gettext("Could not chdir to home directory "
960 			    "%s: %s"), pw->pw_dir, strerror(errno));
961 
962 		(void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
963 		    pw->pw_name);
964 		if ((estr = add_env("MAIL", varmail)) == NULL)
965 			goto malloc_fail;
966 		new_env[e++] = estr;
967 	}
968 
969 	if (pw != NULL && strlen(pw->pw_shell) > 0) {
970 		if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
971 			goto malloc_fail;
972 		new_env[e++] = estr;
973 	} else {
974 		if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
975 			goto malloc_fail;
976 		new_env[e++] = estr;
977 	}
978 
979 	new_env[e++] = NULL;	/* add terminating NULL */
980 
981 	assert(e == size);
982 	return (new_env);
983 
984 malloc_fail:
985 	zperror(gettext("failed to allocate memory for process environment"));
986 	return (NULL);
987 }
988 
989 static int
990 close_func(void *slavefd, int fd)
991 {
992 	if (fd != *(int *)slavefd)
993 		(void) close(fd);
994 	return (0);
995 }
996 
997 static void
998 set_cmdchar(char *cmdcharstr)
999 {
1000 	char c;
1001 	long lc;
1002 
1003 	if ((c = *cmdcharstr) != '\\') {
1004 		cmdchar = c;
1005 		return;
1006 	}
1007 
1008 	c = cmdcharstr[1];
1009 	if (c == '\0' || c == '\\') {
1010 		cmdchar = '\\';
1011 		return;
1012 	}
1013 
1014 	if (c < '0' || c > '7') {
1015 		zerror(gettext("Unrecognized escape character option %s"),
1016 		    cmdcharstr);
1017 		usage();
1018 	}
1019 
1020 	lc = strtol(cmdcharstr + 1, NULL, 8);
1021 	if (lc < 0 || lc > 255) {
1022 		zerror(gettext("Octal escape character '%s' too large"),
1023 		    cmdcharstr);
1024 		usage();
1025 	}
1026 	cmdchar = (char)lc;
1027 }
1028 
1029 static int
1030 setup_utmpx(char *slavename)
1031 {
1032 	struct utmpx ut;
1033 
1034 	bzero(&ut, sizeof (ut));
1035 	(void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1036 	(void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1037 	ut.ut_pid = getpid();
1038 	ut.ut_id[0] = 'z';
1039 	ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1040 	ut.ut_type = LOGIN_PROCESS;
1041 	(void) time(&ut.ut_tv.tv_sec);
1042 
1043 	if (makeutx(&ut) == NULL) {
1044 		zerror(gettext("makeutx failed"));
1045 		return (-1);
1046 	}
1047 	return (0);
1048 }
1049 
1050 static void
1051 release_lock_file(int lockfd)
1052 {
1053 	(void) close(lockfd);
1054 }
1055 
1056 static int
1057 grab_lock_file(const char *zone_name, int *lockfd)
1058 {
1059 	char pathbuf[PATH_MAX];
1060 	struct flock flock;
1061 
1062 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1063 		zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1064 		    strerror(errno));
1065 		return (-1);
1066 	}
1067 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
1068 	(void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1069 	    ZONES_TMPDIR, zone_name);
1070 
1071 	if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1072 		zerror(gettext("could not open %s: %s"), pathbuf,
1073 		    strerror(errno));
1074 		return (-1);
1075 	}
1076 	/*
1077 	 * Lock the file to synchronize with other zoneadmds
1078 	 */
1079 	flock.l_type = F_WRLCK;
1080 	flock.l_whence = SEEK_SET;
1081 	flock.l_start = (off_t)0;
1082 	flock.l_len = (off_t)0;
1083 	if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1084 		zerror(gettext("unable to lock %s: %s"), pathbuf,
1085 		    strerror(errno));
1086 		release_lock_file(*lockfd);
1087 		return (-1);
1088 	}
1089 	return (Z_OK);
1090 }
1091 
1092 static int
1093 start_zoneadmd(const char *zone_name)
1094 {
1095 	pid_t retval;
1096 	int pstatus = 0, error = -1, lockfd, doorfd;
1097 	struct door_info info;
1098 	char doorpath[MAXPATHLEN];
1099 
1100 	(void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1101 
1102 	if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1103 		return (-1);
1104 	/*
1105 	 * We must do the door check with the lock held.  Otherwise, we
1106 	 * might race against another zoneadm/zlogin process and wind
1107 	 * up with two processes trying to start zoneadmd at the same
1108 	 * time.  zoneadmd will detect this, and fail, but we prefer this
1109 	 * to be as seamless as is practical, from a user perspective.
1110 	 */
1111 	if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1112 		if (errno != ENOENT) {
1113 			zerror("failed to open %s: %s", doorpath,
1114 			    strerror(errno));
1115 			goto out;
1116 		}
1117 	} else {
1118 		/*
1119 		 * Seems to be working ok.
1120 		 */
1121 		if (door_info(doorfd, &info) == 0 &&
1122 		    ((info.di_attributes & DOOR_REVOKED) == 0)) {
1123 			error = 0;
1124 			goto out;
1125 		}
1126 	}
1127 
1128 	if ((child_pid = fork()) == -1) {
1129 		zperror(gettext("could not fork"));
1130 		goto out;
1131 	} else if (child_pid == 0) {
1132 		/* child process */
1133 		(void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1134 		    zone_name, NULL);
1135 		zperror(gettext("could not exec zoneadmd"));
1136 		_exit(1);
1137 	}
1138 
1139 	/* parent process */
1140 	do {
1141 		retval = waitpid(child_pid, &pstatus, 0);
1142 	} while (retval != child_pid);
1143 	if (WIFSIGNALED(pstatus) ||
1144 	    (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1145 		zerror(gettext("could not start %s"), "zoneadmd");
1146 		goto out;
1147 	}
1148 	error = 0;
1149 out:
1150 	release_lock_file(lockfd);
1151 	(void) close(doorfd);
1152 	return (error);
1153 }
1154 
1155 static int
1156 init_template(void)
1157 {
1158 	int fd;
1159 	int err = 0;
1160 
1161 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1162 	if (fd == -1)
1163 		return (-1);
1164 
1165 	/*
1166 	 * zlogin doesn't do anything with the contract.
1167 	 * Deliver no events, don't inherit, and allow it to be orphaned.
1168 	 */
1169 	err |= ct_tmpl_set_critical(fd, 0);
1170 	err |= ct_tmpl_set_informative(fd, 0);
1171 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1172 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1173 	if (err || ct_tmpl_activate(fd)) {
1174 		(void) close(fd);
1175 		return (-1);
1176 	}
1177 
1178 	return (fd);
1179 }
1180 
1181 static int
1182 noninteractive_login(char *zonename, zoneid_t zoneid, char *login,
1183     char **new_args, char **new_env)
1184 {
1185 	pid_t retval;
1186 	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2];
1187 	int child_status;
1188 	int tmpl_fd;
1189 	sigset_t block_cld;
1190 
1191 	if ((tmpl_fd = init_template()) == -1) {
1192 		reset_tty();
1193 		zperror(gettext("could not create contract"));
1194 		return (1);
1195 	}
1196 
1197 	if (pipe(stdin_pipe) != 0) {
1198 		zperror(gettext("could not create STDIN pipe"));
1199 		return (1);
1200 	}
1201 	/*
1202 	 * When the user types ^D, we get a zero length message on STDIN.
1203 	 * We need to echo that down the pipe to send it to the other side;
1204 	 * but by default, pipes don't propagate zero-length messages.  We
1205 	 * toggle that behavior off using I_SWROPT.  See streamio(7i).
1206 	 */
1207 	if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1208 		zperror(gettext("could not configure STDIN pipe"));
1209 		return (1);
1210 
1211 	}
1212 	if (pipe(stdout_pipe) != 0) {
1213 		zperror(gettext("could not create STDOUT pipe"));
1214 		return (1);
1215 	}
1216 	if (pipe(stderr_pipe) != 0) {
1217 		zperror(gettext("could not create STDERR pipe"));
1218 		return (1);
1219 	}
1220 
1221 	/*
1222 	 * If any of the pipe FD's winds up being less than STDERR, then we
1223 	 * have a mess on our hands-- and we are lacking some of the I/O
1224 	 * streams we would expect anyway.  So we bail.
1225 	 */
1226 	if (stdin_pipe[0] <= STDERR_FILENO ||
1227 	    stdin_pipe[1] <= STDERR_FILENO ||
1228 	    stdout_pipe[0] <= STDERR_FILENO ||
1229 	    stdout_pipe[1] <= STDERR_FILENO ||
1230 	    stderr_pipe[0] <= STDERR_FILENO ||
1231 	    stderr_pipe[1] <= STDERR_FILENO) {
1232 		zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1233 		return (1);
1234 	}
1235 
1236 	if (prefork_dropprivs() != 0) {
1237 		zperror(gettext("could not allocate privilege set"));
1238 		return (1);
1239 	}
1240 
1241 	(void) sigset(SIGCLD, sigcld);
1242 	(void) sigemptyset(&block_cld);
1243 	(void) sigaddset(&block_cld, SIGCLD);
1244 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1245 
1246 	if ((child_pid = fork()) == -1) {
1247 		(void) ct_tmpl_clear(tmpl_fd);
1248 		(void) close(tmpl_fd);
1249 		zperror(gettext("could not fork"));
1250 		return (1);
1251 	} else if (child_pid == 0) { /* child process */
1252 		(void) ct_tmpl_clear(tmpl_fd);
1253 
1254 		/*
1255 		 * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1256 		 */
1257 		(void) close(STDIN_FILENO);
1258 		(void) close(STDOUT_FILENO);
1259 		(void) close(STDERR_FILENO);
1260 		(void) dup2(stdin_pipe[1], STDIN_FILENO);
1261 		(void) dup2(stdout_pipe[1], STDOUT_FILENO);
1262 		(void) dup2(stderr_pipe[1], STDERR_FILENO);
1263 		(void) closefrom(STDERR_FILENO + 1);
1264 
1265 		(void) sigset(SIGCLD, SIG_DFL);
1266 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1267 		/*
1268 		 * In case any of stdin, stdout or stderr are streams,
1269 		 * anchor them to prevent malicious I_POPs.
1270 		 */
1271 		(void) ioctl(STDIN_FILENO, I_ANCHOR);
1272 		(void) ioctl(STDOUT_FILENO, I_ANCHOR);
1273 		(void) ioctl(STDERR_FILENO, I_ANCHOR);
1274 
1275 		if (zone_enter(zoneid) == -1) {
1276 			zerror(gettext("could not enter zone %s: %s"),
1277 			    zonename, strerror(errno));
1278 			_exit(1);
1279 		}
1280 
1281 		if (!failsafe)
1282 			new_env = prep_env_noninteractive(login, new_env);
1283 
1284 		if (new_env == NULL) {
1285 			_exit(1);
1286 		}
1287 
1288 		/*
1289 		 * Move into a new process group; the zone_enter will have
1290 		 * placed us into zsched's session, and we want to be in
1291 		 * a unique process group.
1292 		 */
1293 		(void) setpgid(getpid(), getpid());
1294 
1295 		(void) execve(new_args[0], new_args, new_env);
1296 		zperror(gettext("exec failure"));
1297 		_exit(1);
1298 	}
1299 	/* parent */
1300 	(void) sigset(SIGINT, sig_forward);
1301 
1302 	postfork_dropprivs();
1303 
1304 	(void) ct_tmpl_clear(tmpl_fd);
1305 	(void) close(tmpl_fd);
1306 
1307 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1308 	doio(stdin_pipe[0], stdout_pipe[0], stderr_pipe[0], B_TRUE);
1309 	do {
1310 		retval = waitpid(child_pid, &child_status, 0);
1311 		if (retval == -1) {
1312 			child_status = 0;
1313 		}
1314 	} while (retval != child_pid && errno != ECHILD);
1315 
1316 	return (WEXITSTATUS(child_status));
1317 }
1318 
1319 int
1320 main(int argc, char **argv)
1321 {
1322 	int arg, console = 0;
1323 	zoneid_t zoneid;
1324 	zone_state_t st;
1325 	char *login = "root";
1326 	int lflag = 0;
1327 	char *zonename = NULL;
1328 	char **proc_args = NULL;
1329 	char **new_args, **new_env;
1330 	sigset_t block_cld;
1331 	char devroot[MAXPATHLEN];
1332 	char *slavename, slaveshortname[MAXPATHLEN];
1333 	priv_set_t *privset;
1334 	int tmpl_fd;
1335 	struct stat sb;
1336 	char kernzone[ZONENAME_MAX];
1337 
1338 	(void) setlocale(LC_ALL, "");
1339 	(void) textdomain(TEXT_DOMAIN);
1340 
1341 	(void) getpname(argv[0]);
1342 
1343 	while ((arg = getopt(argc, argv, "ECR:Se:l:")) != EOF) {
1344 		switch (arg) {
1345 		case 'C':
1346 			console = 1;
1347 			break;
1348 		case 'E':
1349 			nocmdchar = 1;
1350 			break;
1351 		case 'R':	/* undocumented */
1352 			if (*optarg != '/') {
1353 				zerror(gettext("root path must be absolute."));
1354 				exit(2);
1355 			}
1356 			if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1357 				zerror(
1358 				    gettext("root path must be a directory."));
1359 				exit(2);
1360 			}
1361 			zonecfg_set_root(optarg);
1362 			break;
1363 		case 'S':
1364 			failsafe = 1;
1365 			break;
1366 		case 'e':
1367 			set_cmdchar(optarg);
1368 			break;
1369 		case 'l':
1370 			login = optarg;
1371 			lflag = 1;
1372 			break;
1373 		default:
1374 			usage();
1375 		}
1376 	}
1377 
1378 	if (console != 0 && lflag != 0) {
1379 		zerror(gettext("-l may not be specified for console login"));
1380 		usage();
1381 	}
1382 
1383 	if (console != 0 && failsafe != 0) {
1384 		zerror(gettext("-S may not be specified for console login"));
1385 		usage();
1386 	}
1387 
1388 	if (console != 0 && zonecfg_in_alt_root()) {
1389 		zerror(gettext("-R may not be specified for console login"));
1390 		exit(2);
1391 	}
1392 
1393 	if (failsafe != 0 && lflag != 0) {
1394 		zerror(gettext("-l may not be specified for failsafe login"));
1395 		usage();
1396 	}
1397 
1398 	if (optind == (argc - 1)) {
1399 		/*
1400 		 * zone name, no process name; this should be an interactive
1401 		 * as long as STDIN is really a tty.
1402 		 */
1403 		if (isatty(STDIN_FILENO))
1404 			interactive = 1;
1405 		zonename = argv[optind];
1406 	} else if (optind < (argc - 1)) {
1407 		if (console) {
1408 			zerror(gettext("Commands may not be specified for "
1409 			    "console login."));
1410 			usage();
1411 		}
1412 		/* zone name and process name, and possibly some args */
1413 		zonename = argv[optind];
1414 		proc_args = &argv[optind + 1];
1415 		interactive = 0;
1416 	} else {
1417 		usage();
1418 	}
1419 
1420 	if (getzoneid() != GLOBAL_ZONEID) {
1421 		zerror(gettext("'%s' may only be used from the global zone"),
1422 		    pname);
1423 		return (1);
1424 	}
1425 
1426 	if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1427 		zerror(gettext("'%s' not applicable to the global zone"),
1428 		    pname);
1429 		return (1);
1430 	}
1431 
1432 	if (zone_get_state(zonename, &st) != Z_OK) {
1433 		zerror(gettext("zone '%s' unknown"), zonename);
1434 		return (1);
1435 	}
1436 
1437 	if (st < ZONE_STATE_INSTALLED) {
1438 		zerror(gettext("cannot login to a zone which is '%s'"),
1439 		    zone_state_str(st));
1440 		return (1);
1441 	}
1442 
1443 	/*
1444 	 * In both console and non-console cases, we require all privs.
1445 	 * In the console case, because we may need to startup zoneadmd.
1446 	 * In the non-console case in order to do zone_enter(2), zonept()
1447 	 * and other tasks.
1448 	 *
1449 	 * Future work: this solution is temporary.  Ultimately, we need to
1450 	 * move to a flexible system which allows the global admin to
1451 	 * designate that a particular user can zlogin (and probably zlogin
1452 	 * -C) to a particular zone.  This all-root business we have now is
1453 	 * quite sketchy.
1454 	 */
1455 	if ((privset = priv_allocset()) == NULL) {
1456 		zperror(gettext("priv_allocset failed"));
1457 		return (1);
1458 	}
1459 
1460 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1461 		zperror(gettext("getppriv failed"));
1462 		priv_freeset(privset);
1463 		return (1);
1464 	}
1465 
1466 	if (priv_isfullset(privset) == B_FALSE) {
1467 		zerror(gettext("You lack sufficient privilege to run "
1468 		    "this command (all privs required)"));
1469 		priv_freeset(privset);
1470 		return (1);
1471 	}
1472 	priv_freeset(privset);
1473 
1474 	/*
1475 	 * The console is a separate case from the rest of the code; handle
1476 	 * it first.
1477 	 */
1478 	if (console) {
1479 
1480 		/*
1481 		 * Ensure that zoneadmd for this zone is running.
1482 		 */
1483 		if (start_zoneadmd(zonename) == -1)
1484 			return (1);
1485 
1486 		/*
1487 		 * Make contact with zoneadmd.
1488 		 */
1489 		if (get_console_master(zonename) == -1)
1490 			return (1);
1491 
1492 		(void) printf(gettext("[Connected to zone '%s' console]\n"),
1493 		    zonename);
1494 
1495 		if (set_tty_rawmode(STDIN_FILENO) == -1) {
1496 			reset_tty();
1497 			zperror(gettext("failed to set stdin pty to raw mode"));
1498 			return (1);
1499 		}
1500 
1501 		(void) sigset(SIGWINCH, sigwinch);
1502 		(void) sigwinch(0);
1503 
1504 		/*
1505 		 * Run the I/O loop until we get disconnected.
1506 		 */
1507 		doio(masterfd, masterfd, -1, B_FALSE);
1508 		reset_tty();
1509 		(void) printf(gettext("\n[Connection to zone '%s' console "
1510 		    "closed]\n"), zonename);
1511 
1512 		return (0);
1513 	}
1514 
1515 	if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1516 		zerror(gettext("login allowed only to running zones "
1517 		    "(%s is '%s')."), zonename, zone_state_str(st));
1518 		return (1);
1519 	}
1520 
1521 	(void) strlcpy(kernzone, zonename, sizeof (kernzone));
1522 	if (zonecfg_in_alt_root()) {
1523 		FILE *fp = zonecfg_open_scratch("", B_FALSE);
1524 
1525 		if (fp == NULL || zonecfg_find_scratch(fp, zonename,
1526 		    zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
1527 			zerror(gettext("cannot find scratch zone %s"),
1528 			    zonename);
1529 			if (fp != NULL)
1530 				zonecfg_close_scratch(fp);
1531 			return (1);
1532 		}
1533 		zonecfg_close_scratch(fp);
1534 	}
1535 
1536 	if ((zoneid = getzoneidbyname(kernzone)) == -1) {
1537 		zerror(gettext("failed to get zoneid for zone '%s'"),
1538 		    zonename);
1539 		return (1);
1540 	}
1541 
1542 	/*
1543 	 * We need the zone root path only if we are setting up a pty.
1544 	 */
1545 	if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
1546 		zerror(gettext("could not get dev path for zone %s"),
1547 		    zonename);
1548 		return (1);
1549 	}
1550 
1551 	if ((new_args = prep_args(login, proc_args)) == NULL) {
1552 		zperror(gettext("could not assemble new arguments"));
1553 		return (1);
1554 	}
1555 
1556 	if ((new_env = prep_env()) == NULL) {
1557 		zperror(gettext("could not assemble new environment"));
1558 		return (1);
1559 	}
1560 
1561 	if (!interactive)
1562 		return (noninteractive_login(zonename, zoneid, login, new_args,
1563 		    new_env));
1564 
1565 	if (zonecfg_in_alt_root()) {
1566 		zerror(gettext("cannot use interactive login with scratch "
1567 		    "zone"));
1568 		return (1);
1569 	}
1570 
1571 	/*
1572 	 * Things are more complex in interactive mode; we get the
1573 	 * master side of the pty, then place the user's terminal into
1574 	 * raw mode.
1575 	 */
1576 	if (get_master_pty() == -1) {
1577 		zerror(gettext("could not setup master pty device"));
1578 		return (1);
1579 	}
1580 
1581 	/*
1582 	 * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
1583 	 */
1584 	if ((slavename = ptsname(masterfd)) == NULL) {
1585 		zperror(gettext("failed to get name for pseudo-tty"));
1586 		return (1);
1587 	}
1588 	if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
1589 		(void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
1590 		    sizeof (slaveshortname));
1591 	else
1592 		(void) strlcpy(slaveshortname, slavename,
1593 		    sizeof (slaveshortname));
1594 
1595 	(void) printf(gettext("[Connected to zone '%s' %s]\n"), zonename,
1596 	    slaveshortname);
1597 
1598 	if (set_tty_rawmode(STDIN_FILENO) == -1) {
1599 		reset_tty();
1600 		zperror(gettext("failed to set stdin pty to raw mode"));
1601 		return (1);
1602 	}
1603 
1604 	if (prefork_dropprivs() != 0) {
1605 		reset_tty();
1606 		zperror(gettext("could not allocate privilege set"));
1607 		return (1);
1608 	}
1609 
1610 	/*
1611 	 * We must mask SIGCLD until after we have coped with the fork
1612 	 * sufficiently to deal with it; otherwise we can race and receive the
1613 	 * signal before child_pid has been initialized (yes, this really
1614 	 * happens).
1615 	 */
1616 	(void) sigset(SIGCLD, sigcld);
1617 	(void) sigemptyset(&block_cld);
1618 	(void) sigaddset(&block_cld, SIGCLD);
1619 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1620 
1621 	/*
1622 	 * We activate the contract template at the last minute to
1623 	 * avoid intermediate functions that could be using fork(2)
1624 	 * internally.
1625 	 */
1626 	if ((tmpl_fd = init_template()) == -1) {
1627 		reset_tty();
1628 		zperror(gettext("could not create contract"));
1629 		return (1);
1630 	}
1631 
1632 	if ((child_pid = fork()) == -1) {
1633 		(void) ct_tmpl_clear(tmpl_fd);
1634 		reset_tty();
1635 		zperror(gettext("could not fork"));
1636 		return (1);
1637 	} else if (child_pid == 0) { /* child process */
1638 		int slavefd, newslave;
1639 
1640 		(void) ct_tmpl_clear(tmpl_fd);
1641 		(void) close(tmpl_fd);
1642 
1643 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1644 
1645 		if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
1646 			return (1);
1647 
1648 		/*
1649 		 * Close all fds except for the slave pty.
1650 		 */
1651 		(void) fdwalk(close_func, &slavefd);
1652 
1653 		/*
1654 		 * Temporarily dup slavefd to stderr; that way if we have
1655 		 * to print out that zone_enter failed, the output will
1656 		 * have somewhere to go.
1657 		 */
1658 		if (slavefd != STDERR_FILENO)
1659 			(void) dup2(slavefd, STDERR_FILENO);
1660 
1661 		if (zone_enter(zoneid) == -1) {
1662 			zerror(gettext("could not enter zone %s: %s"),
1663 			    zonename, strerror(errno));
1664 			return (1);
1665 		}
1666 
1667 		if (slavefd != STDERR_FILENO)
1668 			(void) close(STDERR_FILENO);
1669 
1670 		/*
1671 		 * We take pains to get this process into a new process
1672 		 * group, and subsequently a new session.  In this way,
1673 		 * we'll have a session which doesn't yet have a controlling
1674 		 * terminal.  When we open the slave, it will become the
1675 		 * controlling terminal; no PIDs concerning pgrps or sids
1676 		 * will leak inappropriately into the zone.
1677 		 */
1678 		(void) setpgrp();
1679 
1680 		/*
1681 		 * We need the slave pty to be referenced from the zone's
1682 		 * /dev in order to ensure that the devt's, etc are all
1683 		 * correct.  Otherwise we break ttyname and the like.
1684 		 */
1685 		if ((newslave = open(slavename, O_RDWR)) == -1) {
1686 			(void) close(slavefd);
1687 			return (1);
1688 		}
1689 		(void) close(slavefd);
1690 		slavefd = newslave;
1691 
1692 		/*
1693 		 * dup the slave to the various FDs, so that when the
1694 		 * spawned process does a write/read it maps to the slave
1695 		 * pty.
1696 		 */
1697 		(void) dup2(slavefd, STDIN_FILENO);
1698 		(void) dup2(slavefd, STDOUT_FILENO);
1699 		(void) dup2(slavefd, STDERR_FILENO);
1700 		if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
1701 		    slavefd != STDERR_FILENO) {
1702 			(void) close(slavefd);
1703 		}
1704 
1705 		/*
1706 		 * In failsafe mode, we don't use login(1), so don't try
1707 		 * setting up a utmpx entry.
1708 		 */
1709 		if (!failsafe) {
1710 			if (setup_utmpx(slaveshortname) == -1)
1711 				return (1);
1712 		}
1713 
1714 		(void) execve(new_args[0], new_args, new_env);
1715 		zperror(gettext("exec failure"));
1716 		return (1);
1717 	}
1718 	(void) ct_tmpl_clear(tmpl_fd);
1719 	(void) close(tmpl_fd);
1720 
1721 	/*
1722 	 * The rest is only for the parent process.
1723 	 */
1724 	(void) sigset(SIGWINCH, sigwinch);
1725 
1726 	postfork_dropprivs();
1727 
1728 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1729 	doio(masterfd, masterfd, -1, B_FALSE);
1730 
1731 	reset_tty();
1732 	(void) fprintf(stderr,
1733 	    gettext("\n[Connection to zone '%s' %s closed]\n"), zonename,
1734 	    slaveshortname);
1735 
1736 	if (pollerr != 0) {
1737 		(void) fprintf(stderr, gettext("Error: connection closed due "
1738 		    "to unexpected pollevents=0x%x.\n"), pollerr);
1739 		return (1);
1740 	}
1741 
1742 	return (0);
1743 }
1744