xref: /titanic_41/usr/src/cmd/zlogin/zlogin.c (revision 60405de4d8688d96dd05157c28db3ade5c9bc234)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * zlogin provides three types of login which allow users in the global
30  * zone to access non-global zones.
31  *
32  * - "interactive login" is similar to rlogin(1); for example, the user could
33  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
34  *   granted a new pty (which is then shoved into the zone), and an I/O
35  *   loop between parent and child processes takes care of the interactive
36  *   session.  In this mode, login(1) (and its -c option, which means
37  *   "already authenticated") is employed to take care of the initialization
38  *   of the user's session.
39  *
40  * - "non-interactive login" is similar to su(1M); the user could issue
41  *   'zlogin my-zone ls -l' and the command would be run as specified.
42  *   In this mode, zlogin sets up pipes as the communication channel, and
43  *   'su' is used to do the login setup work.
44  *
45  * - "console login" is the equivalent to accessing the tip line for a
46  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
47  *   In this mode, zlogin contacts the zoneadmd process via unix domain
48  *   socket.  If zoneadmd is not running, it starts it.  This allows the
49  *   console to be available anytime the zone is installed, regardless of
50  *   whether it is running.
51  */
52 
53 #include <sys/socket.h>
54 #include <sys/termios.h>
55 #include <sys/utsname.h>
56 #include <sys/stat.h>
57 #include <sys/types.h>
58 #include <sys/contract/process.h>
59 #include <sys/ctfs.h>
60 #include <sys/brand.h>
61 
62 #include <alloca.h>
63 #include <assert.h>
64 #include <ctype.h>
65 #include <door.h>
66 #include <errno.h>
67 #include <poll.h>
68 #include <priv.h>
69 #include <pwd.h>
70 #include <unistd.h>
71 #include <utmpx.h>
72 #include <sac.h>
73 #include <signal.h>
74 #include <stdarg.h>
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <string.h>
78 #include <strings.h>
79 #include <stropts.h>
80 #include <wait.h>
81 #include <zone.h>
82 #include <fcntl.h>
83 #include <libdevinfo.h>
84 #include <libintl.h>
85 #include <locale.h>
86 #include <libzonecfg.h>
87 #include <libcontract.h>
88 #include <libbrand.h>
89 
90 static int masterfd;
91 static struct termios save_termios;
92 static struct termios effective_termios;
93 static int save_fd;
94 static struct winsize winsize;
95 static volatile int dead;
96 static volatile pid_t child_pid = -1;
97 static int interactive = 0;
98 static priv_set_t *dropprivs;
99 
100 static int nocmdchar = 0;
101 static int failsafe = 0;
102 static char cmdchar = '~';
103 
104 static int pollerr = 0;
105 
106 static const char *pname;
107 
108 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
109 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
110 #endif
111 
112 #define	SUPATH	"/usr/bin/su"
113 #define	FAILSAFESHELL	"/sbin/sh"
114 #define	DEFAULTSHELL	"/sbin/sh"
115 #define	DEF_PATH	"/usr/sbin:/usr/bin"
116 
117 /*
118  * See canonify() below.  CANONIFY_LEN is the maximum length that a
119  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
120  */
121 #define	CANONIFY_LEN 5
122 
123 static void
124 usage(void)
125 {
126 	(void) fprintf(stderr, gettext("usage: %s [ -CES ] [ -e cmdchar ] "
127 	    "[-l user] zonename [command [args ...] ]\n"), pname);
128 	exit(2);
129 }
130 
131 static const char *
132 getpname(const char *arg0)
133 {
134 	const char *p = strrchr(arg0, '/');
135 
136 	if (p == NULL)
137 		p = arg0;
138 	else
139 		p++;
140 
141 	pname = p;
142 	return (p);
143 }
144 
145 static void
146 zerror(const char *fmt, ...)
147 {
148 	va_list alist;
149 
150 	(void) fprintf(stderr, "%s: ", pname);
151 	va_start(alist, fmt);
152 	(void) vfprintf(stderr, fmt, alist);
153 	va_end(alist);
154 	(void) fprintf(stderr, "\n");
155 }
156 
157 static void
158 zperror(const char *str)
159 {
160 	const char *estr;
161 
162 	if ((estr = strerror(errno)) != NULL)
163 		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
164 	else
165 		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
166 }
167 
168 /*
169  * The first part of our privilege dropping scheme needs to be called before
170  * fork(), since we must have it for security; we don't want to be surprised
171  * later that we couldn't allocate the privset.
172  */
173 static int
174 prefork_dropprivs()
175 {
176 	if ((dropprivs = priv_allocset()) == NULL)
177 		return (1);
178 	priv_emptyset(dropprivs);
179 
180 	/*
181 	 * We need these privileges in order to query session information and
182 	 * send signals.
183 	 */
184 	if (interactive == 0) {
185 		if (priv_addset(dropprivs, "proc_session") == -1)
186 			return (1);
187 		if (priv_addset(dropprivs, "proc_zone") == -1)
188 			return (1);
189 		if (priv_addset(dropprivs, "proc_owner") == -1)
190 			return (1);
191 	}
192 
193 	return (0);
194 }
195 
196 /*
197  * The second part of the privilege drop.  We are paranoid about being attacked
198  * by the zone, so we drop all privileges.  This should prevent a compromise
199  * which gets us to fork(), exec(), symlink(), etc.
200  */
201 static void
202 postfork_dropprivs()
203 {
204 	if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
205 		zperror(gettext("Warning: could not set permitted privileges"));
206 	}
207 	if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
208 		zperror(gettext("Warning: could not set limit privileges"));
209 	}
210 	if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
211 		zperror(gettext("Warning: could not set inheritable "
212 		    "privileges"));
213 	}
214 }
215 
216 /*
217  * Create the unix domain socket and call the zoneadmd server; handshake
218  * with it to determine whether it will allow us to connect.
219  */
220 static int
221 get_console_master(const char *zname)
222 {
223 	int sockfd = -1;
224 	struct sockaddr_un servaddr;
225 	char clientid[MAXPATHLEN];
226 	char handshake[MAXPATHLEN], c;
227 	int msglen;
228 	int i = 0, err = 0;
229 
230 	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
231 		zperror(gettext("could not create socket"));
232 		return (-1);
233 	}
234 
235 	bzero(&servaddr, sizeof (servaddr));
236 	servaddr.sun_family = AF_UNIX;
237 	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
238 	    "%s/%s.console_sock", ZONES_TMPDIR, zname);
239 
240 	if (connect(sockfd, (struct sockaddr *)&servaddr,
241 	    sizeof (servaddr)) == -1) {
242 		zperror(gettext("Could not connect to zone console"));
243 		goto bad;
244 	}
245 	masterfd = sockfd;
246 
247 	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s\n",
248 	    getpid(), setlocale(LC_MESSAGES, NULL));
249 
250 	if (msglen >= sizeof (clientid) || msglen < 0) {
251 		zerror("protocol error");
252 		goto bad;
253 	}
254 
255 	if (write(masterfd, clientid, msglen) != msglen) {
256 		zerror("protocol error");
257 		goto bad;
258 	}
259 
260 	bzero(handshake, sizeof (handshake));
261 
262 	/*
263 	 * Take care not to accumulate more than our fill, and leave room for
264 	 * the NUL at the end.
265 	 */
266 	while ((err = read(masterfd, &c, 1)) == 1) {
267 		if (i >= (sizeof (handshake) - 1))
268 			break;
269 		if (c == '\n')
270 			break;
271 		handshake[i] = c;
272 		i++;
273 	}
274 
275 	/*
276 	 * If something went wrong during the handshake we bail; perhaps
277 	 * the server died off.
278 	 */
279 	if (err == -1) {
280 		zperror(gettext("Could not connect to zone console"));
281 		goto bad;
282 	}
283 
284 	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
285 		return (0);
286 
287 	zerror(gettext("Console is already in use by process ID %s."),
288 	    handshake);
289 bad:
290 	(void) close(sockfd);
291 	masterfd = -1;
292 	return (-1);
293 }
294 
295 
296 /*
297  * Routines to handle pty creation upon zone entry and to shuttle I/O back
298  * and forth between the two terminals.  We also compute and store the
299  * name of the slave terminal associated with the master side.
300  */
301 static int
302 get_master_pty()
303 {
304 	if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
305 		zperror(gettext("failed to obtain a pseudo-tty"));
306 		return (-1);
307 	}
308 	if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
309 		zperror(gettext("failed to get terminal settings from stdin"));
310 		return (-1);
311 	}
312 	(void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
313 
314 	return (0);
315 }
316 
317 /*
318  * This is a bit tricky; normally a pts device will belong to the zone it
319  * is granted to.  But in the case of "entering" a zone, we need to establish
320  * the pty before entering the zone so that we can vector I/O to and from it
321  * from the global zone.
322  *
323  * We use the zonept() call to let the ptm driver know what we are up to;
324  * the only other hairy bit is the setting of zoneslavename (which happens
325  * above, in get_master_pty()).
326  */
327 static int
328 init_slave_pty(zoneid_t zoneid, char *devroot)
329 {
330 	int slavefd = -1;
331 	char *slavename, zoneslavename[MAXPATHLEN];
332 
333 	/*
334 	 * Set slave permissions, zone the pts, then unlock it.
335 	 */
336 	if (grantpt(masterfd) != 0) {
337 		zperror(gettext("grantpt failed"));
338 		return (-1);
339 	}
340 
341 	if (unlockpt(masterfd) != 0) {
342 		zperror(gettext("unlockpt failed"));
343 		return (-1);
344 	}
345 
346 	/*
347 	 * We must open the slave side before zoning this pty; otherwise
348 	 * the kernel would refuse us the open-- zoning a pty makes it
349 	 * inaccessible to the global zone.  Note we are trying to open
350 	 * the device node via the $ZONEROOT/dev path for this pty.
351 	 *
352 	 * Later we'll close the slave out when once we've opened it again
353 	 * from within the target zone.  Blarg.
354 	 */
355 	if ((slavename = ptsname(masterfd)) == NULL) {
356 		zperror(gettext("failed to get name for pseudo-tty"));
357 		return (-1);
358 	}
359 
360 	(void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
361 	    devroot, slavename);
362 
363 	if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
364 		zerror(gettext("failed to open %s: %s"), zoneslavename,
365 		    strerror(errno));
366 		return (-1);
367 	}
368 
369 	/*
370 	 * Push hardware emulation (ptem), line discipline (ldterm),
371 	 * and V7/4BSD/Xenix compatibility (ttcompat) modules.
372 	 */
373 	if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
374 		zperror(gettext("failed to push ptem module"));
375 		if (!failsafe)
376 			goto bad;
377 	}
378 
379 	/*
380 	 * Anchor the stream to prevent malicious I_POPs; we prefer to do
381 	 * this prior to entering the zone so that we can detect any errors
382 	 * early, and so that we can set the anchor from the global zone.
383 	 */
384 	if (ioctl(slavefd, I_ANCHOR) == -1) {
385 		zperror(gettext("failed to set stream anchor"));
386 		if (!failsafe)
387 			goto bad;
388 	}
389 
390 	if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
391 		zperror(gettext("failed to push ldterm module"));
392 		if (!failsafe)
393 			goto bad;
394 	}
395 	if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
396 		zperror(gettext("failed to push ttcompat module"));
397 		if (!failsafe)
398 			goto bad;
399 	}
400 
401 	/*
402 	 * Propagate terminal settings from the external term to the new one.
403 	 */
404 	if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
405 		zperror(gettext("failed to set terminal settings"));
406 		if (!failsafe)
407 			goto bad;
408 	}
409 	(void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
410 
411 	if (zonept(masterfd, zoneid) != 0) {
412 		zperror(gettext("could not set zoneid of pty"));
413 		goto bad;
414 	}
415 
416 	return (slavefd);
417 
418 bad:
419 	(void) close(slavefd);
420 	return (-1);
421 }
422 
423 /*
424  * Place terminal into raw mode.
425  */
426 static int
427 set_tty_rawmode(int fd)
428 {
429 	struct termios term;
430 	if (tcgetattr(fd, &term) < 0) {
431 		zperror(gettext("failed to get user terminal settings"));
432 		return (-1);
433 	}
434 
435 	/* Stash for later, so we can revert back to previous mode */
436 	save_termios = term;
437 	save_fd = fd;
438 
439 	/* disable 8->7 bit strip, start/stop, enable any char to restart */
440 	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
441 	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
442 	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
443 	/* disable output post-processing */
444 	term.c_oflag &= ~OPOST;
445 	/* disable canonical mode, signal chars, echo & extended functions */
446 	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
447 
448 	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
449 	term.c_cc[VTIME] = 0;
450 
451 	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
452 		zperror(gettext("failed to set user terminal to raw mode"));
453 		return (-1);
454 	}
455 
456 	/*
457 	 * We need to know the value of VEOF so that we can properly process for
458 	 * client-side ~<EOF>.  But we have obliterated VEOF in term,
459 	 * because VMIN overloads the same array slot in non-canonical mode.
460 	 * Stupid @&^%!
461 	 *
462 	 * So here we construct the "effective" termios from the current
463 	 * terminal settings, and the corrected VEOF and VEOL settings.
464 	 */
465 	if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
466 		zperror(gettext("failed to get user terminal settings"));
467 		return (-1);
468 	}
469 	effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
470 	effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
471 
472 	return (0);
473 }
474 
475 /*
476  * Copy terminal window size from our terminal to the pts.
477  */
478 /*ARGSUSED*/
479 static void
480 sigwinch(int s)
481 {
482 	struct winsize ws;
483 
484 	if (ioctl(0, TIOCGWINSZ, &ws) == 0)
485 		(void) ioctl(masterfd, TIOCSWINSZ, &ws);
486 }
487 
488 static void
489 /*ARGSUSED*/
490 sigcld(int s)
491 {
492 	int status;
493 	pid_t pid;
494 
495 	/*
496 	 * Peek at the exit status.  If this isn't the process we cared
497 	 * about, then just reap it.
498 	 */
499 	if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
500 		if (pid == child_pid &&
501 		    (WIFEXITED(status) || WIFSIGNALED(status)))
502 			dead = 1;
503 		else
504 			(void) waitpid(pid, &status, WNOHANG);
505 	}
506 }
507 
508 /*
509  * Some signals (currently, SIGINT) must be forwarded on to the process
510  * group of the child process.
511  */
512 static void
513 sig_forward(int s)
514 {
515 	if (child_pid != -1) {
516 		pid_t pgid = getpgid(child_pid);
517 		if (pgid != -1)
518 			(void) sigsend(P_PGID, pgid, s);
519 	}
520 }
521 
522 /*
523  * reset terminal settings for global environment
524  */
525 static void
526 reset_tty()
527 {
528 	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
529 }
530 
531 /*
532  * Convert character to printable representation, for display with locally
533  * echoed command characters (like when we need to display ~^D)
534  */
535 static void
536 canonify(char c, char *cc)
537 {
538 	if (isprint(c)) {
539 		cc[0] = c;
540 		cc[1] = '\0';
541 	} else if (c >= 0 && c <= 31) {	/* ^@ through ^_ */
542 		cc[0] = '^';
543 		cc[1] = c + '@';
544 		cc[2] = '\0';
545 	} else {
546 		cc[0] = '\\';
547 		cc[1] = ((c >> 6) & 7) + '0';
548 		cc[2] = ((c >> 3) & 7) + '0';
549 		cc[3] = (c & 7) + '0';
550 		cc[4] = '\0';
551 	}
552 }
553 
554 /*
555  * process_user_input watches the input stream for the escape sequence for
556  * 'quit' (by default, tilde-period).  Because we might be fed just one
557  * keystroke at a time, state associated with the user input (are we at the
558  * beginning of the line?  are we locally echoing the next character?) is
559  * maintained by beginning_of_line and local_echo across calls to the routine.
560  *
561  * This routine returns -1 when the 'quit' escape sequence has been issued,
562  * and 0 otherwise.
563  */
564 static int
565 process_user_input(int outfd, char *buf, size_t nbytes)
566 {
567 	static boolean_t beginning_of_line = B_TRUE;
568 	static boolean_t local_echo = B_FALSE;
569 
570 	char c = *buf;
571 	for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
572 		buf++;
573 		if (beginning_of_line && !nocmdchar) {
574 			beginning_of_line = B_FALSE;
575 			if (c == cmdchar) {
576 				local_echo = B_TRUE;
577 				continue;
578 			}
579 		} else if (local_echo) {
580 			local_echo = B_FALSE;
581 			if (c == '.' || c == effective_termios.c_cc[VEOF]) {
582 				char cc[CANONIFY_LEN];
583 				canonify(c, cc);
584 				(void) write(STDOUT_FILENO, &cmdchar, 1);
585 				(void) write(STDOUT_FILENO, cc, strlen(cc));
586 				return (-1);
587 			}
588 		}
589 		if (write(outfd, &c, 1) <= 0)
590 			return (-1);
591 		beginning_of_line = (c == '\r' || c == '\n' ||
592 		    c == effective_termios.c_cc[VKILL] ||
593 		    c == effective_termios.c_cc[VEOL] ||
594 		    c == effective_termios.c_cc[VSUSP] ||
595 		    c == effective_termios.c_cc[VINTR]);
596 	}
597 	return (0);
598 }
599 
600 /*
601  * This is the main I/O loop, and is shared across all zlogin modes.
602  * Parameters:
603  * 	stdin_fd:  The fd representing 'stdin' for the slave side; input to
604  *	           the zone will be written here.
605  *
606  *	stdout_fd: The fd representing 'stdout' for the slave side; output
607  *	           from the zone will arrive here.
608  *
609  *	stderr_fd: The fd representing 'stderr' for the slave side; output
610  *	           from the zone will arrive here.
611  *
612  *	raw_mode:  If TRUE, then no processing (for example, for '~.') will
613  *	           be performed on the input coming from STDIN.
614  *
615  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
616  * mode supplies a stderr).
617  *
618  */
619 static void
620 doio(int stdin_fd, int stdout_fd, int stderr_fd, boolean_t raw_mode)
621 {
622 	struct pollfd pollfds[3];
623 	char ibuf[BUFSIZ];
624 	int cc, ret;
625 
626 	/* read from stdout of zone and write to stdout of global zone */
627 	pollfds[0].fd = stdout_fd;
628 	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
629 
630 	/* read from stderr of zone and write to stderr of global zone */
631 	pollfds[1].fd = stderr_fd;
632 	pollfds[1].events = pollfds[0].events;
633 
634 	/* read from stdin of global zone and write to stdin of zone */
635 	pollfds[2].fd = STDIN_FILENO;
636 	pollfds[2].events = pollfds[0].events;
637 
638 	for (;;) {
639 		pollfds[0].revents = pollfds[1].revents =
640 		    pollfds[2].revents = 0;
641 
642 		if (dead)
643 			break;
644 
645 		ret = poll(pollfds,
646 		    sizeof (pollfds) / sizeof (struct pollfd), -1);
647 		if (ret == -1 && errno != EINTR) {
648 			perror("poll failed");
649 			break;
650 		}
651 
652 		if (errno == EINTR && dead) {
653 			break;
654 		}
655 
656 		/* event from master side stdout */
657 		if (pollfds[0].revents) {
658 			if (pollfds[0].revents &
659 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
660 				cc = read(stdout_fd, ibuf, BUFSIZ);
661 				if (cc == -1 && (errno != EINTR || dead))
662 					break;
663 				if (cc == 0)	/* EOF */
664 					break;
665 				(void) write(STDOUT_FILENO, ibuf, cc);
666 			} else {
667 				pollerr = pollfds[0].revents;
668 				break;
669 			}
670 		}
671 
672 		/* event from master side stderr */
673 		if (pollfds[1].revents) {
674 			if (pollfds[1].revents &
675 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
676 				cc = read(stderr_fd, ibuf, BUFSIZ);
677 				if (cc == -1 && (errno != EINTR || dead))
678 					break;
679 				if (cc == 0)	/* EOF */
680 					break;
681 				(void) write(STDERR_FILENO, ibuf, cc);
682 			} else {
683 				pollerr = pollfds[1].revents;
684 				break;
685 			}
686 		}
687 
688 		/* event from user STDIN side */
689 		if (pollfds[2].revents) {
690 			if (pollfds[2].revents &
691 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
692 				cc = read(STDIN_FILENO, ibuf, BUFSIZ);
693 				if (cc == -1 && (errno != EINTR || dead))
694 					break;
695 
696 				/*
697 				 * stdin fd is stdin of the target; so,
698 				 * the thing we'll write the user data *to*.
699 				 *
700 				 * Also, unlike on the output side, we
701 				 * propagate zero-length messages to the
702 				 * other side.
703 				 */
704 				if (raw_mode == B_TRUE) {
705 					if (write(stdin_fd, ibuf, cc) == -1)
706 						break;
707 				} else {
708 					if (process_user_input(stdin_fd, ibuf,
709 					    cc) == -1)
710 						break;
711 				}
712 			} else if (raw_mode == B_TRUE &&
713 			    pollfds[2].revents & POLLHUP) {
714 				/*
715 				 * It's OK to get a POLLHUP on STDIN-- it
716 				 * always happens if you do:
717 				 *
718 				 * echo foo | zlogin <zone> <command>
719 				 *
720 				 * We reset fd to -1 in this case to clear
721 				 * the condition and write an EOF to the
722 				 * other side in order to wrap things up.
723 				 */
724 				pollfds[2].fd = -1;
725 				(void) write(stdin_fd, ibuf, 0);
726 			} else {
727 				pollerr = pollfds[2].revents;
728 				break;
729 			}
730 		}
731 	}
732 
733 	/*
734 	 * We are in the midst of dying, but try to poll with a short
735 	 * timeout to see if we can catch the last bit of I/O from the
736 	 * children.
737 	 */
738 	pollfds[0].revents = pollfds[1].revents = pollfds[2].revents = 0;
739 	(void) poll(pollfds,
740 	    sizeof (pollfds) / sizeof (struct pollfd), 100);
741 	if (pollfds[0].revents &
742 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
743 		if ((cc = read(stdout_fd, ibuf, BUFSIZ)) > 0)
744 			(void) write(STDOUT_FILENO, ibuf, cc);
745 	}
746 	if (pollfds[1].revents &
747 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
748 		if ((cc = read(stderr_fd, ibuf, BUFSIZ)) > 0)
749 			(void) write(STDERR_FILENO, ibuf, cc);
750 	}
751 }
752 
753 static char **
754 zone_login_cmd(brand_handle_t bh, const char *login)
755 {
756 	static char result_buf[ARG_MAX];
757 	char **new_argv, *ptr, *lasts;
758 	int n, a;
759 
760 	/* Get the login command for the target zone. */
761 	bzero(result_buf, sizeof (result_buf));
762 	if (brand_get_login_cmd(bh, login,
763 	    result_buf, sizeof (result_buf)) != 0)
764 		return (NULL);
765 
766 	/*
767 	 * We got back a string that we'd like to execute.  But since
768 	 * we're not doing the execution via a shell we'll need to convert
769 	 * the exec string to an array of strings.  We'll do that here
770 	 * but we're going to be very simplistic about it and break stuff
771 	 * up based on spaces.  We're not even going to support any kind
772 	 * of quoting or escape characters.  It's truly amazing that
773 	 * there is no library function in OpenSolaris to do this for us.
774 	 */
775 
776 	/*
777 	 * Be paranoid.  Since we're deliniating based on spaces make
778 	 * sure there are no adjacent spaces.
779 	 */
780 	if (strstr(result_buf, "  ") != NULL)
781 		return (NULL);
782 
783 	/* Remove any trailing whitespace.  */
784 	n = strlen(result_buf);
785 	if (result_buf[n - 1] == ' ')
786 		result_buf[n - 1] = '\0';
787 
788 	/* Count how many elements there are in the exec string. */
789 	ptr = result_buf;
790 	for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
791 		;
792 
793 	/* Allocate the argv array that we're going to return. */
794 	if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
795 		return (NULL);
796 
797 	/* Tokenize the exec string and return. */
798 	a = 0;
799 	new_argv[a++] = result_buf;
800 	if (n > 2) {
801 		(void) strtok_r(result_buf, " ", &lasts);
802 		while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
803 			;
804 	} else {
805 		new_argv[a++] = NULL;
806 	}
807 	assert(n == a);
808 	return (new_argv);
809 }
810 
811 /*
812  * Prepare argv array for exec'd process; if we're passing commands to the
813  * new process, then use su(1M) to do the invocation.  Otherwise, use
814  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
815  * login that we're coming from another zone, and to disregard its CONSOLE
816  * checks).
817  */
818 static char **
819 prep_args(brand_handle_t bh, const char *login, char **argv)
820 {
821 	int argc = 0, a = 0, i, n = -1;
822 	char **new_argv;
823 
824 	if (argv != NULL) {
825 		size_t subshell_len = 1;
826 		char *subshell;
827 
828 		while (argv[argc] != NULL)
829 			argc++;
830 
831 		for (i = 0; i < argc; i++) {
832 			subshell_len += strlen(argv[i]) + 1;
833 		}
834 		if ((subshell = calloc(1, subshell_len)) == NULL)
835 			return (NULL);
836 
837 		for (i = 0; i < argc; i++) {
838 			(void) strcat(subshell, argv[i]);
839 			(void) strcat(subshell, " ");
840 		}
841 
842 		if (failsafe) {
843 			n = 4;
844 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
845 				return (NULL);
846 
847 			new_argv[a++] = FAILSAFESHELL;
848 		} else {
849 			n = 5;
850 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
851 				return (NULL);
852 
853 			new_argv[a++] = SUPATH;
854 			new_argv[a++] = (char *)login;
855 		}
856 		new_argv[a++] = "-c";
857 		new_argv[a++] = subshell;
858 		new_argv[a++] = NULL;
859 		assert(a == n);
860 	} else {
861 		if (failsafe) {
862 			n = 2;
863 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
864 				return (NULL);
865 			new_argv[a++] = FAILSAFESHELL;
866 			new_argv[a++] = NULL;
867 			assert(n == a);
868 		} else {
869 			new_argv = zone_login_cmd(bh, login);
870 		}
871 	}
872 
873 	return (new_argv);
874 }
875 
876 /*
877  * Helper routine for prep_env below.
878  */
879 static char *
880 add_env(char *name, char *value)
881 {
882 	size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
883 	char *str;
884 
885 	if ((str = malloc(sz)) == NULL)
886 		return (NULL);
887 
888 	(void) snprintf(str, sz, "%s=%s", name, value);
889 	return (str);
890 }
891 
892 /*
893  * Prepare envp array for exec'd process.
894  */
895 static char **
896 prep_env()
897 {
898 	int e = 0, size = 1;
899 	char **new_env, *estr;
900 	char *term = getenv("TERM");
901 
902 	size++;	/* for $PATH */
903 	if (term != NULL)
904 		size++;
905 
906 	/*
907 	 * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
908 	 * We also set $SHELL, since neither login nor su will be around to do
909 	 * it.
910 	 */
911 	if (failsafe)
912 		size += 2;
913 
914 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
915 		return (NULL);
916 
917 	if ((estr = add_env("PATH", DEF_PATH)) == NULL)
918 		return (NULL);
919 	new_env[e++] = estr;
920 
921 	if (term != NULL) {
922 		if ((estr = add_env("TERM", term)) == NULL)
923 			return (NULL);
924 		new_env[e++] = estr;
925 	}
926 
927 	if (failsafe) {
928 		if ((estr = add_env("HOME", "/")) == NULL)
929 			return (NULL);
930 		new_env[e++] = estr;
931 
932 		if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
933 			return (NULL);
934 		new_env[e++] = estr;
935 	}
936 
937 	new_env[e++] = NULL;
938 
939 	assert(e == size);
940 
941 	return (new_env);
942 }
943 
944 /*
945  * Finish the preparation of the envp array for exec'd non-interactive
946  * zlogins.  This is called in the child process *after* we zone_enter(), since
947  * it derives things we can only know within the zone, such as $HOME, $SHELL,
948  * etc.  We need only do this in the non-interactive, mode, since otherwise
949  * login(1) will do it.  We don't do this in failsafe mode, since it presents
950  * additional ways in which the command could fail, and we'd prefer to avoid
951  * that.
952  */
953 static char **
954 prep_env_noninteractive(char *login, char **env)
955 {
956 	size_t size;
957 	struct passwd *pw;
958 	char **new_env;
959 	int e, i;
960 	char *estr;
961 	char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
962 
963 	assert(env != NULL);
964 	assert(failsafe == 0);
965 
966 	/*
967 	 * Get existing envp size.
968 	 */
969 	for (size = 0; env[size] != NULL; size++)
970 		;
971 	e = size;
972 
973 	/*
974 	 * Finish filling out the environment; we duplicate the environment
975 	 * setup described in login(1), for lack of a better precedent.
976 	 */
977 	if ((pw = getpwnam(login)) != NULL) {
978 		size += 3;	/* LOGNAME, HOME, MAIL */
979 	}
980 	size++;	/* always fill in SHELL */
981 	size++; /* terminating NULL */
982 
983 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
984 		goto malloc_fail;
985 
986 	/*
987 	 * Copy existing elements of env into new_env.
988 	 */
989 	for (i = 0; env[i] != NULL; i++) {
990 		if ((new_env[i] = strdup(env[i])) == NULL)
991 			goto malloc_fail;
992 	}
993 	assert(e == i);
994 
995 	if (pw != NULL) {
996 		if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
997 			goto malloc_fail;
998 		new_env[e++] = estr;
999 
1000 		if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1001 			goto malloc_fail;
1002 		new_env[e++] = estr;
1003 
1004 		if (chdir(pw->pw_dir) != 0)
1005 			zerror(gettext("Could not chdir to home directory "
1006 			    "%s: %s"), pw->pw_dir, strerror(errno));
1007 
1008 		(void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1009 		    pw->pw_name);
1010 		if ((estr = add_env("MAIL", varmail)) == NULL)
1011 			goto malloc_fail;
1012 		new_env[e++] = estr;
1013 	}
1014 
1015 	if (pw != NULL && strlen(pw->pw_shell) > 0) {
1016 		if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1017 			goto malloc_fail;
1018 		new_env[e++] = estr;
1019 	} else {
1020 		if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1021 			goto malloc_fail;
1022 		new_env[e++] = estr;
1023 	}
1024 
1025 	new_env[e++] = NULL;	/* add terminating NULL */
1026 
1027 	assert(e == size);
1028 	return (new_env);
1029 
1030 malloc_fail:
1031 	zperror(gettext("failed to allocate memory for process environment"));
1032 	return (NULL);
1033 }
1034 
1035 static int
1036 close_func(void *slavefd, int fd)
1037 {
1038 	if (fd != *(int *)slavefd)
1039 		(void) close(fd);
1040 	return (0);
1041 }
1042 
1043 static void
1044 set_cmdchar(char *cmdcharstr)
1045 {
1046 	char c;
1047 	long lc;
1048 
1049 	if ((c = *cmdcharstr) != '\\') {
1050 		cmdchar = c;
1051 		return;
1052 	}
1053 
1054 	c = cmdcharstr[1];
1055 	if (c == '\0' || c == '\\') {
1056 		cmdchar = '\\';
1057 		return;
1058 	}
1059 
1060 	if (c < '0' || c > '7') {
1061 		zerror(gettext("Unrecognized escape character option %s"),
1062 		    cmdcharstr);
1063 		usage();
1064 	}
1065 
1066 	lc = strtol(cmdcharstr + 1, NULL, 8);
1067 	if (lc < 0 || lc > 255) {
1068 		zerror(gettext("Octal escape character '%s' too large"),
1069 		    cmdcharstr);
1070 		usage();
1071 	}
1072 	cmdchar = (char)lc;
1073 }
1074 
1075 static int
1076 setup_utmpx(char *slavename)
1077 {
1078 	struct utmpx ut;
1079 
1080 	bzero(&ut, sizeof (ut));
1081 	(void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1082 	(void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1083 	ut.ut_pid = getpid();
1084 	ut.ut_id[0] = 'z';
1085 	ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1086 	ut.ut_type = LOGIN_PROCESS;
1087 	(void) time(&ut.ut_tv.tv_sec);
1088 
1089 	if (makeutx(&ut) == NULL) {
1090 		zerror(gettext("makeutx failed"));
1091 		return (-1);
1092 	}
1093 	return (0);
1094 }
1095 
1096 static void
1097 release_lock_file(int lockfd)
1098 {
1099 	(void) close(lockfd);
1100 }
1101 
1102 static int
1103 grab_lock_file(const char *zone_name, int *lockfd)
1104 {
1105 	char pathbuf[PATH_MAX];
1106 	struct flock flock;
1107 
1108 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1109 		zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1110 		    strerror(errno));
1111 		return (-1);
1112 	}
1113 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
1114 	(void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1115 	    ZONES_TMPDIR, zone_name);
1116 
1117 	if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1118 		zerror(gettext("could not open %s: %s"), pathbuf,
1119 		    strerror(errno));
1120 		return (-1);
1121 	}
1122 	/*
1123 	 * Lock the file to synchronize with other zoneadmds
1124 	 */
1125 	flock.l_type = F_WRLCK;
1126 	flock.l_whence = SEEK_SET;
1127 	flock.l_start = (off_t)0;
1128 	flock.l_len = (off_t)0;
1129 	if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1130 		zerror(gettext("unable to lock %s: %s"), pathbuf,
1131 		    strerror(errno));
1132 		release_lock_file(*lockfd);
1133 		return (-1);
1134 	}
1135 	return (Z_OK);
1136 }
1137 
1138 static int
1139 start_zoneadmd(const char *zone_name)
1140 {
1141 	pid_t retval;
1142 	int pstatus = 0, error = -1, lockfd, doorfd;
1143 	struct door_info info;
1144 	char doorpath[MAXPATHLEN];
1145 
1146 	(void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1147 
1148 	if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1149 		return (-1);
1150 	/*
1151 	 * We must do the door check with the lock held.  Otherwise, we
1152 	 * might race against another zoneadm/zlogin process and wind
1153 	 * up with two processes trying to start zoneadmd at the same
1154 	 * time.  zoneadmd will detect this, and fail, but we prefer this
1155 	 * to be as seamless as is practical, from a user perspective.
1156 	 */
1157 	if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1158 		if (errno != ENOENT) {
1159 			zerror("failed to open %s: %s", doorpath,
1160 			    strerror(errno));
1161 			goto out;
1162 		}
1163 	} else {
1164 		/*
1165 		 * Seems to be working ok.
1166 		 */
1167 		if (door_info(doorfd, &info) == 0 &&
1168 		    ((info.di_attributes & DOOR_REVOKED) == 0)) {
1169 			error = 0;
1170 			goto out;
1171 		}
1172 	}
1173 
1174 	if ((child_pid = fork()) == -1) {
1175 		zperror(gettext("could not fork"));
1176 		goto out;
1177 	} else if (child_pid == 0) {
1178 		/* child process */
1179 		(void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1180 		    zone_name, NULL);
1181 		zperror(gettext("could not exec zoneadmd"));
1182 		_exit(1);
1183 	}
1184 
1185 	/* parent process */
1186 	do {
1187 		retval = waitpid(child_pid, &pstatus, 0);
1188 	} while (retval != child_pid);
1189 	if (WIFSIGNALED(pstatus) ||
1190 	    (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1191 		zerror(gettext("could not start %s"), "zoneadmd");
1192 		goto out;
1193 	}
1194 	error = 0;
1195 out:
1196 	release_lock_file(lockfd);
1197 	(void) close(doorfd);
1198 	return (error);
1199 }
1200 
1201 static int
1202 init_template(void)
1203 {
1204 	int fd;
1205 	int err = 0;
1206 
1207 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1208 	if (fd == -1)
1209 		return (-1);
1210 
1211 	/*
1212 	 * zlogin doesn't do anything with the contract.
1213 	 * Deliver no events, don't inherit, and allow it to be orphaned.
1214 	 */
1215 	err |= ct_tmpl_set_critical(fd, 0);
1216 	err |= ct_tmpl_set_informative(fd, 0);
1217 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1218 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1219 	if (err || ct_tmpl_activate(fd)) {
1220 		(void) close(fd);
1221 		return (-1);
1222 	}
1223 
1224 	return (fd);
1225 }
1226 
1227 static int
1228 noninteractive_login(char *zonename, zoneid_t zoneid, char *login,
1229     char **new_args, char **new_env)
1230 {
1231 	pid_t retval;
1232 	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2];
1233 	int child_status;
1234 	int tmpl_fd;
1235 	sigset_t block_cld;
1236 
1237 	if ((tmpl_fd = init_template()) == -1) {
1238 		reset_tty();
1239 		zperror(gettext("could not create contract"));
1240 		return (1);
1241 	}
1242 
1243 	if (pipe(stdin_pipe) != 0) {
1244 		zperror(gettext("could not create STDIN pipe"));
1245 		return (1);
1246 	}
1247 	/*
1248 	 * When the user types ^D, we get a zero length message on STDIN.
1249 	 * We need to echo that down the pipe to send it to the other side;
1250 	 * but by default, pipes don't propagate zero-length messages.  We
1251 	 * toggle that behavior off using I_SWROPT.  See streamio(7i).
1252 	 */
1253 	if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1254 		zperror(gettext("could not configure STDIN pipe"));
1255 		return (1);
1256 
1257 	}
1258 	if (pipe(stdout_pipe) != 0) {
1259 		zperror(gettext("could not create STDOUT pipe"));
1260 		return (1);
1261 	}
1262 	if (pipe(stderr_pipe) != 0) {
1263 		zperror(gettext("could not create STDERR pipe"));
1264 		return (1);
1265 	}
1266 
1267 	/*
1268 	 * If any of the pipe FD's winds up being less than STDERR, then we
1269 	 * have a mess on our hands-- and we are lacking some of the I/O
1270 	 * streams we would expect anyway.  So we bail.
1271 	 */
1272 	if (stdin_pipe[0] <= STDERR_FILENO ||
1273 	    stdin_pipe[1] <= STDERR_FILENO ||
1274 	    stdout_pipe[0] <= STDERR_FILENO ||
1275 	    stdout_pipe[1] <= STDERR_FILENO ||
1276 	    stderr_pipe[0] <= STDERR_FILENO ||
1277 	    stderr_pipe[1] <= STDERR_FILENO) {
1278 		zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1279 		return (1);
1280 	}
1281 
1282 	if (prefork_dropprivs() != 0) {
1283 		zperror(gettext("could not allocate privilege set"));
1284 		return (1);
1285 	}
1286 
1287 	(void) sigset(SIGCLD, sigcld);
1288 	(void) sigemptyset(&block_cld);
1289 	(void) sigaddset(&block_cld, SIGCLD);
1290 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1291 
1292 	if ((child_pid = fork()) == -1) {
1293 		(void) ct_tmpl_clear(tmpl_fd);
1294 		(void) close(tmpl_fd);
1295 		zperror(gettext("could not fork"));
1296 		return (1);
1297 	} else if (child_pid == 0) { /* child process */
1298 		(void) ct_tmpl_clear(tmpl_fd);
1299 
1300 		/*
1301 		 * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1302 		 */
1303 		(void) close(STDIN_FILENO);
1304 		(void) close(STDOUT_FILENO);
1305 		(void) close(STDERR_FILENO);
1306 		(void) dup2(stdin_pipe[1], STDIN_FILENO);
1307 		(void) dup2(stdout_pipe[1], STDOUT_FILENO);
1308 		(void) dup2(stderr_pipe[1], STDERR_FILENO);
1309 		(void) closefrom(STDERR_FILENO + 1);
1310 
1311 		(void) sigset(SIGCLD, SIG_DFL);
1312 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1313 		/*
1314 		 * In case any of stdin, stdout or stderr are streams,
1315 		 * anchor them to prevent malicious I_POPs.
1316 		 */
1317 		(void) ioctl(STDIN_FILENO, I_ANCHOR);
1318 		(void) ioctl(STDOUT_FILENO, I_ANCHOR);
1319 		(void) ioctl(STDERR_FILENO, I_ANCHOR);
1320 
1321 		if (zone_enter(zoneid) == -1) {
1322 			zerror(gettext("could not enter zone %s: %s"),
1323 			    zonename, strerror(errno));
1324 			_exit(1);
1325 		}
1326 
1327 		if (!failsafe)
1328 			new_env = prep_env_noninteractive(login, new_env);
1329 
1330 		if (new_env == NULL) {
1331 			_exit(1);
1332 		}
1333 
1334 		/*
1335 		 * Move into a new process group; the zone_enter will have
1336 		 * placed us into zsched's session, and we want to be in
1337 		 * a unique process group.
1338 		 */
1339 		(void) setpgid(getpid(), getpid());
1340 
1341 		(void) execve(new_args[0], new_args, new_env);
1342 		zperror(gettext("exec failure"));
1343 		_exit(1);
1344 	}
1345 	/* parent */
1346 	(void) sigset(SIGINT, sig_forward);
1347 
1348 	postfork_dropprivs();
1349 
1350 	(void) ct_tmpl_clear(tmpl_fd);
1351 	(void) close(tmpl_fd);
1352 
1353 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1354 	doio(stdin_pipe[0], stdout_pipe[0], stderr_pipe[0], B_TRUE);
1355 	do {
1356 		retval = waitpid(child_pid, &child_status, 0);
1357 		if (retval == -1) {
1358 			child_status = 0;
1359 		}
1360 	} while (retval != child_pid && errno != ECHILD);
1361 
1362 	return (WEXITSTATUS(child_status));
1363 }
1364 
1365 int
1366 main(int argc, char **argv)
1367 {
1368 	int arg, console = 0;
1369 	zoneid_t zoneid;
1370 	zone_state_t st;
1371 	char *login = "root";
1372 	int lflag = 0;
1373 	char *zonename = NULL;
1374 	char **proc_args = NULL;
1375 	char **new_args, **new_env;
1376 	sigset_t block_cld;
1377 	char devroot[MAXPATHLEN];
1378 	char *slavename, slaveshortname[MAXPATHLEN];
1379 	priv_set_t *privset;
1380 	int tmpl_fd;
1381 	char zonebrand[MAXNAMELEN];
1382 	struct stat sb;
1383 	char kernzone[ZONENAME_MAX];
1384 	brand_handle_t bh;
1385 
1386 	(void) setlocale(LC_ALL, "");
1387 	(void) textdomain(TEXT_DOMAIN);
1388 
1389 	(void) getpname(argv[0]);
1390 
1391 	while ((arg = getopt(argc, argv, "ECR:Se:l:")) != EOF) {
1392 		switch (arg) {
1393 		case 'C':
1394 			console = 1;
1395 			break;
1396 		case 'E':
1397 			nocmdchar = 1;
1398 			break;
1399 		case 'R':	/* undocumented */
1400 			if (*optarg != '/') {
1401 				zerror(gettext("root path must be absolute."));
1402 				exit(2);
1403 			}
1404 			if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1405 				zerror(
1406 				    gettext("root path must be a directory."));
1407 				exit(2);
1408 			}
1409 			zonecfg_set_root(optarg);
1410 			break;
1411 		case 'S':
1412 			failsafe = 1;
1413 			break;
1414 		case 'e':
1415 			set_cmdchar(optarg);
1416 			break;
1417 		case 'l':
1418 			login = optarg;
1419 			lflag = 1;
1420 			break;
1421 		default:
1422 			usage();
1423 		}
1424 	}
1425 
1426 	if (console != 0 && lflag != 0) {
1427 		zerror(gettext("-l may not be specified for console login"));
1428 		usage();
1429 	}
1430 
1431 	if (console != 0 && failsafe != 0) {
1432 		zerror(gettext("-S may not be specified for console login"));
1433 		usage();
1434 	}
1435 
1436 	if (console != 0 && zonecfg_in_alt_root()) {
1437 		zerror(gettext("-R may not be specified for console login"));
1438 		exit(2);
1439 	}
1440 
1441 	if (failsafe != 0 && lflag != 0) {
1442 		zerror(gettext("-l may not be specified for failsafe login"));
1443 		usage();
1444 	}
1445 
1446 	if (optind == (argc - 1)) {
1447 		/*
1448 		 * zone name, no process name; this should be an interactive
1449 		 * as long as STDIN is really a tty.
1450 		 */
1451 		if (isatty(STDIN_FILENO))
1452 			interactive = 1;
1453 		zonename = argv[optind];
1454 	} else if (optind < (argc - 1)) {
1455 		if (console) {
1456 			zerror(gettext("Commands may not be specified for "
1457 			    "console login."));
1458 			usage();
1459 		}
1460 		/* zone name and process name, and possibly some args */
1461 		zonename = argv[optind];
1462 		proc_args = &argv[optind + 1];
1463 		interactive = 0;
1464 	} else {
1465 		usage();
1466 	}
1467 
1468 	if (getzoneid() != GLOBAL_ZONEID) {
1469 		zerror(gettext("'%s' may only be used from the global zone"),
1470 		    pname);
1471 		return (1);
1472 	}
1473 
1474 	if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1475 		zerror(gettext("'%s' not applicable to the global zone"),
1476 		    pname);
1477 		return (1);
1478 	}
1479 
1480 	if (zone_get_state(zonename, &st) != Z_OK) {
1481 		zerror(gettext("zone '%s' unknown"), zonename);
1482 		return (1);
1483 	}
1484 
1485 	if (st < ZONE_STATE_INSTALLED) {
1486 		zerror(gettext("cannot login to a zone which is '%s'"),
1487 		    zone_state_str(st));
1488 		return (1);
1489 	}
1490 
1491 	/*
1492 	 * In both console and non-console cases, we require all privs.
1493 	 * In the console case, because we may need to startup zoneadmd.
1494 	 * In the non-console case in order to do zone_enter(2), zonept()
1495 	 * and other tasks.
1496 	 *
1497 	 * Future work: this solution is temporary.  Ultimately, we need to
1498 	 * move to a flexible system which allows the global admin to
1499 	 * designate that a particular user can zlogin (and probably zlogin
1500 	 * -C) to a particular zone.  This all-root business we have now is
1501 	 * quite sketchy.
1502 	 */
1503 	if ((privset = priv_allocset()) == NULL) {
1504 		zperror(gettext("priv_allocset failed"));
1505 		return (1);
1506 	}
1507 
1508 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1509 		zperror(gettext("getppriv failed"));
1510 		priv_freeset(privset);
1511 		return (1);
1512 	}
1513 
1514 	if (priv_isfullset(privset) == B_FALSE) {
1515 		zerror(gettext("You lack sufficient privilege to run "
1516 		    "this command (all privs required)"));
1517 		priv_freeset(privset);
1518 		return (1);
1519 	}
1520 	priv_freeset(privset);
1521 
1522 	/*
1523 	 * The console is a separate case from the rest of the code; handle
1524 	 * it first.
1525 	 */
1526 	if (console) {
1527 
1528 		/*
1529 		 * Ensure that zoneadmd for this zone is running.
1530 		 */
1531 		if (start_zoneadmd(zonename) == -1)
1532 			return (1);
1533 
1534 		/*
1535 		 * Make contact with zoneadmd.
1536 		 */
1537 		if (get_console_master(zonename) == -1)
1538 			return (1);
1539 
1540 		(void) printf(gettext("[Connected to zone '%s' console]\n"),
1541 		    zonename);
1542 
1543 		if (set_tty_rawmode(STDIN_FILENO) == -1) {
1544 			reset_tty();
1545 			zperror(gettext("failed to set stdin pty to raw mode"));
1546 			return (1);
1547 		}
1548 
1549 		(void) sigset(SIGWINCH, sigwinch);
1550 		(void) sigwinch(0);
1551 
1552 		/*
1553 		 * Run the I/O loop until we get disconnected.
1554 		 */
1555 		doio(masterfd, masterfd, -1, B_FALSE);
1556 		reset_tty();
1557 		(void) printf(gettext("\n[Connection to zone '%s' console "
1558 		    "closed]\n"), zonename);
1559 
1560 		return (0);
1561 	}
1562 
1563 	if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1564 		zerror(gettext("login allowed only to running zones "
1565 		    "(%s is '%s')."), zonename, zone_state_str(st));
1566 		return (1);
1567 	}
1568 
1569 	(void) strlcpy(kernzone, zonename, sizeof (kernzone));
1570 	if (zonecfg_in_alt_root()) {
1571 		FILE *fp = zonecfg_open_scratch("", B_FALSE);
1572 
1573 		if (fp == NULL || zonecfg_find_scratch(fp, zonename,
1574 		    zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
1575 			zerror(gettext("cannot find scratch zone %s"),
1576 			    zonename);
1577 			if (fp != NULL)
1578 				zonecfg_close_scratch(fp);
1579 			return (1);
1580 		}
1581 		zonecfg_close_scratch(fp);
1582 	}
1583 
1584 	if ((zoneid = getzoneidbyname(kernzone)) == -1) {
1585 		zerror(gettext("failed to get zoneid for zone '%s'"),
1586 		    zonename);
1587 		return (1);
1588 	}
1589 
1590 	/*
1591 	 * We need the zone root path only if we are setting up a pty.
1592 	 */
1593 	if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
1594 		zerror(gettext("could not get dev path for zone %s"),
1595 		    zonename);
1596 		return (1);
1597 	}
1598 
1599 	/* Get a handle to the brand info for this zone */
1600 	if ((zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) ||
1601 	    ((bh = brand_open(zonebrand)) == NULL)) {
1602 		zerror(gettext("could not get brand for zone %s"), zonename);
1603 		return (1);
1604 	}
1605 	if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
1606 		zperror(gettext("could not assemble new arguments"));
1607 		brand_close(bh);
1608 		return (1);
1609 	}
1610 	brand_close(bh);
1611 
1612 	if ((new_env = prep_env()) == NULL) {
1613 		zperror(gettext("could not assemble new environment"));
1614 		return (1);
1615 	}
1616 
1617 	if (!interactive)
1618 		return (noninteractive_login(zonename, zoneid, login, new_args,
1619 		    new_env));
1620 
1621 	if (zonecfg_in_alt_root()) {
1622 		zerror(gettext("cannot use interactive login with scratch "
1623 		    "zone"));
1624 		return (1);
1625 	}
1626 
1627 	/*
1628 	 * Things are more complex in interactive mode; we get the
1629 	 * master side of the pty, then place the user's terminal into
1630 	 * raw mode.
1631 	 */
1632 	if (get_master_pty() == -1) {
1633 		zerror(gettext("could not setup master pty device"));
1634 		return (1);
1635 	}
1636 
1637 	/*
1638 	 * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
1639 	 */
1640 	if ((slavename = ptsname(masterfd)) == NULL) {
1641 		zperror(gettext("failed to get name for pseudo-tty"));
1642 		return (1);
1643 	}
1644 	if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
1645 		(void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
1646 		    sizeof (slaveshortname));
1647 	else
1648 		(void) strlcpy(slaveshortname, slavename,
1649 		    sizeof (slaveshortname));
1650 
1651 	(void) printf(gettext("[Connected to zone '%s' %s]\n"), zonename,
1652 	    slaveshortname);
1653 
1654 	if (set_tty_rawmode(STDIN_FILENO) == -1) {
1655 		reset_tty();
1656 		zperror(gettext("failed to set stdin pty to raw mode"));
1657 		return (1);
1658 	}
1659 
1660 	if (prefork_dropprivs() != 0) {
1661 		reset_tty();
1662 		zperror(gettext("could not allocate privilege set"));
1663 		return (1);
1664 	}
1665 
1666 	/*
1667 	 * We must mask SIGCLD until after we have coped with the fork
1668 	 * sufficiently to deal with it; otherwise we can race and receive the
1669 	 * signal before child_pid has been initialized (yes, this really
1670 	 * happens).
1671 	 */
1672 	(void) sigset(SIGCLD, sigcld);
1673 	(void) sigemptyset(&block_cld);
1674 	(void) sigaddset(&block_cld, SIGCLD);
1675 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1676 
1677 	/*
1678 	 * We activate the contract template at the last minute to
1679 	 * avoid intermediate functions that could be using fork(2)
1680 	 * internally.
1681 	 */
1682 	if ((tmpl_fd = init_template()) == -1) {
1683 		reset_tty();
1684 		zperror(gettext("could not create contract"));
1685 		return (1);
1686 	}
1687 
1688 	if ((child_pid = fork()) == -1) {
1689 		(void) ct_tmpl_clear(tmpl_fd);
1690 		reset_tty();
1691 		zperror(gettext("could not fork"));
1692 		return (1);
1693 	} else if (child_pid == 0) { /* child process */
1694 		int slavefd, newslave;
1695 
1696 		(void) ct_tmpl_clear(tmpl_fd);
1697 		(void) close(tmpl_fd);
1698 
1699 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1700 
1701 		if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
1702 			return (1);
1703 
1704 		/*
1705 		 * Close all fds except for the slave pty.
1706 		 */
1707 		(void) fdwalk(close_func, &slavefd);
1708 
1709 		/*
1710 		 * Temporarily dup slavefd to stderr; that way if we have
1711 		 * to print out that zone_enter failed, the output will
1712 		 * have somewhere to go.
1713 		 */
1714 		if (slavefd != STDERR_FILENO)
1715 			(void) dup2(slavefd, STDERR_FILENO);
1716 
1717 		if (zone_enter(zoneid) == -1) {
1718 			zerror(gettext("could not enter zone %s: %s"),
1719 			    zonename, strerror(errno));
1720 			return (1);
1721 		}
1722 
1723 		if (slavefd != STDERR_FILENO)
1724 			(void) close(STDERR_FILENO);
1725 
1726 		/*
1727 		 * We take pains to get this process into a new process
1728 		 * group, and subsequently a new session.  In this way,
1729 		 * we'll have a session which doesn't yet have a controlling
1730 		 * terminal.  When we open the slave, it will become the
1731 		 * controlling terminal; no PIDs concerning pgrps or sids
1732 		 * will leak inappropriately into the zone.
1733 		 */
1734 		(void) setpgrp();
1735 
1736 		/*
1737 		 * We need the slave pty to be referenced from the zone's
1738 		 * /dev in order to ensure that the devt's, etc are all
1739 		 * correct.  Otherwise we break ttyname and the like.
1740 		 */
1741 		if ((newslave = open(slavename, O_RDWR)) == -1) {
1742 			(void) close(slavefd);
1743 			return (1);
1744 		}
1745 		(void) close(slavefd);
1746 		slavefd = newslave;
1747 
1748 		/*
1749 		 * dup the slave to the various FDs, so that when the
1750 		 * spawned process does a write/read it maps to the slave
1751 		 * pty.
1752 		 */
1753 		(void) dup2(slavefd, STDIN_FILENO);
1754 		(void) dup2(slavefd, STDOUT_FILENO);
1755 		(void) dup2(slavefd, STDERR_FILENO);
1756 		if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
1757 		    slavefd != STDERR_FILENO) {
1758 			(void) close(slavefd);
1759 		}
1760 
1761 		/*
1762 		 * In failsafe mode, we don't use login(1), so don't try
1763 		 * setting up a utmpx entry.
1764 		 *
1765 		 * A branded zone may have very different utmpx semantics.
1766 		 * At the moment, we only have two brand types:
1767 		 * Solaris-like (native, sn1) and Linux.  In the Solaris
1768 		 * case, we know exactly how to do the necessary utmpx
1769 		 * setup.  Fortunately for us, the Linux /bin/login is
1770 		 * prepared to deal with a non-initialized utmpx entry, so
1771 		 * we can simply skip it.  If future brands don't fall into
1772 		 * either category, we'll have to add a per-brand utmpx
1773 		 * setup hook.
1774 		 */
1775 		if (!failsafe && (strcmp(zonebrand, "lx") != 0))
1776 			if (setup_utmpx(slaveshortname) == -1)
1777 				return (1);
1778 
1779 		(void) execve(new_args[0], new_args, new_env);
1780 		zperror(gettext("exec failure"));
1781 		return (1);
1782 	}
1783 	(void) ct_tmpl_clear(tmpl_fd);
1784 	(void) close(tmpl_fd);
1785 
1786 	/*
1787 	 * The rest is only for the parent process.
1788 	 */
1789 	(void) sigset(SIGWINCH, sigwinch);
1790 
1791 	postfork_dropprivs();
1792 
1793 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1794 	doio(masterfd, masterfd, -1, B_FALSE);
1795 
1796 	reset_tty();
1797 	(void) fprintf(stderr,
1798 	    gettext("\n[Connection to zone '%s' %s closed]\n"), zonename,
1799 	    slaveshortname);
1800 
1801 	if (pollerr != 0) {
1802 		(void) fprintf(stderr, gettext("Error: connection closed due "
1803 		    "to unexpected pollevents=0x%x.\n"), pollerr);
1804 		return (1);
1805 	}
1806 
1807 	return (0);
1808 }
1809