xref: /illumos-gate/usr/src/cmd/zlogin/zlogin.c (revision ad69a33458cf73ee14857d57799cf686946e0b88)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 DEY Storage Systems, Inc.
24  * Copyright (c) 2014 Gary Mills
25  * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
26  */
27 
28 /*
29  * zlogin provides three types of login which allow users in the global
30  * zone to access non-global zones.
31  *
32  * - "interactive login" is similar to rlogin(1); for example, the user could
33  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
34  *   granted a new pty (which is then shoved into the zone), and an I/O
35  *   loop between parent and child processes takes care of the interactive
36  *   session.  In this mode, login(1) (and its -c option, which means
37  *   "already authenticated") is employed to take care of the initialization
38  *   of the user's session.
39  *
40  * - "non-interactive login" is similar to su(1M); the user could issue
41  *   'zlogin my-zone ls -l' and the command would be run as specified.
42  *   In this mode, zlogin sets up pipes as the communication channel, and
43  *   'su' is used to do the login setup work.
44  *
45  * - "console login" is the equivalent to accessing the tip line for a
46  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
47  *   In this mode, zlogin contacts the zoneadmd process via unix domain
48  *   socket.  If zoneadmd is not running, it starts it.  This allows the
49  *   console to be available anytime the zone is installed, regardless of
50  *   whether it is running.
51  */
52 
53 #include <sys/socket.h>
54 #include <sys/termios.h>
55 #include <sys/utsname.h>
56 #include <sys/stat.h>
57 #include <sys/types.h>
58 #include <sys/contract/process.h>
59 #include <sys/ctfs.h>
60 #include <sys/brand.h>
61 #include <sys/wait.h>
62 #include <alloca.h>
63 #include <assert.h>
64 #include <ctype.h>
65 #include <paths.h>
66 #include <door.h>
67 #include <errno.h>
68 #include <nss_dbdefs.h>
69 #include <poll.h>
70 #include <priv.h>
71 #include <pwd.h>
72 #include <unistd.h>
73 #include <utmpx.h>
74 #include <sac.h>
75 #include <signal.h>
76 #include <stdarg.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <strings.h>
81 #include <stropts.h>
82 #include <wait.h>
83 #include <zone.h>
84 #include <fcntl.h>
85 #include <libdevinfo.h>
86 #include <libintl.h>
87 #include <locale.h>
88 #include <libzonecfg.h>
89 #include <libcontract.h>
90 #include <libbrand.h>
91 #include <auth_list.h>
92 #include <auth_attr.h>
93 #include <secdb.h>
94 
95 static int masterfd;
96 static struct termios save_termios;
97 static struct termios effective_termios;
98 static int save_fd;
99 static struct winsize winsize;
100 static volatile int dead;
101 static volatile pid_t child_pid = -1;
102 static int interactive = 0;
103 static priv_set_t *dropprivs;
104 
105 static int nocmdchar = 0;
106 static int failsafe = 0;
107 static char cmdchar = '~';
108 static int quiet = 0;
109 
110 static int pollerr = 0;
111 
112 static const char *pname;
113 static char *username;
114 
115 /*
116  * When forced_login is true, the user is not prompted
117  * for an authentication password in the target zone.
118  */
119 static boolean_t forced_login = B_FALSE;
120 
121 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
122 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
123 #endif
124 
125 #define	SUPATH	"/usr/bin/su"
126 #define	FAILSAFESHELL	"/sbin/sh"
127 #define	DEFAULTSHELL	"/sbin/sh"
128 #define	DEF_PATH	"/usr/sbin:/usr/bin"
129 
130 #define	CLUSTER_BRAND_NAME	"cluster"
131 
132 /*
133  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
134  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
135  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
136  * also chosen in conjunction with the HI_WATER setting to make sure we
137  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
138  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
139  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
140  * is less than HI_WATER data already in the pipe.
141  */
142 #define	ZLOGIN_BUFSIZ	8192
143 #define	ZLOGIN_RDBUFSIZ	1024
144 #define	HI_WATER	8192
145 
146 /*
147  * See canonify() below.  CANONIFY_LEN is the maximum length that a
148  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
149  */
150 #define	CANONIFY_LEN 5
151 
152 static void
153 usage(void)
154 {
155 	(void) fprintf(stderr, gettext("usage: %s [ -nQCES ] [ -e cmdchar ] "
156 	    "[-l user] zonename [command [args ...] ]\n"), pname);
157 	exit(2);
158 }
159 
160 static const char *
161 getpname(const char *arg0)
162 {
163 	const char *p = strrchr(arg0, '/');
164 
165 	if (p == NULL)
166 		p = arg0;
167 	else
168 		p++;
169 
170 	pname = p;
171 	return (p);
172 }
173 
174 static void
175 zerror(const char *fmt, ...)
176 {
177 	va_list alist;
178 
179 	(void) fprintf(stderr, "%s: ", pname);
180 	va_start(alist, fmt);
181 	(void) vfprintf(stderr, fmt, alist);
182 	va_end(alist);
183 	(void) fprintf(stderr, "\n");
184 }
185 
186 static void
187 zperror(const char *str)
188 {
189 	const char *estr;
190 
191 	if ((estr = strerror(errno)) != NULL)
192 		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
193 	else
194 		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
195 }
196 
197 /*
198  * The first part of our privilege dropping scheme needs to be called before
199  * fork(), since we must have it for security; we don't want to be surprised
200  * later that we couldn't allocate the privset.
201  */
202 static int
203 prefork_dropprivs()
204 {
205 	if ((dropprivs = priv_allocset()) == NULL)
206 		return (1);
207 
208 	priv_basicset(dropprivs);
209 	(void) priv_delset(dropprivs, PRIV_PROC_INFO);
210 	(void) priv_delset(dropprivs, PRIV_PROC_FORK);
211 	(void) priv_delset(dropprivs, PRIV_PROC_EXEC);
212 	(void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
213 
214 	/*
215 	 * We need to keep the basic privilege PROC_SESSION and all unknown
216 	 * basic privileges as well as the privileges PROC_ZONE and
217 	 * PROC_OWNER in order to query session information and
218 	 * send signals.
219 	 */
220 	if (interactive == 0) {
221 		(void) priv_addset(dropprivs, PRIV_PROC_ZONE);
222 		(void) priv_addset(dropprivs, PRIV_PROC_OWNER);
223 	} else {
224 		(void) priv_delset(dropprivs, PRIV_PROC_SESSION);
225 	}
226 
227 	return (0);
228 }
229 
230 /*
231  * The second part of the privilege drop.  We are paranoid about being attacked
232  * by the zone, so we drop all privileges.  This should prevent a compromise
233  * which gets us to fork(), exec(), symlink(), etc.
234  */
235 static void
236 postfork_dropprivs()
237 {
238 	if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
239 		zperror(gettext("Warning: could not set permitted privileges"));
240 	}
241 	if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
242 		zperror(gettext("Warning: could not set limit privileges"));
243 	}
244 	if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
245 		zperror(gettext("Warning: could not set inheritable "
246 		    "privileges"));
247 	}
248 }
249 
250 /*
251  * Create the unix domain socket and call the zoneadmd server; handshake
252  * with it to determine whether it will allow us to connect.
253  */
254 static int
255 get_console_master(const char *zname)
256 {
257 	int sockfd = -1;
258 	struct sockaddr_un servaddr;
259 	char clientid[MAXPATHLEN];
260 	char handshake[MAXPATHLEN], c;
261 	int msglen;
262 	int i = 0, err = 0;
263 
264 	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
265 		zperror(gettext("could not create socket"));
266 		return (-1);
267 	}
268 
269 	bzero(&servaddr, sizeof (servaddr));
270 	servaddr.sun_family = AF_UNIX;
271 	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
272 	    "%s/%s.console_sock", ZONES_TMPDIR, zname);
273 
274 	if (connect(sockfd, (struct sockaddr *)&servaddr,
275 	    sizeof (servaddr)) == -1) {
276 		zperror(gettext("Could not connect to zone console"));
277 		goto bad;
278 	}
279 	masterfd = sockfd;
280 
281 	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s\n",
282 	    getpid(), setlocale(LC_MESSAGES, NULL));
283 
284 	if (msglen >= sizeof (clientid) || msglen < 0) {
285 		zerror("protocol error");
286 		goto bad;
287 	}
288 
289 	if (write(masterfd, clientid, msglen) != msglen) {
290 		zerror("protocol error");
291 		goto bad;
292 	}
293 
294 	bzero(handshake, sizeof (handshake));
295 
296 	/*
297 	 * Take care not to accumulate more than our fill, and leave room for
298 	 * the NUL at the end.
299 	 */
300 	while ((err = read(masterfd, &c, 1)) == 1) {
301 		if (i >= (sizeof (handshake) - 1))
302 			break;
303 		if (c == '\n')
304 			break;
305 		handshake[i] = c;
306 		i++;
307 	}
308 
309 	/*
310 	 * If something went wrong during the handshake we bail; perhaps
311 	 * the server died off.
312 	 */
313 	if (err == -1) {
314 		zperror(gettext("Could not connect to zone console"));
315 		goto bad;
316 	}
317 
318 	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
319 		return (0);
320 
321 	zerror(gettext("Console is already in use by process ID %s."),
322 	    handshake);
323 bad:
324 	(void) close(sockfd);
325 	masterfd = -1;
326 	return (-1);
327 }
328 
329 
330 /*
331  * Routines to handle pty creation upon zone entry and to shuttle I/O back
332  * and forth between the two terminals.  We also compute and store the
333  * name of the slave terminal associated with the master side.
334  */
335 static int
336 get_master_pty()
337 {
338 	if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
339 		zperror(gettext("failed to obtain a pseudo-tty"));
340 		return (-1);
341 	}
342 	if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
343 		zperror(gettext("failed to get terminal settings from stdin"));
344 		return (-1);
345 	}
346 	(void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
347 
348 	return (0);
349 }
350 
351 /*
352  * This is a bit tricky; normally a pts device will belong to the zone it
353  * is granted to.  But in the case of "entering" a zone, we need to establish
354  * the pty before entering the zone so that we can vector I/O to and from it
355  * from the global zone.
356  *
357  * We use the zonept() call to let the ptm driver know what we are up to;
358  * the only other hairy bit is the setting of zoneslavename (which happens
359  * above, in get_master_pty()).
360  */
361 static int
362 init_slave_pty(zoneid_t zoneid, char *devroot)
363 {
364 	int slavefd = -1;
365 	char *slavename, zoneslavename[MAXPATHLEN];
366 
367 	/*
368 	 * Set slave permissions, zone the pts, then unlock it.
369 	 */
370 	if (grantpt(masterfd) != 0) {
371 		zperror(gettext("grantpt failed"));
372 		return (-1);
373 	}
374 
375 	if (unlockpt(masterfd) != 0) {
376 		zperror(gettext("unlockpt failed"));
377 		return (-1);
378 	}
379 
380 	/*
381 	 * We must open the slave side before zoning this pty; otherwise
382 	 * the kernel would refuse us the open-- zoning a pty makes it
383 	 * inaccessible to the global zone.  Note we are trying to open
384 	 * the device node via the $ZONEROOT/dev path for this pty.
385 	 *
386 	 * Later we'll close the slave out when once we've opened it again
387 	 * from within the target zone.  Blarg.
388 	 */
389 	if ((slavename = ptsname(masterfd)) == NULL) {
390 		zperror(gettext("failed to get name for pseudo-tty"));
391 		return (-1);
392 	}
393 
394 	(void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
395 	    devroot, slavename);
396 
397 	if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
398 		zerror(gettext("failed to open %s: %s"), zoneslavename,
399 		    strerror(errno));
400 		return (-1);
401 	}
402 
403 	/*
404 	 * Push hardware emulation (ptem), line discipline (ldterm),
405 	 * and V7/4BSD/Xenix compatibility (ttcompat) modules.
406 	 */
407 	if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
408 		zperror(gettext("failed to push ptem module"));
409 		if (!failsafe)
410 			goto bad;
411 	}
412 
413 	/*
414 	 * Anchor the stream to prevent malicious I_POPs; we prefer to do
415 	 * this prior to entering the zone so that we can detect any errors
416 	 * early, and so that we can set the anchor from the global zone.
417 	 */
418 	if (ioctl(slavefd, I_ANCHOR) == -1) {
419 		zperror(gettext("failed to set stream anchor"));
420 		if (!failsafe)
421 			goto bad;
422 	}
423 
424 	if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
425 		zperror(gettext("failed to push ldterm module"));
426 		if (!failsafe)
427 			goto bad;
428 	}
429 	if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
430 		zperror(gettext("failed to push ttcompat module"));
431 		if (!failsafe)
432 			goto bad;
433 	}
434 
435 	/*
436 	 * Propagate terminal settings from the external term to the new one.
437 	 */
438 	if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
439 		zperror(gettext("failed to set terminal settings"));
440 		if (!failsafe)
441 			goto bad;
442 	}
443 	(void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
444 
445 	if (zonept(masterfd, zoneid) != 0) {
446 		zperror(gettext("could not set zoneid of pty"));
447 		goto bad;
448 	}
449 
450 	return (slavefd);
451 
452 bad:
453 	(void) close(slavefd);
454 	return (-1);
455 }
456 
457 /*
458  * Place terminal into raw mode.
459  */
460 static int
461 set_tty_rawmode(int fd)
462 {
463 	struct termios term;
464 	if (tcgetattr(fd, &term) < 0) {
465 		zperror(gettext("failed to get user terminal settings"));
466 		return (-1);
467 	}
468 
469 	/* Stash for later, so we can revert back to previous mode */
470 	save_termios = term;
471 	save_fd = fd;
472 
473 	/* disable 8->7 bit strip, start/stop, enable any char to restart */
474 	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
475 	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
476 	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
477 	/* disable output post-processing */
478 	term.c_oflag &= ~OPOST;
479 	/* disable canonical mode, signal chars, echo & extended functions */
480 	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
481 
482 	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
483 	term.c_cc[VTIME] = 0;
484 
485 	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
486 		zperror(gettext("failed to set user terminal to raw mode"));
487 		return (-1);
488 	}
489 
490 	/*
491 	 * We need to know the value of VEOF so that we can properly process for
492 	 * client-side ~<EOF>.  But we have obliterated VEOF in term,
493 	 * because VMIN overloads the same array slot in non-canonical mode.
494 	 * Stupid @&^%!
495 	 *
496 	 * So here we construct the "effective" termios from the current
497 	 * terminal settings, and the corrected VEOF and VEOL settings.
498 	 */
499 	if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
500 		zperror(gettext("failed to get user terminal settings"));
501 		return (-1);
502 	}
503 	effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
504 	effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
505 
506 	return (0);
507 }
508 
509 /*
510  * Copy terminal window size from our terminal to the pts.
511  */
512 /*ARGSUSED*/
513 static void
514 sigwinch(int s)
515 {
516 	struct winsize ws;
517 
518 	if (ioctl(0, TIOCGWINSZ, &ws) == 0)
519 		(void) ioctl(masterfd, TIOCSWINSZ, &ws);
520 }
521 
522 static volatile int close_on_sig = -1;
523 
524 static void
525 /*ARGSUSED*/
526 sigcld(int s)
527 {
528 	int status;
529 	pid_t pid;
530 
531 	/*
532 	 * Peek at the exit status.  If this isn't the process we cared
533 	 * about, then just reap it.
534 	 */
535 	if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
536 		if (pid == child_pid &&
537 		    (WIFEXITED(status) || WIFSIGNALED(status))) {
538 			dead = 1;
539 			if (close_on_sig != -1) {
540 				(void) write(close_on_sig, "a", 1);
541 				(void) close(close_on_sig);
542 				close_on_sig = -1;
543 			}
544 		} else {
545 			(void) waitpid(pid, &status, WNOHANG);
546 		}
547 	}
548 }
549 
550 /*
551  * Some signals (currently, SIGINT) must be forwarded on to the process
552  * group of the child process.
553  */
554 static void
555 sig_forward(int s)
556 {
557 	if (child_pid != -1) {
558 		(void) sigsend(P_PGID, child_pid, s);
559 	}
560 }
561 
562 /*
563  * reset terminal settings for global environment
564  */
565 static void
566 reset_tty()
567 {
568 	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
569 }
570 
571 /*
572  * Convert character to printable representation, for display with locally
573  * echoed command characters (like when we need to display ~^D)
574  */
575 static void
576 canonify(char c, char *cc)
577 {
578 	if (isprint(c)) {
579 		cc[0] = c;
580 		cc[1] = '\0';
581 	} else if (c >= 0 && c <= 31) {	/* ^@ through ^_ */
582 		cc[0] = '^';
583 		cc[1] = c + '@';
584 		cc[2] = '\0';
585 	} else {
586 		cc[0] = '\\';
587 		cc[1] = ((c >> 6) & 7) + '0';
588 		cc[2] = ((c >> 3) & 7) + '0';
589 		cc[3] = (c & 7) + '0';
590 		cc[4] = '\0';
591 	}
592 }
593 
594 /*
595  * process_user_input watches the input stream for the escape sequence for
596  * 'quit' (by default, tilde-period).  Because we might be fed just one
597  * keystroke at a time, state associated with the user input (are we at the
598  * beginning of the line?  are we locally echoing the next character?) is
599  * maintained by beginning_of_line and local_echo across calls to the routine.
600  * If the write to outfd fails, we'll try to read from infd in an attempt
601  * to prevent deadlock between the two processes.
602  *
603  * This routine returns -1 when the 'quit' escape sequence has been issued,
604  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
605  */
606 static int
607 process_user_input(int outfd, int infd)
608 {
609 	static boolean_t beginning_of_line = B_TRUE;
610 	static boolean_t local_echo = B_FALSE;
611 	char ibuf[ZLOGIN_BUFSIZ];
612 	int nbytes;
613 	char *buf = ibuf;
614 	char c = *buf;
615 
616 	nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
617 	if (nbytes == -1 && (errno != EINTR || dead))
618 		return (-1);
619 
620 	if (nbytes == -1)	/* The read was interrupted. */
621 		return (0);
622 
623 	/* 0 read means EOF, close the pipe to the child */
624 	if (nbytes == 0)
625 		return (1);
626 
627 	for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
628 		buf++;
629 		if (beginning_of_line && !nocmdchar) {
630 			beginning_of_line = B_FALSE;
631 			if (c == cmdchar) {
632 				local_echo = B_TRUE;
633 				continue;
634 			}
635 		} else if (local_echo) {
636 			local_echo = B_FALSE;
637 			if (c == '.' || c == effective_termios.c_cc[VEOF]) {
638 				char cc[CANONIFY_LEN];
639 
640 				canonify(c, cc);
641 				(void) write(STDOUT_FILENO, &cmdchar, 1);
642 				(void) write(STDOUT_FILENO, cc, strlen(cc));
643 				return (-1);
644 			}
645 		}
646 retry:
647 		if (write(outfd, &c, 1) <= 0) {
648 			/*
649 			 * Since the fd we are writing to is opened with
650 			 * O_NONBLOCK it is possible to get EAGAIN if the
651 			 * pipe is full.  One way this could happen is if we
652 			 * are writing a lot of data into the pipe in this loop
653 			 * and the application on the other end is echoing that
654 			 * data back out to its stdout.  The output pipe can
655 			 * fill up since we are stuck here in this loop and not
656 			 * draining the other pipe.  We can try to read some of
657 			 * the data to see if we can drain the pipe so that the
658 			 * application can continue to make progress.  The read
659 			 * is non-blocking so we won't hang here.  We also wait
660 			 * a bit before retrying since there could be other
661 			 * reasons why the pipe is full and we don't want to
662 			 * continuously retry.
663 			 */
664 			if (errno == EAGAIN) {
665 				struct timespec rqtp;
666 				int ln;
667 				char obuf[ZLOGIN_BUFSIZ];
668 
669 				if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
670 					(void) write(STDOUT_FILENO, obuf, ln);
671 
672 				/* sleep for 10 milliseconds */
673 				rqtp.tv_sec = 0;
674 				rqtp.tv_nsec = MSEC2NSEC(10);
675 				(void) nanosleep(&rqtp, NULL);
676 				if (!dead)
677 					goto retry;
678 			}
679 
680 			return (-1);
681 		}
682 		beginning_of_line = (c == '\r' || c == '\n' ||
683 		    c == effective_termios.c_cc[VKILL] ||
684 		    c == effective_termios.c_cc[VEOL] ||
685 		    c == effective_termios.c_cc[VSUSP] ||
686 		    c == effective_termios.c_cc[VINTR]);
687 	}
688 	return (0);
689 }
690 
691 /*
692  * This function prevents deadlock between zlogin and the application in the
693  * zone that it is talking to.  This can happen when we read from zlogin's
694  * stdin and write the data down the pipe to the application.  If the pipe
695  * is full, we'll block in the write.  Because zlogin could be blocked in
696  * the write, it would never read the application's stdout/stderr so the
697  * application can then block on those writes (when the pipe fills up).  If the
698  * the application gets blocked this way, it can never get around to reading
699  * its stdin so that zlogin can unblock from its write.  Once in this state,
700  * the two processes are deadlocked.
701  *
702  * To prevent this, we want to verify that we can write into the pipe before we
703  * read from our stdin.  If the pipe already is pretty full, we bypass the read
704  * for now.  We'll circle back here again after the poll() so that we can
705  * try again.  When this function is called, we already know there is data
706  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
707  * stdin is EOF, and 0 if everything is ok (even though we might not have
708  * read/written any data into the pipe on this iteration).
709  */
710 static int
711 process_raw_input(int stdin_fd, int appin_fd)
712 {
713 	int cc;
714 	struct stat64 sb;
715 	char ibuf[ZLOGIN_RDBUFSIZ];
716 
717 	/* Check how much data is already in the pipe */
718 	if (fstat64(appin_fd, &sb) == -1) {
719 		perror("stat failed");
720 		return (-1);
721 	}
722 
723 	if (dead)
724 		return (-1);
725 
726 	/*
727 	 * The pipe already has a lot of data in it,  don't write any more
728 	 * right now.
729 	 */
730 	if (sb.st_size >= HI_WATER)
731 		return (0);
732 
733 	cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
734 	if (cc == -1 && (errno != EINTR || dead))
735 		return (-1);
736 
737 	if (cc == -1)	/* The read was interrupted. */
738 		return (0);
739 
740 	/* 0 read means EOF, close the pipe to the child */
741 	if (cc == 0)
742 		return (1);
743 
744 	/*
745 	 * stdin_fd is stdin of the target; so, the thing we'll write the user
746 	 * data *to*.
747 	 */
748 	if (write(stdin_fd, ibuf, cc) == -1)
749 		return (-1);
750 
751 	return (0);
752 }
753 
754 /*
755  * Write the output from the application running in the zone.  We can get
756  * a signal during the write (usually it would be SIGCHLD when the application
757  * has exited) so we loop to make sure we have written all of the data we read.
758  */
759 static int
760 process_output(int in_fd, int out_fd)
761 {
762 	int wrote = 0;
763 	int cc;
764 	char ibuf[ZLOGIN_BUFSIZ];
765 
766 	cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
767 	if (cc == -1 && (errno != EINTR || dead))
768 		return (-1);
769 	if (cc == 0)	/* EOF */
770 		return (-1);
771 	if (cc == -1)	/* The read was interrupted. */
772 		return (0);
773 
774 	do {
775 		int len;
776 
777 		len = write(out_fd, ibuf + wrote, cc - wrote);
778 		if (len == -1 && errno != EINTR)
779 			return (-1);
780 		if (len != -1)
781 			wrote += len;
782 	} while (wrote < cc);
783 
784 	return (0);
785 }
786 
787 /*
788  * This is the main I/O loop, and is shared across all zlogin modes.
789  * Parameters:
790  * 	stdin_fd:  The fd representing 'stdin' for the slave side; input to
791  *		   the zone will be written here.
792  *
793  * 	appin_fd:  The fd representing the other end of the 'stdin' pipe (when
794  *		   we're running non-interactive); used in process_raw_input
795  *		   to ensure we don't fill up the application's stdin pipe.
796  *
797  *	stdout_fd: The fd representing 'stdout' for the slave side; output
798  *		   from the zone will arrive here.
799  *
800  *	stderr_fd: The fd representing 'stderr' for the slave side; output
801  *		   from the zone will arrive here.
802  *
803  *	raw_mode:  If TRUE, then no processing (for example, for '~.') will
804  *		   be performed on the input coming from STDIN.
805  *
806  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
807  * mode supplies a stderr).
808  *
809  */
810 static void
811 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
812     boolean_t raw_mode)
813 {
814 	struct pollfd pollfds[4];
815 	char ibuf[ZLOGIN_BUFSIZ];
816 	int cc, ret;
817 
818 	/* read from stdout of zone and write to stdout of global zone */
819 	pollfds[0].fd = stdout_fd;
820 	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
821 
822 	/* read from stderr of zone and write to stderr of global zone */
823 	pollfds[1].fd = stderr_fd;
824 	pollfds[1].events = pollfds[0].events;
825 
826 	/* read from stdin of global zone and write to stdin of zone */
827 	pollfds[2].fd = STDIN_FILENO;
828 	pollfds[2].events = pollfds[0].events;
829 
830 	/* read from signalling pipe so we know when child dies */
831 	pollfds[3].fd = sig_fd;
832 	pollfds[3].events = pollfds[0].events;
833 
834 	for (;;) {
835 		pollfds[0].revents = pollfds[1].revents =
836 		    pollfds[2].revents = pollfds[3].revents = 0;
837 
838 		if (dead)
839 			break;
840 
841 		/*
842 		 * There is a race condition here where we can receive the
843 		 * child death signal, set the dead flag, but since we have
844 		 * passed the test above, we would go into poll and hang.
845 		 * To avoid this we use the sig_fd as an additional poll fd.
846 		 * The signal handler writes into the other end of this pipe
847 		 * when the child dies so that the poll will always see that
848 		 * input and proceed.  We just loop around at that point and
849 		 * then notice the dead flag.
850 		 */
851 
852 		ret = poll(pollfds,
853 		    sizeof (pollfds) / sizeof (struct pollfd), -1);
854 
855 		if (ret == -1 && errno != EINTR) {
856 			perror("poll failed");
857 			break;
858 		}
859 
860 		if (errno == EINTR && dead) {
861 			break;
862 		}
863 
864 		/* event from master side stdout */
865 		if (pollfds[0].revents) {
866 			if (pollfds[0].revents &
867 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
868 				if (process_output(stdout_fd, STDOUT_FILENO)
869 				    != 0)
870 					break;
871 			} else {
872 				pollerr = pollfds[0].revents;
873 				break;
874 			}
875 		}
876 
877 		/* event from master side stderr */
878 		if (pollfds[1].revents) {
879 			if (pollfds[1].revents &
880 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
881 				if (process_output(stderr_fd, STDERR_FILENO)
882 				    != 0)
883 					break;
884 			} else {
885 				pollerr = pollfds[1].revents;
886 				break;
887 			}
888 		}
889 
890 		/* event from user STDIN side */
891 		if (pollfds[2].revents) {
892 			if (pollfds[2].revents &
893 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
894 				/*
895 				 * stdin fd is stdin of the target; so,
896 				 * the thing we'll write the user data *to*.
897 				 *
898 				 * Also, unlike on the output side, we
899 				 * close the pipe on a zero-length message.
900 				 */
901 				int res;
902 
903 				if (raw_mode)
904 					res = process_raw_input(stdin_fd,
905 					    appin_fd);
906 				else
907 					res = process_user_input(stdin_fd,
908 					    stdout_fd);
909 
910 				if (res < 0)
911 					break;
912 				if (res > 0) {
913 					/* EOF (close) child's stdin_fd */
914 					pollfds[2].fd = -1;
915 					while ((res = close(stdin_fd)) != 0 &&
916 					    errno == EINTR)
917 						;
918 					if (res != 0)
919 						break;
920 				}
921 
922 			} else if (raw_mode && pollfds[2].revents & POLLHUP) {
923 				/*
924 				 * It's OK to get a POLLHUP on STDIN-- it
925 				 * always happens if you do:
926 				 *
927 				 * echo foo | zlogin <zone> <command>
928 				 *
929 				 * We reset fd to -1 in this case to clear
930 				 * the condition and close the pipe (EOF) to
931 				 * the other side in order to wrap things up.
932 				 */
933 				int res;
934 
935 				pollfds[2].fd = -1;
936 				while ((res = close(stdin_fd)) != 0 &&
937 				    errno == EINTR)
938 					;
939 				if (res != 0)
940 					break;
941 			} else {
942 				pollerr = pollfds[2].revents;
943 				break;
944 			}
945 		}
946 	}
947 
948 	/*
949 	 * We are in the midst of dying, but try to poll with a short
950 	 * timeout to see if we can catch the last bit of I/O from the
951 	 * children.
952 	 */
953 retry:
954 	pollfds[0].revents = pollfds[1].revents = 0;
955 	(void) poll(pollfds, 2, 100);
956 	if (pollfds[0].revents &
957 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
958 		if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
959 			(void) write(STDOUT_FILENO, ibuf, cc);
960 			goto retry;
961 		}
962 	}
963 	if (pollfds[1].revents &
964 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
965 		if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
966 			(void) write(STDERR_FILENO, ibuf, cc);
967 			goto retry;
968 		}
969 	}
970 }
971 
972 /*
973  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
974  */
975 static const char *
976 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
977     size_t len)
978 {
979 	bzero(user_cmd, sizeof (user_cmd));
980 	if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
981 		return (NULL);
982 
983 	return (user_cmd);
984 }
985 
986 /* From libc */
987 extern int str2passwd(const char *, int, void *, char *, int);
988 
989 /*
990  * exec() the user_cmd brand hook, and convert the output string to a
991  * struct passwd.  This is to be called after zone_enter().
992  *
993  */
994 static struct passwd *
995 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
996     int pwbuflen)
997 {
998 	char pwline[NSS_BUFLEN_PASSWD];
999 	char *cin = NULL;
1000 	FILE *fin;
1001 	int status;
1002 
1003 	assert(getzoneid() != GLOBAL_ZONEID);
1004 
1005 	if ((fin = popen(user_cmd, "r")) == NULL)
1006 		return (NULL);
1007 
1008 	while (cin == NULL && !feof(fin))
1009 		cin = fgets(pwline, sizeof (pwline), fin);
1010 
1011 	if (cin == NULL) {
1012 		(void) pclose(fin);
1013 		return (NULL);
1014 	}
1015 
1016 	status = pclose(fin);
1017 	if (!WIFEXITED(status))
1018 		return (NULL);
1019 	if (WEXITSTATUS(status) != 0)
1020 		return (NULL);
1021 
1022 	if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1023 		return (pwent);
1024 	else
1025 		return (NULL);
1026 }
1027 
1028 static char **
1029 zone_login_cmd(brand_handle_t bh, const char *login)
1030 {
1031 	static char result_buf[ARG_MAX];
1032 	char **new_argv, *ptr, *lasts;
1033 	int n, a;
1034 
1035 	/* Get the login command for the target zone. */
1036 	bzero(result_buf, sizeof (result_buf));
1037 
1038 	if (forced_login) {
1039 		if (brand_get_forcedlogin_cmd(bh, login,
1040 		    result_buf, sizeof (result_buf)) != 0)
1041 			return (NULL);
1042 	} else {
1043 		if (brand_get_login_cmd(bh, login,
1044 		    result_buf, sizeof (result_buf)) != 0)
1045 			return (NULL);
1046 	}
1047 
1048 	/*
1049 	 * We got back a string that we'd like to execute.  But since
1050 	 * we're not doing the execution via a shell we'll need to convert
1051 	 * the exec string to an array of strings.  We'll do that here
1052 	 * but we're going to be very simplistic about it and break stuff
1053 	 * up based on spaces.  We're not even going to support any kind
1054 	 * of quoting or escape characters.  It's truly amazing that
1055 	 * there is no library function in OpenSolaris to do this for us.
1056 	 */
1057 
1058 	/*
1059 	 * Be paranoid.  Since we're deliniating based on spaces make
1060 	 * sure there are no adjacent spaces.
1061 	 */
1062 	if (strstr(result_buf, "  ") != NULL)
1063 		return (NULL);
1064 
1065 	/* Remove any trailing whitespace.  */
1066 	n = strlen(result_buf);
1067 	if (result_buf[n - 1] == ' ')
1068 		result_buf[n - 1] = '\0';
1069 
1070 	/* Count how many elements there are in the exec string. */
1071 	ptr = result_buf;
1072 	for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1073 		;
1074 
1075 	/* Allocate the argv array that we're going to return. */
1076 	if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1077 		return (NULL);
1078 
1079 	/* Tokenize the exec string and return. */
1080 	a = 0;
1081 	new_argv[a++] = result_buf;
1082 	if (n > 2) {
1083 		(void) strtok_r(result_buf, " ", &lasts);
1084 		while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1085 			;
1086 	} else {
1087 		new_argv[a++] = NULL;
1088 	}
1089 	assert(n == a);
1090 	return (new_argv);
1091 }
1092 
1093 /*
1094  * Prepare argv array for exec'd process; if we're passing commands to the
1095  * new process, then use su(1M) to do the invocation.  Otherwise, use
1096  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1097  * login that we're coming from another zone, and to disregard its CONSOLE
1098  * checks).
1099  */
1100 static char **
1101 prep_args(brand_handle_t bh, const char *login, char **argv)
1102 {
1103 	int argc = 0, a = 0, i, n = -1;
1104 	char **new_argv;
1105 
1106 	if (argv != NULL) {
1107 		size_t subshell_len = 1;
1108 		char *subshell;
1109 
1110 		while (argv[argc] != NULL)
1111 			argc++;
1112 
1113 		for (i = 0; i < argc; i++) {
1114 			subshell_len += strlen(argv[i]) + 1;
1115 		}
1116 		if ((subshell = calloc(1, subshell_len)) == NULL)
1117 			return (NULL);
1118 
1119 		for (i = 0; i < argc; i++) {
1120 			(void) strcat(subshell, argv[i]);
1121 			(void) strcat(subshell, " ");
1122 		}
1123 
1124 		if (failsafe) {
1125 			n = 4;
1126 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1127 				return (NULL);
1128 
1129 			new_argv[a++] = FAILSAFESHELL;
1130 		} else {
1131 			n = 5;
1132 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1133 				return (NULL);
1134 
1135 			new_argv[a++] = SUPATH;
1136 			if (strcmp(login, "root") != 0) {
1137 				new_argv[a++] = "-";
1138 				n++;
1139 			}
1140 			new_argv[a++] = (char *)login;
1141 		}
1142 		new_argv[a++] = "-c";
1143 		new_argv[a++] = subshell;
1144 		new_argv[a++] = NULL;
1145 		assert(a == n);
1146 	} else {
1147 		if (failsafe) {
1148 			n = 2;
1149 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1150 				return (NULL);
1151 			new_argv[a++] = FAILSAFESHELL;
1152 			new_argv[a++] = NULL;
1153 			assert(n == a);
1154 		} else {
1155 			new_argv = zone_login_cmd(bh, login);
1156 		}
1157 	}
1158 
1159 	return (new_argv);
1160 }
1161 
1162 /*
1163  * Helper routine for prep_env below.
1164  */
1165 static char *
1166 add_env(char *name, char *value)
1167 {
1168 	size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1169 	char *str;
1170 
1171 	if ((str = malloc(sz)) == NULL)
1172 		return (NULL);
1173 
1174 	(void) snprintf(str, sz, "%s=%s", name, value);
1175 	return (str);
1176 }
1177 
1178 /*
1179  * Prepare envp array for exec'd process.
1180  */
1181 static char **
1182 prep_env()
1183 {
1184 	int e = 0, size = 1;
1185 	char **new_env, *estr;
1186 	char *term = getenv("TERM");
1187 
1188 	size++;	/* for $PATH */
1189 	if (term != NULL)
1190 		size++;
1191 
1192 	/*
1193 	 * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1194 	 * We also set $SHELL, since neither login nor su will be around to do
1195 	 * it.
1196 	 */
1197 	if (failsafe)
1198 		size += 2;
1199 
1200 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1201 		return (NULL);
1202 
1203 	if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1204 		return (NULL);
1205 	new_env[e++] = estr;
1206 
1207 	if (term != NULL) {
1208 		if ((estr = add_env("TERM", term)) == NULL)
1209 			return (NULL);
1210 		new_env[e++] = estr;
1211 	}
1212 
1213 	if (failsafe) {
1214 		if ((estr = add_env("HOME", "/")) == NULL)
1215 			return (NULL);
1216 		new_env[e++] = estr;
1217 
1218 		if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1219 			return (NULL);
1220 		new_env[e++] = estr;
1221 	}
1222 
1223 	new_env[e++] = NULL;
1224 
1225 	assert(e == size);
1226 
1227 	return (new_env);
1228 }
1229 
1230 /*
1231  * Finish the preparation of the envp array for exec'd non-interactive
1232  * zlogins.  This is called in the child process *after* we zone_enter(), since
1233  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1234  * etc.  We need only do this in the non-interactive, mode, since otherwise
1235  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1236  * additional ways in which the command could fail, and we'd prefer to avoid
1237  * that.
1238  */
1239 static char **
1240 prep_env_noninteractive(const char *user_cmd, char **env)
1241 {
1242 	size_t size;
1243 	char **new_env;
1244 	int e, i;
1245 	char *estr;
1246 	char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1247 	char pwbuf[NSS_BUFLEN_PASSWD + 1];
1248 	struct passwd pwent;
1249 	struct passwd *pw = NULL;
1250 
1251 	assert(env != NULL);
1252 	assert(failsafe == 0);
1253 
1254 	/*
1255 	 * Exec the "user_cmd" brand hook to get a pwent for the
1256 	 * login user.  If this fails, HOME will be set to "/", SHELL
1257 	 * will be set to $DEFAULTSHELL, and we will continue to exec
1258 	 * SUPATH <login> -c <cmd>.
1259 	 */
1260 	pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1261 
1262 	/*
1263 	 * Get existing envp size.
1264 	 */
1265 	for (size = 0; env[size] != NULL; size++)
1266 		;
1267 
1268 	e = size;
1269 
1270 	/*
1271 	 * Finish filling out the environment; we duplicate the environment
1272 	 * setup described in login(1), for lack of a better precedent.
1273 	 */
1274 	if (pw != NULL)
1275 		size += 3;	/* LOGNAME, HOME, MAIL */
1276 	else
1277 		size += 1;	/* HOME */
1278 
1279 	size++;	/* always fill in SHELL */
1280 	size++; /* terminating NULL */
1281 
1282 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1283 		goto malloc_fail;
1284 
1285 	/*
1286 	 * Copy existing elements of env into new_env.
1287 	 */
1288 	for (i = 0; env[i] != NULL; i++) {
1289 		if ((new_env[i] = strdup(env[i])) == NULL)
1290 			goto malloc_fail;
1291 	}
1292 	assert(e == i);
1293 
1294 	if (pw != NULL) {
1295 		if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1296 			goto malloc_fail;
1297 		new_env[e++] = estr;
1298 
1299 		if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1300 			goto malloc_fail;
1301 		new_env[e++] = estr;
1302 
1303 		if (chdir(pw->pw_dir) != 0)
1304 			zerror(gettext("Could not chdir to home directory "
1305 			    "%s: %s"), pw->pw_dir, strerror(errno));
1306 
1307 		(void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1308 		    pw->pw_name);
1309 		if ((estr = add_env("MAIL", varmail)) == NULL)
1310 			goto malloc_fail;
1311 		new_env[e++] = estr;
1312 	} else {
1313 		if ((estr = add_env("HOME", "/")) == NULL)
1314 			goto malloc_fail;
1315 		new_env[e++] = estr;
1316 	}
1317 
1318 	if (pw != NULL && strlen(pw->pw_shell) > 0) {
1319 		if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1320 			goto malloc_fail;
1321 		new_env[e++] = estr;
1322 	} else {
1323 		if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1324 			goto malloc_fail;
1325 		new_env[e++] = estr;
1326 	}
1327 
1328 	new_env[e++] = NULL;	/* add terminating NULL */
1329 
1330 	assert(e == size);
1331 	return (new_env);
1332 
1333 malloc_fail:
1334 	zperror(gettext("failed to allocate memory for process environment"));
1335 	return (NULL);
1336 }
1337 
1338 static int
1339 close_func(void *slavefd, int fd)
1340 {
1341 	if (fd != *(int *)slavefd)
1342 		(void) close(fd);
1343 	return (0);
1344 }
1345 
1346 static void
1347 set_cmdchar(char *cmdcharstr)
1348 {
1349 	char c;
1350 	long lc;
1351 
1352 	if ((c = *cmdcharstr) != '\\') {
1353 		cmdchar = c;
1354 		return;
1355 	}
1356 
1357 	c = cmdcharstr[1];
1358 	if (c == '\0' || c == '\\') {
1359 		cmdchar = '\\';
1360 		return;
1361 	}
1362 
1363 	if (c < '0' || c > '7') {
1364 		zerror(gettext("Unrecognized escape character option %s"),
1365 		    cmdcharstr);
1366 		usage();
1367 	}
1368 
1369 	lc = strtol(cmdcharstr + 1, NULL, 8);
1370 	if (lc < 0 || lc > 255) {
1371 		zerror(gettext("Octal escape character '%s' too large"),
1372 		    cmdcharstr);
1373 		usage();
1374 	}
1375 	cmdchar = (char)lc;
1376 }
1377 
1378 static int
1379 setup_utmpx(char *slavename)
1380 {
1381 	struct utmpx ut;
1382 
1383 	bzero(&ut, sizeof (ut));
1384 	(void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1385 	(void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1386 	ut.ut_pid = getpid();
1387 	ut.ut_id[0] = 'z';
1388 	ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1389 	ut.ut_type = LOGIN_PROCESS;
1390 	(void) time(&ut.ut_tv.tv_sec);
1391 
1392 	if (makeutx(&ut) == NULL) {
1393 		zerror(gettext("makeutx failed"));
1394 		return (-1);
1395 	}
1396 	return (0);
1397 }
1398 
1399 static void
1400 release_lock_file(int lockfd)
1401 {
1402 	(void) close(lockfd);
1403 }
1404 
1405 static int
1406 grab_lock_file(const char *zone_name, int *lockfd)
1407 {
1408 	char pathbuf[PATH_MAX];
1409 	struct flock flock;
1410 
1411 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1412 		zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1413 		    strerror(errno));
1414 		return (-1);
1415 	}
1416 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
1417 	(void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1418 	    ZONES_TMPDIR, zone_name);
1419 
1420 	if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1421 		zerror(gettext("could not open %s: %s"), pathbuf,
1422 		    strerror(errno));
1423 		return (-1);
1424 	}
1425 	/*
1426 	 * Lock the file to synchronize with other zoneadmds
1427 	 */
1428 	flock.l_type = F_WRLCK;
1429 	flock.l_whence = SEEK_SET;
1430 	flock.l_start = (off_t)0;
1431 	flock.l_len = (off_t)0;
1432 	if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1433 		zerror(gettext("unable to lock %s: %s"), pathbuf,
1434 		    strerror(errno));
1435 		release_lock_file(*lockfd);
1436 		return (-1);
1437 	}
1438 	return (Z_OK);
1439 }
1440 
1441 static int
1442 start_zoneadmd(const char *zone_name)
1443 {
1444 	pid_t retval;
1445 	int pstatus = 0, error = -1, lockfd, doorfd;
1446 	struct door_info info;
1447 	char doorpath[MAXPATHLEN];
1448 
1449 	(void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1450 
1451 	if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1452 		return (-1);
1453 	/*
1454 	 * We must do the door check with the lock held.  Otherwise, we
1455 	 * might race against another zoneadm/zlogin process and wind
1456 	 * up with two processes trying to start zoneadmd at the same
1457 	 * time.  zoneadmd will detect this, and fail, but we prefer this
1458 	 * to be as seamless as is practical, from a user perspective.
1459 	 */
1460 	if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1461 		if (errno != ENOENT) {
1462 			zerror("failed to open %s: %s", doorpath,
1463 			    strerror(errno));
1464 			goto out;
1465 		}
1466 	} else {
1467 		/*
1468 		 * Seems to be working ok.
1469 		 */
1470 		if (door_info(doorfd, &info) == 0 &&
1471 		    ((info.di_attributes & DOOR_REVOKED) == 0)) {
1472 			error = 0;
1473 			goto out;
1474 		}
1475 	}
1476 
1477 	if ((child_pid = fork()) == -1) {
1478 		zperror(gettext("could not fork"));
1479 		goto out;
1480 	} else if (child_pid == 0) {
1481 		/* child process */
1482 		(void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1483 		    zone_name, NULL);
1484 		zperror(gettext("could not exec zoneadmd"));
1485 		_exit(1);
1486 	}
1487 
1488 	/* parent process */
1489 	do {
1490 		retval = waitpid(child_pid, &pstatus, 0);
1491 	} while (retval != child_pid);
1492 	if (WIFSIGNALED(pstatus) ||
1493 	    (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1494 		zerror(gettext("could not start %s"), "zoneadmd");
1495 		goto out;
1496 	}
1497 	error = 0;
1498 out:
1499 	release_lock_file(lockfd);
1500 	(void) close(doorfd);
1501 	return (error);
1502 }
1503 
1504 static int
1505 init_template(void)
1506 {
1507 	int fd;
1508 	int err = 0;
1509 
1510 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1511 	if (fd == -1)
1512 		return (-1);
1513 
1514 	/*
1515 	 * zlogin doesn't do anything with the contract.
1516 	 * Deliver no events, don't inherit, and allow it to be orphaned.
1517 	 */
1518 	err |= ct_tmpl_set_critical(fd, 0);
1519 	err |= ct_tmpl_set_informative(fd, 0);
1520 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1521 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1522 	if (err || ct_tmpl_activate(fd)) {
1523 		(void) close(fd);
1524 		return (-1);
1525 	}
1526 
1527 	return (fd);
1528 }
1529 
1530 static int
1531 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1532     char **new_args, char **new_env)
1533 {
1534 	pid_t retval;
1535 	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1536 	int child_status;
1537 	int tmpl_fd;
1538 	sigset_t block_cld;
1539 
1540 	if ((tmpl_fd = init_template()) == -1) {
1541 		reset_tty();
1542 		zperror(gettext("could not create contract"));
1543 		return (1);
1544 	}
1545 
1546 	if (pipe(stdin_pipe) != 0) {
1547 		zperror(gettext("could not create STDIN pipe"));
1548 		return (1);
1549 	}
1550 	/*
1551 	 * When the user types ^D, we get a zero length message on STDIN.
1552 	 * We need to echo that down the pipe to send it to the other side;
1553 	 * but by default, pipes don't propagate zero-length messages.  We
1554 	 * toggle that behavior off using I_SWROPT.  See streamio(7i).
1555 	 */
1556 	if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1557 		zperror(gettext("could not configure STDIN pipe"));
1558 		return (1);
1559 
1560 	}
1561 	if (pipe(stdout_pipe) != 0) {
1562 		zperror(gettext("could not create STDOUT pipe"));
1563 		return (1);
1564 	}
1565 	if (pipe(stderr_pipe) != 0) {
1566 		zperror(gettext("could not create STDERR pipe"));
1567 		return (1);
1568 	}
1569 
1570 	if (pipe(dead_child_pipe) != 0) {
1571 		zperror(gettext("could not create signalling pipe"));
1572 		return (1);
1573 	}
1574 	close_on_sig = dead_child_pipe[0];
1575 
1576 	/*
1577 	 * If any of the pipe FD's winds up being less than STDERR, then we
1578 	 * have a mess on our hands-- and we are lacking some of the I/O
1579 	 * streams we would expect anyway.  So we bail.
1580 	 */
1581 	if (stdin_pipe[0] <= STDERR_FILENO ||
1582 	    stdin_pipe[1] <= STDERR_FILENO ||
1583 	    stdout_pipe[0] <= STDERR_FILENO ||
1584 	    stdout_pipe[1] <= STDERR_FILENO ||
1585 	    stderr_pipe[0] <= STDERR_FILENO ||
1586 	    stderr_pipe[1] <= STDERR_FILENO ||
1587 	    dead_child_pipe[0] <= STDERR_FILENO ||
1588 	    dead_child_pipe[1] <= STDERR_FILENO) {
1589 		zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1590 		return (1);
1591 	}
1592 
1593 	if (prefork_dropprivs() != 0) {
1594 		zperror(gettext("could not allocate privilege set"));
1595 		return (1);
1596 	}
1597 
1598 	(void) sigset(SIGCLD, sigcld);
1599 	(void) sigemptyset(&block_cld);
1600 	(void) sigaddset(&block_cld, SIGCLD);
1601 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1602 
1603 	if ((child_pid = fork()) == -1) {
1604 		(void) ct_tmpl_clear(tmpl_fd);
1605 		(void) close(tmpl_fd);
1606 		zperror(gettext("could not fork"));
1607 		return (1);
1608 	} else if (child_pid == 0) { /* child process */
1609 		(void) ct_tmpl_clear(tmpl_fd);
1610 
1611 		/*
1612 		 * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1613 		 */
1614 		(void) close(STDIN_FILENO);
1615 		(void) close(STDOUT_FILENO);
1616 		(void) close(STDERR_FILENO);
1617 		(void) dup2(stdin_pipe[1], STDIN_FILENO);
1618 		(void) dup2(stdout_pipe[1], STDOUT_FILENO);
1619 		(void) dup2(stderr_pipe[1], STDERR_FILENO);
1620 		(void) closefrom(STDERR_FILENO + 1);
1621 
1622 		(void) sigset(SIGCLD, SIG_DFL);
1623 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1624 		/*
1625 		 * In case any of stdin, stdout or stderr are streams,
1626 		 * anchor them to prevent malicious I_POPs.
1627 		 */
1628 		(void) ioctl(STDIN_FILENO, I_ANCHOR);
1629 		(void) ioctl(STDOUT_FILENO, I_ANCHOR);
1630 		(void) ioctl(STDERR_FILENO, I_ANCHOR);
1631 
1632 		if (zone_enter(zoneid) == -1) {
1633 			zerror(gettext("could not enter zone %s: %s"),
1634 			    zonename, strerror(errno));
1635 			_exit(1);
1636 		}
1637 
1638 		/*
1639 		 * For non-native zones, tell libc where it can find locale
1640 		 * specific getttext() messages.
1641 		 */
1642 		if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1643 			(void) bindtextdomain(TEXT_DOMAIN,
1644 			    "/.SUNWnative/usr/lib/locale");
1645 		else if (access("/native/usr/lib/locale", R_OK) == 0)
1646 			(void) bindtextdomain(TEXT_DOMAIN,
1647 			    "/native/usr/lib/locale");
1648 
1649 		if (!failsafe)
1650 			new_env = prep_env_noninteractive(user_cmd, new_env);
1651 
1652 		if (new_env == NULL) {
1653 			_exit(1);
1654 		}
1655 
1656 		/*
1657 		 * Move into a new process group; the zone_enter will have
1658 		 * placed us into zsched's session, and we want to be in
1659 		 * a unique process group.
1660 		 */
1661 		(void) setpgid(getpid(), getpid());
1662 
1663 		/*
1664 		 * The child needs to run as root to
1665 		 * execute the su program.
1666 		 */
1667 		if (setuid(0) == -1) {
1668 			zperror(gettext("insufficient privilege"));
1669 			return (1);
1670 		}
1671 
1672 		(void) execve(new_args[0], new_args, new_env);
1673 		zperror(gettext("exec failure"));
1674 		_exit(1);
1675 	}
1676 	/* parent */
1677 
1678 	/* close pipe sides written by child */
1679 	(void) close(stdout_pipe[1]);
1680 	(void) close(stderr_pipe[1]);
1681 
1682 	(void) sigset(SIGINT, sig_forward);
1683 
1684 	postfork_dropprivs();
1685 
1686 	(void) ct_tmpl_clear(tmpl_fd);
1687 	(void) close(tmpl_fd);
1688 
1689 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1690 	doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1691 	    dead_child_pipe[1], B_TRUE);
1692 	do {
1693 		retval = waitpid(child_pid, &child_status, 0);
1694 		if (retval == -1) {
1695 			child_status = 0;
1696 		}
1697 	} while (retval != child_pid && errno != ECHILD);
1698 
1699 	return (WEXITSTATUS(child_status));
1700 }
1701 
1702 static char *
1703 get_username()
1704 {
1705 	uid_t	uid;
1706 	struct passwd *nptr;
1707 
1708 	/*
1709 	 * Authorizations are checked to restrict access based on the
1710 	 * requested operation and zone name, It is assumed that the
1711 	 * program is running with all privileges, but that the real
1712 	 * user ID is that of the user or role on whose behalf we are
1713 	 * operating. So we start by getting the username that will be
1714 	 * used for subsequent authorization checks.
1715 	 */
1716 
1717 	uid = getuid();
1718 	if ((nptr = getpwuid(uid)) == NULL) {
1719 		zerror(gettext("could not get user name."));
1720 		_exit(1);
1721 	}
1722 	return (nptr->pw_name);
1723 }
1724 
1725 int
1726 main(int argc, char **argv)
1727 {
1728 	int arg, console = 0;
1729 	zoneid_t zoneid;
1730 	zone_state_t st;
1731 	char *login = "root";
1732 	int lflag = 0;
1733 	int nflag = 0;
1734 	char *zonename = NULL;
1735 	char **proc_args = NULL;
1736 	char **new_args, **new_env;
1737 	sigset_t block_cld;
1738 	char devroot[MAXPATHLEN];
1739 	char *slavename, slaveshortname[MAXPATHLEN];
1740 	priv_set_t *privset;
1741 	int tmpl_fd;
1742 	char zonebrand[MAXNAMELEN];
1743 	char default_brand[MAXNAMELEN];
1744 	struct stat sb;
1745 	char kernzone[ZONENAME_MAX];
1746 	brand_handle_t bh;
1747 	char user_cmd[MAXPATHLEN];
1748 	char authname[MAXAUTHS];
1749 
1750 	(void) setlocale(LC_ALL, "");
1751 	(void) textdomain(TEXT_DOMAIN);
1752 
1753 	(void) getpname(argv[0]);
1754 	username = get_username();
1755 
1756 	while ((arg = getopt(argc, argv, "nECR:Se:l:Q")) != EOF) {
1757 		switch (arg) {
1758 		case 'C':
1759 			console = 1;
1760 			break;
1761 		case 'E':
1762 			nocmdchar = 1;
1763 			break;
1764 		case 'R':	/* undocumented */
1765 			if (*optarg != '/') {
1766 				zerror(gettext("root path must be absolute."));
1767 				exit(2);
1768 			}
1769 			if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1770 				zerror(
1771 				    gettext("root path must be a directory."));
1772 				exit(2);
1773 			}
1774 			zonecfg_set_root(optarg);
1775 			break;
1776 		case 'Q':
1777 			quiet = 1;
1778 			break;
1779 		case 'S':
1780 			failsafe = 1;
1781 			break;
1782 		case 'e':
1783 			set_cmdchar(optarg);
1784 			break;
1785 		case 'l':
1786 			login = optarg;
1787 			lflag = 1;
1788 			break;
1789 		case 'n':
1790 			nflag = 1;
1791 			break;
1792 		default:
1793 			usage();
1794 		}
1795 	}
1796 
1797 	if (console != 0) {
1798 
1799 		if (lflag != 0) {
1800 			zerror(gettext(
1801 			    "-l may not be specified for console login"));
1802 			usage();
1803 		}
1804 
1805 		if (nflag != 0) {
1806 			zerror(gettext(
1807 			    "-n may not be specified for console login"));
1808 			usage();
1809 		}
1810 
1811 		if (failsafe != 0) {
1812 			zerror(gettext(
1813 			    "-S may not be specified for console login"));
1814 			usage();
1815 		}
1816 
1817 		if (zonecfg_in_alt_root()) {
1818 			zerror(gettext(
1819 			    "-R may not be specified for console login"));
1820 			exit(2);
1821 		}
1822 
1823 	}
1824 
1825 	if (failsafe != 0 && lflag != 0) {
1826 		zerror(gettext("-l may not be specified for failsafe login"));
1827 		usage();
1828 	}
1829 
1830 	if (optind == (argc - 1)) {
1831 		/*
1832 		 * zone name, no process name; this should be an interactive
1833 		 * as long as STDIN is really a tty.
1834 		 */
1835 		if (nflag != 0) {
1836 			zerror(gettext(
1837 			    "-n may not be specified for interactive login"));
1838 			usage();
1839 		}
1840 		if (isatty(STDIN_FILENO))
1841 			interactive = 1;
1842 		zonename = argv[optind];
1843 	} else if (optind < (argc - 1)) {
1844 		if (console) {
1845 			zerror(gettext("Commands may not be specified for "
1846 			    "console login."));
1847 			usage();
1848 		}
1849 		/* zone name and process name, and possibly some args */
1850 		zonename = argv[optind];
1851 		proc_args = &argv[optind + 1];
1852 		interactive = 0;
1853 	} else {
1854 		usage();
1855 	}
1856 
1857 	if (getzoneid() != GLOBAL_ZONEID) {
1858 		zerror(gettext("'%s' may only be used from the global zone"),
1859 		    pname);
1860 		return (1);
1861 	}
1862 
1863 	if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1864 		zerror(gettext("'%s' not applicable to the global zone"),
1865 		    pname);
1866 		return (1);
1867 	}
1868 
1869 	if (zone_get_state(zonename, &st) != Z_OK) {
1870 		zerror(gettext("zone '%s' unknown"), zonename);
1871 		return (1);
1872 	}
1873 
1874 	if (st < ZONE_STATE_INSTALLED) {
1875 		zerror(gettext("cannot login to a zone which is '%s'"),
1876 		    zone_state_str(st));
1877 		return (1);
1878 	}
1879 
1880 	/*
1881 	 * In both console and non-console cases, we require all privs.
1882 	 * In the console case, because we may need to startup zoneadmd.
1883 	 * In the non-console case in order to do zone_enter(2), zonept()
1884 	 * and other tasks.
1885 	 */
1886 
1887 	if ((privset = priv_allocset()) == NULL) {
1888 		zperror(gettext("priv_allocset failed"));
1889 		return (1);
1890 	}
1891 
1892 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1893 		zperror(gettext("getppriv failed"));
1894 		priv_freeset(privset);
1895 		return (1);
1896 	}
1897 
1898 	if (priv_isfullset(privset) == B_FALSE) {
1899 		zerror(gettext("You lack sufficient privilege to run "
1900 		    "this command (all privs required)"));
1901 		priv_freeset(privset);
1902 		return (1);
1903 	}
1904 	priv_freeset(privset);
1905 
1906 	/*
1907 	 * Check if user is authorized for requested usage of the zone
1908 	 */
1909 
1910 	(void) snprintf(authname, MAXAUTHS, "%s%s%s",
1911 	    ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1912 	if (chkauthattr(authname, username) == 0) {
1913 		if (console) {
1914 			zerror(gettext("%s is not authorized for console "
1915 			    "access to  %s zone."),
1916 			    username, zonename);
1917 			return (1);
1918 		} else {
1919 			(void) snprintf(authname, MAXAUTHS, "%s%s%s",
1920 			    ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1921 			if (failsafe || !interactive) {
1922 				zerror(gettext("%s is not authorized for  "
1923 				    "failsafe or non-interactive login "
1924 				    "to  %s zone."), username, zonename);
1925 				return (1);
1926 			} else if (chkauthattr(authname, username) == 0) {
1927 				zerror(gettext("%s is not authorized "
1928 				    " to login to %s zone."),
1929 				    username, zonename);
1930 				return (1);
1931 			}
1932 		}
1933 	} else {
1934 		forced_login = B_TRUE;
1935 	}
1936 
1937 	/*
1938 	 * The console is a separate case from the rest of the code; handle
1939 	 * it first.
1940 	 */
1941 	if (console) {
1942 		/*
1943 		 * Ensure that zoneadmd for this zone is running.
1944 		 */
1945 		if (start_zoneadmd(zonename) == -1)
1946 			return (1);
1947 
1948 		/*
1949 		 * Make contact with zoneadmd.
1950 		 */
1951 		if (get_console_master(zonename) == -1)
1952 			return (1);
1953 
1954 		if (!quiet)
1955 			(void) printf(
1956 			    gettext("[Connected to zone '%s' console]\n"),
1957 			    zonename);
1958 
1959 		if (set_tty_rawmode(STDIN_FILENO) == -1) {
1960 			reset_tty();
1961 			zperror(gettext("failed to set stdin pty to raw mode"));
1962 			return (1);
1963 		}
1964 
1965 		(void) sigset(SIGWINCH, sigwinch);
1966 		(void) sigwinch(0);
1967 
1968 		/*
1969 		 * Run the I/O loop until we get disconnected.
1970 		 */
1971 		doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1972 		reset_tty();
1973 		if (!quiet)
1974 			(void) printf(
1975 			    gettext("\n[Connection to zone '%s' console "
1976 			    "closed]\n"), zonename);
1977 
1978 		return (0);
1979 	}
1980 
1981 	if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1982 		zerror(gettext("login allowed only to running zones "
1983 		    "(%s is '%s')."), zonename, zone_state_str(st));
1984 		return (1);
1985 	}
1986 
1987 	(void) strlcpy(kernzone, zonename, sizeof (kernzone));
1988 	if (zonecfg_in_alt_root()) {
1989 		FILE *fp = zonecfg_open_scratch("", B_FALSE);
1990 
1991 		if (fp == NULL || zonecfg_find_scratch(fp, zonename,
1992 		    zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
1993 			zerror(gettext("cannot find scratch zone %s"),
1994 			    zonename);
1995 			if (fp != NULL)
1996 				zonecfg_close_scratch(fp);
1997 			return (1);
1998 		}
1999 		zonecfg_close_scratch(fp);
2000 	}
2001 
2002 	if ((zoneid = getzoneidbyname(kernzone)) == -1) {
2003 		zerror(gettext("failed to get zoneid for zone '%s'"),
2004 		    zonename);
2005 		return (1);
2006 	}
2007 
2008 	/*
2009 	 * We need the zone root path only if we are setting up a pty.
2010 	 */
2011 	if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
2012 		zerror(gettext("could not get dev path for zone %s"),
2013 		    zonename);
2014 		return (1);
2015 	}
2016 
2017 	if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
2018 		zerror(gettext("could not get brand for zone %s"), zonename);
2019 		return (1);
2020 	}
2021 	/*
2022 	 * In the alternate root environment, the only supported
2023 	 * operations are mount and unmount.  In this case, just treat
2024 	 * the zone as native if it is cluster.  Cluster zones can be
2025 	 * native for the purpose of LU or upgrade, and the cluster
2026 	 * brand may not exist in the miniroot (such as in net install
2027 	 * upgrade).
2028 	 */
2029 	if (zonecfg_default_brand(default_brand,
2030 	    sizeof (default_brand)) != Z_OK) {
2031 		zerror(gettext("unable to determine default brand"));
2032 		return (1);
2033 	}
2034 	if (zonecfg_in_alt_root() &&
2035 	    strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2036 		(void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2037 	}
2038 
2039 	if ((bh = brand_open(zonebrand)) == NULL) {
2040 		zerror(gettext("could not open brand for zone %s"), zonename);
2041 		return (1);
2042 	}
2043 
2044 	if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2045 		zperror(gettext("could not assemble new arguments"));
2046 		brand_close(bh);
2047 		return (1);
2048 	}
2049 	/*
2050 	 * Get the brand specific user_cmd.  This command is used to get
2051 	 * a passwd(4) entry for login.
2052 	 */
2053 	if (!interactive && !failsafe) {
2054 		if (zone_get_user_cmd(bh, login, user_cmd,
2055 		    sizeof (user_cmd)) == NULL) {
2056 			zerror(gettext("could not get user_cmd for zone %s"),
2057 			    zonename);
2058 			brand_close(bh);
2059 			return (1);
2060 		}
2061 	}
2062 	brand_close(bh);
2063 
2064 	if ((new_env = prep_env()) == NULL) {
2065 		zperror(gettext("could not assemble new environment"));
2066 		return (1);
2067 	}
2068 
2069 	if (!interactive) {
2070 		if (nflag) {
2071 			int nfd;
2072 
2073 			if ((nfd = open(_PATH_DEVNULL, O_RDONLY)) < 0) {
2074 				zperror(gettext("failed to open null device"));
2075 				return (1);
2076 			}
2077 			if (nfd != STDIN_FILENO) {
2078 				if (dup2(nfd, STDIN_FILENO) < 0) {
2079 					zperror(gettext(
2080 					    "failed to dup2 null device"));
2081 					return (1);
2082 				}
2083 				(void) close(nfd);
2084 			}
2085 			/* /dev/null is now standard input */
2086 		}
2087 		return (noninteractive_login(zonename, user_cmd, zoneid,
2088 		    new_args, new_env));
2089 	}
2090 
2091 	if (zonecfg_in_alt_root()) {
2092 		zerror(gettext("cannot use interactive login with scratch "
2093 		    "zone"));
2094 		return (1);
2095 	}
2096 
2097 	/*
2098 	 * Things are more complex in interactive mode; we get the
2099 	 * master side of the pty, then place the user's terminal into
2100 	 * raw mode.
2101 	 */
2102 	if (get_master_pty() == -1) {
2103 		zerror(gettext("could not setup master pty device"));
2104 		return (1);
2105 	}
2106 
2107 	/*
2108 	 * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2109 	 */
2110 	if ((slavename = ptsname(masterfd)) == NULL) {
2111 		zperror(gettext("failed to get name for pseudo-tty"));
2112 		return (1);
2113 	}
2114 	if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2115 		(void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2116 		    sizeof (slaveshortname));
2117 	else
2118 		(void) strlcpy(slaveshortname, slavename,
2119 		    sizeof (slaveshortname));
2120 
2121 	if (!quiet)
2122 		(void) printf(gettext("[Connected to zone '%s' %s]\n"),
2123 		    zonename, slaveshortname);
2124 
2125 	if (set_tty_rawmode(STDIN_FILENO) == -1) {
2126 		reset_tty();
2127 		zperror(gettext("failed to set stdin pty to raw mode"));
2128 		return (1);
2129 	}
2130 
2131 	if (prefork_dropprivs() != 0) {
2132 		reset_tty();
2133 		zperror(gettext("could not allocate privilege set"));
2134 		return (1);
2135 	}
2136 
2137 	/*
2138 	 * We must mask SIGCLD until after we have coped with the fork
2139 	 * sufficiently to deal with it; otherwise we can race and receive the
2140 	 * signal before child_pid has been initialized (yes, this really
2141 	 * happens).
2142 	 */
2143 	(void) sigset(SIGCLD, sigcld);
2144 	(void) sigemptyset(&block_cld);
2145 	(void) sigaddset(&block_cld, SIGCLD);
2146 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2147 
2148 	/*
2149 	 * We activate the contract template at the last minute to
2150 	 * avoid intermediate functions that could be using fork(2)
2151 	 * internally.
2152 	 */
2153 	if ((tmpl_fd = init_template()) == -1) {
2154 		reset_tty();
2155 		zperror(gettext("could not create contract"));
2156 		return (1);
2157 	}
2158 
2159 	if ((child_pid = fork()) == -1) {
2160 		(void) ct_tmpl_clear(tmpl_fd);
2161 		reset_tty();
2162 		zperror(gettext("could not fork"));
2163 		return (1);
2164 	} else if (child_pid == 0) { /* child process */
2165 		int slavefd, newslave;
2166 
2167 		(void) ct_tmpl_clear(tmpl_fd);
2168 		(void) close(tmpl_fd);
2169 
2170 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2171 
2172 		if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2173 			return (1);
2174 
2175 		/*
2176 		 * Close all fds except for the slave pty.
2177 		 */
2178 		(void) fdwalk(close_func, &slavefd);
2179 
2180 		/*
2181 		 * Temporarily dup slavefd to stderr; that way if we have
2182 		 * to print out that zone_enter failed, the output will
2183 		 * have somewhere to go.
2184 		 */
2185 		if (slavefd != STDERR_FILENO)
2186 			(void) dup2(slavefd, STDERR_FILENO);
2187 
2188 		if (zone_enter(zoneid) == -1) {
2189 			zerror(gettext("could not enter zone %s: %s"),
2190 			    zonename, strerror(errno));
2191 			return (1);
2192 		}
2193 
2194 		if (slavefd != STDERR_FILENO)
2195 			(void) close(STDERR_FILENO);
2196 
2197 		/*
2198 		 * We take pains to get this process into a new process
2199 		 * group, and subsequently a new session.  In this way,
2200 		 * we'll have a session which doesn't yet have a controlling
2201 		 * terminal.  When we open the slave, it will become the
2202 		 * controlling terminal; no PIDs concerning pgrps or sids
2203 		 * will leak inappropriately into the zone.
2204 		 */
2205 		(void) setpgrp();
2206 
2207 		/*
2208 		 * We need the slave pty to be referenced from the zone's
2209 		 * /dev in order to ensure that the devt's, etc are all
2210 		 * correct.  Otherwise we break ttyname and the like.
2211 		 */
2212 		if ((newslave = open(slavename, O_RDWR)) == -1) {
2213 			(void) close(slavefd);
2214 			return (1);
2215 		}
2216 		(void) close(slavefd);
2217 		slavefd = newslave;
2218 
2219 		/*
2220 		 * dup the slave to the various FDs, so that when the
2221 		 * spawned process does a write/read it maps to the slave
2222 		 * pty.
2223 		 */
2224 		(void) dup2(slavefd, STDIN_FILENO);
2225 		(void) dup2(slavefd, STDOUT_FILENO);
2226 		(void) dup2(slavefd, STDERR_FILENO);
2227 		if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2228 		    slavefd != STDERR_FILENO) {
2229 			(void) close(slavefd);
2230 		}
2231 
2232 		/*
2233 		 * In failsafe mode, we don't use login(1), so don't try
2234 		 * setting up a utmpx entry.
2235 		 */
2236 		if (!failsafe)
2237 			if (setup_utmpx(slaveshortname) == -1)
2238 				return (1);
2239 
2240 		/*
2241 		 * The child needs to run as root to
2242 		 * execute the brand's login program.
2243 		 */
2244 		if (setuid(0) == -1) {
2245 			zperror(gettext("insufficient privilege"));
2246 			return (1);
2247 		}
2248 
2249 		(void) execve(new_args[0], new_args, new_env);
2250 		zperror(gettext("exec failure"));
2251 		return (1);
2252 	}
2253 
2254 	(void) ct_tmpl_clear(tmpl_fd);
2255 	(void) close(tmpl_fd);
2256 
2257 	/*
2258 	 * The rest is only for the parent process.
2259 	 */
2260 	(void) sigset(SIGWINCH, sigwinch);
2261 
2262 	postfork_dropprivs();
2263 
2264 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2265 	doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2266 
2267 	reset_tty();
2268 	if (!quiet)
2269 		(void) fprintf(stderr,
2270 		    gettext("\n[Connection to zone '%s' %s closed]\n"),
2271 		    zonename, slaveshortname);
2272 
2273 	if (pollerr != 0) {
2274 		(void) fprintf(stderr, gettext("Error: connection closed due "
2275 		    "to unexpected pollevents=0x%x.\n"), pollerr);
2276 		return (1);
2277 	}
2278 
2279 	return (0);
2280 }
2281