xref: /titanic_41/usr/src/cmd/zlogin/zlogin.c (revision bbaa8b60dd95d714741fc474adad3cf710ef4efd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * zlogin provides three types of login which allow users in the global
27  * zone to access non-global zones.
28  *
29  * - "interactive login" is similar to rlogin(1); for example, the user could
30  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
31  *   granted a new pty (which is then shoved into the zone), and an I/O
32  *   loop between parent and child processes takes care of the interactive
33  *   session.  In this mode, login(1) (and its -c option, which means
34  *   "already authenticated") is employed to take care of the initialization
35  *   of the user's session.
36  *
37  * - "non-interactive login" is similar to su(1M); the user could issue
38  *   'zlogin my-zone ls -l' and the command would be run as specified.
39  *   In this mode, zlogin sets up pipes as the communication channel, and
40  *   'su' is used to do the login setup work.
41  *
42  * - "console login" is the equivalent to accessing the tip line for a
43  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
44  *   In this mode, zlogin contacts the zoneadmd process via unix domain
45  *   socket.  If zoneadmd is not running, it starts it.  This allows the
46  *   console to be available anytime the zone is installed, regardless of
47  *   whether it is running.
48  */
49 
50 #include <sys/socket.h>
51 #include <sys/termios.h>
52 #include <sys/utsname.h>
53 #include <sys/stat.h>
54 #include <sys/types.h>
55 #include <sys/contract/process.h>
56 #include <sys/ctfs.h>
57 #include <sys/brand.h>
58 #include <sys/wait.h>
59 #include <alloca.h>
60 #include <assert.h>
61 #include <ctype.h>
62 #include <door.h>
63 #include <errno.h>
64 #include <nss_dbdefs.h>
65 #include <poll.h>
66 #include <priv.h>
67 #include <pwd.h>
68 #include <unistd.h>
69 #include <utmpx.h>
70 #include <sac.h>
71 #include <signal.h>
72 #include <stdarg.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include <strings.h>
77 #include <stropts.h>
78 #include <wait.h>
79 #include <zone.h>
80 #include <fcntl.h>
81 #include <libdevinfo.h>
82 #include <libintl.h>
83 #include <locale.h>
84 #include <libzonecfg.h>
85 #include <libcontract.h>
86 #include <libbrand.h>
87 #include <auth_list.h>
88 #include <auth_attr.h>
89 #include <secdb.h>
90 
91 static int masterfd;
92 static struct termios save_termios;
93 static struct termios effective_termios;
94 static int save_fd;
95 static struct winsize winsize;
96 static volatile int dead;
97 static volatile pid_t child_pid = -1;
98 static int interactive = 0;
99 static priv_set_t *dropprivs;
100 
101 static int nocmdchar = 0;
102 static int failsafe = 0;
103 static char cmdchar = '~';
104 
105 static int pollerr = 0;
106 
107 static const char *pname;
108 static char *username;
109 
110 /*
111  * When forced_login is true, the user is not prompted
112  * for an authentication password in the target zone.
113  */
114 static boolean_t forced_login = B_FALSE;
115 
116 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
117 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
118 #endif
119 
120 #define	SUPATH	"/usr/bin/su"
121 #define	FAILSAFESHELL	"/sbin/sh"
122 #define	DEFAULTSHELL	"/sbin/sh"
123 #define	DEF_PATH	"/usr/sbin:/usr/bin"
124 
125 #define	CLUSTER_BRAND_NAME	"cluster"
126 
127 /*
128  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
129  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
130  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
131  * also chosen in conjunction with the HI_WATER setting to make sure we
132  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
133  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
134  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
135  * is less than HI_WATER data already in the pipe.
136  */
137 #define	ZLOGIN_BUFSIZ	8192
138 #define	ZLOGIN_RDBUFSIZ	1024
139 #define	HI_WATER	8192
140 
141 /*
142  * See canonify() below.  CANONIFY_LEN is the maximum length that a
143  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
144  */
145 #define	CANONIFY_LEN 5
146 
147 static void
148 usage(void)
149 {
150 	(void) fprintf(stderr, gettext("usage: %s [ -CES ] [ -e cmdchar ] "
151 	    "[-l user] zonename [command [args ...] ]\n"), pname);
152 	exit(2);
153 }
154 
155 static const char *
156 getpname(const char *arg0)
157 {
158 	const char *p = strrchr(arg0, '/');
159 
160 	if (p == NULL)
161 		p = arg0;
162 	else
163 		p++;
164 
165 	pname = p;
166 	return (p);
167 }
168 
169 static void
170 zerror(const char *fmt, ...)
171 {
172 	va_list alist;
173 
174 	(void) fprintf(stderr, "%s: ", pname);
175 	va_start(alist, fmt);
176 	(void) vfprintf(stderr, fmt, alist);
177 	va_end(alist);
178 	(void) fprintf(stderr, "\n");
179 }
180 
181 static void
182 zperror(const char *str)
183 {
184 	const char *estr;
185 
186 	if ((estr = strerror(errno)) != NULL)
187 		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
188 	else
189 		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
190 }
191 
192 /*
193  * The first part of our privilege dropping scheme needs to be called before
194  * fork(), since we must have it for security; we don't want to be surprised
195  * later that we couldn't allocate the privset.
196  */
197 static int
198 prefork_dropprivs()
199 {
200 	if ((dropprivs = priv_allocset()) == NULL)
201 		return (1);
202 
203 	priv_basicset(dropprivs);
204 	(void) priv_delset(dropprivs, PRIV_PROC_INFO);
205 	(void) priv_delset(dropprivs, PRIV_PROC_FORK);
206 	(void) priv_delset(dropprivs, PRIV_PROC_EXEC);
207 	(void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
208 
209 	/*
210 	 * We need to keep the basic privilege PROC_SESSION and all unknown
211 	 * basic privileges as well as the privileges PROC_ZONE and
212 	 * PROC_OWNER in order to query session information and
213 	 * send signals.
214 	 */
215 	if (interactive == 0) {
216 		(void) priv_addset(dropprivs, PRIV_PROC_ZONE);
217 		(void) priv_addset(dropprivs, PRIV_PROC_OWNER);
218 	} else {
219 		(void) priv_delset(dropprivs, PRIV_PROC_SESSION);
220 	}
221 
222 	return (0);
223 }
224 
225 /*
226  * The second part of the privilege drop.  We are paranoid about being attacked
227  * by the zone, so we drop all privileges.  This should prevent a compromise
228  * which gets us to fork(), exec(), symlink(), etc.
229  */
230 static void
231 postfork_dropprivs()
232 {
233 	if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
234 		zperror(gettext("Warning: could not set permitted privileges"));
235 	}
236 	if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
237 		zperror(gettext("Warning: could not set limit privileges"));
238 	}
239 	if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
240 		zperror(gettext("Warning: could not set inheritable "
241 		    "privileges"));
242 	}
243 }
244 
245 /*
246  * Create the unix domain socket and call the zoneadmd server; handshake
247  * with it to determine whether it will allow us to connect.
248  */
249 static int
250 get_console_master(const char *zname)
251 {
252 	int sockfd = -1;
253 	struct sockaddr_un servaddr;
254 	char clientid[MAXPATHLEN];
255 	char handshake[MAXPATHLEN], c;
256 	int msglen;
257 	int i = 0, err = 0;
258 
259 	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
260 		zperror(gettext("could not create socket"));
261 		return (-1);
262 	}
263 
264 	bzero(&servaddr, sizeof (servaddr));
265 	servaddr.sun_family = AF_UNIX;
266 	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
267 	    "%s/%s.console_sock", ZONES_TMPDIR, zname);
268 
269 	if (connect(sockfd, (struct sockaddr *)&servaddr,
270 	    sizeof (servaddr)) == -1) {
271 		zperror(gettext("Could not connect to zone console"));
272 		goto bad;
273 	}
274 	masterfd = sockfd;
275 
276 	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s\n",
277 	    getpid(), setlocale(LC_MESSAGES, NULL));
278 
279 	if (msglen >= sizeof (clientid) || msglen < 0) {
280 		zerror("protocol error");
281 		goto bad;
282 	}
283 
284 	if (write(masterfd, clientid, msglen) != msglen) {
285 		zerror("protocol error");
286 		goto bad;
287 	}
288 
289 	bzero(handshake, sizeof (handshake));
290 
291 	/*
292 	 * Take care not to accumulate more than our fill, and leave room for
293 	 * the NUL at the end.
294 	 */
295 	while ((err = read(masterfd, &c, 1)) == 1) {
296 		if (i >= (sizeof (handshake) - 1))
297 			break;
298 		if (c == '\n')
299 			break;
300 		handshake[i] = c;
301 		i++;
302 	}
303 
304 	/*
305 	 * If something went wrong during the handshake we bail; perhaps
306 	 * the server died off.
307 	 */
308 	if (err == -1) {
309 		zperror(gettext("Could not connect to zone console"));
310 		goto bad;
311 	}
312 
313 	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
314 		return (0);
315 
316 	zerror(gettext("Console is already in use by process ID %s."),
317 	    handshake);
318 bad:
319 	(void) close(sockfd);
320 	masterfd = -1;
321 	return (-1);
322 }
323 
324 
325 /*
326  * Routines to handle pty creation upon zone entry and to shuttle I/O back
327  * and forth between the two terminals.  We also compute and store the
328  * name of the slave terminal associated with the master side.
329  */
330 static int
331 get_master_pty()
332 {
333 	if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
334 		zperror(gettext("failed to obtain a pseudo-tty"));
335 		return (-1);
336 	}
337 	if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
338 		zperror(gettext("failed to get terminal settings from stdin"));
339 		return (-1);
340 	}
341 	(void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
342 
343 	return (0);
344 }
345 
346 /*
347  * This is a bit tricky; normally a pts device will belong to the zone it
348  * is granted to.  But in the case of "entering" a zone, we need to establish
349  * the pty before entering the zone so that we can vector I/O to and from it
350  * from the global zone.
351  *
352  * We use the zonept() call to let the ptm driver know what we are up to;
353  * the only other hairy bit is the setting of zoneslavename (which happens
354  * above, in get_master_pty()).
355  */
356 static int
357 init_slave_pty(zoneid_t zoneid, char *devroot)
358 {
359 	int slavefd = -1;
360 	char *slavename, zoneslavename[MAXPATHLEN];
361 
362 	/*
363 	 * Set slave permissions, zone the pts, then unlock it.
364 	 */
365 	if (grantpt(masterfd) != 0) {
366 		zperror(gettext("grantpt failed"));
367 		return (-1);
368 	}
369 
370 	if (unlockpt(masterfd) != 0) {
371 		zperror(gettext("unlockpt failed"));
372 		return (-1);
373 	}
374 
375 	/*
376 	 * We must open the slave side before zoning this pty; otherwise
377 	 * the kernel would refuse us the open-- zoning a pty makes it
378 	 * inaccessible to the global zone.  Note we are trying to open
379 	 * the device node via the $ZONEROOT/dev path for this pty.
380 	 *
381 	 * Later we'll close the slave out when once we've opened it again
382 	 * from within the target zone.  Blarg.
383 	 */
384 	if ((slavename = ptsname(masterfd)) == NULL) {
385 		zperror(gettext("failed to get name for pseudo-tty"));
386 		return (-1);
387 	}
388 
389 	(void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
390 	    devroot, slavename);
391 
392 	if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
393 		zerror(gettext("failed to open %s: %s"), zoneslavename,
394 		    strerror(errno));
395 		return (-1);
396 	}
397 
398 	/*
399 	 * Push hardware emulation (ptem), line discipline (ldterm),
400 	 * and V7/4BSD/Xenix compatibility (ttcompat) modules.
401 	 */
402 	if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
403 		zperror(gettext("failed to push ptem module"));
404 		if (!failsafe)
405 			goto bad;
406 	}
407 
408 	/*
409 	 * Anchor the stream to prevent malicious I_POPs; we prefer to do
410 	 * this prior to entering the zone so that we can detect any errors
411 	 * early, and so that we can set the anchor from the global zone.
412 	 */
413 	if (ioctl(slavefd, I_ANCHOR) == -1) {
414 		zperror(gettext("failed to set stream anchor"));
415 		if (!failsafe)
416 			goto bad;
417 	}
418 
419 	if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
420 		zperror(gettext("failed to push ldterm module"));
421 		if (!failsafe)
422 			goto bad;
423 	}
424 	if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
425 		zperror(gettext("failed to push ttcompat module"));
426 		if (!failsafe)
427 			goto bad;
428 	}
429 
430 	/*
431 	 * Propagate terminal settings from the external term to the new one.
432 	 */
433 	if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
434 		zperror(gettext("failed to set terminal settings"));
435 		if (!failsafe)
436 			goto bad;
437 	}
438 	(void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
439 
440 	if (zonept(masterfd, zoneid) != 0) {
441 		zperror(gettext("could not set zoneid of pty"));
442 		goto bad;
443 	}
444 
445 	return (slavefd);
446 
447 bad:
448 	(void) close(slavefd);
449 	return (-1);
450 }
451 
452 /*
453  * Place terminal into raw mode.
454  */
455 static int
456 set_tty_rawmode(int fd)
457 {
458 	struct termios term;
459 	if (tcgetattr(fd, &term) < 0) {
460 		zperror(gettext("failed to get user terminal settings"));
461 		return (-1);
462 	}
463 
464 	/* Stash for later, so we can revert back to previous mode */
465 	save_termios = term;
466 	save_fd = fd;
467 
468 	/* disable 8->7 bit strip, start/stop, enable any char to restart */
469 	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
470 	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
471 	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
472 	/* disable output post-processing */
473 	term.c_oflag &= ~OPOST;
474 	/* disable canonical mode, signal chars, echo & extended functions */
475 	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
476 
477 	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
478 	term.c_cc[VTIME] = 0;
479 
480 	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
481 		zperror(gettext("failed to set user terminal to raw mode"));
482 		return (-1);
483 	}
484 
485 	/*
486 	 * We need to know the value of VEOF so that we can properly process for
487 	 * client-side ~<EOF>.  But we have obliterated VEOF in term,
488 	 * because VMIN overloads the same array slot in non-canonical mode.
489 	 * Stupid @&^%!
490 	 *
491 	 * So here we construct the "effective" termios from the current
492 	 * terminal settings, and the corrected VEOF and VEOL settings.
493 	 */
494 	if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
495 		zperror(gettext("failed to get user terminal settings"));
496 		return (-1);
497 	}
498 	effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
499 	effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
500 
501 	return (0);
502 }
503 
504 /*
505  * Copy terminal window size from our terminal to the pts.
506  */
507 /*ARGSUSED*/
508 static void
509 sigwinch(int s)
510 {
511 	struct winsize ws;
512 
513 	if (ioctl(0, TIOCGWINSZ, &ws) == 0)
514 		(void) ioctl(masterfd, TIOCSWINSZ, &ws);
515 }
516 
517 static volatile int close_on_sig = -1;
518 
519 static void
520 /*ARGSUSED*/
521 sigcld(int s)
522 {
523 	int status;
524 	pid_t pid;
525 
526 	/*
527 	 * Peek at the exit status.  If this isn't the process we cared
528 	 * about, then just reap it.
529 	 */
530 	if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
531 		if (pid == child_pid &&
532 		    (WIFEXITED(status) || WIFSIGNALED(status))) {
533 			dead = 1;
534 			if (close_on_sig != -1) {
535 				(void) write(close_on_sig, "a", 1);
536 				(void) close(close_on_sig);
537 				close_on_sig = -1;
538 			}
539 		} else {
540 			(void) waitpid(pid, &status, WNOHANG);
541 		}
542 	}
543 }
544 
545 /*
546  * Some signals (currently, SIGINT) must be forwarded on to the process
547  * group of the child process.
548  */
549 static void
550 sig_forward(int s)
551 {
552 	if (child_pid != -1) {
553 		pid_t pgid = getpgid(child_pid);
554 		if (pgid != -1)
555 			(void) sigsend(P_PGID, pgid, s);
556 	}
557 }
558 
559 /*
560  * reset terminal settings for global environment
561  */
562 static void
563 reset_tty()
564 {
565 	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
566 }
567 
568 /*
569  * Convert character to printable representation, for display with locally
570  * echoed command characters (like when we need to display ~^D)
571  */
572 static void
573 canonify(char c, char *cc)
574 {
575 	if (isprint(c)) {
576 		cc[0] = c;
577 		cc[1] = '\0';
578 	} else if (c >= 0 && c <= 31) {	/* ^@ through ^_ */
579 		cc[0] = '^';
580 		cc[1] = c + '@';
581 		cc[2] = '\0';
582 	} else {
583 		cc[0] = '\\';
584 		cc[1] = ((c >> 6) & 7) + '0';
585 		cc[2] = ((c >> 3) & 7) + '0';
586 		cc[3] = (c & 7) + '0';
587 		cc[4] = '\0';
588 	}
589 }
590 
591 /*
592  * process_user_input watches the input stream for the escape sequence for
593  * 'quit' (by default, tilde-period).  Because we might be fed just one
594  * keystroke at a time, state associated with the user input (are we at the
595  * beginning of the line?  are we locally echoing the next character?) is
596  * maintained by beginning_of_line and local_echo across calls to the routine.
597  * If the write to outfd fails, we'll try to read from infd in an attempt
598  * to prevent deadlock between the two processes.
599  *
600  * This routine returns -1 when the 'quit' escape sequence has been issued,
601  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
602  */
603 static int
604 process_user_input(int outfd, int infd)
605 {
606 	static boolean_t beginning_of_line = B_TRUE;
607 	static boolean_t local_echo = B_FALSE;
608 	char ibuf[ZLOGIN_BUFSIZ];
609 	int nbytes;
610 	char *buf = ibuf;
611 	char c = *buf;
612 
613 	nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
614 	if (nbytes == -1 && (errno != EINTR || dead))
615 		return (-1);
616 
617 	if (nbytes == -1)	/* The read was interrupted. */
618 		return (0);
619 
620 	/* 0 read means EOF, close the pipe to the child */
621 	if (nbytes == 0)
622 		return (1);
623 
624 	for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
625 		buf++;
626 		if (beginning_of_line && !nocmdchar) {
627 			beginning_of_line = B_FALSE;
628 			if (c == cmdchar) {
629 				local_echo = B_TRUE;
630 				continue;
631 			}
632 		} else if (local_echo) {
633 			local_echo = B_FALSE;
634 			if (c == '.' || c == effective_termios.c_cc[VEOF]) {
635 				char cc[CANONIFY_LEN];
636 
637 				canonify(c, cc);
638 				(void) write(STDOUT_FILENO, &cmdchar, 1);
639 				(void) write(STDOUT_FILENO, cc, strlen(cc));
640 				return (-1);
641 			}
642 		}
643 retry:
644 		if (write(outfd, &c, 1) <= 0) {
645 			/*
646 			 * Since the fd we are writing to is opened with
647 			 * O_NONBLOCK it is possible to get EAGAIN if the
648 			 * pipe is full.  One way this could happen is if we
649 			 * are writing a lot of data into the pipe in this loop
650 			 * and the application on the other end is echoing that
651 			 * data back out to its stdout.  The output pipe can
652 			 * fill up since we are stuck here in this loop and not
653 			 * draining the other pipe.  We can try to read some of
654 			 * the data to see if we can drain the pipe so that the
655 			 * application can continue to make progress.  The read
656 			 * is non-blocking so we won't hang here.  We also wait
657 			 * a bit before retrying since there could be other
658 			 * reasons why the pipe is full and we don't want to
659 			 * continuously retry.
660 			 */
661 			if (errno == EAGAIN) {
662 				struct timespec rqtp;
663 				int ln;
664 				char obuf[ZLOGIN_BUFSIZ];
665 
666 				if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
667 					(void) write(STDOUT_FILENO, obuf, ln);
668 
669 				/* sleep for 10 milliseconds */
670 				rqtp.tv_sec = 0;
671 				rqtp.tv_nsec = 10 * (NANOSEC / MILLISEC);
672 				(void) nanosleep(&rqtp, NULL);
673 				if (!dead)
674 					goto retry;
675 			}
676 
677 			return (-1);
678 		}
679 		beginning_of_line = (c == '\r' || c == '\n' ||
680 		    c == effective_termios.c_cc[VKILL] ||
681 		    c == effective_termios.c_cc[VEOL] ||
682 		    c == effective_termios.c_cc[VSUSP] ||
683 		    c == effective_termios.c_cc[VINTR]);
684 	}
685 	return (0);
686 }
687 
688 /*
689  * This function prevents deadlock between zlogin and the application in the
690  * zone that it is talking to.  This can happen when we read from zlogin's
691  * stdin and write the data down the pipe to the application.  If the pipe
692  * is full, we'll block in the write.  Because zlogin could be blocked in
693  * the write, it would never read the application's stdout/stderr so the
694  * application can then block on those writes (when the pipe fills up).  If the
695  * the application gets blocked this way, it can never get around to reading
696  * its stdin so that zlogin can unblock from its write.  Once in this state,
697  * the two processes are deadlocked.
698  *
699  * To prevent this, we want to verify that we can write into the pipe before we
700  * read from our stdin.  If the pipe already is pretty full, we bypass the read
701  * for now.  We'll circle back here again after the poll() so that we can
702  * try again.  When this function is called, we already know there is data
703  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
704  * stdin is EOF, and 0 if everything is ok (even though we might not have
705  * read/written any data into the pipe on this iteration).
706  */
707 static int
708 process_raw_input(int stdin_fd, int appin_fd)
709 {
710 	int cc;
711 	struct stat64 sb;
712 	char ibuf[ZLOGIN_RDBUFSIZ];
713 
714 	/* Check how much data is already in the pipe */
715 	if (fstat64(appin_fd, &sb) == -1) {
716 		perror("stat failed");
717 		return (-1);
718 	}
719 
720 	if (dead)
721 		return (-1);
722 
723 	/*
724 	 * The pipe already has a lot of data in it,  don't write any more
725 	 * right now.
726 	 */
727 	if (sb.st_size >= HI_WATER)
728 		return (0);
729 
730 	cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
731 	if (cc == -1 && (errno != EINTR || dead))
732 		return (-1);
733 
734 	if (cc == -1)	/* The read was interrupted. */
735 		return (0);
736 
737 	/* 0 read means EOF, close the pipe to the child */
738 	if (cc == 0)
739 		return (1);
740 
741 	/*
742 	 * stdin_fd is stdin of the target; so, the thing we'll write the user
743 	 * data *to*.
744 	 */
745 	if (write(stdin_fd, ibuf, cc) == -1)
746 		return (-1);
747 
748 	return (0);
749 }
750 
751 /*
752  * Write the output from the application running in the zone.  We can get
753  * a signal during the write (usually it would be SIGCHLD when the application
754  * has exited) so we loop to make sure we have written all of the data we read.
755  */
756 static int
757 process_output(int in_fd, int out_fd)
758 {
759 	int wrote = 0;
760 	int cc;
761 	char ibuf[ZLOGIN_BUFSIZ];
762 
763 	cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
764 	if (cc == -1 && (errno != EINTR || dead))
765 		return (-1);
766 	if (cc == 0)	/* EOF */
767 		return (-1);
768 	if (cc == -1)	/* The read was interrupted. */
769 		return (0);
770 
771 	do {
772 		int len;
773 
774 		len = write(out_fd, ibuf + wrote, cc - wrote);
775 		if (len == -1 && errno != EINTR)
776 			return (-1);
777 		if (len != -1)
778 			wrote += len;
779 	} while (wrote < cc);
780 
781 	return (0);
782 }
783 
784 /*
785  * This is the main I/O loop, and is shared across all zlogin modes.
786  * Parameters:
787  * 	stdin_fd:  The fd representing 'stdin' for the slave side; input to
788  *		   the zone will be written here.
789  *
790  * 	appin_fd:  The fd representing the other end of the 'stdin' pipe (when
791  *		   we're running non-interactive); used in process_raw_input
792  *		   to ensure we don't fill up the application's stdin pipe.
793  *
794  *	stdout_fd: The fd representing 'stdout' for the slave side; output
795  *		   from the zone will arrive here.
796  *
797  *	stderr_fd: The fd representing 'stderr' for the slave side; output
798  *		   from the zone will arrive here.
799  *
800  *	raw_mode:  If TRUE, then no processing (for example, for '~.') will
801  *		   be performed on the input coming from STDIN.
802  *
803  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
804  * mode supplies a stderr).
805  *
806  */
807 static void
808 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
809     boolean_t raw_mode)
810 {
811 	struct pollfd pollfds[4];
812 	char ibuf[ZLOGIN_BUFSIZ];
813 	int cc, ret;
814 
815 	/* read from stdout of zone and write to stdout of global zone */
816 	pollfds[0].fd = stdout_fd;
817 	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
818 
819 	/* read from stderr of zone and write to stderr of global zone */
820 	pollfds[1].fd = stderr_fd;
821 	pollfds[1].events = pollfds[0].events;
822 
823 	/* read from stdin of global zone and write to stdin of zone */
824 	pollfds[2].fd = STDIN_FILENO;
825 	pollfds[2].events = pollfds[0].events;
826 
827 	/* read from signalling pipe so we know when child dies */
828 	pollfds[3].fd = sig_fd;
829 	pollfds[3].events = pollfds[0].events;
830 
831 	for (;;) {
832 		pollfds[0].revents = pollfds[1].revents =
833 		    pollfds[2].revents = pollfds[3].revents = 0;
834 
835 		if (dead)
836 			break;
837 
838 		/*
839 		 * There is a race condition here where we can receive the
840 		 * child death signal, set the dead flag, but since we have
841 		 * passed the test above, we would go into poll and hang.
842 		 * To avoid this we use the sig_fd as an additional poll fd.
843 		 * The signal handler writes into the other end of this pipe
844 		 * when the child dies so that the poll will always see that
845 		 * input and proceed.  We just loop around at that point and
846 		 * then notice the dead flag.
847 		 */
848 
849 		ret = poll(pollfds,
850 		    sizeof (pollfds) / sizeof (struct pollfd), -1);
851 
852 		if (ret == -1 && errno != EINTR) {
853 			perror("poll failed");
854 			break;
855 		}
856 
857 		if (errno == EINTR && dead) {
858 			break;
859 		}
860 
861 		/* event from master side stdout */
862 		if (pollfds[0].revents) {
863 			if (pollfds[0].revents &
864 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
865 				if (process_output(stdout_fd, STDOUT_FILENO)
866 				    != 0)
867 					break;
868 			} else {
869 				pollerr = pollfds[0].revents;
870 				break;
871 			}
872 		}
873 
874 		/* event from master side stderr */
875 		if (pollfds[1].revents) {
876 			if (pollfds[1].revents &
877 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
878 				if (process_output(stderr_fd, STDERR_FILENO)
879 				    != 0)
880 					break;
881 			} else {
882 				pollerr = pollfds[1].revents;
883 				break;
884 			}
885 		}
886 
887 		/* event from user STDIN side */
888 		if (pollfds[2].revents) {
889 			if (pollfds[2].revents &
890 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
891 				/*
892 				 * stdin fd is stdin of the target; so,
893 				 * the thing we'll write the user data *to*.
894 				 *
895 				 * Also, unlike on the output side, we
896 				 * close the pipe on a zero-length message.
897 				 */
898 				int res;
899 
900 				if (raw_mode)
901 					res = process_raw_input(stdin_fd,
902 					    appin_fd);
903 				else
904 					res = process_user_input(stdin_fd,
905 					    stdout_fd);
906 
907 				if (res < 0)
908 					break;
909 				if (res > 0) {
910 					/* EOF (close) child's stdin_fd */
911 					pollfds[2].fd = -1;
912 					while ((res = close(stdin_fd)) != 0 &&
913 					    errno == EINTR)
914 						;
915 					if (res != 0)
916 						break;
917 				}
918 
919 			} else if (raw_mode && pollfds[2].revents & POLLHUP) {
920 				/*
921 				 * It's OK to get a POLLHUP on STDIN-- it
922 				 * always happens if you do:
923 				 *
924 				 * echo foo | zlogin <zone> <command>
925 				 *
926 				 * We reset fd to -1 in this case to clear
927 				 * the condition and close the pipe (EOF) to
928 				 * the other side in order to wrap things up.
929 				 */
930 				int res;
931 
932 				pollfds[2].fd = -1;
933 				while ((res = close(stdin_fd)) != 0 &&
934 				    errno == EINTR)
935 					;
936 				if (res != 0)
937 					break;
938 			} else {
939 				pollerr = pollfds[2].revents;
940 				break;
941 			}
942 		}
943 	}
944 
945 	/*
946 	 * We are in the midst of dying, but try to poll with a short
947 	 * timeout to see if we can catch the last bit of I/O from the
948 	 * children.
949 	 */
950 retry:
951 	pollfds[0].revents = pollfds[1].revents = 0;
952 	(void) poll(pollfds, 2, 100);
953 	if (pollfds[0].revents &
954 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
955 		if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
956 			(void) write(STDOUT_FILENO, ibuf, cc);
957 			goto retry;
958 		}
959 	}
960 	if (pollfds[1].revents &
961 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
962 		if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
963 			(void) write(STDERR_FILENO, ibuf, cc);
964 			goto retry;
965 		}
966 	}
967 }
968 
969 /*
970  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
971  */
972 static const char *
973 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
974     size_t len)
975 {
976 	bzero(user_cmd, sizeof (user_cmd));
977 	if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
978 		return (NULL);
979 
980 	return (user_cmd);
981 }
982 
983 /* From libc */
984 extern int str2passwd(const char *, int, void *, char *, int);
985 
986 /*
987  * exec() the user_cmd brand hook, and convert the output string to a
988  * struct passwd.  This is to be called after zone_enter().
989  *
990  */
991 static struct passwd *
992 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
993     int pwbuflen)
994 {
995 	char pwline[NSS_BUFLEN_PASSWD];
996 	char *cin = NULL;
997 	FILE *fin;
998 	int status;
999 
1000 	assert(getzoneid() != GLOBAL_ZONEID);
1001 
1002 	if ((fin = popen(user_cmd, "r")) == NULL)
1003 		return (NULL);
1004 
1005 	while (cin == NULL && !feof(fin))
1006 		cin = fgets(pwline, sizeof (pwline), fin);
1007 
1008 	if (cin == NULL) {
1009 		(void) pclose(fin);
1010 		return (NULL);
1011 	}
1012 
1013 	status = pclose(fin);
1014 	if (!WIFEXITED(status))
1015 		return (NULL);
1016 	if (WEXITSTATUS(status) != 0)
1017 		return (NULL);
1018 
1019 	if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1020 		return (pwent);
1021 	else
1022 		return (NULL);
1023 }
1024 
1025 static char **
1026 zone_login_cmd(brand_handle_t bh, const char *login)
1027 {
1028 	static char result_buf[ARG_MAX];
1029 	char **new_argv, *ptr, *lasts;
1030 	int n, a;
1031 
1032 	/* Get the login command for the target zone. */
1033 	bzero(result_buf, sizeof (result_buf));
1034 
1035 	if (forced_login) {
1036 		if (brand_get_forcedlogin_cmd(bh, login,
1037 		    result_buf, sizeof (result_buf)) != 0)
1038 			return (NULL);
1039 	} else {
1040 		if (brand_get_login_cmd(bh, login,
1041 		    result_buf, sizeof (result_buf)) != 0)
1042 			return (NULL);
1043 	}
1044 
1045 	/*
1046 	 * We got back a string that we'd like to execute.  But since
1047 	 * we're not doing the execution via a shell we'll need to convert
1048 	 * the exec string to an array of strings.  We'll do that here
1049 	 * but we're going to be very simplistic about it and break stuff
1050 	 * up based on spaces.  We're not even going to support any kind
1051 	 * of quoting or escape characters.  It's truly amazing that
1052 	 * there is no library function in OpenSolaris to do this for us.
1053 	 */
1054 
1055 	/*
1056 	 * Be paranoid.  Since we're deliniating based on spaces make
1057 	 * sure there are no adjacent spaces.
1058 	 */
1059 	if (strstr(result_buf, "  ") != NULL)
1060 		return (NULL);
1061 
1062 	/* Remove any trailing whitespace.  */
1063 	n = strlen(result_buf);
1064 	if (result_buf[n - 1] == ' ')
1065 		result_buf[n - 1] = '\0';
1066 
1067 	/* Count how many elements there are in the exec string. */
1068 	ptr = result_buf;
1069 	for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1070 		;
1071 
1072 	/* Allocate the argv array that we're going to return. */
1073 	if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1074 		return (NULL);
1075 
1076 	/* Tokenize the exec string and return. */
1077 	a = 0;
1078 	new_argv[a++] = result_buf;
1079 	if (n > 2) {
1080 		(void) strtok_r(result_buf, " ", &lasts);
1081 		while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1082 			;
1083 	} else {
1084 		new_argv[a++] = NULL;
1085 	}
1086 	assert(n == a);
1087 	return (new_argv);
1088 }
1089 
1090 /*
1091  * Prepare argv array for exec'd process; if we're passing commands to the
1092  * new process, then use su(1M) to do the invocation.  Otherwise, use
1093  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1094  * login that we're coming from another zone, and to disregard its CONSOLE
1095  * checks).
1096  */
1097 static char **
1098 prep_args(brand_handle_t bh, const char *login, char **argv)
1099 {
1100 	int argc = 0, a = 0, i, n = -1;
1101 	char **new_argv;
1102 
1103 	if (argv != NULL) {
1104 		size_t subshell_len = 1;
1105 		char *subshell;
1106 
1107 		while (argv[argc] != NULL)
1108 			argc++;
1109 
1110 		for (i = 0; i < argc; i++) {
1111 			subshell_len += strlen(argv[i]) + 1;
1112 		}
1113 		if ((subshell = calloc(1, subshell_len)) == NULL)
1114 			return (NULL);
1115 
1116 		for (i = 0; i < argc; i++) {
1117 			(void) strcat(subshell, argv[i]);
1118 			(void) strcat(subshell, " ");
1119 		}
1120 
1121 		if (failsafe) {
1122 			n = 4;
1123 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1124 				return (NULL);
1125 
1126 			new_argv[a++] = FAILSAFESHELL;
1127 		} else {
1128 			n = 5;
1129 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1130 				return (NULL);
1131 
1132 			new_argv[a++] = SUPATH;
1133 			if (strcmp(login, "root") != 0) {
1134 				new_argv[a++] = "-";
1135 				n++;
1136 			}
1137 			new_argv[a++] = (char *)login;
1138 		}
1139 		new_argv[a++] = "-c";
1140 		new_argv[a++] = subshell;
1141 		new_argv[a++] = NULL;
1142 		assert(a == n);
1143 	} else {
1144 		if (failsafe) {
1145 			n = 2;
1146 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1147 				return (NULL);
1148 			new_argv[a++] = FAILSAFESHELL;
1149 			new_argv[a++] = NULL;
1150 			assert(n == a);
1151 		} else {
1152 			new_argv = zone_login_cmd(bh, login);
1153 		}
1154 	}
1155 
1156 	return (new_argv);
1157 }
1158 
1159 /*
1160  * Helper routine for prep_env below.
1161  */
1162 static char *
1163 add_env(char *name, char *value)
1164 {
1165 	size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1166 	char *str;
1167 
1168 	if ((str = malloc(sz)) == NULL)
1169 		return (NULL);
1170 
1171 	(void) snprintf(str, sz, "%s=%s", name, value);
1172 	return (str);
1173 }
1174 
1175 /*
1176  * Prepare envp array for exec'd process.
1177  */
1178 static char **
1179 prep_env()
1180 {
1181 	int e = 0, size = 1;
1182 	char **new_env, *estr;
1183 	char *term = getenv("TERM");
1184 
1185 	size++;	/* for $PATH */
1186 	if (term != NULL)
1187 		size++;
1188 
1189 	/*
1190 	 * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1191 	 * We also set $SHELL, since neither login nor su will be around to do
1192 	 * it.
1193 	 */
1194 	if (failsafe)
1195 		size += 2;
1196 
1197 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1198 		return (NULL);
1199 
1200 	if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1201 		return (NULL);
1202 	new_env[e++] = estr;
1203 
1204 	if (term != NULL) {
1205 		if ((estr = add_env("TERM", term)) == NULL)
1206 			return (NULL);
1207 		new_env[e++] = estr;
1208 	}
1209 
1210 	if (failsafe) {
1211 		if ((estr = add_env("HOME", "/")) == NULL)
1212 			return (NULL);
1213 		new_env[e++] = estr;
1214 
1215 		if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1216 			return (NULL);
1217 		new_env[e++] = estr;
1218 	}
1219 
1220 	new_env[e++] = NULL;
1221 
1222 	assert(e == size);
1223 
1224 	return (new_env);
1225 }
1226 
1227 /*
1228  * Finish the preparation of the envp array for exec'd non-interactive
1229  * zlogins.  This is called in the child process *after* we zone_enter(), since
1230  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1231  * etc.  We need only do this in the non-interactive, mode, since otherwise
1232  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1233  * additional ways in which the command could fail, and we'd prefer to avoid
1234  * that.
1235  */
1236 static char **
1237 prep_env_noninteractive(const char *user_cmd, char **env)
1238 {
1239 	size_t size;
1240 	char **new_env;
1241 	int e, i;
1242 	char *estr;
1243 	char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1244 	char pwbuf[NSS_BUFLEN_PASSWD + 1];
1245 	struct passwd pwent;
1246 	struct passwd *pw = NULL;
1247 
1248 	assert(env != NULL);
1249 	assert(failsafe == 0);
1250 
1251 	/*
1252 	 * Exec the "user_cmd" brand hook to get a pwent for the
1253 	 * login user.  If this fails, HOME will be set to "/", SHELL
1254 	 * will be set to $DEFAULTSHELL, and we will continue to exec
1255 	 * SUPATH <login> -c <cmd>.
1256 	 */
1257 	pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1258 
1259 	/*
1260 	 * Get existing envp size.
1261 	 */
1262 	for (size = 0; env[size] != NULL; size++)
1263 		;
1264 
1265 	e = size;
1266 
1267 	/*
1268 	 * Finish filling out the environment; we duplicate the environment
1269 	 * setup described in login(1), for lack of a better precedent.
1270 	 */
1271 	if (pw != NULL)
1272 		size += 3;	/* LOGNAME, HOME, MAIL */
1273 	else
1274 		size += 1;	/* HOME */
1275 
1276 	size++;	/* always fill in SHELL */
1277 	size++; /* terminating NULL */
1278 
1279 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1280 		goto malloc_fail;
1281 
1282 	/*
1283 	 * Copy existing elements of env into new_env.
1284 	 */
1285 	for (i = 0; env[i] != NULL; i++) {
1286 		if ((new_env[i] = strdup(env[i])) == NULL)
1287 			goto malloc_fail;
1288 	}
1289 	assert(e == i);
1290 
1291 	if (pw != NULL) {
1292 		if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1293 			goto malloc_fail;
1294 		new_env[e++] = estr;
1295 
1296 		if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1297 			goto malloc_fail;
1298 		new_env[e++] = estr;
1299 
1300 		if (chdir(pw->pw_dir) != 0)
1301 			zerror(gettext("Could not chdir to home directory "
1302 			    "%s: %s"), pw->pw_dir, strerror(errno));
1303 
1304 		(void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1305 		    pw->pw_name);
1306 		if ((estr = add_env("MAIL", varmail)) == NULL)
1307 			goto malloc_fail;
1308 		new_env[e++] = estr;
1309 	} else {
1310 		if ((estr = add_env("HOME", "/")) == NULL)
1311 			goto malloc_fail;
1312 		new_env[e++] = estr;
1313 	}
1314 
1315 	if (pw != NULL && strlen(pw->pw_shell) > 0) {
1316 		if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1317 			goto malloc_fail;
1318 		new_env[e++] = estr;
1319 	} else {
1320 		if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1321 			goto malloc_fail;
1322 		new_env[e++] = estr;
1323 	}
1324 
1325 	new_env[e++] = NULL;	/* add terminating NULL */
1326 
1327 	assert(e == size);
1328 	return (new_env);
1329 
1330 malloc_fail:
1331 	zperror(gettext("failed to allocate memory for process environment"));
1332 	return (NULL);
1333 }
1334 
1335 static int
1336 close_func(void *slavefd, int fd)
1337 {
1338 	if (fd != *(int *)slavefd)
1339 		(void) close(fd);
1340 	return (0);
1341 }
1342 
1343 static void
1344 set_cmdchar(char *cmdcharstr)
1345 {
1346 	char c;
1347 	long lc;
1348 
1349 	if ((c = *cmdcharstr) != '\\') {
1350 		cmdchar = c;
1351 		return;
1352 	}
1353 
1354 	c = cmdcharstr[1];
1355 	if (c == '\0' || c == '\\') {
1356 		cmdchar = '\\';
1357 		return;
1358 	}
1359 
1360 	if (c < '0' || c > '7') {
1361 		zerror(gettext("Unrecognized escape character option %s"),
1362 		    cmdcharstr);
1363 		usage();
1364 	}
1365 
1366 	lc = strtol(cmdcharstr + 1, NULL, 8);
1367 	if (lc < 0 || lc > 255) {
1368 		zerror(gettext("Octal escape character '%s' too large"),
1369 		    cmdcharstr);
1370 		usage();
1371 	}
1372 	cmdchar = (char)lc;
1373 }
1374 
1375 static int
1376 setup_utmpx(char *slavename)
1377 {
1378 	struct utmpx ut;
1379 
1380 	bzero(&ut, sizeof (ut));
1381 	(void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1382 	(void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1383 	ut.ut_pid = getpid();
1384 	ut.ut_id[0] = 'z';
1385 	ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1386 	ut.ut_type = LOGIN_PROCESS;
1387 	(void) time(&ut.ut_tv.tv_sec);
1388 
1389 	if (makeutx(&ut) == NULL) {
1390 		zerror(gettext("makeutx failed"));
1391 		return (-1);
1392 	}
1393 	return (0);
1394 }
1395 
1396 static void
1397 release_lock_file(int lockfd)
1398 {
1399 	(void) close(lockfd);
1400 }
1401 
1402 static int
1403 grab_lock_file(const char *zone_name, int *lockfd)
1404 {
1405 	char pathbuf[PATH_MAX];
1406 	struct flock flock;
1407 
1408 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1409 		zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1410 		    strerror(errno));
1411 		return (-1);
1412 	}
1413 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
1414 	(void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1415 	    ZONES_TMPDIR, zone_name);
1416 
1417 	if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1418 		zerror(gettext("could not open %s: %s"), pathbuf,
1419 		    strerror(errno));
1420 		return (-1);
1421 	}
1422 	/*
1423 	 * Lock the file to synchronize with other zoneadmds
1424 	 */
1425 	flock.l_type = F_WRLCK;
1426 	flock.l_whence = SEEK_SET;
1427 	flock.l_start = (off_t)0;
1428 	flock.l_len = (off_t)0;
1429 	if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1430 		zerror(gettext("unable to lock %s: %s"), pathbuf,
1431 		    strerror(errno));
1432 		release_lock_file(*lockfd);
1433 		return (-1);
1434 	}
1435 	return (Z_OK);
1436 }
1437 
1438 static int
1439 start_zoneadmd(const char *zone_name)
1440 {
1441 	pid_t retval;
1442 	int pstatus = 0, error = -1, lockfd, doorfd;
1443 	struct door_info info;
1444 	char doorpath[MAXPATHLEN];
1445 
1446 	(void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1447 
1448 	if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1449 		return (-1);
1450 	/*
1451 	 * We must do the door check with the lock held.  Otherwise, we
1452 	 * might race against another zoneadm/zlogin process and wind
1453 	 * up with two processes trying to start zoneadmd at the same
1454 	 * time.  zoneadmd will detect this, and fail, but we prefer this
1455 	 * to be as seamless as is practical, from a user perspective.
1456 	 */
1457 	if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1458 		if (errno != ENOENT) {
1459 			zerror("failed to open %s: %s", doorpath,
1460 			    strerror(errno));
1461 			goto out;
1462 		}
1463 	} else {
1464 		/*
1465 		 * Seems to be working ok.
1466 		 */
1467 		if (door_info(doorfd, &info) == 0 &&
1468 		    ((info.di_attributes & DOOR_REVOKED) == 0)) {
1469 			error = 0;
1470 			goto out;
1471 		}
1472 	}
1473 
1474 	if ((child_pid = fork()) == -1) {
1475 		zperror(gettext("could not fork"));
1476 		goto out;
1477 	} else if (child_pid == 0) {
1478 		/* child process */
1479 		(void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1480 		    zone_name, NULL);
1481 		zperror(gettext("could not exec zoneadmd"));
1482 		_exit(1);
1483 	}
1484 
1485 	/* parent process */
1486 	do {
1487 		retval = waitpid(child_pid, &pstatus, 0);
1488 	} while (retval != child_pid);
1489 	if (WIFSIGNALED(pstatus) ||
1490 	    (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1491 		zerror(gettext("could not start %s"), "zoneadmd");
1492 		goto out;
1493 	}
1494 	error = 0;
1495 out:
1496 	release_lock_file(lockfd);
1497 	(void) close(doorfd);
1498 	return (error);
1499 }
1500 
1501 static int
1502 init_template(void)
1503 {
1504 	int fd;
1505 	int err = 0;
1506 
1507 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1508 	if (fd == -1)
1509 		return (-1);
1510 
1511 	/*
1512 	 * zlogin doesn't do anything with the contract.
1513 	 * Deliver no events, don't inherit, and allow it to be orphaned.
1514 	 */
1515 	err |= ct_tmpl_set_critical(fd, 0);
1516 	err |= ct_tmpl_set_informative(fd, 0);
1517 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1518 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1519 	if (err || ct_tmpl_activate(fd)) {
1520 		(void) close(fd);
1521 		return (-1);
1522 	}
1523 
1524 	return (fd);
1525 }
1526 
1527 static int
1528 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1529     char **new_args, char **new_env)
1530 {
1531 	pid_t retval;
1532 	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1533 	int child_status;
1534 	int tmpl_fd;
1535 	sigset_t block_cld;
1536 
1537 	if ((tmpl_fd = init_template()) == -1) {
1538 		reset_tty();
1539 		zperror(gettext("could not create contract"));
1540 		return (1);
1541 	}
1542 
1543 	if (pipe(stdin_pipe) != 0) {
1544 		zperror(gettext("could not create STDIN pipe"));
1545 		return (1);
1546 	}
1547 	/*
1548 	 * When the user types ^D, we get a zero length message on STDIN.
1549 	 * We need to echo that down the pipe to send it to the other side;
1550 	 * but by default, pipes don't propagate zero-length messages.  We
1551 	 * toggle that behavior off using I_SWROPT.  See streamio(7i).
1552 	 */
1553 	if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1554 		zperror(gettext("could not configure STDIN pipe"));
1555 		return (1);
1556 
1557 	}
1558 	if (pipe(stdout_pipe) != 0) {
1559 		zperror(gettext("could not create STDOUT pipe"));
1560 		return (1);
1561 	}
1562 	if (pipe(stderr_pipe) != 0) {
1563 		zperror(gettext("could not create STDERR pipe"));
1564 		return (1);
1565 	}
1566 
1567 	if (pipe(dead_child_pipe) != 0) {
1568 		zperror(gettext("could not create signalling pipe"));
1569 		return (1);
1570 	}
1571 	close_on_sig = dead_child_pipe[0];
1572 
1573 	/*
1574 	 * If any of the pipe FD's winds up being less than STDERR, then we
1575 	 * have a mess on our hands-- and we are lacking some of the I/O
1576 	 * streams we would expect anyway.  So we bail.
1577 	 */
1578 	if (stdin_pipe[0] <= STDERR_FILENO ||
1579 	    stdin_pipe[1] <= STDERR_FILENO ||
1580 	    stdout_pipe[0] <= STDERR_FILENO ||
1581 	    stdout_pipe[1] <= STDERR_FILENO ||
1582 	    stderr_pipe[0] <= STDERR_FILENO ||
1583 	    stderr_pipe[1] <= STDERR_FILENO ||
1584 	    dead_child_pipe[0] <= STDERR_FILENO ||
1585 	    dead_child_pipe[1] <= STDERR_FILENO) {
1586 		zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1587 		return (1);
1588 	}
1589 
1590 	if (prefork_dropprivs() != 0) {
1591 		zperror(gettext("could not allocate privilege set"));
1592 		return (1);
1593 	}
1594 
1595 	(void) sigset(SIGCLD, sigcld);
1596 	(void) sigemptyset(&block_cld);
1597 	(void) sigaddset(&block_cld, SIGCLD);
1598 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1599 
1600 	if ((child_pid = fork()) == -1) {
1601 		(void) ct_tmpl_clear(tmpl_fd);
1602 		(void) close(tmpl_fd);
1603 		zperror(gettext("could not fork"));
1604 		return (1);
1605 	} else if (child_pid == 0) { /* child process */
1606 		(void) ct_tmpl_clear(tmpl_fd);
1607 
1608 		/*
1609 		 * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1610 		 */
1611 		(void) close(STDIN_FILENO);
1612 		(void) close(STDOUT_FILENO);
1613 		(void) close(STDERR_FILENO);
1614 		(void) dup2(stdin_pipe[1], STDIN_FILENO);
1615 		(void) dup2(stdout_pipe[1], STDOUT_FILENO);
1616 		(void) dup2(stderr_pipe[1], STDERR_FILENO);
1617 		(void) closefrom(STDERR_FILENO + 1);
1618 
1619 		(void) sigset(SIGCLD, SIG_DFL);
1620 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1621 		/*
1622 		 * In case any of stdin, stdout or stderr are streams,
1623 		 * anchor them to prevent malicious I_POPs.
1624 		 */
1625 		(void) ioctl(STDIN_FILENO, I_ANCHOR);
1626 		(void) ioctl(STDOUT_FILENO, I_ANCHOR);
1627 		(void) ioctl(STDERR_FILENO, I_ANCHOR);
1628 
1629 		if (zone_enter(zoneid) == -1) {
1630 			zerror(gettext("could not enter zone %s: %s"),
1631 			    zonename, strerror(errno));
1632 			_exit(1);
1633 		}
1634 
1635 		/*
1636 		 * For non-native zones, tell libc where it can find locale
1637 		 * specific getttext() messages.
1638 		 */
1639 		if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1640 			(void) bindtextdomain(TEXT_DOMAIN,
1641 			    "/.SUNWnative/usr/lib/locale");
1642 		else if (access("/native/usr/lib/locale", R_OK) == 0)
1643 			(void) bindtextdomain(TEXT_DOMAIN,
1644 			    "/native/usr/lib/locale");
1645 
1646 		if (!failsafe)
1647 			new_env = prep_env_noninteractive(user_cmd, new_env);
1648 
1649 		if (new_env == NULL) {
1650 			_exit(1);
1651 		}
1652 
1653 		/*
1654 		 * Move into a new process group; the zone_enter will have
1655 		 * placed us into zsched's session, and we want to be in
1656 		 * a unique process group.
1657 		 */
1658 		(void) setpgid(getpid(), getpid());
1659 
1660 		/*
1661 		 * The child needs to run as root to
1662 		 * execute the su program.
1663 		 */
1664 		if (setuid(0) == -1) {
1665 			zperror(gettext("insufficient privilege"));
1666 			return (1);
1667 		}
1668 
1669 		(void) execve(new_args[0], new_args, new_env);
1670 		zperror(gettext("exec failure"));
1671 		_exit(1);
1672 	}
1673 	/* parent */
1674 
1675 	/* close pipe sides written by child */
1676 	(void) close(stdout_pipe[1]);
1677 	(void) close(stderr_pipe[1]);
1678 
1679 	(void) sigset(SIGINT, sig_forward);
1680 
1681 	postfork_dropprivs();
1682 
1683 	(void) ct_tmpl_clear(tmpl_fd);
1684 	(void) close(tmpl_fd);
1685 
1686 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1687 	doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1688 	    dead_child_pipe[1], B_TRUE);
1689 	do {
1690 		retval = waitpid(child_pid, &child_status, 0);
1691 		if (retval == -1) {
1692 			child_status = 0;
1693 		}
1694 	} while (retval != child_pid && errno != ECHILD);
1695 
1696 	return (WEXITSTATUS(child_status));
1697 }
1698 
1699 static char *
1700 get_username()
1701 {
1702 	uid_t	uid;
1703 	struct passwd *nptr;
1704 
1705 	/*
1706 	 * Authorizations are checked to restrict access based on the
1707 	 * requested operation and zone name, It is assumed that the
1708 	 * program is running with all privileges, but that the real
1709 	 * user ID is that of the user or role on whose behalf we are
1710 	 * operating. So we start by getting the username that will be
1711 	 * used for subsequent authorization checks.
1712 	 */
1713 
1714 	uid = getuid();
1715 	if ((nptr = getpwuid(uid)) == NULL) {
1716 		zerror(gettext("could not get user name."));
1717 		_exit(1);
1718 	}
1719 	return (nptr->pw_name);
1720 }
1721 
1722 int
1723 main(int argc, char **argv)
1724 {
1725 	int arg, console = 0;
1726 	zoneid_t zoneid;
1727 	zone_state_t st;
1728 	char *login = "root";
1729 	int lflag = 0;
1730 	char *zonename = NULL;
1731 	char **proc_args = NULL;
1732 	char **new_args, **new_env;
1733 	sigset_t block_cld;
1734 	char devroot[MAXPATHLEN];
1735 	char *slavename, slaveshortname[MAXPATHLEN];
1736 	priv_set_t *privset;
1737 	int tmpl_fd;
1738 	char zonebrand[MAXNAMELEN];
1739 	char default_brand[MAXNAMELEN];
1740 	struct stat sb;
1741 	char kernzone[ZONENAME_MAX];
1742 	brand_handle_t bh;
1743 	char user_cmd[MAXPATHLEN];
1744 	char authname[MAXAUTHS];
1745 
1746 	(void) setlocale(LC_ALL, "");
1747 	(void) textdomain(TEXT_DOMAIN);
1748 
1749 	(void) getpname(argv[0]);
1750 	username = get_username();
1751 
1752 	while ((arg = getopt(argc, argv, "ECR:Se:l:")) != EOF) {
1753 		switch (arg) {
1754 		case 'C':
1755 			console = 1;
1756 			break;
1757 		case 'E':
1758 			nocmdchar = 1;
1759 			break;
1760 		case 'R':	/* undocumented */
1761 			if (*optarg != '/') {
1762 				zerror(gettext("root path must be absolute."));
1763 				exit(2);
1764 			}
1765 			if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1766 				zerror(
1767 				    gettext("root path must be a directory."));
1768 				exit(2);
1769 			}
1770 			zonecfg_set_root(optarg);
1771 			break;
1772 		case 'S':
1773 			failsafe = 1;
1774 			break;
1775 		case 'e':
1776 			set_cmdchar(optarg);
1777 			break;
1778 		case 'l':
1779 			login = optarg;
1780 			lflag = 1;
1781 			break;
1782 		default:
1783 			usage();
1784 		}
1785 	}
1786 
1787 	if (console != 0 && lflag != 0) {
1788 		zerror(gettext("-l may not be specified for console login"));
1789 		usage();
1790 	}
1791 
1792 	if (console != 0 && failsafe != 0) {
1793 		zerror(gettext("-S may not be specified for console login"));
1794 		usage();
1795 	}
1796 
1797 	if (console != 0 && zonecfg_in_alt_root()) {
1798 		zerror(gettext("-R may not be specified for console login"));
1799 		exit(2);
1800 	}
1801 
1802 	if (failsafe != 0 && lflag != 0) {
1803 		zerror(gettext("-l may not be specified for failsafe login"));
1804 		usage();
1805 	}
1806 
1807 	if (optind == (argc - 1)) {
1808 		/*
1809 		 * zone name, no process name; this should be an interactive
1810 		 * as long as STDIN is really a tty.
1811 		 */
1812 		if (isatty(STDIN_FILENO))
1813 			interactive = 1;
1814 		zonename = argv[optind];
1815 	} else if (optind < (argc - 1)) {
1816 		if (console) {
1817 			zerror(gettext("Commands may not be specified for "
1818 			    "console login."));
1819 			usage();
1820 		}
1821 		/* zone name and process name, and possibly some args */
1822 		zonename = argv[optind];
1823 		proc_args = &argv[optind + 1];
1824 		interactive = 0;
1825 	} else {
1826 		usage();
1827 	}
1828 
1829 	if (getzoneid() != GLOBAL_ZONEID) {
1830 		zerror(gettext("'%s' may only be used from the global zone"),
1831 		    pname);
1832 		return (1);
1833 	}
1834 
1835 	if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1836 		zerror(gettext("'%s' not applicable to the global zone"),
1837 		    pname);
1838 		return (1);
1839 	}
1840 
1841 	if (zone_get_state(zonename, &st) != Z_OK) {
1842 		zerror(gettext("zone '%s' unknown"), zonename);
1843 		return (1);
1844 	}
1845 
1846 	if (st < ZONE_STATE_INSTALLED) {
1847 		zerror(gettext("cannot login to a zone which is '%s'"),
1848 		    zone_state_str(st));
1849 		return (1);
1850 	}
1851 
1852 	/*
1853 	 * In both console and non-console cases, we require all privs.
1854 	 * In the console case, because we may need to startup zoneadmd.
1855 	 * In the non-console case in order to do zone_enter(2), zonept()
1856 	 * and other tasks.
1857 	 */
1858 
1859 	if ((privset = priv_allocset()) == NULL) {
1860 		zperror(gettext("priv_allocset failed"));
1861 		return (1);
1862 	}
1863 
1864 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1865 		zperror(gettext("getppriv failed"));
1866 		priv_freeset(privset);
1867 		return (1);
1868 	}
1869 
1870 	if (priv_isfullset(privset) == B_FALSE) {
1871 		zerror(gettext("You lack sufficient privilege to run "
1872 		    "this command (all privs required)"));
1873 		priv_freeset(privset);
1874 		return (1);
1875 	}
1876 	priv_freeset(privset);
1877 
1878 	/*
1879 	 * Check if user is authorized for requested usage of the zone
1880 	 */
1881 
1882 	(void) snprintf(authname, MAXAUTHS, "%s%s%s",
1883 	    ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1884 	if (chkauthattr(authname, username) == 0) {
1885 		if (console) {
1886 			zerror(gettext("%s is not authorized for console "
1887 			    "access to  %s zone."),
1888 			    username, zonename);
1889 			return (1);
1890 		} else {
1891 			(void) snprintf(authname, MAXAUTHS, "%s%s%s",
1892 			    ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1893 			if (failsafe || !interactive) {
1894 				zerror(gettext("%s is not authorized for  "
1895 				    "failsafe or non-interactive login "
1896 				    "to  %s zone."), username, zonename);
1897 				return (1);
1898 			} else if (chkauthattr(authname, username) == 0) {
1899 				zerror(gettext("%s is not authorized "
1900 				    " to login to %s zone."),
1901 				    username, zonename);
1902 				return (1);
1903 			}
1904 		}
1905 	} else {
1906 		forced_login = B_TRUE;
1907 	}
1908 
1909 	/*
1910 	 * The console is a separate case from the rest of the code; handle
1911 	 * it first.
1912 	 */
1913 	if (console) {
1914 		/*
1915 		 * Ensure that zoneadmd for this zone is running.
1916 		 */
1917 		if (start_zoneadmd(zonename) == -1)
1918 			return (1);
1919 
1920 		/*
1921 		 * Make contact with zoneadmd.
1922 		 */
1923 		if (get_console_master(zonename) == -1)
1924 			return (1);
1925 
1926 		(void) printf(gettext("[Connected to zone '%s' console]\n"),
1927 		    zonename);
1928 
1929 		if (set_tty_rawmode(STDIN_FILENO) == -1) {
1930 			reset_tty();
1931 			zperror(gettext("failed to set stdin pty to raw mode"));
1932 			return (1);
1933 		}
1934 
1935 		(void) sigset(SIGWINCH, sigwinch);
1936 		(void) sigwinch(0);
1937 
1938 		/*
1939 		 * Run the I/O loop until we get disconnected.
1940 		 */
1941 		doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1942 		reset_tty();
1943 		(void) printf(gettext("\n[Connection to zone '%s' console "
1944 		    "closed]\n"), zonename);
1945 
1946 		return (0);
1947 	}
1948 
1949 	if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1950 		zerror(gettext("login allowed only to running zones "
1951 		    "(%s is '%s')."), zonename, zone_state_str(st));
1952 		return (1);
1953 	}
1954 
1955 	(void) strlcpy(kernzone, zonename, sizeof (kernzone));
1956 	if (zonecfg_in_alt_root()) {
1957 		FILE *fp = zonecfg_open_scratch("", B_FALSE);
1958 
1959 		if (fp == NULL || zonecfg_find_scratch(fp, zonename,
1960 		    zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
1961 			zerror(gettext("cannot find scratch zone %s"),
1962 			    zonename);
1963 			if (fp != NULL)
1964 				zonecfg_close_scratch(fp);
1965 			return (1);
1966 		}
1967 		zonecfg_close_scratch(fp);
1968 	}
1969 
1970 	if ((zoneid = getzoneidbyname(kernzone)) == -1) {
1971 		zerror(gettext("failed to get zoneid for zone '%s'"),
1972 		    zonename);
1973 		return (1);
1974 	}
1975 
1976 	/*
1977 	 * We need the zone root path only if we are setting up a pty.
1978 	 */
1979 	if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
1980 		zerror(gettext("could not get dev path for zone %s"),
1981 		    zonename);
1982 		return (1);
1983 	}
1984 
1985 	if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
1986 		zerror(gettext("could not get brand for zone %s"), zonename);
1987 		return (1);
1988 	}
1989 	/*
1990 	 * In the alternate root environment, the only supported
1991 	 * operations are mount and unmount.  In this case, just treat
1992 	 * the zone as native if it is cluster.  Cluster zones can be
1993 	 * native for the purpose of LU or upgrade, and the cluster
1994 	 * brand may not exist in the miniroot (such as in net install
1995 	 * upgrade).
1996 	 */
1997 	if (zonecfg_default_brand(default_brand,
1998 	    sizeof (default_brand)) != Z_OK) {
1999 		zerror(gettext("unable to determine default brand"));
2000 		return (1);
2001 	}
2002 	if (zonecfg_in_alt_root() &&
2003 	    strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2004 		(void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2005 	}
2006 
2007 	if ((bh = brand_open(zonebrand)) == NULL) {
2008 		zerror(gettext("could not open brand for zone %s"), zonename);
2009 		return (1);
2010 	}
2011 
2012 	if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2013 		zperror(gettext("could not assemble new arguments"));
2014 		brand_close(bh);
2015 		return (1);
2016 	}
2017 	/*
2018 	 * Get the brand specific user_cmd.  This command is used to get
2019 	 * a passwd(4) entry for login.
2020 	 */
2021 	if (!interactive && !failsafe) {
2022 		if (zone_get_user_cmd(bh, login, user_cmd,
2023 		    sizeof (user_cmd)) == NULL) {
2024 			zerror(gettext("could not get user_cmd for zone %s"),
2025 			    zonename);
2026 			brand_close(bh);
2027 			return (1);
2028 		}
2029 	}
2030 	brand_close(bh);
2031 
2032 	if ((new_env = prep_env()) == NULL) {
2033 		zperror(gettext("could not assemble new environment"));
2034 		return (1);
2035 	}
2036 
2037 	if (!interactive)
2038 		return (noninteractive_login(zonename, user_cmd, zoneid,
2039 		    new_args, new_env));
2040 
2041 	if (zonecfg_in_alt_root()) {
2042 		zerror(gettext("cannot use interactive login with scratch "
2043 		    "zone"));
2044 		return (1);
2045 	}
2046 
2047 	/*
2048 	 * Things are more complex in interactive mode; we get the
2049 	 * master side of the pty, then place the user's terminal into
2050 	 * raw mode.
2051 	 */
2052 	if (get_master_pty() == -1) {
2053 		zerror(gettext("could not setup master pty device"));
2054 		return (1);
2055 	}
2056 
2057 	/*
2058 	 * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2059 	 */
2060 	if ((slavename = ptsname(masterfd)) == NULL) {
2061 		zperror(gettext("failed to get name for pseudo-tty"));
2062 		return (1);
2063 	}
2064 	if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2065 		(void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2066 		    sizeof (slaveshortname));
2067 	else
2068 		(void) strlcpy(slaveshortname, slavename,
2069 		    sizeof (slaveshortname));
2070 
2071 	(void) printf(gettext("[Connected to zone '%s' %s]\n"), zonename,
2072 	    slaveshortname);
2073 
2074 	if (set_tty_rawmode(STDIN_FILENO) == -1) {
2075 		reset_tty();
2076 		zperror(gettext("failed to set stdin pty to raw mode"));
2077 		return (1);
2078 	}
2079 
2080 	if (prefork_dropprivs() != 0) {
2081 		reset_tty();
2082 		zperror(gettext("could not allocate privilege set"));
2083 		return (1);
2084 	}
2085 
2086 	/*
2087 	 * We must mask SIGCLD until after we have coped with the fork
2088 	 * sufficiently to deal with it; otherwise we can race and receive the
2089 	 * signal before child_pid has been initialized (yes, this really
2090 	 * happens).
2091 	 */
2092 	(void) sigset(SIGCLD, sigcld);
2093 	(void) sigemptyset(&block_cld);
2094 	(void) sigaddset(&block_cld, SIGCLD);
2095 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2096 
2097 	/*
2098 	 * We activate the contract template at the last minute to
2099 	 * avoid intermediate functions that could be using fork(2)
2100 	 * internally.
2101 	 */
2102 	if ((tmpl_fd = init_template()) == -1) {
2103 		reset_tty();
2104 		zperror(gettext("could not create contract"));
2105 		return (1);
2106 	}
2107 
2108 	if ((child_pid = fork()) == -1) {
2109 		(void) ct_tmpl_clear(tmpl_fd);
2110 		reset_tty();
2111 		zperror(gettext("could not fork"));
2112 		return (1);
2113 	} else if (child_pid == 0) { /* child process */
2114 		int slavefd, newslave;
2115 
2116 		(void) ct_tmpl_clear(tmpl_fd);
2117 		(void) close(tmpl_fd);
2118 
2119 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2120 
2121 		if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2122 			return (1);
2123 
2124 		/*
2125 		 * Close all fds except for the slave pty.
2126 		 */
2127 		(void) fdwalk(close_func, &slavefd);
2128 
2129 		/*
2130 		 * Temporarily dup slavefd to stderr; that way if we have
2131 		 * to print out that zone_enter failed, the output will
2132 		 * have somewhere to go.
2133 		 */
2134 		if (slavefd != STDERR_FILENO)
2135 			(void) dup2(slavefd, STDERR_FILENO);
2136 
2137 		if (zone_enter(zoneid) == -1) {
2138 			zerror(gettext("could not enter zone %s: %s"),
2139 			    zonename, strerror(errno));
2140 			return (1);
2141 		}
2142 
2143 		if (slavefd != STDERR_FILENO)
2144 			(void) close(STDERR_FILENO);
2145 
2146 		/*
2147 		 * We take pains to get this process into a new process
2148 		 * group, and subsequently a new session.  In this way,
2149 		 * we'll have a session which doesn't yet have a controlling
2150 		 * terminal.  When we open the slave, it will become the
2151 		 * controlling terminal; no PIDs concerning pgrps or sids
2152 		 * will leak inappropriately into the zone.
2153 		 */
2154 		(void) setpgrp();
2155 
2156 		/*
2157 		 * We need the slave pty to be referenced from the zone's
2158 		 * /dev in order to ensure that the devt's, etc are all
2159 		 * correct.  Otherwise we break ttyname and the like.
2160 		 */
2161 		if ((newslave = open(slavename, O_RDWR)) == -1) {
2162 			(void) close(slavefd);
2163 			return (1);
2164 		}
2165 		(void) close(slavefd);
2166 		slavefd = newslave;
2167 
2168 		/*
2169 		 * dup the slave to the various FDs, so that when the
2170 		 * spawned process does a write/read it maps to the slave
2171 		 * pty.
2172 		 */
2173 		(void) dup2(slavefd, STDIN_FILENO);
2174 		(void) dup2(slavefd, STDOUT_FILENO);
2175 		(void) dup2(slavefd, STDERR_FILENO);
2176 		if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2177 		    slavefd != STDERR_FILENO) {
2178 			(void) close(slavefd);
2179 		}
2180 
2181 		/*
2182 		 * In failsafe mode, we don't use login(1), so don't try
2183 		 * setting up a utmpx entry.
2184 		 */
2185 		if (!failsafe)
2186 			if (setup_utmpx(slaveshortname) == -1)
2187 				return (1);
2188 
2189 		/*
2190 		 * The child needs to run as root to
2191 		 * execute the brand's login program.
2192 		 */
2193 		if (setuid(0) == -1) {
2194 			zperror(gettext("insufficient privilege"));
2195 			return (1);
2196 		}
2197 
2198 		(void) execve(new_args[0], new_args, new_env);
2199 		zperror(gettext("exec failure"));
2200 		return (1);
2201 	}
2202 
2203 	(void) ct_tmpl_clear(tmpl_fd);
2204 	(void) close(tmpl_fd);
2205 
2206 	/*
2207 	 * The rest is only for the parent process.
2208 	 */
2209 	(void) sigset(SIGWINCH, sigwinch);
2210 
2211 	postfork_dropprivs();
2212 
2213 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2214 	doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2215 
2216 	reset_tty();
2217 	(void) fprintf(stderr,
2218 	    gettext("\n[Connection to zone '%s' %s closed]\n"), zonename,
2219 	    slaveshortname);
2220 
2221 	if (pollerr != 0) {
2222 		(void) fprintf(stderr, gettext("Error: connection closed due "
2223 		    "to unexpected pollevents=0x%x.\n"), pollerr);
2224 		return (1);
2225 	}
2226 
2227 	return (0);
2228 }
2229