xref: /illumos-gate/usr/src/cmd/zlogin/zlogin.c (revision 4e5ef1cee66fbfbbd2b3e56b81e2bb5700f4a59e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 DEY Storage Systems, Inc.
24  * Copyright (c) 2014 Gary Mills
25  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
26  * Copyright 2019 Joyent, Inc.
27  * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
28  */
29 
30 /*
31  * zlogin provides three types of login which allow users in the global
32  * zone to access non-global zones.
33  *
34  * - "interactive login" is similar to rlogin(1); for example, the user could
35  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
36  *   granted a new pty (which is then shoved into the zone), and an I/O
37  *   loop between parent and child processes takes care of the interactive
38  *   session.  In this mode, login(1) (and its -c option, which means
39  *   "already authenticated") is employed to take care of the initialization
40  *   of the user's session.
41  *
42  * - "non-interactive login" is similar to su(1M); the user could issue
43  *   'zlogin my-zone ls -l' and the command would be run as specified.
44  *   In this mode, zlogin sets up pipes as the communication channel, and
45  *   'su' is used to do the login setup work.
46  *
47  * - "console login" is the equivalent to accessing the tip line for a
48  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
49  *   In this mode, zlogin contacts the zoneadmd process via unix domain
50  *   socket.  If zoneadmd is not running, it starts it.  This allows the
51  *   console to be available anytime the zone is installed, regardless of
52  *   whether it is running.
53  */
54 
55 #include <sys/socket.h>
56 #include <sys/termios.h>
57 #include <sys/utsname.h>
58 #include <sys/stat.h>
59 #include <sys/types.h>
60 #include <sys/contract/process.h>
61 #include <sys/ctfs.h>
62 #include <sys/brand.h>
63 #include <sys/wait.h>
64 #include <alloca.h>
65 #include <assert.h>
66 #include <ctype.h>
67 #include <paths.h>
68 #include <door.h>
69 #include <errno.h>
70 #include <nss_dbdefs.h>
71 #include <poll.h>
72 #include <priv.h>
73 #include <pwd.h>
74 #include <unistd.h>
75 #include <utmpx.h>
76 #include <sac.h>
77 #include <signal.h>
78 #include <stdarg.h>
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <string.h>
82 #include <strings.h>
83 #include <stropts.h>
84 #include <wait.h>
85 #include <zone.h>
86 #include <fcntl.h>
87 #include <libdevinfo.h>
88 #include <libintl.h>
89 #include <locale.h>
90 #include <libzonecfg.h>
91 #include <libcontract.h>
92 #include <libbrand.h>
93 #include <auth_list.h>
94 #include <auth_attr.h>
95 #include <secdb.h>
96 
97 static int masterfd;
98 static struct termios save_termios;
99 static struct termios effective_termios;
100 static int save_fd;
101 static struct winsize winsize;
102 static volatile int dead;
103 static volatile pid_t child_pid = -1;
104 static int interactive = 0;
105 static priv_set_t *dropprivs;
106 
107 static int nocmdchar = 0;
108 static int failsafe = 0;
109 static int disconnect = 0;
110 static char cmdchar = '~';
111 static int quiet = 0;
112 
113 static int pollerr = 0;
114 
115 static const char *pname;
116 static char *username;
117 
118 extern int __xpg4;	/* 0 if not an xpg4/6-compiled program */
119 
120 /*
121  * When forced_login is true, the user is not prompted
122  * for an authentication password in the target zone.
123  */
124 static boolean_t forced_login = B_FALSE;
125 
126 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
127 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
128 #endif
129 
130 #define	SUPATH	"/usr/bin/su"
131 #define	FAILSAFESHELL	"/sbin/sh"
132 #define	DEFAULTSHELL	"/sbin/sh"
133 #define	DEF_PATH	"/usr/sbin:/usr/bin"
134 
135 #define	CLUSTER_BRAND_NAME	"cluster"
136 
137 /*
138  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
139  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
140  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
141  * also chosen in conjunction with the HI_WATER setting to make sure we
142  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
143  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
144  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
145  * is less than HI_WATER data already in the pipe.
146  */
147 #define	ZLOGIN_BUFSIZ	8192
148 #define	ZLOGIN_RDBUFSIZ	1024
149 #define	HI_WATER	8192
150 
151 /*
152  * See canonify() below.  CANONIFY_LEN is the maximum length that a
153  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
154  */
155 #define	CANONIFY_LEN 5
156 
157 static void
158 usage(void)
159 {
160 	(void) fprintf(stderr, gettext("usage: %s [ -dnQCES ] [ -e cmdchar ] "
161 	    "[-l user] zonename [command [args ...] ]\n"), pname);
162 	exit(2);
163 }
164 
165 static const char *
166 getpname(const char *arg0)
167 {
168 	const char *p = strrchr(arg0, '/');
169 
170 	if (p == NULL)
171 		p = arg0;
172 	else
173 		p++;
174 
175 	pname = p;
176 	return (p);
177 }
178 
179 static void
180 zerror(const char *fmt, ...)
181 {
182 	va_list alist;
183 
184 	(void) fprintf(stderr, "%s: ", pname);
185 	va_start(alist, fmt);
186 	(void) vfprintf(stderr, fmt, alist);
187 	va_end(alist);
188 	(void) fprintf(stderr, "\n");
189 }
190 
191 static void
192 zperror(const char *str)
193 {
194 	const char *estr;
195 
196 	if ((estr = strerror(errno)) != NULL)
197 		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
198 	else
199 		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
200 }
201 
202 /*
203  * The first part of our privilege dropping scheme needs to be called before
204  * fork(), since we must have it for security; we don't want to be surprised
205  * later that we couldn't allocate the privset.
206  */
207 static int
208 prefork_dropprivs()
209 {
210 	if ((dropprivs = priv_allocset()) == NULL)
211 		return (1);
212 
213 	priv_basicset(dropprivs);
214 	(void) priv_delset(dropprivs, PRIV_PROC_INFO);
215 	(void) priv_delset(dropprivs, PRIV_PROC_FORK);
216 	(void) priv_delset(dropprivs, PRIV_PROC_EXEC);
217 	(void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
218 
219 	/*
220 	 * We need to keep the basic privilege PROC_SESSION and all unknown
221 	 * basic privileges as well as the privileges PROC_ZONE and
222 	 * PROC_OWNER in order to query session information and
223 	 * send signals.
224 	 */
225 	if (interactive == 0) {
226 		(void) priv_addset(dropprivs, PRIV_PROC_ZONE);
227 		(void) priv_addset(dropprivs, PRIV_PROC_OWNER);
228 	} else {
229 		(void) priv_delset(dropprivs, PRIV_PROC_SESSION);
230 	}
231 
232 	return (0);
233 }
234 
235 /*
236  * The second part of the privilege drop.  We are paranoid about being attacked
237  * by the zone, so we drop all privileges.  This should prevent a compromise
238  * which gets us to fork(), exec(), symlink(), etc.
239  */
240 static void
241 postfork_dropprivs()
242 {
243 	if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
244 		zperror(gettext("Warning: could not set permitted privileges"));
245 	}
246 	if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
247 		zperror(gettext("Warning: could not set limit privileges"));
248 	}
249 	if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
250 		zperror(gettext("Warning: could not set inheritable "
251 		    "privileges"));
252 	}
253 }
254 
255 /*
256  * Create the unix domain socket and call the zoneadmd server; handshake
257  * with it to determine whether it will allow us to connect.
258  */
259 static int
260 get_console_master(const char *zname)
261 {
262 	int sockfd = -1;
263 	struct sockaddr_un servaddr;
264 	char clientid[MAXPATHLEN];
265 	char handshake[MAXPATHLEN], c;
266 	int msglen;
267 	int i = 0, err = 0;
268 
269 	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
270 		zperror(gettext("could not create socket"));
271 		return (-1);
272 	}
273 
274 	bzero(&servaddr, sizeof (servaddr));
275 	servaddr.sun_family = AF_UNIX;
276 	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
277 	    "%s/%s.console_sock", ZONES_TMPDIR, zname);
278 
279 	if (connect(sockfd, (struct sockaddr *)&servaddr,
280 	    sizeof (servaddr)) == -1) {
281 		zperror(gettext("Could not connect to zone console"));
282 		goto bad;
283 	}
284 	masterfd = sockfd;
285 
286 	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s %d\n",
287 	    getpid(), setlocale(LC_MESSAGES, NULL), disconnect);
288 
289 	if (msglen >= sizeof (clientid) || msglen < 0) {
290 		zerror("protocol error");
291 		goto bad;
292 	}
293 
294 	if (write(masterfd, clientid, msglen) != msglen) {
295 		zerror("protocol error");
296 		goto bad;
297 	}
298 
299 	bzero(handshake, sizeof (handshake));
300 
301 	/*
302 	 * Take care not to accumulate more than our fill, and leave room for
303 	 * the NUL at the end.
304 	 */
305 	while ((err = read(masterfd, &c, 1)) == 1) {
306 		if (i >= (sizeof (handshake) - 1))
307 			break;
308 		if (c == '\n')
309 			break;
310 		handshake[i] = c;
311 		i++;
312 	}
313 
314 	/*
315 	 * If something went wrong during the handshake we bail; perhaps
316 	 * the server died off.
317 	 */
318 	if (err == -1) {
319 		zperror(gettext("Could not connect to zone console"));
320 		goto bad;
321 	}
322 
323 	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
324 		return (0);
325 
326 	zerror(gettext("Console is already in use by process ID %s."),
327 	    handshake);
328 bad:
329 	(void) close(sockfd);
330 	masterfd = -1;
331 	return (-1);
332 }
333 
334 
335 /*
336  * Routines to handle pty creation upon zone entry and to shuttle I/O back
337  * and forth between the two terminals.  We also compute and store the
338  * name of the slave terminal associated with the master side.
339  */
340 static int
341 get_master_pty()
342 {
343 	if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
344 		zperror(gettext("failed to obtain a pseudo-tty"));
345 		return (-1);
346 	}
347 	if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
348 		zperror(gettext("failed to get terminal settings from stdin"));
349 		return (-1);
350 	}
351 	(void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
352 
353 	return (0);
354 }
355 
356 /*
357  * This is a bit tricky; normally a pts device will belong to the zone it
358  * is granted to.  But in the case of "entering" a zone, we need to establish
359  * the pty before entering the zone so that we can vector I/O to and from it
360  * from the global zone.
361  *
362  * We use the zonept() call to let the ptm driver know what we are up to;
363  * the only other hairy bit is the setting of zoneslavename (which happens
364  * above, in get_master_pty()).
365  */
366 static int
367 init_slave_pty(zoneid_t zoneid, char *devroot)
368 {
369 	int slavefd = -1;
370 	char *slavename, zoneslavename[MAXPATHLEN];
371 
372 	/*
373 	 * Set slave permissions, zone the pts, then unlock it.
374 	 */
375 	if (grantpt(masterfd) != 0) {
376 		zperror(gettext("grantpt failed"));
377 		return (-1);
378 	}
379 
380 	if (unlockpt(masterfd) != 0) {
381 		zperror(gettext("unlockpt failed"));
382 		return (-1);
383 	}
384 
385 	/*
386 	 * We must open the slave side before zoning this pty; otherwise
387 	 * the kernel would refuse us the open-- zoning a pty makes it
388 	 * inaccessible to the global zone.  Note we are trying to open
389 	 * the device node via the $ZONEROOT/dev path for this pty.
390 	 *
391 	 * Later we'll close the slave out when once we've opened it again
392 	 * from within the target zone.  Blarg.
393 	 */
394 	if ((slavename = ptsname(masterfd)) == NULL) {
395 		zperror(gettext("failed to get name for pseudo-tty"));
396 		return (-1);
397 	}
398 
399 	(void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
400 	    devroot, slavename);
401 
402 	if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
403 		zerror(gettext("failed to open %s: %s"), zoneslavename,
404 		    strerror(errno));
405 		return (-1);
406 	}
407 
408 	/*
409 	 * Push hardware emulation (ptem), line discipline (ldterm),
410 	 * and V7/4BSD/Xenix compatibility (ttcompat) modules.
411 	 */
412 	if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
413 		zperror(gettext("failed to push ptem module"));
414 		if (!failsafe)
415 			goto bad;
416 	}
417 
418 	/*
419 	 * Anchor the stream to prevent malicious I_POPs; we prefer to do
420 	 * this prior to entering the zone so that we can detect any errors
421 	 * early, and so that we can set the anchor from the global zone.
422 	 */
423 	if (ioctl(slavefd, I_ANCHOR) == -1) {
424 		zperror(gettext("failed to set stream anchor"));
425 		if (!failsafe)
426 			goto bad;
427 	}
428 
429 	if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
430 		zperror(gettext("failed to push ldterm module"));
431 		if (!failsafe)
432 			goto bad;
433 	}
434 	if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
435 		zperror(gettext("failed to push ttcompat module"));
436 		if (!failsafe)
437 			goto bad;
438 	}
439 
440 	/*
441 	 * Propagate terminal settings from the external term to the new one.
442 	 */
443 	if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
444 		zperror(gettext("failed to set terminal settings"));
445 		if (!failsafe)
446 			goto bad;
447 	}
448 	(void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
449 
450 	if (zonept(masterfd, zoneid) != 0) {
451 		zperror(gettext("could not set zoneid of pty"));
452 		goto bad;
453 	}
454 
455 	return (slavefd);
456 
457 bad:
458 	(void) close(slavefd);
459 	return (-1);
460 }
461 
462 /*
463  * Place terminal into raw mode.
464  */
465 static int
466 set_tty_rawmode(int fd)
467 {
468 	struct termios term;
469 	if (tcgetattr(fd, &term) < 0) {
470 		zperror(gettext("failed to get user terminal settings"));
471 		return (-1);
472 	}
473 
474 	/* Stash for later, so we can revert back to previous mode */
475 	save_termios = term;
476 	save_fd = fd;
477 
478 	/* disable 8->7 bit strip, start/stop, enable any char to restart */
479 	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
480 	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
481 	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
482 	/* disable output post-processing */
483 	term.c_oflag &= ~OPOST;
484 	/* disable canonical mode, signal chars, echo & extended functions */
485 	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
486 
487 	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
488 	term.c_cc[VTIME] = 0;
489 
490 	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
491 		zperror(gettext("failed to set user terminal to raw mode"));
492 		return (-1);
493 	}
494 
495 	/*
496 	 * We need to know the value of VEOF so that we can properly process for
497 	 * client-side ~<EOF>.  But we have obliterated VEOF in term,
498 	 * because VMIN overloads the same array slot in non-canonical mode.
499 	 * Stupid @&^%!
500 	 *
501 	 * So here we construct the "effective" termios from the current
502 	 * terminal settings, and the corrected VEOF and VEOL settings.
503 	 */
504 	if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
505 		zperror(gettext("failed to get user terminal settings"));
506 		return (-1);
507 	}
508 	effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
509 	effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
510 
511 	return (0);
512 }
513 
514 /*
515  * Copy terminal window size from our terminal to the pts.
516  */
517 /*ARGSUSED*/
518 static void
519 sigwinch(int s)
520 {
521 	struct winsize ws;
522 
523 	if (ioctl(0, TIOCGWINSZ, &ws) == 0)
524 		(void) ioctl(masterfd, TIOCSWINSZ, &ws);
525 }
526 
527 static volatile int close_on_sig = -1;
528 
529 static void
530 /*ARGSUSED*/
531 sigcld(int s)
532 {
533 	int status;
534 	pid_t pid;
535 
536 	/*
537 	 * Peek at the exit status.  If this isn't the process we cared
538 	 * about, then just reap it.
539 	 */
540 	if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
541 		if (pid == child_pid &&
542 		    (WIFEXITED(status) || WIFSIGNALED(status))) {
543 			dead = 1;
544 			if (close_on_sig != -1) {
545 				(void) write(close_on_sig, "a", 1);
546 				(void) close(close_on_sig);
547 				close_on_sig = -1;
548 			}
549 		} else {
550 			(void) waitpid(pid, &status, WNOHANG);
551 		}
552 	}
553 }
554 
555 /*
556  * Some signals (currently, SIGINT) must be forwarded on to the process
557  * group of the child process.
558  */
559 static void
560 sig_forward(int s)
561 {
562 	if (child_pid != -1) {
563 		(void) sigsend(P_PGID, child_pid, s);
564 	}
565 }
566 
567 /*
568  * reset terminal settings for global environment
569  */
570 static void
571 reset_tty()
572 {
573 	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
574 }
575 
576 /*
577  * Convert character to printable representation, for display with locally
578  * echoed command characters (like when we need to display ~^D)
579  */
580 static void
581 canonify(char c, char *cc)
582 {
583 	if (isprint(c)) {
584 		cc[0] = c;
585 		cc[1] = '\0';
586 	} else if (c >= 0 && c <= 31) {	/* ^@ through ^_ */
587 		cc[0] = '^';
588 		cc[1] = c + '@';
589 		cc[2] = '\0';
590 	} else {
591 		cc[0] = '\\';
592 		cc[1] = ((c >> 6) & 7) + '0';
593 		cc[2] = ((c >> 3) & 7) + '0';
594 		cc[3] = (c & 7) + '0';
595 		cc[4] = '\0';
596 	}
597 }
598 
599 /*
600  * process_user_input watches the input stream for the escape sequence for
601  * 'quit' (by default, tilde-period).  Because we might be fed just one
602  * keystroke at a time, state associated with the user input (are we at the
603  * beginning of the line?  are we locally echoing the next character?) is
604  * maintained by beginning_of_line and local_echo across calls to the routine.
605  * If the write to outfd fails, we'll try to read from infd in an attempt
606  * to prevent deadlock between the two processes.
607  *
608  * This routine returns -1 when the 'quit' escape sequence has been issued,
609  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
610  */
611 static int
612 process_user_input(int outfd, int infd)
613 {
614 	static boolean_t beginning_of_line = B_TRUE;
615 	static boolean_t local_echo = B_FALSE;
616 	char ibuf[ZLOGIN_BUFSIZ];
617 	int nbytes;
618 	char *buf = ibuf;
619 
620 	nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
621 	if (nbytes == -1 && (errno != EINTR || dead))
622 		return (-1);
623 
624 	if (nbytes == -1)	/* The read was interrupted. */
625 		return (0);
626 
627 	/* 0 read means EOF, close the pipe to the child */
628 	if (nbytes == 0)
629 		return (1);
630 
631 	for (char c = *buf; nbytes > 0; c = *buf, --nbytes) {
632 		buf++;
633 		if (beginning_of_line && !nocmdchar) {
634 			beginning_of_line = B_FALSE;
635 			if (c == cmdchar) {
636 				local_echo = B_TRUE;
637 				continue;
638 			}
639 		} else if (local_echo) {
640 			local_echo = B_FALSE;
641 			if (c == '.' || c == effective_termios.c_cc[VEOF]) {
642 				char cc[CANONIFY_LEN];
643 
644 				canonify(c, cc);
645 				(void) write(STDOUT_FILENO, &cmdchar, 1);
646 				(void) write(STDOUT_FILENO, cc, strlen(cc));
647 				return (-1);
648 			}
649 		}
650 retry:
651 		if (write(outfd, &c, 1) <= 0) {
652 			/*
653 			 * Since the fd we are writing to is opened with
654 			 * O_NONBLOCK it is possible to get EAGAIN if the
655 			 * pipe is full.  One way this could happen is if we
656 			 * are writing a lot of data into the pipe in this loop
657 			 * and the application on the other end is echoing that
658 			 * data back out to its stdout.  The output pipe can
659 			 * fill up since we are stuck here in this loop and not
660 			 * draining the other pipe.  We can try to read some of
661 			 * the data to see if we can drain the pipe so that the
662 			 * application can continue to make progress.  The read
663 			 * is non-blocking so we won't hang here.  We also wait
664 			 * a bit before retrying since there could be other
665 			 * reasons why the pipe is full and we don't want to
666 			 * continuously retry.
667 			 */
668 			if (errno == EAGAIN) {
669 				struct timespec rqtp;
670 				int ln;
671 				char obuf[ZLOGIN_BUFSIZ];
672 
673 				if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
674 					(void) write(STDOUT_FILENO, obuf, ln);
675 
676 				/* sleep for 10 milliseconds */
677 				rqtp.tv_sec = 0;
678 				rqtp.tv_nsec = MSEC2NSEC(10);
679 				(void) nanosleep(&rqtp, NULL);
680 				if (!dead)
681 					goto retry;
682 			}
683 
684 			return (-1);
685 		}
686 		beginning_of_line = (c == '\r' || c == '\n' ||
687 		    c == effective_termios.c_cc[VKILL] ||
688 		    c == effective_termios.c_cc[VEOL] ||
689 		    c == effective_termios.c_cc[VSUSP] ||
690 		    c == effective_termios.c_cc[VINTR]);
691 	}
692 	return (0);
693 }
694 
695 /*
696  * This function prevents deadlock between zlogin and the application in the
697  * zone that it is talking to.  This can happen when we read from zlogin's
698  * stdin and write the data down the pipe to the application.  If the pipe
699  * is full, we'll block in the write.  Because zlogin could be blocked in
700  * the write, it would never read the application's stdout/stderr so the
701  * application can then block on those writes (when the pipe fills up).  If the
702  * the application gets blocked this way, it can never get around to reading
703  * its stdin so that zlogin can unblock from its write.  Once in this state,
704  * the two processes are deadlocked.
705  *
706  * To prevent this, we want to verify that we can write into the pipe before we
707  * read from our stdin.  If the pipe already is pretty full, we bypass the read
708  * for now.  We'll circle back here again after the poll() so that we can
709  * try again.  When this function is called, we already know there is data
710  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
711  * stdin is EOF, and 0 if everything is ok (even though we might not have
712  * read/written any data into the pipe on this iteration).
713  */
714 static int
715 process_raw_input(int stdin_fd, int appin_fd)
716 {
717 	int cc;
718 	struct stat64 sb;
719 	char ibuf[ZLOGIN_RDBUFSIZ];
720 
721 	/* Check how much data is already in the pipe */
722 	if (fstat64(appin_fd, &sb) == -1) {
723 		perror("stat failed");
724 		return (-1);
725 	}
726 
727 	if (dead)
728 		return (-1);
729 
730 	/*
731 	 * The pipe already has a lot of data in it,  don't write any more
732 	 * right now.
733 	 */
734 	if (sb.st_size >= HI_WATER)
735 		return (0);
736 
737 	cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
738 	if (cc == -1 && (errno != EINTR || dead))
739 		return (-1);
740 
741 	if (cc == -1)	/* The read was interrupted. */
742 		return (0);
743 
744 	/* 0 read means EOF, close the pipe to the child */
745 	if (cc == 0)
746 		return (1);
747 
748 	/*
749 	 * stdin_fd is stdin of the target; so, the thing we'll write the user
750 	 * data *to*.
751 	 */
752 	if (write(stdin_fd, ibuf, cc) == -1)
753 		return (-1);
754 
755 	return (0);
756 }
757 
758 /*
759  * Write the output from the application running in the zone.  We can get
760  * a signal during the write (usually it would be SIGCHLD when the application
761  * has exited) so we loop to make sure we have written all of the data we read.
762  */
763 static int
764 process_output(int in_fd, int out_fd)
765 {
766 	int wrote = 0;
767 	int cc;
768 	char ibuf[ZLOGIN_BUFSIZ];
769 
770 	cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
771 	if (cc == -1 && (errno != EINTR || dead))
772 		return (-1);
773 	if (cc == 0) {
774 		/*
775 		 * A return value of 0 when calling read() on a terminal
776 		 * indicates end-of-file pre-XPG4 and no data available
777 		 * for XPG4 and above.
778 		 */
779 		if (__xpg4 == 0)
780 			return (-1);
781 		return (0);
782 	}
783 	if (cc == -1)	/* The read was interrupted. */
784 		return (0);
785 
786 	do {
787 		int len;
788 
789 		len = write(out_fd, ibuf + wrote, cc - wrote);
790 		if (len == -1 && errno != EINTR)
791 			return (-1);
792 		if (len != -1)
793 			wrote += len;
794 	} while (wrote < cc);
795 
796 	return (0);
797 }
798 
799 /*
800  * This is the main I/O loop, and is shared across all zlogin modes.
801  * Parameters:
802  *	stdin_fd:  The fd representing 'stdin' for the slave side; input to
803  *		   the zone will be written here.
804  *
805  *	appin_fd:  The fd representing the other end of the 'stdin' pipe (when
806  *		   we're running non-interactive); used in process_raw_input
807  *		   to ensure we don't fill up the application's stdin pipe.
808  *
809  *	stdout_fd: The fd representing 'stdout' for the slave side; output
810  *		   from the zone will arrive here.
811  *
812  *	stderr_fd: The fd representing 'stderr' for the slave side; output
813  *		   from the zone will arrive here.
814  *
815  *	raw_mode:  If TRUE, then no processing (for example, for '~.') will
816  *		   be performed on the input coming from STDIN.
817  *
818  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
819  * mode supplies a stderr).
820  *
821  */
822 static void
823 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
824     boolean_t raw_mode)
825 {
826 	struct pollfd pollfds[4];
827 	char ibuf[ZLOGIN_BUFSIZ];
828 	int cc, ret;
829 
830 	/* read from stdout of zone and write to stdout of global zone */
831 	pollfds[0].fd = stdout_fd;
832 	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
833 
834 	/* read from stderr of zone and write to stderr of global zone */
835 	pollfds[1].fd = stderr_fd;
836 	pollfds[1].events = pollfds[0].events;
837 
838 	/* read from stdin of global zone and write to stdin of zone */
839 	pollfds[2].fd = STDIN_FILENO;
840 	pollfds[2].events = pollfds[0].events;
841 
842 	/* read from signalling pipe so we know when child dies */
843 	pollfds[3].fd = sig_fd;
844 	pollfds[3].events = pollfds[0].events;
845 
846 	for (;;) {
847 		pollfds[0].revents = pollfds[1].revents =
848 		    pollfds[2].revents = pollfds[3].revents = 0;
849 
850 		if (dead)
851 			break;
852 
853 		/*
854 		 * There is a race condition here where we can receive the
855 		 * child death signal, set the dead flag, but since we have
856 		 * passed the test above, we would go into poll and hang.
857 		 * To avoid this we use the sig_fd as an additional poll fd.
858 		 * The signal handler writes into the other end of this pipe
859 		 * when the child dies so that the poll will always see that
860 		 * input and proceed.  We just loop around at that point and
861 		 * then notice the dead flag.
862 		 */
863 
864 		ret = poll(pollfds,
865 		    sizeof (pollfds) / sizeof (struct pollfd), -1);
866 
867 		if (ret == -1 && errno != EINTR) {
868 			perror("poll failed");
869 			break;
870 		}
871 
872 		if (errno == EINTR && dead) {
873 			break;
874 		}
875 
876 		/* event from master side stdout */
877 		if (pollfds[0].revents) {
878 			if (pollfds[0].revents &
879 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
880 				if (process_output(stdout_fd, STDOUT_FILENO)
881 				    != 0)
882 					break;
883 			} else {
884 				pollerr = pollfds[0].revents;
885 				break;
886 			}
887 		}
888 
889 		/* event from master side stderr */
890 		if (pollfds[1].revents) {
891 			if (pollfds[1].revents &
892 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
893 				if (process_output(stderr_fd, STDERR_FILENO)
894 				    != 0)
895 					break;
896 			} else {
897 				pollerr = pollfds[1].revents;
898 				break;
899 			}
900 		}
901 
902 		/* event from user STDIN side */
903 		if (pollfds[2].revents) {
904 			if (pollfds[2].revents &
905 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
906 				/*
907 				 * stdin fd is stdin of the target; so,
908 				 * the thing we'll write the user data *to*.
909 				 *
910 				 * Also, unlike on the output side, we
911 				 * close the pipe on a zero-length message.
912 				 */
913 				int res;
914 
915 				if (raw_mode)
916 					res = process_raw_input(stdin_fd,
917 					    appin_fd);
918 				else
919 					res = process_user_input(stdin_fd,
920 					    stdout_fd);
921 
922 				if (res < 0)
923 					break;
924 				if (res > 0) {
925 					/* EOF (close) child's stdin_fd */
926 					pollfds[2].fd = -1;
927 					while ((res = close(stdin_fd)) != 0 &&
928 					    errno == EINTR)
929 						;
930 					if (res != 0)
931 						break;
932 				}
933 
934 			} else if (raw_mode && pollfds[2].revents & POLLHUP) {
935 				/*
936 				 * It's OK to get a POLLHUP on STDIN-- it
937 				 * always happens if you do:
938 				 *
939 				 * echo foo | zlogin <zone> <command>
940 				 *
941 				 * We reset fd to -1 in this case to clear
942 				 * the condition and close the pipe (EOF) to
943 				 * the other side in order to wrap things up.
944 				 */
945 				int res;
946 
947 				pollfds[2].fd = -1;
948 				while ((res = close(stdin_fd)) != 0 &&
949 				    errno == EINTR)
950 					;
951 				if (res != 0)
952 					break;
953 			} else {
954 				pollerr = pollfds[2].revents;
955 				break;
956 			}
957 		}
958 	}
959 
960 	/*
961 	 * We are in the midst of dying, but try to poll with a short
962 	 * timeout to see if we can catch the last bit of I/O from the
963 	 * children.
964 	 */
965 retry:
966 	pollfds[0].revents = pollfds[1].revents = 0;
967 	(void) poll(pollfds, 2, 100);
968 	if (pollfds[0].revents &
969 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
970 		if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
971 			(void) write(STDOUT_FILENO, ibuf, cc);
972 			goto retry;
973 		}
974 	}
975 	if (pollfds[1].revents &
976 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
977 		if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
978 			(void) write(STDERR_FILENO, ibuf, cc);
979 			goto retry;
980 		}
981 	}
982 }
983 
984 /*
985  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
986  */
987 static const char *
988 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
989     size_t len)
990 {
991 	bzero(user_cmd, sizeof (user_cmd));
992 	if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
993 		return (NULL);
994 
995 	return (user_cmd);
996 }
997 
998 /* From libc */
999 extern int str2passwd(const char *, int, void *, char *, int);
1000 
1001 /*
1002  * exec() the user_cmd brand hook, and convert the output string to a
1003  * struct passwd.  This is to be called after zone_enter().
1004  *
1005  */
1006 static struct passwd *
1007 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
1008     int pwbuflen)
1009 {
1010 	char pwline[NSS_BUFLEN_PASSWD];
1011 	char *cin = NULL;
1012 	FILE *fin;
1013 	int status;
1014 
1015 	assert(getzoneid() != GLOBAL_ZONEID);
1016 
1017 	if ((fin = popen(user_cmd, "r")) == NULL)
1018 		return (NULL);
1019 
1020 	while (cin == NULL && !feof(fin))
1021 		cin = fgets(pwline, sizeof (pwline), fin);
1022 
1023 	if (cin == NULL) {
1024 		(void) pclose(fin);
1025 		return (NULL);
1026 	}
1027 
1028 	status = pclose(fin);
1029 	if (!WIFEXITED(status))
1030 		return (NULL);
1031 	if (WEXITSTATUS(status) != 0)
1032 		return (NULL);
1033 
1034 	if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1035 		return (pwent);
1036 	else
1037 		return (NULL);
1038 }
1039 
1040 static char **
1041 zone_login_cmd(brand_handle_t bh, const char *login)
1042 {
1043 	static char result_buf[ARG_MAX];
1044 	char **new_argv, *ptr, *lasts;
1045 	int n, a;
1046 
1047 	/* Get the login command for the target zone. */
1048 	bzero(result_buf, sizeof (result_buf));
1049 
1050 	if (forced_login) {
1051 		if (brand_get_forcedlogin_cmd(bh, login,
1052 		    result_buf, sizeof (result_buf)) != 0)
1053 			return (NULL);
1054 	} else {
1055 		if (brand_get_login_cmd(bh, login,
1056 		    result_buf, sizeof (result_buf)) != 0)
1057 			return (NULL);
1058 	}
1059 
1060 	/*
1061 	 * We got back a string that we'd like to execute.  But since
1062 	 * we're not doing the execution via a shell we'll need to convert
1063 	 * the exec string to an array of strings.  We'll do that here
1064 	 * but we're going to be very simplistic about it and break stuff
1065 	 * up based on spaces.  We're not even going to support any kind
1066 	 * of quoting or escape characters.  It's truly amazing that
1067 	 * there is no library function in OpenSolaris to do this for us.
1068 	 */
1069 
1070 	/*
1071 	 * Be paranoid.  Since we're deliniating based on spaces make
1072 	 * sure there are no adjacent spaces.
1073 	 */
1074 	if (strstr(result_buf, "  ") != NULL)
1075 		return (NULL);
1076 
1077 	/* Remove any trailing whitespace.  */
1078 	n = strlen(result_buf);
1079 	if (result_buf[n - 1] == ' ')
1080 		result_buf[n - 1] = '\0';
1081 
1082 	/* Count how many elements there are in the exec string. */
1083 	ptr = result_buf;
1084 	for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1085 		;
1086 
1087 	/* Allocate the argv array that we're going to return. */
1088 	if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1089 		return (NULL);
1090 
1091 	/* Tokenize the exec string and return. */
1092 	a = 0;
1093 	new_argv[a++] = result_buf;
1094 	if (n > 2) {
1095 		(void) strtok_r(result_buf, " ", &lasts);
1096 		while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1097 			;
1098 	} else {
1099 		new_argv[a++] = NULL;
1100 	}
1101 	assert(n == a);
1102 	return (new_argv);
1103 }
1104 
1105 /*
1106  * Prepare argv array for exec'd process; if we're passing commands to the
1107  * new process, then use su(1M) to do the invocation.  Otherwise, use
1108  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1109  * login that we're coming from another zone, and to disregard its CONSOLE
1110  * checks).
1111  */
1112 static char **
1113 prep_args(brand_handle_t bh, const char *login, char **argv)
1114 {
1115 	int argc = 0, a = 0, i, n = -1;
1116 	char **new_argv;
1117 
1118 	if (argv != NULL) {
1119 		size_t subshell_len = 1;
1120 		char *subshell;
1121 
1122 		while (argv[argc] != NULL)
1123 			argc++;
1124 
1125 		for (i = 0; i < argc; i++) {
1126 			subshell_len += strlen(argv[i]) + 1;
1127 		}
1128 		if ((subshell = calloc(1, subshell_len)) == NULL)
1129 			return (NULL);
1130 
1131 		for (i = 0; i < argc; i++) {
1132 			(void) strcat(subshell, argv[i]);
1133 			(void) strcat(subshell, " ");
1134 		}
1135 
1136 		if (failsafe) {
1137 			n = 4;
1138 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1139 				return (NULL);
1140 
1141 			new_argv[a++] = FAILSAFESHELL;
1142 		} else {
1143 			n = 5;
1144 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1145 				return (NULL);
1146 
1147 			new_argv[a++] = SUPATH;
1148 			if (strcmp(login, "root") != 0) {
1149 				new_argv[a++] = "-";
1150 				n++;
1151 			}
1152 			new_argv[a++] = (char *)login;
1153 		}
1154 		new_argv[a++] = "-c";
1155 		new_argv[a++] = subshell;
1156 		new_argv[a++] = NULL;
1157 		assert(a == n);
1158 	} else {
1159 		if (failsafe) {
1160 			n = 2;
1161 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1162 				return (NULL);
1163 			new_argv[a++] = FAILSAFESHELL;
1164 			new_argv[a++] = NULL;
1165 			assert(n == a);
1166 		} else {
1167 			new_argv = zone_login_cmd(bh, login);
1168 		}
1169 	}
1170 
1171 	return (new_argv);
1172 }
1173 
1174 /*
1175  * Helper routine for prep_env below.
1176  */
1177 static char *
1178 add_env(char *name, char *value)
1179 {
1180 	size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1181 	char *str;
1182 
1183 	if ((str = malloc(sz)) == NULL)
1184 		return (NULL);
1185 
1186 	(void) snprintf(str, sz, "%s=%s", name, value);
1187 	return (str);
1188 }
1189 
1190 /*
1191  * Prepare envp array for exec'd process.
1192  */
1193 static char **
1194 prep_env()
1195 {
1196 	int e = 0, size = 1;
1197 	char **new_env, *estr;
1198 	char *term = getenv("TERM");
1199 
1200 	size++;	/* for $PATH */
1201 	if (term != NULL)
1202 		size++;
1203 
1204 	/*
1205 	 * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1206 	 * We also set $SHELL, since neither login nor su will be around to do
1207 	 * it.
1208 	 */
1209 	if (failsafe)
1210 		size += 2;
1211 
1212 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1213 		return (NULL);
1214 
1215 	if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1216 		return (NULL);
1217 	new_env[e++] = estr;
1218 
1219 	if (term != NULL) {
1220 		if ((estr = add_env("TERM", term)) == NULL)
1221 			return (NULL);
1222 		new_env[e++] = estr;
1223 	}
1224 
1225 	if (failsafe) {
1226 		if ((estr = add_env("HOME", "/")) == NULL)
1227 			return (NULL);
1228 		new_env[e++] = estr;
1229 
1230 		if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1231 			return (NULL);
1232 		new_env[e++] = estr;
1233 	}
1234 
1235 	new_env[e++] = NULL;
1236 
1237 	assert(e == size);
1238 
1239 	return (new_env);
1240 }
1241 
1242 /*
1243  * Finish the preparation of the envp array for exec'd non-interactive
1244  * zlogins.  This is called in the child process *after* we zone_enter(), since
1245  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1246  * etc.  We need only do this in the non-interactive, mode, since otherwise
1247  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1248  * additional ways in which the command could fail, and we'd prefer to avoid
1249  * that.
1250  */
1251 static char **
1252 prep_env_noninteractive(const char *user_cmd, char **env)
1253 {
1254 	size_t size;
1255 	char **new_env;
1256 	int e, i;
1257 	char *estr;
1258 	char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1259 	char pwbuf[NSS_BUFLEN_PASSWD + 1];
1260 	struct passwd pwent;
1261 	struct passwd *pw = NULL;
1262 
1263 	assert(env != NULL);
1264 	assert(failsafe == 0);
1265 
1266 	/*
1267 	 * Exec the "user_cmd" brand hook to get a pwent for the
1268 	 * login user.  If this fails, HOME will be set to "/", SHELL
1269 	 * will be set to $DEFAULTSHELL, and we will continue to exec
1270 	 * SUPATH <login> -c <cmd>.
1271 	 */
1272 	pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1273 
1274 	/*
1275 	 * Get existing envp size.
1276 	 */
1277 	for (size = 0; env[size] != NULL; size++)
1278 		;
1279 
1280 	e = size;
1281 
1282 	/*
1283 	 * Finish filling out the environment; we duplicate the environment
1284 	 * setup described in login(1), for lack of a better precedent.
1285 	 */
1286 	if (pw != NULL)
1287 		size += 3;	/* LOGNAME, HOME, MAIL */
1288 	else
1289 		size += 1;	/* HOME */
1290 
1291 	size++;	/* always fill in SHELL */
1292 	size++; /* terminating NULL */
1293 
1294 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1295 		goto malloc_fail;
1296 
1297 	/*
1298 	 * Copy existing elements of env into new_env.
1299 	 */
1300 	for (i = 0; env[i] != NULL; i++) {
1301 		if ((new_env[i] = strdup(env[i])) == NULL)
1302 			goto malloc_fail;
1303 	}
1304 	assert(e == i);
1305 
1306 	if (pw != NULL) {
1307 		if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1308 			goto malloc_fail;
1309 		new_env[e++] = estr;
1310 
1311 		if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1312 			goto malloc_fail;
1313 		new_env[e++] = estr;
1314 
1315 		if (chdir(pw->pw_dir) != 0)
1316 			zerror(gettext("Could not chdir to home directory "
1317 			    "%s: %s"), pw->pw_dir, strerror(errno));
1318 
1319 		(void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1320 		    pw->pw_name);
1321 		if ((estr = add_env("MAIL", varmail)) == NULL)
1322 			goto malloc_fail;
1323 		new_env[e++] = estr;
1324 	} else {
1325 		if ((estr = add_env("HOME", "/")) == NULL)
1326 			goto malloc_fail;
1327 		new_env[e++] = estr;
1328 	}
1329 
1330 	if (pw != NULL && strlen(pw->pw_shell) > 0) {
1331 		if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1332 			goto malloc_fail;
1333 		new_env[e++] = estr;
1334 	} else {
1335 		if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1336 			goto malloc_fail;
1337 		new_env[e++] = estr;
1338 	}
1339 
1340 	new_env[e++] = NULL;	/* add terminating NULL */
1341 
1342 	assert(e == size);
1343 	return (new_env);
1344 
1345 malloc_fail:
1346 	zperror(gettext("failed to allocate memory for process environment"));
1347 	return (NULL);
1348 }
1349 
1350 static int
1351 close_func(void *slavefd, int fd)
1352 {
1353 	if (fd != *(int *)slavefd)
1354 		(void) close(fd);
1355 	return (0);
1356 }
1357 
1358 static void
1359 set_cmdchar(char *cmdcharstr)
1360 {
1361 	char c;
1362 	long lc;
1363 
1364 	if ((c = *cmdcharstr) != '\\') {
1365 		cmdchar = c;
1366 		return;
1367 	}
1368 
1369 	c = cmdcharstr[1];
1370 	if (c == '\0' || c == '\\') {
1371 		cmdchar = '\\';
1372 		return;
1373 	}
1374 
1375 	if (c < '0' || c > '7') {
1376 		zerror(gettext("Unrecognized escape character option %s"),
1377 		    cmdcharstr);
1378 		usage();
1379 	}
1380 
1381 	lc = strtol(cmdcharstr + 1, NULL, 8);
1382 	if (lc < 0 || lc > 255) {
1383 		zerror(gettext("Octal escape character '%s' too large"),
1384 		    cmdcharstr);
1385 		usage();
1386 	}
1387 	cmdchar = (char)lc;
1388 }
1389 
1390 static int
1391 setup_utmpx(char *slavename)
1392 {
1393 	struct utmpx ut;
1394 
1395 	bzero(&ut, sizeof (ut));
1396 	(void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1397 	(void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1398 	ut.ut_pid = getpid();
1399 	ut.ut_id[0] = 'z';
1400 	ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1401 	ut.ut_type = LOGIN_PROCESS;
1402 	(void) time(&ut.ut_tv.tv_sec);
1403 
1404 	if (makeutx(&ut) == NULL) {
1405 		zerror(gettext("makeutx failed"));
1406 		return (-1);
1407 	}
1408 	return (0);
1409 }
1410 
1411 static void
1412 release_lock_file(int lockfd)
1413 {
1414 	(void) close(lockfd);
1415 }
1416 
1417 static int
1418 grab_lock_file(const char *zone_name, int *lockfd)
1419 {
1420 	char pathbuf[PATH_MAX];
1421 	struct flock flock;
1422 
1423 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1424 		zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1425 		    strerror(errno));
1426 		return (-1);
1427 	}
1428 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
1429 	(void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1430 	    ZONES_TMPDIR, zone_name);
1431 
1432 	if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1433 		zerror(gettext("could not open %s: %s"), pathbuf,
1434 		    strerror(errno));
1435 		return (-1);
1436 	}
1437 	/*
1438 	 * Lock the file to synchronize with other zoneadmds
1439 	 */
1440 	flock.l_type = F_WRLCK;
1441 	flock.l_whence = SEEK_SET;
1442 	flock.l_start = (off_t)0;
1443 	flock.l_len = (off_t)0;
1444 	if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1445 		zerror(gettext("unable to lock %s: %s"), pathbuf,
1446 		    strerror(errno));
1447 		release_lock_file(*lockfd);
1448 		return (-1);
1449 	}
1450 	return (Z_OK);
1451 }
1452 
1453 static int
1454 start_zoneadmd(const char *zone_name)
1455 {
1456 	pid_t retval;
1457 	int pstatus = 0, error = -1, lockfd, doorfd;
1458 	struct door_info info;
1459 	char doorpath[MAXPATHLEN];
1460 
1461 	(void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1462 
1463 	if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1464 		return (-1);
1465 	/*
1466 	 * We must do the door check with the lock held.  Otherwise, we
1467 	 * might race against another zoneadm/zlogin process and wind
1468 	 * up with two processes trying to start zoneadmd at the same
1469 	 * time.  zoneadmd will detect this, and fail, but we prefer this
1470 	 * to be as seamless as is practical, from a user perspective.
1471 	 */
1472 	if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1473 		if (errno != ENOENT) {
1474 			zerror("failed to open %s: %s", doorpath,
1475 			    strerror(errno));
1476 			goto out;
1477 		}
1478 	} else {
1479 		/*
1480 		 * Seems to be working ok.
1481 		 */
1482 		if (door_info(doorfd, &info) == 0 &&
1483 		    ((info.di_attributes & DOOR_REVOKED) == 0)) {
1484 			error = 0;
1485 			goto out;
1486 		}
1487 	}
1488 
1489 	if ((child_pid = fork()) == -1) {
1490 		zperror(gettext("could not fork"));
1491 		goto out;
1492 	} else if (child_pid == 0) {
1493 		/* child process */
1494 		(void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1495 		    zone_name, NULL);
1496 		zperror(gettext("could not exec zoneadmd"));
1497 		_exit(1);
1498 	}
1499 
1500 	/* parent process */
1501 	do {
1502 		retval = waitpid(child_pid, &pstatus, 0);
1503 	} while (retval != child_pid);
1504 	if (WIFSIGNALED(pstatus) ||
1505 	    (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1506 		zerror(gettext("could not start %s"), "zoneadmd");
1507 		goto out;
1508 	}
1509 	error = 0;
1510 out:
1511 	release_lock_file(lockfd);
1512 	(void) close(doorfd);
1513 	return (error);
1514 }
1515 
1516 static int
1517 init_template(void)
1518 {
1519 	int fd;
1520 	int err = 0;
1521 
1522 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1523 	if (fd == -1)
1524 		return (-1);
1525 
1526 	/*
1527 	 * zlogin doesn't do anything with the contract.
1528 	 * Deliver no events, don't inherit, and allow it to be orphaned.
1529 	 */
1530 	err |= ct_tmpl_set_critical(fd, 0);
1531 	err |= ct_tmpl_set_informative(fd, 0);
1532 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1533 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1534 	if (err || ct_tmpl_activate(fd)) {
1535 		(void) close(fd);
1536 		return (-1);
1537 	}
1538 
1539 	return (fd);
1540 }
1541 
1542 static int
1543 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1544     char **new_args, char **new_env)
1545 {
1546 	pid_t retval;
1547 	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1548 	int child_status;
1549 	int tmpl_fd;
1550 	sigset_t block_cld;
1551 
1552 	if ((tmpl_fd = init_template()) == -1) {
1553 		reset_tty();
1554 		zperror(gettext("could not create contract"));
1555 		return (1);
1556 	}
1557 
1558 	if (pipe(stdin_pipe) != 0) {
1559 		zperror(gettext("could not create STDIN pipe"));
1560 		return (1);
1561 	}
1562 	/*
1563 	 * When the user types ^D, we get a zero length message on STDIN.
1564 	 * We need to echo that down the pipe to send it to the other side;
1565 	 * but by default, pipes don't propagate zero-length messages.  We
1566 	 * toggle that behavior off using I_SWROPT.  See streamio(7i).
1567 	 */
1568 	if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1569 		zperror(gettext("could not configure STDIN pipe"));
1570 		return (1);
1571 
1572 	}
1573 	if (pipe(stdout_pipe) != 0) {
1574 		zperror(gettext("could not create STDOUT pipe"));
1575 		return (1);
1576 	}
1577 	if (pipe(stderr_pipe) != 0) {
1578 		zperror(gettext("could not create STDERR pipe"));
1579 		return (1);
1580 	}
1581 
1582 	if (pipe(dead_child_pipe) != 0) {
1583 		zperror(gettext("could not create signalling pipe"));
1584 		return (1);
1585 	}
1586 	close_on_sig = dead_child_pipe[0];
1587 
1588 	/*
1589 	 * If any of the pipe FD's winds up being less than STDERR, then we
1590 	 * have a mess on our hands-- and we are lacking some of the I/O
1591 	 * streams we would expect anyway.  So we bail.
1592 	 */
1593 	if (stdin_pipe[0] <= STDERR_FILENO ||
1594 	    stdin_pipe[1] <= STDERR_FILENO ||
1595 	    stdout_pipe[0] <= STDERR_FILENO ||
1596 	    stdout_pipe[1] <= STDERR_FILENO ||
1597 	    stderr_pipe[0] <= STDERR_FILENO ||
1598 	    stderr_pipe[1] <= STDERR_FILENO ||
1599 	    dead_child_pipe[0] <= STDERR_FILENO ||
1600 	    dead_child_pipe[1] <= STDERR_FILENO) {
1601 		zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1602 		return (1);
1603 	}
1604 
1605 	if (prefork_dropprivs() != 0) {
1606 		zperror(gettext("could not allocate privilege set"));
1607 		return (1);
1608 	}
1609 
1610 	(void) sigset(SIGCLD, sigcld);
1611 	(void) sigemptyset(&block_cld);
1612 	(void) sigaddset(&block_cld, SIGCLD);
1613 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1614 
1615 	if ((child_pid = fork()) == -1) {
1616 		(void) ct_tmpl_clear(tmpl_fd);
1617 		(void) close(tmpl_fd);
1618 		zperror(gettext("could not fork"));
1619 		return (1);
1620 	} else if (child_pid == 0) { /* child process */
1621 		(void) ct_tmpl_clear(tmpl_fd);
1622 
1623 		/*
1624 		 * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1625 		 */
1626 		(void) close(STDIN_FILENO);
1627 		(void) close(STDOUT_FILENO);
1628 		(void) close(STDERR_FILENO);
1629 		(void) dup2(stdin_pipe[1], STDIN_FILENO);
1630 		(void) dup2(stdout_pipe[1], STDOUT_FILENO);
1631 		(void) dup2(stderr_pipe[1], STDERR_FILENO);
1632 		(void) closefrom(STDERR_FILENO + 1);
1633 
1634 		(void) sigset(SIGCLD, SIG_DFL);
1635 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1636 		/*
1637 		 * In case any of stdin, stdout or stderr are streams,
1638 		 * anchor them to prevent malicious I_POPs.
1639 		 */
1640 		(void) ioctl(STDIN_FILENO, I_ANCHOR);
1641 		(void) ioctl(STDOUT_FILENO, I_ANCHOR);
1642 		(void) ioctl(STDERR_FILENO, I_ANCHOR);
1643 
1644 		if (zone_enter(zoneid) == -1) {
1645 			zerror(gettext("could not enter zone %s: %s"),
1646 			    zonename, strerror(errno));
1647 			_exit(1);
1648 		}
1649 
1650 		/*
1651 		 * For non-native zones, tell libc where it can find locale
1652 		 * specific getttext() messages.
1653 		 */
1654 		if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1655 			(void) bindtextdomain(TEXT_DOMAIN,
1656 			    "/.SUNWnative/usr/lib/locale");
1657 		else if (access("/native/usr/lib/locale", R_OK) == 0)
1658 			(void) bindtextdomain(TEXT_DOMAIN,
1659 			    "/native/usr/lib/locale");
1660 
1661 		if (!failsafe)
1662 			new_env = prep_env_noninteractive(user_cmd, new_env);
1663 
1664 		if (new_env == NULL) {
1665 			_exit(1);
1666 		}
1667 
1668 		/*
1669 		 * Move into a new process group; the zone_enter will have
1670 		 * placed us into zsched's session, and we want to be in
1671 		 * a unique process group.
1672 		 */
1673 		(void) setpgid(getpid(), getpid());
1674 
1675 		/*
1676 		 * The child needs to run as root to
1677 		 * execute the su program.
1678 		 */
1679 		if (setuid(0) == -1) {
1680 			zperror(gettext("insufficient privilege"));
1681 			return (1);
1682 		}
1683 
1684 		(void) execve(new_args[0], new_args, new_env);
1685 		zperror(gettext("exec failure"));
1686 		_exit(1);
1687 	}
1688 	/* parent */
1689 
1690 	/* close pipe sides written by child */
1691 	(void) close(stdout_pipe[1]);
1692 	(void) close(stderr_pipe[1]);
1693 
1694 	(void) sigset(SIGINT, sig_forward);
1695 
1696 	postfork_dropprivs();
1697 
1698 	(void) ct_tmpl_clear(tmpl_fd);
1699 	(void) close(tmpl_fd);
1700 
1701 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1702 	doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1703 	    dead_child_pipe[1], B_TRUE);
1704 	do {
1705 		retval = waitpid(child_pid, &child_status, 0);
1706 		if (retval == -1) {
1707 			child_status = 0;
1708 		}
1709 	} while (retval != child_pid && errno != ECHILD);
1710 
1711 	return (WEXITSTATUS(child_status));
1712 }
1713 
1714 static char *
1715 get_username()
1716 {
1717 	uid_t	uid;
1718 	struct passwd *nptr;
1719 
1720 	/*
1721 	 * Authorizations are checked to restrict access based on the
1722 	 * requested operation and zone name, It is assumed that the
1723 	 * program is running with all privileges, but that the real
1724 	 * user ID is that of the user or role on whose behalf we are
1725 	 * operating. So we start by getting the username that will be
1726 	 * used for subsequent authorization checks.
1727 	 */
1728 
1729 	uid = getuid();
1730 	if ((nptr = getpwuid(uid)) == NULL) {
1731 		zerror(gettext("could not get user name."));
1732 		_exit(1);
1733 	}
1734 	return (nptr->pw_name);
1735 }
1736 
1737 int
1738 main(int argc, char **argv)
1739 {
1740 	int arg, console = 0;
1741 	zoneid_t zoneid;
1742 	zone_state_t st;
1743 	char *login = "root";
1744 	int lflag = 0;
1745 	int nflag = 0;
1746 	char *zonename = NULL;
1747 	char **proc_args = NULL;
1748 	char **new_args, **new_env;
1749 	sigset_t block_cld;
1750 	char devroot[MAXPATHLEN];
1751 	char *slavename, slaveshortname[MAXPATHLEN];
1752 	priv_set_t *privset;
1753 	int tmpl_fd;
1754 	char zonebrand[MAXNAMELEN];
1755 	char default_brand[MAXNAMELEN];
1756 	struct stat sb;
1757 	char kernzone[ZONENAME_MAX];
1758 	brand_handle_t bh;
1759 	char user_cmd[MAXPATHLEN];
1760 	char authname[MAXAUTHS];
1761 
1762 	(void) setlocale(LC_ALL, "");
1763 	(void) textdomain(TEXT_DOMAIN);
1764 
1765 	(void) getpname(argv[0]);
1766 	username = get_username();
1767 
1768 	while ((arg = getopt(argc, argv, "dnECR:Se:l:Q")) != EOF) {
1769 		switch (arg) {
1770 		case 'C':
1771 			console = 1;
1772 			break;
1773 		case 'E':
1774 			nocmdchar = 1;
1775 			break;
1776 		case 'R':	/* undocumented */
1777 			if (*optarg != '/') {
1778 				zerror(gettext("root path must be absolute."));
1779 				exit(2);
1780 			}
1781 			if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1782 				zerror(
1783 				    gettext("root path must be a directory."));
1784 				exit(2);
1785 			}
1786 			zonecfg_set_root(optarg);
1787 			break;
1788 		case 'Q':
1789 			quiet = 1;
1790 			break;
1791 		case 'S':
1792 			failsafe = 1;
1793 			break;
1794 		case 'd':
1795 			disconnect = 1;
1796 			break;
1797 		case 'e':
1798 			set_cmdchar(optarg);
1799 			break;
1800 		case 'l':
1801 			login = optarg;
1802 			lflag = 1;
1803 			break;
1804 		case 'n':
1805 			nflag = 1;
1806 			break;
1807 		default:
1808 			usage();
1809 		}
1810 	}
1811 
1812 	if (console != 0) {
1813 
1814 		if (lflag != 0) {
1815 			zerror(gettext(
1816 			    "-l may not be specified for console login"));
1817 			usage();
1818 		}
1819 
1820 		if (nflag != 0) {
1821 			zerror(gettext(
1822 			    "-n may not be specified for console login"));
1823 			usage();
1824 		}
1825 
1826 		if (failsafe != 0) {
1827 			zerror(gettext(
1828 			    "-S may not be specified for console login"));
1829 			usage();
1830 		}
1831 
1832 		if (zonecfg_in_alt_root()) {
1833 			zerror(gettext(
1834 			    "-R may not be specified for console login"));
1835 			exit(2);
1836 		}
1837 
1838 	}
1839 
1840 	if (failsafe != 0 && lflag != 0) {
1841 		zerror(gettext("-l may not be specified for failsafe login"));
1842 		usage();
1843 	}
1844 
1845 	if (!console && disconnect != 0) {
1846 		zerror(gettext(
1847 		    "-d may only be specified with console login"));
1848 		usage();
1849 	}
1850 
1851 	if (optind == (argc - 1)) {
1852 		/*
1853 		 * zone name, no process name; this should be an interactive
1854 		 * as long as STDIN is really a tty.
1855 		 */
1856 		if (nflag != 0) {
1857 			zerror(gettext(
1858 			    "-n may not be specified for interactive login"));
1859 			usage();
1860 		}
1861 		if (isatty(STDIN_FILENO))
1862 			interactive = 1;
1863 		zonename = argv[optind];
1864 	} else if (optind < (argc - 1)) {
1865 		if (console) {
1866 			zerror(gettext("Commands may not be specified for "
1867 			    "console login."));
1868 			usage();
1869 		}
1870 		/* zone name and process name, and possibly some args */
1871 		zonename = argv[optind];
1872 		proc_args = &argv[optind + 1];
1873 		interactive = 0;
1874 	} else {
1875 		usage();
1876 	}
1877 
1878 	if (getzoneid() != GLOBAL_ZONEID) {
1879 		zerror(gettext("'%s' may only be used from the global zone"),
1880 		    pname);
1881 		return (1);
1882 	}
1883 
1884 	if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1885 		zerror(gettext("'%s' not applicable to the global zone"),
1886 		    pname);
1887 		return (1);
1888 	}
1889 
1890 	if (zone_get_state(zonename, &st) != Z_OK) {
1891 		zerror(gettext("zone '%s' unknown"), zonename);
1892 		return (1);
1893 	}
1894 
1895 	if (st < ZONE_STATE_INSTALLED) {
1896 		zerror(gettext("cannot login to a zone which is '%s'"),
1897 		    zone_state_str(st));
1898 		return (1);
1899 	}
1900 
1901 	/*
1902 	 * In both console and non-console cases, we require all privs.
1903 	 * In the console case, because we may need to startup zoneadmd.
1904 	 * In the non-console case in order to do zone_enter(2), zonept()
1905 	 * and other tasks.
1906 	 */
1907 
1908 	if ((privset = priv_allocset()) == NULL) {
1909 		zperror(gettext("priv_allocset failed"));
1910 		return (1);
1911 	}
1912 
1913 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1914 		zperror(gettext("getppriv failed"));
1915 		priv_freeset(privset);
1916 		return (1);
1917 	}
1918 
1919 	if (priv_isfullset(privset) == B_FALSE) {
1920 		zerror(gettext("You lack sufficient privilege to run "
1921 		    "this command (all privs required)"));
1922 		priv_freeset(privset);
1923 		return (1);
1924 	}
1925 	priv_freeset(privset);
1926 
1927 	/*
1928 	 * Check if user is authorized for requested usage of the zone
1929 	 */
1930 
1931 	(void) snprintf(authname, MAXAUTHS, "%s%s%s",
1932 	    ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1933 	if (chkauthattr(authname, username) == 0) {
1934 		if (console) {
1935 			zerror(gettext("%s is not authorized for console "
1936 			    "access to  %s zone."),
1937 			    username, zonename);
1938 			return (1);
1939 		} else {
1940 			(void) snprintf(authname, MAXAUTHS, "%s%s%s",
1941 			    ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1942 			if (failsafe || !interactive) {
1943 				zerror(gettext("%s is not authorized for  "
1944 				    "failsafe or non-interactive login "
1945 				    "to  %s zone."), username, zonename);
1946 				return (1);
1947 			} else if (chkauthattr(authname, username) == 0) {
1948 				zerror(gettext("%s is not authorized "
1949 				    " to login to %s zone."),
1950 				    username, zonename);
1951 				return (1);
1952 			}
1953 		}
1954 	} else {
1955 		forced_login = B_TRUE;
1956 	}
1957 
1958 	/*
1959 	 * The console is a separate case from the rest of the code; handle
1960 	 * it first.
1961 	 */
1962 	if (console) {
1963 		/*
1964 		 * Ensure that zoneadmd for this zone is running.
1965 		 */
1966 		if (start_zoneadmd(zonename) == -1)
1967 			return (1);
1968 
1969 		/*
1970 		 * Make contact with zoneadmd.
1971 		 */
1972 		if (get_console_master(zonename) == -1)
1973 			return (1);
1974 
1975 		if (!quiet)
1976 			(void) printf(
1977 			    gettext("[Connected to zone '%s' console]\n"),
1978 			    zonename);
1979 
1980 		if (set_tty_rawmode(STDIN_FILENO) == -1) {
1981 			reset_tty();
1982 			zperror(gettext("failed to set stdin pty to raw mode"));
1983 			return (1);
1984 		}
1985 
1986 		(void) sigset(SIGWINCH, sigwinch);
1987 		(void) sigwinch(0);
1988 
1989 		/*
1990 		 * Run the I/O loop until we get disconnected.
1991 		 */
1992 		doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1993 		reset_tty();
1994 		if (!quiet)
1995 			(void) printf(
1996 			    gettext("\n[Connection to zone '%s' console "
1997 			    "closed]\n"), zonename);
1998 
1999 		return (0);
2000 	}
2001 
2002 	if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
2003 		zerror(gettext("login allowed only to running zones "
2004 		    "(%s is '%s')."), zonename, zone_state_str(st));
2005 		return (1);
2006 	}
2007 
2008 	(void) strlcpy(kernzone, zonename, sizeof (kernzone));
2009 	if (zonecfg_in_alt_root()) {
2010 		FILE *fp = zonecfg_open_scratch("", B_FALSE);
2011 
2012 		if (fp == NULL || zonecfg_find_scratch(fp, zonename,
2013 		    zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
2014 			zerror(gettext("cannot find scratch zone %s"),
2015 			    zonename);
2016 			if (fp != NULL)
2017 				zonecfg_close_scratch(fp);
2018 			return (1);
2019 		}
2020 		zonecfg_close_scratch(fp);
2021 	}
2022 
2023 	if ((zoneid = getzoneidbyname(kernzone)) == -1) {
2024 		zerror(gettext("failed to get zoneid for zone '%s'"),
2025 		    zonename);
2026 		return (1);
2027 	}
2028 
2029 	/*
2030 	 * We need the zone root path only if we are setting up a pty.
2031 	 */
2032 	if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
2033 		zerror(gettext("could not get dev path for zone %s"),
2034 		    zonename);
2035 		return (1);
2036 	}
2037 
2038 	if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
2039 		zerror(gettext("could not get brand for zone %s"), zonename);
2040 		return (1);
2041 	}
2042 	/*
2043 	 * In the alternate root environment, the only supported
2044 	 * operations are mount and unmount.  In this case, just treat
2045 	 * the zone as native if it is cluster.  Cluster zones can be
2046 	 * native for the purpose of LU or upgrade, and the cluster
2047 	 * brand may not exist in the miniroot (such as in net install
2048 	 * upgrade).
2049 	 */
2050 	if (zonecfg_default_brand(default_brand,
2051 	    sizeof (default_brand)) != Z_OK) {
2052 		zerror(gettext("unable to determine default brand"));
2053 		return (1);
2054 	}
2055 	if (zonecfg_in_alt_root() &&
2056 	    strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2057 		(void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2058 	}
2059 
2060 	if ((bh = brand_open(zonebrand)) == NULL) {
2061 		zerror(gettext("could not open brand for zone %s"), zonename);
2062 		return (1);
2063 	}
2064 
2065 	if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2066 		zperror(gettext("could not assemble new arguments"));
2067 		brand_close(bh);
2068 		return (1);
2069 	}
2070 	/*
2071 	 * Get the brand specific user_cmd.  This command is used to get
2072 	 * a passwd(4) entry for login.
2073 	 */
2074 	if (!interactive && !failsafe) {
2075 		if (zone_get_user_cmd(bh, login, user_cmd,
2076 		    sizeof (user_cmd)) == NULL) {
2077 			zerror(gettext("could not get user_cmd for zone %s"),
2078 			    zonename);
2079 			brand_close(bh);
2080 			return (1);
2081 		}
2082 	}
2083 	brand_close(bh);
2084 
2085 	if ((new_env = prep_env()) == NULL) {
2086 		zperror(gettext("could not assemble new environment"));
2087 		return (1);
2088 	}
2089 
2090 	if (!interactive) {
2091 		if (nflag) {
2092 			int nfd;
2093 
2094 			if ((nfd = open(_PATH_DEVNULL, O_RDONLY)) < 0) {
2095 				zperror(gettext("failed to open null device"));
2096 				return (1);
2097 			}
2098 			if (nfd != STDIN_FILENO) {
2099 				if (dup2(nfd, STDIN_FILENO) < 0) {
2100 					zperror(gettext(
2101 					    "failed to dup2 null device"));
2102 					return (1);
2103 				}
2104 				(void) close(nfd);
2105 			}
2106 			/* /dev/null is now standard input */
2107 		}
2108 		return (noninteractive_login(zonename, user_cmd, zoneid,
2109 		    new_args, new_env));
2110 	}
2111 
2112 	if (zonecfg_in_alt_root()) {
2113 		zerror(gettext("cannot use interactive login with scratch "
2114 		    "zone"));
2115 		return (1);
2116 	}
2117 
2118 	/*
2119 	 * Things are more complex in interactive mode; we get the
2120 	 * master side of the pty, then place the user's terminal into
2121 	 * raw mode.
2122 	 */
2123 	if (get_master_pty() == -1) {
2124 		zerror(gettext("could not setup master pty device"));
2125 		return (1);
2126 	}
2127 
2128 	/*
2129 	 * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2130 	 */
2131 	if ((slavename = ptsname(masterfd)) == NULL) {
2132 		zperror(gettext("failed to get name for pseudo-tty"));
2133 		return (1);
2134 	}
2135 	if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2136 		(void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2137 		    sizeof (slaveshortname));
2138 	else
2139 		(void) strlcpy(slaveshortname, slavename,
2140 		    sizeof (slaveshortname));
2141 
2142 	if (!quiet)
2143 		(void) printf(gettext("[Connected to zone '%s' %s]\n"),
2144 		    zonename, slaveshortname);
2145 
2146 	if (set_tty_rawmode(STDIN_FILENO) == -1) {
2147 		reset_tty();
2148 		zperror(gettext("failed to set stdin pty to raw mode"));
2149 		return (1);
2150 	}
2151 
2152 	if (prefork_dropprivs() != 0) {
2153 		reset_tty();
2154 		zperror(gettext("could not allocate privilege set"));
2155 		return (1);
2156 	}
2157 
2158 	/*
2159 	 * We must mask SIGCLD until after we have coped with the fork
2160 	 * sufficiently to deal with it; otherwise we can race and receive the
2161 	 * signal before child_pid has been initialized (yes, this really
2162 	 * happens).
2163 	 */
2164 	(void) sigset(SIGCLD, sigcld);
2165 	(void) sigemptyset(&block_cld);
2166 	(void) sigaddset(&block_cld, SIGCLD);
2167 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2168 
2169 	/*
2170 	 * We activate the contract template at the last minute to
2171 	 * avoid intermediate functions that could be using fork(2)
2172 	 * internally.
2173 	 */
2174 	if ((tmpl_fd = init_template()) == -1) {
2175 		reset_tty();
2176 		zperror(gettext("could not create contract"));
2177 		return (1);
2178 	}
2179 
2180 	if ((child_pid = fork()) == -1) {
2181 		(void) ct_tmpl_clear(tmpl_fd);
2182 		reset_tty();
2183 		zperror(gettext("could not fork"));
2184 		return (1);
2185 	} else if (child_pid == 0) { /* child process */
2186 		int slavefd, newslave;
2187 
2188 		(void) ct_tmpl_clear(tmpl_fd);
2189 		(void) close(tmpl_fd);
2190 
2191 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2192 
2193 		if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2194 			return (1);
2195 
2196 		/*
2197 		 * Close all fds except for the slave pty.
2198 		 */
2199 		(void) fdwalk(close_func, &slavefd);
2200 
2201 		/*
2202 		 * Temporarily dup slavefd to stderr; that way if we have
2203 		 * to print out that zone_enter failed, the output will
2204 		 * have somewhere to go.
2205 		 */
2206 		if (slavefd != STDERR_FILENO)
2207 			(void) dup2(slavefd, STDERR_FILENO);
2208 
2209 		if (zone_enter(zoneid) == -1) {
2210 			zerror(gettext("could not enter zone %s: %s"),
2211 			    zonename, strerror(errno));
2212 			return (1);
2213 		}
2214 
2215 		if (slavefd != STDERR_FILENO)
2216 			(void) close(STDERR_FILENO);
2217 
2218 		/*
2219 		 * We take pains to get this process into a new process
2220 		 * group, and subsequently a new session.  In this way,
2221 		 * we'll have a session which doesn't yet have a controlling
2222 		 * terminal.  When we open the slave, it will become the
2223 		 * controlling terminal; no PIDs concerning pgrps or sids
2224 		 * will leak inappropriately into the zone.
2225 		 */
2226 		(void) setpgrp();
2227 
2228 		/*
2229 		 * We need the slave pty to be referenced from the zone's
2230 		 * /dev in order to ensure that the devt's, etc are all
2231 		 * correct.  Otherwise we break ttyname and the like.
2232 		 */
2233 		if ((newslave = open(slavename, O_RDWR)) == -1) {
2234 			(void) close(slavefd);
2235 			return (1);
2236 		}
2237 		(void) close(slavefd);
2238 		slavefd = newslave;
2239 
2240 		/*
2241 		 * dup the slave to the various FDs, so that when the
2242 		 * spawned process does a write/read it maps to the slave
2243 		 * pty.
2244 		 */
2245 		(void) dup2(slavefd, STDIN_FILENO);
2246 		(void) dup2(slavefd, STDOUT_FILENO);
2247 		(void) dup2(slavefd, STDERR_FILENO);
2248 		if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2249 		    slavefd != STDERR_FILENO) {
2250 			(void) close(slavefd);
2251 		}
2252 
2253 		/*
2254 		 * In failsafe mode, we don't use login(1), so don't try
2255 		 * setting up a utmpx entry.
2256 		 */
2257 		if (!failsafe)
2258 			if (setup_utmpx(slaveshortname) == -1)
2259 				return (1);
2260 
2261 		/*
2262 		 * The child needs to run as root to
2263 		 * execute the brand's login program.
2264 		 */
2265 		if (setuid(0) == -1) {
2266 			zperror(gettext("insufficient privilege"));
2267 			return (1);
2268 		}
2269 
2270 		(void) execve(new_args[0], new_args, new_env);
2271 		zperror(gettext("exec failure"));
2272 		return (1);
2273 	}
2274 
2275 	(void) ct_tmpl_clear(tmpl_fd);
2276 	(void) close(tmpl_fd);
2277 
2278 	/*
2279 	 * The rest is only for the parent process.
2280 	 */
2281 	(void) sigset(SIGWINCH, sigwinch);
2282 
2283 	postfork_dropprivs();
2284 
2285 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2286 	doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2287 
2288 	reset_tty();
2289 	if (!quiet)
2290 		(void) fprintf(stderr,
2291 		    gettext("\n[Connection to zone '%s' %s closed]\n"),
2292 		    zonename, slaveshortname);
2293 
2294 	if (pollerr != 0) {
2295 		(void) fprintf(stderr, gettext("Error: connection closed due "
2296 		    "to unexpected pollevents=0x%x.\n"), pollerr);
2297 		return (1);
2298 	}
2299 
2300 	return (0);
2301 }
2302