xref: /illumos-gate/usr/src/cmd/zlogin/zlogin.c (revision 458f44a49dc56cd17a39815122214e7a1b4793e3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 DEY Storage Systems, Inc.
24  * Copyright (c) 2014 Gary Mills
25  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
26  */
27 
28 /*
29  * zlogin provides three types of login which allow users in the global
30  * zone to access non-global zones.
31  *
32  * - "interactive login" is similar to rlogin(1); for example, the user could
33  *   issue 'zlogin my-zone' or 'zlogin -e ^ -l me my-zone'.   The user is
34  *   granted a new pty (which is then shoved into the zone), and an I/O
35  *   loop between parent and child processes takes care of the interactive
36  *   session.  In this mode, login(1) (and its -c option, which means
37  *   "already authenticated") is employed to take care of the initialization
38  *   of the user's session.
39  *
40  * - "non-interactive login" is similar to su(1M); the user could issue
41  *   'zlogin my-zone ls -l' and the command would be run as specified.
42  *   In this mode, zlogin sets up pipes as the communication channel, and
43  *   'su' is used to do the login setup work.
44  *
45  * - "console login" is the equivalent to accessing the tip line for a
46  *   zone.  For example, the user can issue 'zlogin -C my-zone'.
47  *   In this mode, zlogin contacts the zoneadmd process via unix domain
48  *   socket.  If zoneadmd is not running, it starts it.  This allows the
49  *   console to be available anytime the zone is installed, regardless of
50  *   whether it is running.
51  */
52 
53 #include <sys/socket.h>
54 #include <sys/termios.h>
55 #include <sys/utsname.h>
56 #include <sys/stat.h>
57 #include <sys/types.h>
58 #include <sys/contract/process.h>
59 #include <sys/ctfs.h>
60 #include <sys/brand.h>
61 #include <sys/wait.h>
62 #include <alloca.h>
63 #include <assert.h>
64 #include <ctype.h>
65 #include <paths.h>
66 #include <door.h>
67 #include <errno.h>
68 #include <nss_dbdefs.h>
69 #include <poll.h>
70 #include <priv.h>
71 #include <pwd.h>
72 #include <unistd.h>
73 #include <utmpx.h>
74 #include <sac.h>
75 #include <signal.h>
76 #include <stdarg.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <strings.h>
81 #include <stropts.h>
82 #include <wait.h>
83 #include <zone.h>
84 #include <fcntl.h>
85 #include <libdevinfo.h>
86 #include <libintl.h>
87 #include <locale.h>
88 #include <libzonecfg.h>
89 #include <libcontract.h>
90 #include <libbrand.h>
91 #include <auth_list.h>
92 #include <auth_attr.h>
93 #include <secdb.h>
94 
95 static int masterfd;
96 static struct termios save_termios;
97 static struct termios effective_termios;
98 static int save_fd;
99 static struct winsize winsize;
100 static volatile int dead;
101 static volatile pid_t child_pid = -1;
102 static int interactive = 0;
103 static priv_set_t *dropprivs;
104 
105 static int nocmdchar = 0;
106 static int failsafe = 0;
107 static int disconnect = 0;
108 static char cmdchar = '~';
109 static int quiet = 0;
110 
111 static int pollerr = 0;
112 
113 static const char *pname;
114 static char *username;
115 
116 /*
117  * When forced_login is true, the user is not prompted
118  * for an authentication password in the target zone.
119  */
120 static boolean_t forced_login = B_FALSE;
121 
122 #if !defined(TEXT_DOMAIN)		/* should be defined by cc -D */
123 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it wasn't */
124 #endif
125 
126 #define	SUPATH	"/usr/bin/su"
127 #define	FAILSAFESHELL	"/sbin/sh"
128 #define	DEFAULTSHELL	"/sbin/sh"
129 #define	DEF_PATH	"/usr/sbin:/usr/bin"
130 
131 #define	CLUSTER_BRAND_NAME	"cluster"
132 
133 /*
134  * The ZLOGIN_BUFSIZ is larger than PIPE_BUF so we can be sure we're clearing
135  * out the pipe when the child is exiting.  The ZLOGIN_RDBUFSIZ must be less
136  * than ZLOGIN_BUFSIZ (because we share the buffer in doio).  This value is
137  * also chosen in conjunction with the HI_WATER setting to make sure we
138  * don't fill up the pipe.  We can write FIFOHIWAT (16k) into the pipe before
139  * blocking.  By having ZLOGIN_RDBUFSIZ set to 1k and HI_WATER set to 8k, we
140  * know we can always write a ZLOGIN_RDBUFSIZ chunk into the pipe when there
141  * is less than HI_WATER data already in the pipe.
142  */
143 #define	ZLOGIN_BUFSIZ	8192
144 #define	ZLOGIN_RDBUFSIZ	1024
145 #define	HI_WATER	8192
146 
147 /*
148  * See canonify() below.  CANONIFY_LEN is the maximum length that a
149  * "canonical" sequence will expand to (backslash, three octal digits, NUL).
150  */
151 #define	CANONIFY_LEN 5
152 
153 static void
154 usage(void)
155 {
156 	(void) fprintf(stderr, gettext("usage: %s [ -dnQCES ] [ -e cmdchar ] "
157 	    "[-l user] zonename [command [args ...] ]\n"), pname);
158 	exit(2);
159 }
160 
161 static const char *
162 getpname(const char *arg0)
163 {
164 	const char *p = strrchr(arg0, '/');
165 
166 	if (p == NULL)
167 		p = arg0;
168 	else
169 		p++;
170 
171 	pname = p;
172 	return (p);
173 }
174 
175 static void
176 zerror(const char *fmt, ...)
177 {
178 	va_list alist;
179 
180 	(void) fprintf(stderr, "%s: ", pname);
181 	va_start(alist, fmt);
182 	(void) vfprintf(stderr, fmt, alist);
183 	va_end(alist);
184 	(void) fprintf(stderr, "\n");
185 }
186 
187 static void
188 zperror(const char *str)
189 {
190 	const char *estr;
191 
192 	if ((estr = strerror(errno)) != NULL)
193 		(void) fprintf(stderr, "%s: %s: %s\n", pname, str, estr);
194 	else
195 		(void) fprintf(stderr, "%s: %s: errno %d\n", pname, str, errno);
196 }
197 
198 /*
199  * The first part of our privilege dropping scheme needs to be called before
200  * fork(), since we must have it for security; we don't want to be surprised
201  * later that we couldn't allocate the privset.
202  */
203 static int
204 prefork_dropprivs()
205 {
206 	if ((dropprivs = priv_allocset()) == NULL)
207 		return (1);
208 
209 	priv_basicset(dropprivs);
210 	(void) priv_delset(dropprivs, PRIV_PROC_INFO);
211 	(void) priv_delset(dropprivs, PRIV_PROC_FORK);
212 	(void) priv_delset(dropprivs, PRIV_PROC_EXEC);
213 	(void) priv_delset(dropprivs, PRIV_FILE_LINK_ANY);
214 
215 	/*
216 	 * We need to keep the basic privilege PROC_SESSION and all unknown
217 	 * basic privileges as well as the privileges PROC_ZONE and
218 	 * PROC_OWNER in order to query session information and
219 	 * send signals.
220 	 */
221 	if (interactive == 0) {
222 		(void) priv_addset(dropprivs, PRIV_PROC_ZONE);
223 		(void) priv_addset(dropprivs, PRIV_PROC_OWNER);
224 	} else {
225 		(void) priv_delset(dropprivs, PRIV_PROC_SESSION);
226 	}
227 
228 	return (0);
229 }
230 
231 /*
232  * The second part of the privilege drop.  We are paranoid about being attacked
233  * by the zone, so we drop all privileges.  This should prevent a compromise
234  * which gets us to fork(), exec(), symlink(), etc.
235  */
236 static void
237 postfork_dropprivs()
238 {
239 	if ((setppriv(PRIV_SET, PRIV_PERMITTED, dropprivs)) == -1) {
240 		zperror(gettext("Warning: could not set permitted privileges"));
241 	}
242 	if ((setppriv(PRIV_SET, PRIV_LIMIT, dropprivs)) == -1) {
243 		zperror(gettext("Warning: could not set limit privileges"));
244 	}
245 	if ((setppriv(PRIV_SET, PRIV_INHERITABLE, dropprivs)) == -1) {
246 		zperror(gettext("Warning: could not set inheritable "
247 		    "privileges"));
248 	}
249 }
250 
251 /*
252  * Create the unix domain socket and call the zoneadmd server; handshake
253  * with it to determine whether it will allow us to connect.
254  */
255 static int
256 get_console_master(const char *zname)
257 {
258 	int sockfd = -1;
259 	struct sockaddr_un servaddr;
260 	char clientid[MAXPATHLEN];
261 	char handshake[MAXPATHLEN], c;
262 	int msglen;
263 	int i = 0, err = 0;
264 
265 	if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
266 		zperror(gettext("could not create socket"));
267 		return (-1);
268 	}
269 
270 	bzero(&servaddr, sizeof (servaddr));
271 	servaddr.sun_family = AF_UNIX;
272 	(void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
273 	    "%s/%s.console_sock", ZONES_TMPDIR, zname);
274 
275 	if (connect(sockfd, (struct sockaddr *)&servaddr,
276 	    sizeof (servaddr)) == -1) {
277 		zperror(gettext("Could not connect to zone console"));
278 		goto bad;
279 	}
280 	masterfd = sockfd;
281 
282 	msglen = snprintf(clientid, sizeof (clientid), "IDENT %lu %s %d\n",
283 	    getpid(), setlocale(LC_MESSAGES, NULL), disconnect);
284 
285 	if (msglen >= sizeof (clientid) || msglen < 0) {
286 		zerror("protocol error");
287 		goto bad;
288 	}
289 
290 	if (write(masterfd, clientid, msglen) != msglen) {
291 		zerror("protocol error");
292 		goto bad;
293 	}
294 
295 	bzero(handshake, sizeof (handshake));
296 
297 	/*
298 	 * Take care not to accumulate more than our fill, and leave room for
299 	 * the NUL at the end.
300 	 */
301 	while ((err = read(masterfd, &c, 1)) == 1) {
302 		if (i >= (sizeof (handshake) - 1))
303 			break;
304 		if (c == '\n')
305 			break;
306 		handshake[i] = c;
307 		i++;
308 	}
309 
310 	/*
311 	 * If something went wrong during the handshake we bail; perhaps
312 	 * the server died off.
313 	 */
314 	if (err == -1) {
315 		zperror(gettext("Could not connect to zone console"));
316 		goto bad;
317 	}
318 
319 	if (strncmp(handshake, "OK", sizeof (handshake)) == 0)
320 		return (0);
321 
322 	zerror(gettext("Console is already in use by process ID %s."),
323 	    handshake);
324 bad:
325 	(void) close(sockfd);
326 	masterfd = -1;
327 	return (-1);
328 }
329 
330 
331 /*
332  * Routines to handle pty creation upon zone entry and to shuttle I/O back
333  * and forth between the two terminals.  We also compute and store the
334  * name of the slave terminal associated with the master side.
335  */
336 static int
337 get_master_pty()
338 {
339 	if ((masterfd = open("/dev/ptmx", O_RDWR|O_NONBLOCK)) < 0) {
340 		zperror(gettext("failed to obtain a pseudo-tty"));
341 		return (-1);
342 	}
343 	if (tcgetattr(STDIN_FILENO, &save_termios) == -1) {
344 		zperror(gettext("failed to get terminal settings from stdin"));
345 		return (-1);
346 	}
347 	(void) ioctl(STDIN_FILENO, TIOCGWINSZ, (char *)&winsize);
348 
349 	return (0);
350 }
351 
352 /*
353  * This is a bit tricky; normally a pts device will belong to the zone it
354  * is granted to.  But in the case of "entering" a zone, we need to establish
355  * the pty before entering the zone so that we can vector I/O to and from it
356  * from the global zone.
357  *
358  * We use the zonept() call to let the ptm driver know what we are up to;
359  * the only other hairy bit is the setting of zoneslavename (which happens
360  * above, in get_master_pty()).
361  */
362 static int
363 init_slave_pty(zoneid_t zoneid, char *devroot)
364 {
365 	int slavefd = -1;
366 	char *slavename, zoneslavename[MAXPATHLEN];
367 
368 	/*
369 	 * Set slave permissions, zone the pts, then unlock it.
370 	 */
371 	if (grantpt(masterfd) != 0) {
372 		zperror(gettext("grantpt failed"));
373 		return (-1);
374 	}
375 
376 	if (unlockpt(masterfd) != 0) {
377 		zperror(gettext("unlockpt failed"));
378 		return (-1);
379 	}
380 
381 	/*
382 	 * We must open the slave side before zoning this pty; otherwise
383 	 * the kernel would refuse us the open-- zoning a pty makes it
384 	 * inaccessible to the global zone.  Note we are trying to open
385 	 * the device node via the $ZONEROOT/dev path for this pty.
386 	 *
387 	 * Later we'll close the slave out when once we've opened it again
388 	 * from within the target zone.  Blarg.
389 	 */
390 	if ((slavename = ptsname(masterfd)) == NULL) {
391 		zperror(gettext("failed to get name for pseudo-tty"));
392 		return (-1);
393 	}
394 
395 	(void) snprintf(zoneslavename, sizeof (zoneslavename), "%s%s",
396 	    devroot, slavename);
397 
398 	if ((slavefd = open(zoneslavename, O_RDWR)) < 0) {
399 		zerror(gettext("failed to open %s: %s"), zoneslavename,
400 		    strerror(errno));
401 		return (-1);
402 	}
403 
404 	/*
405 	 * Push hardware emulation (ptem), line discipline (ldterm),
406 	 * and V7/4BSD/Xenix compatibility (ttcompat) modules.
407 	 */
408 	if (ioctl(slavefd, I_PUSH, "ptem") == -1) {
409 		zperror(gettext("failed to push ptem module"));
410 		if (!failsafe)
411 			goto bad;
412 	}
413 
414 	/*
415 	 * Anchor the stream to prevent malicious I_POPs; we prefer to do
416 	 * this prior to entering the zone so that we can detect any errors
417 	 * early, and so that we can set the anchor from the global zone.
418 	 */
419 	if (ioctl(slavefd, I_ANCHOR) == -1) {
420 		zperror(gettext("failed to set stream anchor"));
421 		if (!failsafe)
422 			goto bad;
423 	}
424 
425 	if (ioctl(slavefd, I_PUSH, "ldterm") == -1) {
426 		zperror(gettext("failed to push ldterm module"));
427 		if (!failsafe)
428 			goto bad;
429 	}
430 	if (ioctl(slavefd, I_PUSH, "ttcompat") == -1) {
431 		zperror(gettext("failed to push ttcompat module"));
432 		if (!failsafe)
433 			goto bad;
434 	}
435 
436 	/*
437 	 * Propagate terminal settings from the external term to the new one.
438 	 */
439 	if (tcsetattr(slavefd, TCSAFLUSH, &save_termios) == -1) {
440 		zperror(gettext("failed to set terminal settings"));
441 		if (!failsafe)
442 			goto bad;
443 	}
444 	(void) ioctl(slavefd, TIOCSWINSZ, (char *)&winsize);
445 
446 	if (zonept(masterfd, zoneid) != 0) {
447 		zperror(gettext("could not set zoneid of pty"));
448 		goto bad;
449 	}
450 
451 	return (slavefd);
452 
453 bad:
454 	(void) close(slavefd);
455 	return (-1);
456 }
457 
458 /*
459  * Place terminal into raw mode.
460  */
461 static int
462 set_tty_rawmode(int fd)
463 {
464 	struct termios term;
465 	if (tcgetattr(fd, &term) < 0) {
466 		zperror(gettext("failed to get user terminal settings"));
467 		return (-1);
468 	}
469 
470 	/* Stash for later, so we can revert back to previous mode */
471 	save_termios = term;
472 	save_fd = fd;
473 
474 	/* disable 8->7 bit strip, start/stop, enable any char to restart */
475 	term.c_iflag &= ~(ISTRIP|IXON|IXANY);
476 	/* disable NL->CR, CR->NL, ignore CR, UPPER->lower */
477 	term.c_iflag &= ~(INLCR|ICRNL|IGNCR|IUCLC);
478 	/* disable output post-processing */
479 	term.c_oflag &= ~OPOST;
480 	/* disable canonical mode, signal chars, echo & extended functions */
481 	term.c_lflag &= ~(ICANON|ISIG|ECHO|IEXTEN);
482 
483 	term.c_cc[VMIN] = 1;    /* byte-at-a-time */
484 	term.c_cc[VTIME] = 0;
485 
486 	if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &term)) {
487 		zperror(gettext("failed to set user terminal to raw mode"));
488 		return (-1);
489 	}
490 
491 	/*
492 	 * We need to know the value of VEOF so that we can properly process for
493 	 * client-side ~<EOF>.  But we have obliterated VEOF in term,
494 	 * because VMIN overloads the same array slot in non-canonical mode.
495 	 * Stupid @&^%!
496 	 *
497 	 * So here we construct the "effective" termios from the current
498 	 * terminal settings, and the corrected VEOF and VEOL settings.
499 	 */
500 	if (tcgetattr(STDIN_FILENO, &effective_termios) < 0) {
501 		zperror(gettext("failed to get user terminal settings"));
502 		return (-1);
503 	}
504 	effective_termios.c_cc[VEOF] = save_termios.c_cc[VEOF];
505 	effective_termios.c_cc[VEOL] = save_termios.c_cc[VEOL];
506 
507 	return (0);
508 }
509 
510 /*
511  * Copy terminal window size from our terminal to the pts.
512  */
513 /*ARGSUSED*/
514 static void
515 sigwinch(int s)
516 {
517 	struct winsize ws;
518 
519 	if (ioctl(0, TIOCGWINSZ, &ws) == 0)
520 		(void) ioctl(masterfd, TIOCSWINSZ, &ws);
521 }
522 
523 static volatile int close_on_sig = -1;
524 
525 static void
526 /*ARGSUSED*/
527 sigcld(int s)
528 {
529 	int status;
530 	pid_t pid;
531 
532 	/*
533 	 * Peek at the exit status.  If this isn't the process we cared
534 	 * about, then just reap it.
535 	 */
536 	if ((pid = waitpid(child_pid, &status, WNOHANG|WNOWAIT)) != -1) {
537 		if (pid == child_pid &&
538 		    (WIFEXITED(status) || WIFSIGNALED(status))) {
539 			dead = 1;
540 			if (close_on_sig != -1) {
541 				(void) write(close_on_sig, "a", 1);
542 				(void) close(close_on_sig);
543 				close_on_sig = -1;
544 			}
545 		} else {
546 			(void) waitpid(pid, &status, WNOHANG);
547 		}
548 	}
549 }
550 
551 /*
552  * Some signals (currently, SIGINT) must be forwarded on to the process
553  * group of the child process.
554  */
555 static void
556 sig_forward(int s)
557 {
558 	if (child_pid != -1) {
559 		(void) sigsend(P_PGID, child_pid, s);
560 	}
561 }
562 
563 /*
564  * reset terminal settings for global environment
565  */
566 static void
567 reset_tty()
568 {
569 	(void) tcsetattr(save_fd, TCSADRAIN, &save_termios);
570 }
571 
572 /*
573  * Convert character to printable representation, for display with locally
574  * echoed command characters (like when we need to display ~^D)
575  */
576 static void
577 canonify(char c, char *cc)
578 {
579 	if (isprint(c)) {
580 		cc[0] = c;
581 		cc[1] = '\0';
582 	} else if (c >= 0 && c <= 31) {	/* ^@ through ^_ */
583 		cc[0] = '^';
584 		cc[1] = c + '@';
585 		cc[2] = '\0';
586 	} else {
587 		cc[0] = '\\';
588 		cc[1] = ((c >> 6) & 7) + '0';
589 		cc[2] = ((c >> 3) & 7) + '0';
590 		cc[3] = (c & 7) + '0';
591 		cc[4] = '\0';
592 	}
593 }
594 
595 /*
596  * process_user_input watches the input stream for the escape sequence for
597  * 'quit' (by default, tilde-period).  Because we might be fed just one
598  * keystroke at a time, state associated with the user input (are we at the
599  * beginning of the line?  are we locally echoing the next character?) is
600  * maintained by beginning_of_line and local_echo across calls to the routine.
601  * If the write to outfd fails, we'll try to read from infd in an attempt
602  * to prevent deadlock between the two processes.
603  *
604  * This routine returns -1 when the 'quit' escape sequence has been issued,
605  * or an error is encountered, 1 if stdin is EOF, and 0 otherwise.
606  */
607 static int
608 process_user_input(int outfd, int infd)
609 {
610 	static boolean_t beginning_of_line = B_TRUE;
611 	static boolean_t local_echo = B_FALSE;
612 	char ibuf[ZLOGIN_BUFSIZ];
613 	int nbytes;
614 	char *buf = ibuf;
615 	char c = *buf;
616 
617 	nbytes = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
618 	if (nbytes == -1 && (errno != EINTR || dead))
619 		return (-1);
620 
621 	if (nbytes == -1)	/* The read was interrupted. */
622 		return (0);
623 
624 	/* 0 read means EOF, close the pipe to the child */
625 	if (nbytes == 0)
626 		return (1);
627 
628 	for (c = *buf; nbytes > 0; c = *buf, --nbytes) {
629 		buf++;
630 		if (beginning_of_line && !nocmdchar) {
631 			beginning_of_line = B_FALSE;
632 			if (c == cmdchar) {
633 				local_echo = B_TRUE;
634 				continue;
635 			}
636 		} else if (local_echo) {
637 			local_echo = B_FALSE;
638 			if (c == '.' || c == effective_termios.c_cc[VEOF]) {
639 				char cc[CANONIFY_LEN];
640 
641 				canonify(c, cc);
642 				(void) write(STDOUT_FILENO, &cmdchar, 1);
643 				(void) write(STDOUT_FILENO, cc, strlen(cc));
644 				return (-1);
645 			}
646 		}
647 retry:
648 		if (write(outfd, &c, 1) <= 0) {
649 			/*
650 			 * Since the fd we are writing to is opened with
651 			 * O_NONBLOCK it is possible to get EAGAIN if the
652 			 * pipe is full.  One way this could happen is if we
653 			 * are writing a lot of data into the pipe in this loop
654 			 * and the application on the other end is echoing that
655 			 * data back out to its stdout.  The output pipe can
656 			 * fill up since we are stuck here in this loop and not
657 			 * draining the other pipe.  We can try to read some of
658 			 * the data to see if we can drain the pipe so that the
659 			 * application can continue to make progress.  The read
660 			 * is non-blocking so we won't hang here.  We also wait
661 			 * a bit before retrying since there could be other
662 			 * reasons why the pipe is full and we don't want to
663 			 * continuously retry.
664 			 */
665 			if (errno == EAGAIN) {
666 				struct timespec rqtp;
667 				int ln;
668 				char obuf[ZLOGIN_BUFSIZ];
669 
670 				if ((ln = read(infd, obuf, ZLOGIN_BUFSIZ)) > 0)
671 					(void) write(STDOUT_FILENO, obuf, ln);
672 
673 				/* sleep for 10 milliseconds */
674 				rqtp.tv_sec = 0;
675 				rqtp.tv_nsec = MSEC2NSEC(10);
676 				(void) nanosleep(&rqtp, NULL);
677 				if (!dead)
678 					goto retry;
679 			}
680 
681 			return (-1);
682 		}
683 		beginning_of_line = (c == '\r' || c == '\n' ||
684 		    c == effective_termios.c_cc[VKILL] ||
685 		    c == effective_termios.c_cc[VEOL] ||
686 		    c == effective_termios.c_cc[VSUSP] ||
687 		    c == effective_termios.c_cc[VINTR]);
688 	}
689 	return (0);
690 }
691 
692 /*
693  * This function prevents deadlock between zlogin and the application in the
694  * zone that it is talking to.  This can happen when we read from zlogin's
695  * stdin and write the data down the pipe to the application.  If the pipe
696  * is full, we'll block in the write.  Because zlogin could be blocked in
697  * the write, it would never read the application's stdout/stderr so the
698  * application can then block on those writes (when the pipe fills up).  If the
699  * the application gets blocked this way, it can never get around to reading
700  * its stdin so that zlogin can unblock from its write.  Once in this state,
701  * the two processes are deadlocked.
702  *
703  * To prevent this, we want to verify that we can write into the pipe before we
704  * read from our stdin.  If the pipe already is pretty full, we bypass the read
705  * for now.  We'll circle back here again after the poll() so that we can
706  * try again.  When this function is called, we already know there is data
707  * ready to read on STDIN_FILENO.  We return -1 if there is a problem, 1 if
708  * stdin is EOF, and 0 if everything is ok (even though we might not have
709  * read/written any data into the pipe on this iteration).
710  */
711 static int
712 process_raw_input(int stdin_fd, int appin_fd)
713 {
714 	int cc;
715 	struct stat64 sb;
716 	char ibuf[ZLOGIN_RDBUFSIZ];
717 
718 	/* Check how much data is already in the pipe */
719 	if (fstat64(appin_fd, &sb) == -1) {
720 		perror("stat failed");
721 		return (-1);
722 	}
723 
724 	if (dead)
725 		return (-1);
726 
727 	/*
728 	 * The pipe already has a lot of data in it,  don't write any more
729 	 * right now.
730 	 */
731 	if (sb.st_size >= HI_WATER)
732 		return (0);
733 
734 	cc = read(STDIN_FILENO, ibuf, ZLOGIN_RDBUFSIZ);
735 	if (cc == -1 && (errno != EINTR || dead))
736 		return (-1);
737 
738 	if (cc == -1)	/* The read was interrupted. */
739 		return (0);
740 
741 	/* 0 read means EOF, close the pipe to the child */
742 	if (cc == 0)
743 		return (1);
744 
745 	/*
746 	 * stdin_fd is stdin of the target; so, the thing we'll write the user
747 	 * data *to*.
748 	 */
749 	if (write(stdin_fd, ibuf, cc) == -1)
750 		return (-1);
751 
752 	return (0);
753 }
754 
755 /*
756  * Write the output from the application running in the zone.  We can get
757  * a signal during the write (usually it would be SIGCHLD when the application
758  * has exited) so we loop to make sure we have written all of the data we read.
759  */
760 static int
761 process_output(int in_fd, int out_fd)
762 {
763 	int wrote = 0;
764 	int cc;
765 	char ibuf[ZLOGIN_BUFSIZ];
766 
767 	cc = read(in_fd, ibuf, ZLOGIN_BUFSIZ);
768 	if (cc == -1 && (errno != EINTR || dead))
769 		return (-1);
770 	if (cc == 0)	/* EOF */
771 		return (-1);
772 	if (cc == -1)	/* The read was interrupted. */
773 		return (0);
774 
775 	do {
776 		int len;
777 
778 		len = write(out_fd, ibuf + wrote, cc - wrote);
779 		if (len == -1 && errno != EINTR)
780 			return (-1);
781 		if (len != -1)
782 			wrote += len;
783 	} while (wrote < cc);
784 
785 	return (0);
786 }
787 
788 /*
789  * This is the main I/O loop, and is shared across all zlogin modes.
790  * Parameters:
791  * 	stdin_fd:  The fd representing 'stdin' for the slave side; input to
792  *		   the zone will be written here.
793  *
794  * 	appin_fd:  The fd representing the other end of the 'stdin' pipe (when
795  *		   we're running non-interactive); used in process_raw_input
796  *		   to ensure we don't fill up the application's stdin pipe.
797  *
798  *	stdout_fd: The fd representing 'stdout' for the slave side; output
799  *		   from the zone will arrive here.
800  *
801  *	stderr_fd: The fd representing 'stderr' for the slave side; output
802  *		   from the zone will arrive here.
803  *
804  *	raw_mode:  If TRUE, then no processing (for example, for '~.') will
805  *		   be performed on the input coming from STDIN.
806  *
807  * stderr_fd may be specified as -1 if there is no stderr (only non-interactive
808  * mode supplies a stderr).
809  *
810  */
811 static void
812 doio(int stdin_fd, int appin_fd, int stdout_fd, int stderr_fd, int sig_fd,
813     boolean_t raw_mode)
814 {
815 	struct pollfd pollfds[4];
816 	char ibuf[ZLOGIN_BUFSIZ];
817 	int cc, ret;
818 
819 	/* read from stdout of zone and write to stdout of global zone */
820 	pollfds[0].fd = stdout_fd;
821 	pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
822 
823 	/* read from stderr of zone and write to stderr of global zone */
824 	pollfds[1].fd = stderr_fd;
825 	pollfds[1].events = pollfds[0].events;
826 
827 	/* read from stdin of global zone and write to stdin of zone */
828 	pollfds[2].fd = STDIN_FILENO;
829 	pollfds[2].events = pollfds[0].events;
830 
831 	/* read from signalling pipe so we know when child dies */
832 	pollfds[3].fd = sig_fd;
833 	pollfds[3].events = pollfds[0].events;
834 
835 	for (;;) {
836 		pollfds[0].revents = pollfds[1].revents =
837 		    pollfds[2].revents = pollfds[3].revents = 0;
838 
839 		if (dead)
840 			break;
841 
842 		/*
843 		 * There is a race condition here where we can receive the
844 		 * child death signal, set the dead flag, but since we have
845 		 * passed the test above, we would go into poll and hang.
846 		 * To avoid this we use the sig_fd as an additional poll fd.
847 		 * The signal handler writes into the other end of this pipe
848 		 * when the child dies so that the poll will always see that
849 		 * input and proceed.  We just loop around at that point and
850 		 * then notice the dead flag.
851 		 */
852 
853 		ret = poll(pollfds,
854 		    sizeof (pollfds) / sizeof (struct pollfd), -1);
855 
856 		if (ret == -1 && errno != EINTR) {
857 			perror("poll failed");
858 			break;
859 		}
860 
861 		if (errno == EINTR && dead) {
862 			break;
863 		}
864 
865 		/* event from master side stdout */
866 		if (pollfds[0].revents) {
867 			if (pollfds[0].revents &
868 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
869 				if (process_output(stdout_fd, STDOUT_FILENO)
870 				    != 0)
871 					break;
872 			} else {
873 				pollerr = pollfds[0].revents;
874 				break;
875 			}
876 		}
877 
878 		/* event from master side stderr */
879 		if (pollfds[1].revents) {
880 			if (pollfds[1].revents &
881 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
882 				if (process_output(stderr_fd, STDERR_FILENO)
883 				    != 0)
884 					break;
885 			} else {
886 				pollerr = pollfds[1].revents;
887 				break;
888 			}
889 		}
890 
891 		/* event from user STDIN side */
892 		if (pollfds[2].revents) {
893 			if (pollfds[2].revents &
894 			    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
895 				/*
896 				 * stdin fd is stdin of the target; so,
897 				 * the thing we'll write the user data *to*.
898 				 *
899 				 * Also, unlike on the output side, we
900 				 * close the pipe on a zero-length message.
901 				 */
902 				int res;
903 
904 				if (raw_mode)
905 					res = process_raw_input(stdin_fd,
906 					    appin_fd);
907 				else
908 					res = process_user_input(stdin_fd,
909 					    stdout_fd);
910 
911 				if (res < 0)
912 					break;
913 				if (res > 0) {
914 					/* EOF (close) child's stdin_fd */
915 					pollfds[2].fd = -1;
916 					while ((res = close(stdin_fd)) != 0 &&
917 					    errno == EINTR)
918 						;
919 					if (res != 0)
920 						break;
921 				}
922 
923 			} else if (raw_mode && pollfds[2].revents & POLLHUP) {
924 				/*
925 				 * It's OK to get a POLLHUP on STDIN-- it
926 				 * always happens if you do:
927 				 *
928 				 * echo foo | zlogin <zone> <command>
929 				 *
930 				 * We reset fd to -1 in this case to clear
931 				 * the condition and close the pipe (EOF) to
932 				 * the other side in order to wrap things up.
933 				 */
934 				int res;
935 
936 				pollfds[2].fd = -1;
937 				while ((res = close(stdin_fd)) != 0 &&
938 				    errno == EINTR)
939 					;
940 				if (res != 0)
941 					break;
942 			} else {
943 				pollerr = pollfds[2].revents;
944 				break;
945 			}
946 		}
947 	}
948 
949 	/*
950 	 * We are in the midst of dying, but try to poll with a short
951 	 * timeout to see if we can catch the last bit of I/O from the
952 	 * children.
953 	 */
954 retry:
955 	pollfds[0].revents = pollfds[1].revents = 0;
956 	(void) poll(pollfds, 2, 100);
957 	if (pollfds[0].revents &
958 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
959 		if ((cc = read(stdout_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
960 			(void) write(STDOUT_FILENO, ibuf, cc);
961 			goto retry;
962 		}
963 	}
964 	if (pollfds[1].revents &
965 	    (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
966 		if ((cc = read(stderr_fd, ibuf, ZLOGIN_BUFSIZ)) > 0) {
967 			(void) write(STDERR_FILENO, ibuf, cc);
968 			goto retry;
969 		}
970 	}
971 }
972 
973 /*
974  * Fetch the user_cmd brand hook for getting a user's passwd(4) entry.
975  */
976 static const char *
977 zone_get_user_cmd(brand_handle_t bh, const char *login, char *user_cmd,
978     size_t len)
979 {
980 	bzero(user_cmd, sizeof (user_cmd));
981 	if (brand_get_user_cmd(bh, login, user_cmd, len) != 0)
982 		return (NULL);
983 
984 	return (user_cmd);
985 }
986 
987 /* From libc */
988 extern int str2passwd(const char *, int, void *, char *, int);
989 
990 /*
991  * exec() the user_cmd brand hook, and convert the output string to a
992  * struct passwd.  This is to be called after zone_enter().
993  *
994  */
995 static struct passwd *
996 zone_get_user_pw(const char *user_cmd, struct passwd *pwent, char *pwbuf,
997     int pwbuflen)
998 {
999 	char pwline[NSS_BUFLEN_PASSWD];
1000 	char *cin = NULL;
1001 	FILE *fin;
1002 	int status;
1003 
1004 	assert(getzoneid() != GLOBAL_ZONEID);
1005 
1006 	if ((fin = popen(user_cmd, "r")) == NULL)
1007 		return (NULL);
1008 
1009 	while (cin == NULL && !feof(fin))
1010 		cin = fgets(pwline, sizeof (pwline), fin);
1011 
1012 	if (cin == NULL) {
1013 		(void) pclose(fin);
1014 		return (NULL);
1015 	}
1016 
1017 	status = pclose(fin);
1018 	if (!WIFEXITED(status))
1019 		return (NULL);
1020 	if (WEXITSTATUS(status) != 0)
1021 		return (NULL);
1022 
1023 	if (str2passwd(pwline, sizeof (pwline), pwent, pwbuf, pwbuflen) == 0)
1024 		return (pwent);
1025 	else
1026 		return (NULL);
1027 }
1028 
1029 static char **
1030 zone_login_cmd(brand_handle_t bh, const char *login)
1031 {
1032 	static char result_buf[ARG_MAX];
1033 	char **new_argv, *ptr, *lasts;
1034 	int n, a;
1035 
1036 	/* Get the login command for the target zone. */
1037 	bzero(result_buf, sizeof (result_buf));
1038 
1039 	if (forced_login) {
1040 		if (brand_get_forcedlogin_cmd(bh, login,
1041 		    result_buf, sizeof (result_buf)) != 0)
1042 			return (NULL);
1043 	} else {
1044 		if (brand_get_login_cmd(bh, login,
1045 		    result_buf, sizeof (result_buf)) != 0)
1046 			return (NULL);
1047 	}
1048 
1049 	/*
1050 	 * We got back a string that we'd like to execute.  But since
1051 	 * we're not doing the execution via a shell we'll need to convert
1052 	 * the exec string to an array of strings.  We'll do that here
1053 	 * but we're going to be very simplistic about it and break stuff
1054 	 * up based on spaces.  We're not even going to support any kind
1055 	 * of quoting or escape characters.  It's truly amazing that
1056 	 * there is no library function in OpenSolaris to do this for us.
1057 	 */
1058 
1059 	/*
1060 	 * Be paranoid.  Since we're deliniating based on spaces make
1061 	 * sure there are no adjacent spaces.
1062 	 */
1063 	if (strstr(result_buf, "  ") != NULL)
1064 		return (NULL);
1065 
1066 	/* Remove any trailing whitespace.  */
1067 	n = strlen(result_buf);
1068 	if (result_buf[n - 1] == ' ')
1069 		result_buf[n - 1] = '\0';
1070 
1071 	/* Count how many elements there are in the exec string. */
1072 	ptr = result_buf;
1073 	for (n = 2; ((ptr = strchr(ptr + 1, (int)' ')) != NULL); n++)
1074 		;
1075 
1076 	/* Allocate the argv array that we're going to return. */
1077 	if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1078 		return (NULL);
1079 
1080 	/* Tokenize the exec string and return. */
1081 	a = 0;
1082 	new_argv[a++] = result_buf;
1083 	if (n > 2) {
1084 		(void) strtok_r(result_buf, " ", &lasts);
1085 		while ((new_argv[a++] = strtok_r(NULL, " ", &lasts)) != NULL)
1086 			;
1087 	} else {
1088 		new_argv[a++] = NULL;
1089 	}
1090 	assert(n == a);
1091 	return (new_argv);
1092 }
1093 
1094 /*
1095  * Prepare argv array for exec'd process; if we're passing commands to the
1096  * new process, then use su(1M) to do the invocation.  Otherwise, use
1097  * 'login -z <from_zonename> -f' (-z is an undocumented option which tells
1098  * login that we're coming from another zone, and to disregard its CONSOLE
1099  * checks).
1100  */
1101 static char **
1102 prep_args(brand_handle_t bh, const char *login, char **argv)
1103 {
1104 	int argc = 0, a = 0, i, n = -1;
1105 	char **new_argv;
1106 
1107 	if (argv != NULL) {
1108 		size_t subshell_len = 1;
1109 		char *subshell;
1110 
1111 		while (argv[argc] != NULL)
1112 			argc++;
1113 
1114 		for (i = 0; i < argc; i++) {
1115 			subshell_len += strlen(argv[i]) + 1;
1116 		}
1117 		if ((subshell = calloc(1, subshell_len)) == NULL)
1118 			return (NULL);
1119 
1120 		for (i = 0; i < argc; i++) {
1121 			(void) strcat(subshell, argv[i]);
1122 			(void) strcat(subshell, " ");
1123 		}
1124 
1125 		if (failsafe) {
1126 			n = 4;
1127 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1128 				return (NULL);
1129 
1130 			new_argv[a++] = FAILSAFESHELL;
1131 		} else {
1132 			n = 5;
1133 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1134 				return (NULL);
1135 
1136 			new_argv[a++] = SUPATH;
1137 			if (strcmp(login, "root") != 0) {
1138 				new_argv[a++] = "-";
1139 				n++;
1140 			}
1141 			new_argv[a++] = (char *)login;
1142 		}
1143 		new_argv[a++] = "-c";
1144 		new_argv[a++] = subshell;
1145 		new_argv[a++] = NULL;
1146 		assert(a == n);
1147 	} else {
1148 		if (failsafe) {
1149 			n = 2;
1150 			if ((new_argv = malloc(sizeof (char *) * n)) == NULL)
1151 				return (NULL);
1152 			new_argv[a++] = FAILSAFESHELL;
1153 			new_argv[a++] = NULL;
1154 			assert(n == a);
1155 		} else {
1156 			new_argv = zone_login_cmd(bh, login);
1157 		}
1158 	}
1159 
1160 	return (new_argv);
1161 }
1162 
1163 /*
1164  * Helper routine for prep_env below.
1165  */
1166 static char *
1167 add_env(char *name, char *value)
1168 {
1169 	size_t sz = strlen(name) + strlen(value) + 2; /* name, =, value, NUL */
1170 	char *str;
1171 
1172 	if ((str = malloc(sz)) == NULL)
1173 		return (NULL);
1174 
1175 	(void) snprintf(str, sz, "%s=%s", name, value);
1176 	return (str);
1177 }
1178 
1179 /*
1180  * Prepare envp array for exec'd process.
1181  */
1182 static char **
1183 prep_env()
1184 {
1185 	int e = 0, size = 1;
1186 	char **new_env, *estr;
1187 	char *term = getenv("TERM");
1188 
1189 	size++;	/* for $PATH */
1190 	if (term != NULL)
1191 		size++;
1192 
1193 	/*
1194 	 * In failsafe mode we set $HOME, since '-l' isn't valid in this mode.
1195 	 * We also set $SHELL, since neither login nor su will be around to do
1196 	 * it.
1197 	 */
1198 	if (failsafe)
1199 		size += 2;
1200 
1201 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1202 		return (NULL);
1203 
1204 	if ((estr = add_env("PATH", DEF_PATH)) == NULL)
1205 		return (NULL);
1206 	new_env[e++] = estr;
1207 
1208 	if (term != NULL) {
1209 		if ((estr = add_env("TERM", term)) == NULL)
1210 			return (NULL);
1211 		new_env[e++] = estr;
1212 	}
1213 
1214 	if (failsafe) {
1215 		if ((estr = add_env("HOME", "/")) == NULL)
1216 			return (NULL);
1217 		new_env[e++] = estr;
1218 
1219 		if ((estr = add_env("SHELL", FAILSAFESHELL)) == NULL)
1220 			return (NULL);
1221 		new_env[e++] = estr;
1222 	}
1223 
1224 	new_env[e++] = NULL;
1225 
1226 	assert(e == size);
1227 
1228 	return (new_env);
1229 }
1230 
1231 /*
1232  * Finish the preparation of the envp array for exec'd non-interactive
1233  * zlogins.  This is called in the child process *after* we zone_enter(), since
1234  * it derives things we can only know within the zone, such as $HOME, $SHELL,
1235  * etc.  We need only do this in the non-interactive, mode, since otherwise
1236  * login(1) will do it.  We don't do this in failsafe mode, since it presents
1237  * additional ways in which the command could fail, and we'd prefer to avoid
1238  * that.
1239  */
1240 static char **
1241 prep_env_noninteractive(const char *user_cmd, char **env)
1242 {
1243 	size_t size;
1244 	char **new_env;
1245 	int e, i;
1246 	char *estr;
1247 	char varmail[LOGNAME_MAX + 11]; /* strlen(/var/mail/) = 10, NUL */
1248 	char pwbuf[NSS_BUFLEN_PASSWD + 1];
1249 	struct passwd pwent;
1250 	struct passwd *pw = NULL;
1251 
1252 	assert(env != NULL);
1253 	assert(failsafe == 0);
1254 
1255 	/*
1256 	 * Exec the "user_cmd" brand hook to get a pwent for the
1257 	 * login user.  If this fails, HOME will be set to "/", SHELL
1258 	 * will be set to $DEFAULTSHELL, and we will continue to exec
1259 	 * SUPATH <login> -c <cmd>.
1260 	 */
1261 	pw = zone_get_user_pw(user_cmd, &pwent, pwbuf, sizeof (pwbuf));
1262 
1263 	/*
1264 	 * Get existing envp size.
1265 	 */
1266 	for (size = 0; env[size] != NULL; size++)
1267 		;
1268 
1269 	e = size;
1270 
1271 	/*
1272 	 * Finish filling out the environment; we duplicate the environment
1273 	 * setup described in login(1), for lack of a better precedent.
1274 	 */
1275 	if (pw != NULL)
1276 		size += 3;	/* LOGNAME, HOME, MAIL */
1277 	else
1278 		size += 1;	/* HOME */
1279 
1280 	size++;	/* always fill in SHELL */
1281 	size++; /* terminating NULL */
1282 
1283 	if ((new_env = malloc(sizeof (char *) * size)) == NULL)
1284 		goto malloc_fail;
1285 
1286 	/*
1287 	 * Copy existing elements of env into new_env.
1288 	 */
1289 	for (i = 0; env[i] != NULL; i++) {
1290 		if ((new_env[i] = strdup(env[i])) == NULL)
1291 			goto malloc_fail;
1292 	}
1293 	assert(e == i);
1294 
1295 	if (pw != NULL) {
1296 		if ((estr = add_env("LOGNAME", pw->pw_name)) == NULL)
1297 			goto malloc_fail;
1298 		new_env[e++] = estr;
1299 
1300 		if ((estr = add_env("HOME", pw->pw_dir)) == NULL)
1301 			goto malloc_fail;
1302 		new_env[e++] = estr;
1303 
1304 		if (chdir(pw->pw_dir) != 0)
1305 			zerror(gettext("Could not chdir to home directory "
1306 			    "%s: %s"), pw->pw_dir, strerror(errno));
1307 
1308 		(void) snprintf(varmail, sizeof (varmail), "/var/mail/%s",
1309 		    pw->pw_name);
1310 		if ((estr = add_env("MAIL", varmail)) == NULL)
1311 			goto malloc_fail;
1312 		new_env[e++] = estr;
1313 	} else {
1314 		if ((estr = add_env("HOME", "/")) == NULL)
1315 			goto malloc_fail;
1316 		new_env[e++] = estr;
1317 	}
1318 
1319 	if (pw != NULL && strlen(pw->pw_shell) > 0) {
1320 		if ((estr = add_env("SHELL", pw->pw_shell)) == NULL)
1321 			goto malloc_fail;
1322 		new_env[e++] = estr;
1323 	} else {
1324 		if ((estr = add_env("SHELL", DEFAULTSHELL)) == NULL)
1325 			goto malloc_fail;
1326 		new_env[e++] = estr;
1327 	}
1328 
1329 	new_env[e++] = NULL;	/* add terminating NULL */
1330 
1331 	assert(e == size);
1332 	return (new_env);
1333 
1334 malloc_fail:
1335 	zperror(gettext("failed to allocate memory for process environment"));
1336 	return (NULL);
1337 }
1338 
1339 static int
1340 close_func(void *slavefd, int fd)
1341 {
1342 	if (fd != *(int *)slavefd)
1343 		(void) close(fd);
1344 	return (0);
1345 }
1346 
1347 static void
1348 set_cmdchar(char *cmdcharstr)
1349 {
1350 	char c;
1351 	long lc;
1352 
1353 	if ((c = *cmdcharstr) != '\\') {
1354 		cmdchar = c;
1355 		return;
1356 	}
1357 
1358 	c = cmdcharstr[1];
1359 	if (c == '\0' || c == '\\') {
1360 		cmdchar = '\\';
1361 		return;
1362 	}
1363 
1364 	if (c < '0' || c > '7') {
1365 		zerror(gettext("Unrecognized escape character option %s"),
1366 		    cmdcharstr);
1367 		usage();
1368 	}
1369 
1370 	lc = strtol(cmdcharstr + 1, NULL, 8);
1371 	if (lc < 0 || lc > 255) {
1372 		zerror(gettext("Octal escape character '%s' too large"),
1373 		    cmdcharstr);
1374 		usage();
1375 	}
1376 	cmdchar = (char)lc;
1377 }
1378 
1379 static int
1380 setup_utmpx(char *slavename)
1381 {
1382 	struct utmpx ut;
1383 
1384 	bzero(&ut, sizeof (ut));
1385 	(void) strncpy(ut.ut_user, ".zlogin", sizeof (ut.ut_user));
1386 	(void) strncpy(ut.ut_line, slavename, sizeof (ut.ut_line));
1387 	ut.ut_pid = getpid();
1388 	ut.ut_id[0] = 'z';
1389 	ut.ut_id[1] = ut.ut_id[2] = ut.ut_id[3] = (char)SC_WILDC;
1390 	ut.ut_type = LOGIN_PROCESS;
1391 	(void) time(&ut.ut_tv.tv_sec);
1392 
1393 	if (makeutx(&ut) == NULL) {
1394 		zerror(gettext("makeutx failed"));
1395 		return (-1);
1396 	}
1397 	return (0);
1398 }
1399 
1400 static void
1401 release_lock_file(int lockfd)
1402 {
1403 	(void) close(lockfd);
1404 }
1405 
1406 static int
1407 grab_lock_file(const char *zone_name, int *lockfd)
1408 {
1409 	char pathbuf[PATH_MAX];
1410 	struct flock flock;
1411 
1412 	if (mkdir(ZONES_TMPDIR, S_IRWXU) < 0 && errno != EEXIST) {
1413 		zerror(gettext("could not mkdir %s: %s"), ZONES_TMPDIR,
1414 		    strerror(errno));
1415 		return (-1);
1416 	}
1417 	(void) chmod(ZONES_TMPDIR, S_IRWXU);
1418 	(void) snprintf(pathbuf, sizeof (pathbuf), "%s/%s.zoneadm.lock",
1419 	    ZONES_TMPDIR, zone_name);
1420 
1421 	if ((*lockfd = open(pathbuf, O_RDWR|O_CREAT, S_IRUSR|S_IWUSR)) < 0) {
1422 		zerror(gettext("could not open %s: %s"), pathbuf,
1423 		    strerror(errno));
1424 		return (-1);
1425 	}
1426 	/*
1427 	 * Lock the file to synchronize with other zoneadmds
1428 	 */
1429 	flock.l_type = F_WRLCK;
1430 	flock.l_whence = SEEK_SET;
1431 	flock.l_start = (off_t)0;
1432 	flock.l_len = (off_t)0;
1433 	if (fcntl(*lockfd, F_SETLKW, &flock) < 0) {
1434 		zerror(gettext("unable to lock %s: %s"), pathbuf,
1435 		    strerror(errno));
1436 		release_lock_file(*lockfd);
1437 		return (-1);
1438 	}
1439 	return (Z_OK);
1440 }
1441 
1442 static int
1443 start_zoneadmd(const char *zone_name)
1444 {
1445 	pid_t retval;
1446 	int pstatus = 0, error = -1, lockfd, doorfd;
1447 	struct door_info info;
1448 	char doorpath[MAXPATHLEN];
1449 
1450 	(void) snprintf(doorpath, sizeof (doorpath), ZONE_DOOR_PATH, zone_name);
1451 
1452 	if (grab_lock_file(zone_name, &lockfd) != Z_OK)
1453 		return (-1);
1454 	/*
1455 	 * We must do the door check with the lock held.  Otherwise, we
1456 	 * might race against another zoneadm/zlogin process and wind
1457 	 * up with two processes trying to start zoneadmd at the same
1458 	 * time.  zoneadmd will detect this, and fail, but we prefer this
1459 	 * to be as seamless as is practical, from a user perspective.
1460 	 */
1461 	if ((doorfd = open(doorpath, O_RDONLY)) < 0) {
1462 		if (errno != ENOENT) {
1463 			zerror("failed to open %s: %s", doorpath,
1464 			    strerror(errno));
1465 			goto out;
1466 		}
1467 	} else {
1468 		/*
1469 		 * Seems to be working ok.
1470 		 */
1471 		if (door_info(doorfd, &info) == 0 &&
1472 		    ((info.di_attributes & DOOR_REVOKED) == 0)) {
1473 			error = 0;
1474 			goto out;
1475 		}
1476 	}
1477 
1478 	if ((child_pid = fork()) == -1) {
1479 		zperror(gettext("could not fork"));
1480 		goto out;
1481 	} else if (child_pid == 0) {
1482 		/* child process */
1483 		(void) execl("/usr/lib/zones/zoneadmd", "zoneadmd", "-z",
1484 		    zone_name, NULL);
1485 		zperror(gettext("could not exec zoneadmd"));
1486 		_exit(1);
1487 	}
1488 
1489 	/* parent process */
1490 	do {
1491 		retval = waitpid(child_pid, &pstatus, 0);
1492 	} while (retval != child_pid);
1493 	if (WIFSIGNALED(pstatus) ||
1494 	    (WIFEXITED(pstatus) && WEXITSTATUS(pstatus) != 0)) {
1495 		zerror(gettext("could not start %s"), "zoneadmd");
1496 		goto out;
1497 	}
1498 	error = 0;
1499 out:
1500 	release_lock_file(lockfd);
1501 	(void) close(doorfd);
1502 	return (error);
1503 }
1504 
1505 static int
1506 init_template(void)
1507 {
1508 	int fd;
1509 	int err = 0;
1510 
1511 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
1512 	if (fd == -1)
1513 		return (-1);
1514 
1515 	/*
1516 	 * zlogin doesn't do anything with the contract.
1517 	 * Deliver no events, don't inherit, and allow it to be orphaned.
1518 	 */
1519 	err |= ct_tmpl_set_critical(fd, 0);
1520 	err |= ct_tmpl_set_informative(fd, 0);
1521 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
1522 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
1523 	if (err || ct_tmpl_activate(fd)) {
1524 		(void) close(fd);
1525 		return (-1);
1526 	}
1527 
1528 	return (fd);
1529 }
1530 
1531 static int
1532 noninteractive_login(char *zonename, const char *user_cmd, zoneid_t zoneid,
1533     char **new_args, char **new_env)
1534 {
1535 	pid_t retval;
1536 	int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2], dead_child_pipe[2];
1537 	int child_status;
1538 	int tmpl_fd;
1539 	sigset_t block_cld;
1540 
1541 	if ((tmpl_fd = init_template()) == -1) {
1542 		reset_tty();
1543 		zperror(gettext("could not create contract"));
1544 		return (1);
1545 	}
1546 
1547 	if (pipe(stdin_pipe) != 0) {
1548 		zperror(gettext("could not create STDIN pipe"));
1549 		return (1);
1550 	}
1551 	/*
1552 	 * When the user types ^D, we get a zero length message on STDIN.
1553 	 * We need to echo that down the pipe to send it to the other side;
1554 	 * but by default, pipes don't propagate zero-length messages.  We
1555 	 * toggle that behavior off using I_SWROPT.  See streamio(7i).
1556 	 */
1557 	if (ioctl(stdin_pipe[0], I_SWROPT, SNDZERO) != 0) {
1558 		zperror(gettext("could not configure STDIN pipe"));
1559 		return (1);
1560 
1561 	}
1562 	if (pipe(stdout_pipe) != 0) {
1563 		zperror(gettext("could not create STDOUT pipe"));
1564 		return (1);
1565 	}
1566 	if (pipe(stderr_pipe) != 0) {
1567 		zperror(gettext("could not create STDERR pipe"));
1568 		return (1);
1569 	}
1570 
1571 	if (pipe(dead_child_pipe) != 0) {
1572 		zperror(gettext("could not create signalling pipe"));
1573 		return (1);
1574 	}
1575 	close_on_sig = dead_child_pipe[0];
1576 
1577 	/*
1578 	 * If any of the pipe FD's winds up being less than STDERR, then we
1579 	 * have a mess on our hands-- and we are lacking some of the I/O
1580 	 * streams we would expect anyway.  So we bail.
1581 	 */
1582 	if (stdin_pipe[0] <= STDERR_FILENO ||
1583 	    stdin_pipe[1] <= STDERR_FILENO ||
1584 	    stdout_pipe[0] <= STDERR_FILENO ||
1585 	    stdout_pipe[1] <= STDERR_FILENO ||
1586 	    stderr_pipe[0] <= STDERR_FILENO ||
1587 	    stderr_pipe[1] <= STDERR_FILENO ||
1588 	    dead_child_pipe[0] <= STDERR_FILENO ||
1589 	    dead_child_pipe[1] <= STDERR_FILENO) {
1590 		zperror(gettext("process lacks valid STDIN, STDOUT, STDERR"));
1591 		return (1);
1592 	}
1593 
1594 	if (prefork_dropprivs() != 0) {
1595 		zperror(gettext("could not allocate privilege set"));
1596 		return (1);
1597 	}
1598 
1599 	(void) sigset(SIGCLD, sigcld);
1600 	(void) sigemptyset(&block_cld);
1601 	(void) sigaddset(&block_cld, SIGCLD);
1602 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
1603 
1604 	if ((child_pid = fork()) == -1) {
1605 		(void) ct_tmpl_clear(tmpl_fd);
1606 		(void) close(tmpl_fd);
1607 		zperror(gettext("could not fork"));
1608 		return (1);
1609 	} else if (child_pid == 0) { /* child process */
1610 		(void) ct_tmpl_clear(tmpl_fd);
1611 
1612 		/*
1613 		 * Do a dance to get the pipes hooked up as FD's 0, 1 and 2.
1614 		 */
1615 		(void) close(STDIN_FILENO);
1616 		(void) close(STDOUT_FILENO);
1617 		(void) close(STDERR_FILENO);
1618 		(void) dup2(stdin_pipe[1], STDIN_FILENO);
1619 		(void) dup2(stdout_pipe[1], STDOUT_FILENO);
1620 		(void) dup2(stderr_pipe[1], STDERR_FILENO);
1621 		(void) closefrom(STDERR_FILENO + 1);
1622 
1623 		(void) sigset(SIGCLD, SIG_DFL);
1624 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1625 		/*
1626 		 * In case any of stdin, stdout or stderr are streams,
1627 		 * anchor them to prevent malicious I_POPs.
1628 		 */
1629 		(void) ioctl(STDIN_FILENO, I_ANCHOR);
1630 		(void) ioctl(STDOUT_FILENO, I_ANCHOR);
1631 		(void) ioctl(STDERR_FILENO, I_ANCHOR);
1632 
1633 		if (zone_enter(zoneid) == -1) {
1634 			zerror(gettext("could not enter zone %s: %s"),
1635 			    zonename, strerror(errno));
1636 			_exit(1);
1637 		}
1638 
1639 		/*
1640 		 * For non-native zones, tell libc where it can find locale
1641 		 * specific getttext() messages.
1642 		 */
1643 		if (access("/.SUNWnative/usr/lib/locale", R_OK) == 0)
1644 			(void) bindtextdomain(TEXT_DOMAIN,
1645 			    "/.SUNWnative/usr/lib/locale");
1646 		else if (access("/native/usr/lib/locale", R_OK) == 0)
1647 			(void) bindtextdomain(TEXT_DOMAIN,
1648 			    "/native/usr/lib/locale");
1649 
1650 		if (!failsafe)
1651 			new_env = prep_env_noninteractive(user_cmd, new_env);
1652 
1653 		if (new_env == NULL) {
1654 			_exit(1);
1655 		}
1656 
1657 		/*
1658 		 * Move into a new process group; the zone_enter will have
1659 		 * placed us into zsched's session, and we want to be in
1660 		 * a unique process group.
1661 		 */
1662 		(void) setpgid(getpid(), getpid());
1663 
1664 		/*
1665 		 * The child needs to run as root to
1666 		 * execute the su program.
1667 		 */
1668 		if (setuid(0) == -1) {
1669 			zperror(gettext("insufficient privilege"));
1670 			return (1);
1671 		}
1672 
1673 		(void) execve(new_args[0], new_args, new_env);
1674 		zperror(gettext("exec failure"));
1675 		_exit(1);
1676 	}
1677 	/* parent */
1678 
1679 	/* close pipe sides written by child */
1680 	(void) close(stdout_pipe[1]);
1681 	(void) close(stderr_pipe[1]);
1682 
1683 	(void) sigset(SIGINT, sig_forward);
1684 
1685 	postfork_dropprivs();
1686 
1687 	(void) ct_tmpl_clear(tmpl_fd);
1688 	(void) close(tmpl_fd);
1689 
1690 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
1691 	doio(stdin_pipe[0], stdin_pipe[1], stdout_pipe[0], stderr_pipe[0],
1692 	    dead_child_pipe[1], B_TRUE);
1693 	do {
1694 		retval = waitpid(child_pid, &child_status, 0);
1695 		if (retval == -1) {
1696 			child_status = 0;
1697 		}
1698 	} while (retval != child_pid && errno != ECHILD);
1699 
1700 	return (WEXITSTATUS(child_status));
1701 }
1702 
1703 static char *
1704 get_username()
1705 {
1706 	uid_t	uid;
1707 	struct passwd *nptr;
1708 
1709 	/*
1710 	 * Authorizations are checked to restrict access based on the
1711 	 * requested operation and zone name, It is assumed that the
1712 	 * program is running with all privileges, but that the real
1713 	 * user ID is that of the user or role on whose behalf we are
1714 	 * operating. So we start by getting the username that will be
1715 	 * used for subsequent authorization checks.
1716 	 */
1717 
1718 	uid = getuid();
1719 	if ((nptr = getpwuid(uid)) == NULL) {
1720 		zerror(gettext("could not get user name."));
1721 		_exit(1);
1722 	}
1723 	return (nptr->pw_name);
1724 }
1725 
1726 int
1727 main(int argc, char **argv)
1728 {
1729 	int arg, console = 0;
1730 	zoneid_t zoneid;
1731 	zone_state_t st;
1732 	char *login = "root";
1733 	int lflag = 0;
1734 	int nflag = 0;
1735 	char *zonename = NULL;
1736 	char **proc_args = NULL;
1737 	char **new_args, **new_env;
1738 	sigset_t block_cld;
1739 	char devroot[MAXPATHLEN];
1740 	char *slavename, slaveshortname[MAXPATHLEN];
1741 	priv_set_t *privset;
1742 	int tmpl_fd;
1743 	char zonebrand[MAXNAMELEN];
1744 	char default_brand[MAXNAMELEN];
1745 	struct stat sb;
1746 	char kernzone[ZONENAME_MAX];
1747 	brand_handle_t bh;
1748 	char user_cmd[MAXPATHLEN];
1749 	char authname[MAXAUTHS];
1750 
1751 	(void) setlocale(LC_ALL, "");
1752 	(void) textdomain(TEXT_DOMAIN);
1753 
1754 	(void) getpname(argv[0]);
1755 	username = get_username();
1756 
1757 	while ((arg = getopt(argc, argv, "dnECR:Se:l:Q")) != EOF) {
1758 		switch (arg) {
1759 		case 'C':
1760 			console = 1;
1761 			break;
1762 		case 'E':
1763 			nocmdchar = 1;
1764 			break;
1765 		case 'R':	/* undocumented */
1766 			if (*optarg != '/') {
1767 				zerror(gettext("root path must be absolute."));
1768 				exit(2);
1769 			}
1770 			if (stat(optarg, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
1771 				zerror(
1772 				    gettext("root path must be a directory."));
1773 				exit(2);
1774 			}
1775 			zonecfg_set_root(optarg);
1776 			break;
1777 		case 'Q':
1778 			quiet = 1;
1779 			break;
1780 		case 'S':
1781 			failsafe = 1;
1782 			break;
1783 		case 'd':
1784 			disconnect = 1;
1785 			break;
1786 		case 'e':
1787 			set_cmdchar(optarg);
1788 			break;
1789 		case 'l':
1790 			login = optarg;
1791 			lflag = 1;
1792 			break;
1793 		case 'n':
1794 			nflag = 1;
1795 			break;
1796 		default:
1797 			usage();
1798 		}
1799 	}
1800 
1801 	if (console != 0) {
1802 
1803 		if (lflag != 0) {
1804 			zerror(gettext(
1805 			    "-l may not be specified for console login"));
1806 			usage();
1807 		}
1808 
1809 		if (nflag != 0) {
1810 			zerror(gettext(
1811 			    "-n may not be specified for console login"));
1812 			usage();
1813 		}
1814 
1815 		if (failsafe != 0) {
1816 			zerror(gettext(
1817 			    "-S may not be specified for console login"));
1818 			usage();
1819 		}
1820 
1821 		if (zonecfg_in_alt_root()) {
1822 			zerror(gettext(
1823 			    "-R may not be specified for console login"));
1824 			exit(2);
1825 		}
1826 
1827 	}
1828 
1829 	if (failsafe != 0 && lflag != 0) {
1830 		zerror(gettext("-l may not be specified for failsafe login"));
1831 		usage();
1832 	}
1833 
1834 	if (!console && disconnect != 0) {
1835 		zerror(gettext(
1836 		    "-d may only be specified with console login"));
1837 		usage();
1838 	}
1839 
1840 	if (optind == (argc - 1)) {
1841 		/*
1842 		 * zone name, no process name; this should be an interactive
1843 		 * as long as STDIN is really a tty.
1844 		 */
1845 		if (nflag != 0) {
1846 			zerror(gettext(
1847 			    "-n may not be specified for interactive login"));
1848 			usage();
1849 		}
1850 		if (isatty(STDIN_FILENO))
1851 			interactive = 1;
1852 		zonename = argv[optind];
1853 	} else if (optind < (argc - 1)) {
1854 		if (console) {
1855 			zerror(gettext("Commands may not be specified for "
1856 			    "console login."));
1857 			usage();
1858 		}
1859 		/* zone name and process name, and possibly some args */
1860 		zonename = argv[optind];
1861 		proc_args = &argv[optind + 1];
1862 		interactive = 0;
1863 	} else {
1864 		usage();
1865 	}
1866 
1867 	if (getzoneid() != GLOBAL_ZONEID) {
1868 		zerror(gettext("'%s' may only be used from the global zone"),
1869 		    pname);
1870 		return (1);
1871 	}
1872 
1873 	if (strcmp(zonename, GLOBAL_ZONENAME) == 0) {
1874 		zerror(gettext("'%s' not applicable to the global zone"),
1875 		    pname);
1876 		return (1);
1877 	}
1878 
1879 	if (zone_get_state(zonename, &st) != Z_OK) {
1880 		zerror(gettext("zone '%s' unknown"), zonename);
1881 		return (1);
1882 	}
1883 
1884 	if (st < ZONE_STATE_INSTALLED) {
1885 		zerror(gettext("cannot login to a zone which is '%s'"),
1886 		    zone_state_str(st));
1887 		return (1);
1888 	}
1889 
1890 	/*
1891 	 * In both console and non-console cases, we require all privs.
1892 	 * In the console case, because we may need to startup zoneadmd.
1893 	 * In the non-console case in order to do zone_enter(2), zonept()
1894 	 * and other tasks.
1895 	 */
1896 
1897 	if ((privset = priv_allocset()) == NULL) {
1898 		zperror(gettext("priv_allocset failed"));
1899 		return (1);
1900 	}
1901 
1902 	if (getppriv(PRIV_EFFECTIVE, privset) != 0) {
1903 		zperror(gettext("getppriv failed"));
1904 		priv_freeset(privset);
1905 		return (1);
1906 	}
1907 
1908 	if (priv_isfullset(privset) == B_FALSE) {
1909 		zerror(gettext("You lack sufficient privilege to run "
1910 		    "this command (all privs required)"));
1911 		priv_freeset(privset);
1912 		return (1);
1913 	}
1914 	priv_freeset(privset);
1915 
1916 	/*
1917 	 * Check if user is authorized for requested usage of the zone
1918 	 */
1919 
1920 	(void) snprintf(authname, MAXAUTHS, "%s%s%s",
1921 	    ZONE_MANAGE_AUTH, KV_OBJECT, zonename);
1922 	if (chkauthattr(authname, username) == 0) {
1923 		if (console) {
1924 			zerror(gettext("%s is not authorized for console "
1925 			    "access to  %s zone."),
1926 			    username, zonename);
1927 			return (1);
1928 		} else {
1929 			(void) snprintf(authname, MAXAUTHS, "%s%s%s",
1930 			    ZONE_LOGIN_AUTH, KV_OBJECT, zonename);
1931 			if (failsafe || !interactive) {
1932 				zerror(gettext("%s is not authorized for  "
1933 				    "failsafe or non-interactive login "
1934 				    "to  %s zone."), username, zonename);
1935 				return (1);
1936 			} else if (chkauthattr(authname, username) == 0) {
1937 				zerror(gettext("%s is not authorized "
1938 				    " to login to %s zone."),
1939 				    username, zonename);
1940 				return (1);
1941 			}
1942 		}
1943 	} else {
1944 		forced_login = B_TRUE;
1945 	}
1946 
1947 	/*
1948 	 * The console is a separate case from the rest of the code; handle
1949 	 * it first.
1950 	 */
1951 	if (console) {
1952 		/*
1953 		 * Ensure that zoneadmd for this zone is running.
1954 		 */
1955 		if (start_zoneadmd(zonename) == -1)
1956 			return (1);
1957 
1958 		/*
1959 		 * Make contact with zoneadmd.
1960 		 */
1961 		if (get_console_master(zonename) == -1)
1962 			return (1);
1963 
1964 		if (!quiet)
1965 			(void) printf(
1966 			    gettext("[Connected to zone '%s' console]\n"),
1967 			    zonename);
1968 
1969 		if (set_tty_rawmode(STDIN_FILENO) == -1) {
1970 			reset_tty();
1971 			zperror(gettext("failed to set stdin pty to raw mode"));
1972 			return (1);
1973 		}
1974 
1975 		(void) sigset(SIGWINCH, sigwinch);
1976 		(void) sigwinch(0);
1977 
1978 		/*
1979 		 * Run the I/O loop until we get disconnected.
1980 		 */
1981 		doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
1982 		reset_tty();
1983 		if (!quiet)
1984 			(void) printf(
1985 			    gettext("\n[Connection to zone '%s' console "
1986 			    "closed]\n"), zonename);
1987 
1988 		return (0);
1989 	}
1990 
1991 	if (st != ZONE_STATE_RUNNING && st != ZONE_STATE_MOUNTED) {
1992 		zerror(gettext("login allowed only to running zones "
1993 		    "(%s is '%s')."), zonename, zone_state_str(st));
1994 		return (1);
1995 	}
1996 
1997 	(void) strlcpy(kernzone, zonename, sizeof (kernzone));
1998 	if (zonecfg_in_alt_root()) {
1999 		FILE *fp = zonecfg_open_scratch("", B_FALSE);
2000 
2001 		if (fp == NULL || zonecfg_find_scratch(fp, zonename,
2002 		    zonecfg_get_root(), kernzone, sizeof (kernzone)) == -1) {
2003 			zerror(gettext("cannot find scratch zone %s"),
2004 			    zonename);
2005 			if (fp != NULL)
2006 				zonecfg_close_scratch(fp);
2007 			return (1);
2008 		}
2009 		zonecfg_close_scratch(fp);
2010 	}
2011 
2012 	if ((zoneid = getzoneidbyname(kernzone)) == -1) {
2013 		zerror(gettext("failed to get zoneid for zone '%s'"),
2014 		    zonename);
2015 		return (1);
2016 	}
2017 
2018 	/*
2019 	 * We need the zone root path only if we are setting up a pty.
2020 	 */
2021 	if (zone_get_devroot(zonename, devroot, sizeof (devroot)) == -1) {
2022 		zerror(gettext("could not get dev path for zone %s"),
2023 		    zonename);
2024 		return (1);
2025 	}
2026 
2027 	if (zone_get_brand(zonename, zonebrand, sizeof (zonebrand)) != Z_OK) {
2028 		zerror(gettext("could not get brand for zone %s"), zonename);
2029 		return (1);
2030 	}
2031 	/*
2032 	 * In the alternate root environment, the only supported
2033 	 * operations are mount and unmount.  In this case, just treat
2034 	 * the zone as native if it is cluster.  Cluster zones can be
2035 	 * native for the purpose of LU or upgrade, and the cluster
2036 	 * brand may not exist in the miniroot (such as in net install
2037 	 * upgrade).
2038 	 */
2039 	if (zonecfg_default_brand(default_brand,
2040 	    sizeof (default_brand)) != Z_OK) {
2041 		zerror(gettext("unable to determine default brand"));
2042 		return (1);
2043 	}
2044 	if (zonecfg_in_alt_root() &&
2045 	    strcmp(zonebrand, CLUSTER_BRAND_NAME) == 0) {
2046 		(void) strlcpy(zonebrand, default_brand, sizeof (zonebrand));
2047 	}
2048 
2049 	if ((bh = brand_open(zonebrand)) == NULL) {
2050 		zerror(gettext("could not open brand for zone %s"), zonename);
2051 		return (1);
2052 	}
2053 
2054 	if ((new_args = prep_args(bh, login, proc_args)) == NULL) {
2055 		zperror(gettext("could not assemble new arguments"));
2056 		brand_close(bh);
2057 		return (1);
2058 	}
2059 	/*
2060 	 * Get the brand specific user_cmd.  This command is used to get
2061 	 * a passwd(4) entry for login.
2062 	 */
2063 	if (!interactive && !failsafe) {
2064 		if (zone_get_user_cmd(bh, login, user_cmd,
2065 		    sizeof (user_cmd)) == NULL) {
2066 			zerror(gettext("could not get user_cmd for zone %s"),
2067 			    zonename);
2068 			brand_close(bh);
2069 			return (1);
2070 		}
2071 	}
2072 	brand_close(bh);
2073 
2074 	if ((new_env = prep_env()) == NULL) {
2075 		zperror(gettext("could not assemble new environment"));
2076 		return (1);
2077 	}
2078 
2079 	if (!interactive) {
2080 		if (nflag) {
2081 			int nfd;
2082 
2083 			if ((nfd = open(_PATH_DEVNULL, O_RDONLY)) < 0) {
2084 				zperror(gettext("failed to open null device"));
2085 				return (1);
2086 			}
2087 			if (nfd != STDIN_FILENO) {
2088 				if (dup2(nfd, STDIN_FILENO) < 0) {
2089 					zperror(gettext(
2090 					    "failed to dup2 null device"));
2091 					return (1);
2092 				}
2093 				(void) close(nfd);
2094 			}
2095 			/* /dev/null is now standard input */
2096 		}
2097 		return (noninteractive_login(zonename, user_cmd, zoneid,
2098 		    new_args, new_env));
2099 	}
2100 
2101 	if (zonecfg_in_alt_root()) {
2102 		zerror(gettext("cannot use interactive login with scratch "
2103 		    "zone"));
2104 		return (1);
2105 	}
2106 
2107 	/*
2108 	 * Things are more complex in interactive mode; we get the
2109 	 * master side of the pty, then place the user's terminal into
2110 	 * raw mode.
2111 	 */
2112 	if (get_master_pty() == -1) {
2113 		zerror(gettext("could not setup master pty device"));
2114 		return (1);
2115 	}
2116 
2117 	/*
2118 	 * Compute the "short name" of the pts.  /dev/pts/2 --> pts/2
2119 	 */
2120 	if ((slavename = ptsname(masterfd)) == NULL) {
2121 		zperror(gettext("failed to get name for pseudo-tty"));
2122 		return (1);
2123 	}
2124 	if (strncmp(slavename, "/dev/", strlen("/dev/")) == 0)
2125 		(void) strlcpy(slaveshortname, slavename + strlen("/dev/"),
2126 		    sizeof (slaveshortname));
2127 	else
2128 		(void) strlcpy(slaveshortname, slavename,
2129 		    sizeof (slaveshortname));
2130 
2131 	if (!quiet)
2132 		(void) printf(gettext("[Connected to zone '%s' %s]\n"),
2133 		    zonename, slaveshortname);
2134 
2135 	if (set_tty_rawmode(STDIN_FILENO) == -1) {
2136 		reset_tty();
2137 		zperror(gettext("failed to set stdin pty to raw mode"));
2138 		return (1);
2139 	}
2140 
2141 	if (prefork_dropprivs() != 0) {
2142 		reset_tty();
2143 		zperror(gettext("could not allocate privilege set"));
2144 		return (1);
2145 	}
2146 
2147 	/*
2148 	 * We must mask SIGCLD until after we have coped with the fork
2149 	 * sufficiently to deal with it; otherwise we can race and receive the
2150 	 * signal before child_pid has been initialized (yes, this really
2151 	 * happens).
2152 	 */
2153 	(void) sigset(SIGCLD, sigcld);
2154 	(void) sigemptyset(&block_cld);
2155 	(void) sigaddset(&block_cld, SIGCLD);
2156 	(void) sigprocmask(SIG_BLOCK, &block_cld, NULL);
2157 
2158 	/*
2159 	 * We activate the contract template at the last minute to
2160 	 * avoid intermediate functions that could be using fork(2)
2161 	 * internally.
2162 	 */
2163 	if ((tmpl_fd = init_template()) == -1) {
2164 		reset_tty();
2165 		zperror(gettext("could not create contract"));
2166 		return (1);
2167 	}
2168 
2169 	if ((child_pid = fork()) == -1) {
2170 		(void) ct_tmpl_clear(tmpl_fd);
2171 		reset_tty();
2172 		zperror(gettext("could not fork"));
2173 		return (1);
2174 	} else if (child_pid == 0) { /* child process */
2175 		int slavefd, newslave;
2176 
2177 		(void) ct_tmpl_clear(tmpl_fd);
2178 		(void) close(tmpl_fd);
2179 
2180 		(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2181 
2182 		if ((slavefd = init_slave_pty(zoneid, devroot)) == -1)
2183 			return (1);
2184 
2185 		/*
2186 		 * Close all fds except for the slave pty.
2187 		 */
2188 		(void) fdwalk(close_func, &slavefd);
2189 
2190 		/*
2191 		 * Temporarily dup slavefd to stderr; that way if we have
2192 		 * to print out that zone_enter failed, the output will
2193 		 * have somewhere to go.
2194 		 */
2195 		if (slavefd != STDERR_FILENO)
2196 			(void) dup2(slavefd, STDERR_FILENO);
2197 
2198 		if (zone_enter(zoneid) == -1) {
2199 			zerror(gettext("could not enter zone %s: %s"),
2200 			    zonename, strerror(errno));
2201 			return (1);
2202 		}
2203 
2204 		if (slavefd != STDERR_FILENO)
2205 			(void) close(STDERR_FILENO);
2206 
2207 		/*
2208 		 * We take pains to get this process into a new process
2209 		 * group, and subsequently a new session.  In this way,
2210 		 * we'll have a session which doesn't yet have a controlling
2211 		 * terminal.  When we open the slave, it will become the
2212 		 * controlling terminal; no PIDs concerning pgrps or sids
2213 		 * will leak inappropriately into the zone.
2214 		 */
2215 		(void) setpgrp();
2216 
2217 		/*
2218 		 * We need the slave pty to be referenced from the zone's
2219 		 * /dev in order to ensure that the devt's, etc are all
2220 		 * correct.  Otherwise we break ttyname and the like.
2221 		 */
2222 		if ((newslave = open(slavename, O_RDWR)) == -1) {
2223 			(void) close(slavefd);
2224 			return (1);
2225 		}
2226 		(void) close(slavefd);
2227 		slavefd = newslave;
2228 
2229 		/*
2230 		 * dup the slave to the various FDs, so that when the
2231 		 * spawned process does a write/read it maps to the slave
2232 		 * pty.
2233 		 */
2234 		(void) dup2(slavefd, STDIN_FILENO);
2235 		(void) dup2(slavefd, STDOUT_FILENO);
2236 		(void) dup2(slavefd, STDERR_FILENO);
2237 		if (slavefd != STDIN_FILENO && slavefd != STDOUT_FILENO &&
2238 		    slavefd != STDERR_FILENO) {
2239 			(void) close(slavefd);
2240 		}
2241 
2242 		/*
2243 		 * In failsafe mode, we don't use login(1), so don't try
2244 		 * setting up a utmpx entry.
2245 		 */
2246 		if (!failsafe)
2247 			if (setup_utmpx(slaveshortname) == -1)
2248 				return (1);
2249 
2250 		/*
2251 		 * The child needs to run as root to
2252 		 * execute the brand's login program.
2253 		 */
2254 		if (setuid(0) == -1) {
2255 			zperror(gettext("insufficient privilege"));
2256 			return (1);
2257 		}
2258 
2259 		(void) execve(new_args[0], new_args, new_env);
2260 		zperror(gettext("exec failure"));
2261 		return (1);
2262 	}
2263 
2264 	(void) ct_tmpl_clear(tmpl_fd);
2265 	(void) close(tmpl_fd);
2266 
2267 	/*
2268 	 * The rest is only for the parent process.
2269 	 */
2270 	(void) sigset(SIGWINCH, sigwinch);
2271 
2272 	postfork_dropprivs();
2273 
2274 	(void) sigprocmask(SIG_UNBLOCK, &block_cld, NULL);
2275 	doio(masterfd, -1, masterfd, -1, -1, B_FALSE);
2276 
2277 	reset_tty();
2278 	if (!quiet)
2279 		(void) fprintf(stderr,
2280 		    gettext("\n[Connection to zone '%s' %s closed]\n"),
2281 		    zonename, slaveshortname);
2282 
2283 	if (pollerr != 0) {
2284 		(void) fprintf(stderr, gettext("Error: connection closed due "
2285 		    "to unexpected pollevents=0x%x.\n"), pollerr);
2286 		return (1);
2287 	}
2288 
2289 	return (0);
2290 }
2291