1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * wait.c - asynchronous monitoring of "wait registered" start methods 31 * 32 * Use event ports to poll on the set of fds representing the /proc/[pid]/psinfo 33 * files. If one of these fds returns an event, then we inform the restarter 34 * that it has stopped. 35 * 36 * The wait_info_list holds the series of processes currently being monitored 37 * for exit. The wi_fd member, which contains the file descriptor of the psinfo 38 * file being polled upon ("event ported upon"), will be set to -1 if the file 39 * descriptor is inactive (already closed or not yet opened). 40 */ 41 42 #ifdef _FILE_OFFSET_BITS 43 #undef _FILE_OFFSET_BITS 44 #endif /* _FILE_OFFSET_BITS */ 45 46 #include <sys/resource.h> 47 #include <sys/stat.h> 48 #include <sys/types.h> 49 #include <sys/uio.h> 50 #include <sys/wait.h> 51 52 #include <assert.h> 53 #include <errno.h> 54 #include <fcntl.h> 55 #include <libuutil.h> 56 #include <poll.h> 57 #include <port.h> 58 #include <pthread.h> 59 #include <procfs.h> 60 #include <string.h> 61 #include <stropts.h> 62 #include <unistd.h> 63 64 #include "startd.h" 65 66 #define WAIT_FILES 262144 /* reasonably high maximum */ 67 68 static int port_fd; 69 static scf_handle_t *wait_hndl; 70 static struct rlimit init_fd_rlimit; 71 72 static uu_list_pool_t *wait_info_pool; 73 static uu_list_t *wait_info_list; 74 75 static pthread_mutex_t wait_info_lock; 76 77 /* 78 * void wait_remove(wait_info_t *, int) 79 * Remove the given wait_info structure from our list, performing various 80 * cleanup operations along the way. If the direct flag is false (meaning 81 * that we are being called with from restarter instance list context), then 82 * notify the restarter that the associated instance has exited. 83 * 84 * Since we may no longer be the startd that started this process, we only are 85 * concerned with a waitpid(3C) failure if the wi_parent field is non-zero. 86 */ 87 static void 88 wait_remove(wait_info_t *wi, int direct) 89 { 90 int status; 91 92 if (waitpid(wi->wi_pid, &status, 0) == -1) { 93 if (wi->wi_parent) 94 log_framework(LOG_INFO, 95 "instance %s waitpid failure: %s\n", wi->wi_fmri, 96 strerror(errno)); 97 } else { 98 if (WEXITSTATUS(status) != 0) { 99 log_framework(LOG_NOTICE, 100 "instance %s exited with status %d\n", wi->wi_fmri, 101 WEXITSTATUS(status)); 102 } 103 } 104 105 MUTEX_LOCK(&wait_info_lock); 106 if (wi->wi_fd != -1) { 107 startd_close(wi->wi_fd); 108 wi->wi_fd = -1; 109 } 110 uu_list_remove(wait_info_list, wi); 111 MUTEX_UNLOCK(&wait_info_lock); 112 113 /* 114 * Make an attempt to clear out any utmpx record associated with this 115 * PID. 116 */ 117 utmpx_mark_dead(wi->wi_pid, status, B_FALSE); 118 119 if (!direct) { 120 /* 121 * Bind wait_hndl lazily. 122 */ 123 if (wait_hndl == NULL) { 124 for (wait_hndl = 125 libscf_handle_create_bound(SCF_VERSION); 126 wait_hndl == NULL; 127 wait_hndl = 128 libscf_handle_create_bound(SCF_VERSION)) { 129 log_error(LOG_INFO, "[wait_remove] Unable to " 130 "bind a new repository handle: %s\n", 131 scf_strerror(scf_error())); 132 (void) sleep(2); 133 } 134 } 135 136 log_framework(LOG_DEBUG, 137 "wait_remove requesting stop of %s\n", wi->wi_fmri); 138 (void) stop_instance_fmri(wait_hndl, wi->wi_fmri, RSTOP_EXIT); 139 } 140 141 uu_list_node_fini(wi, &wi->wi_link, wait_info_pool); 142 startd_free(wi, sizeof (wait_info_t)); 143 } 144 145 /* 146 * int wait_register(pid_t, char *, int, int) 147 * wait_register is called after we have called fork(2), and know which pid we 148 * wish to monitor. However, since the child may have already exited by the 149 * time we are called, we must handle the error cases from open(2) 150 * appropriately. The am_parent flag is recorded to handle waitpid(2) 151 * behaviour on removal; similarly, the direct flag is passed through to a 152 * potential call to wait_remove() to govern its behaviour in different 153 * contexts. 154 * 155 * Returns 0 if registration successful, 1 if child pid did not exist, and -1 156 * if a different error occurred. 157 */ 158 int 159 wait_register(pid_t pid, const char *inst_fmri, int am_parent, int direct) 160 { 161 char *fname = uu_msprintf("/proc/%ld/psinfo", pid); 162 int fd; 163 wait_info_t *wi; 164 165 assert(pid != 0); 166 167 if (fname == NULL) 168 return (-1); 169 170 wi = startd_alloc(sizeof (wait_info_t)); 171 172 uu_list_node_init(wi, &wi->wi_link, wait_info_pool); 173 174 wi->wi_fd = -1; 175 wi->wi_pid = pid; 176 wi->wi_fmri = inst_fmri; 177 wi->wi_parent = am_parent; 178 179 MUTEX_LOCK(&wait_info_lock); 180 (void) uu_list_insert_before(wait_info_list, NULL, wi); 181 MUTEX_UNLOCK(&wait_info_lock); 182 183 if ((fd = open(fname, O_RDONLY)) == -1) { 184 if (errno == ENOENT) { 185 /* 186 * Child has already exited. 187 */ 188 wait_remove(wi, direct); 189 uu_free(fname); 190 return (1); 191 } else { 192 log_error(LOG_WARNING, 193 "open %s failed; not monitoring %s: %s\n", fname, 194 inst_fmri, strerror(errno)); 195 uu_free(fname); 196 return (-1); 197 } 198 } 199 200 uu_free(fname); 201 202 wi->wi_fd = fd; 203 204 if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) { 205 log_error(LOG_WARNING, 206 "initial port_association of %d / %s failed: %s\n", fd, 207 inst_fmri, strerror(errno)); 208 return (-1); 209 } 210 211 log_framework(LOG_DEBUG, "monitoring PID %ld on fd %d (%s)\n", pid, fd, 212 inst_fmri); 213 214 return (0); 215 } 216 217 /*ARGSUSED*/ 218 void * 219 wait_thread(void *args) 220 { 221 for (;;) { 222 port_event_t pe; 223 int fd; 224 wait_info_t *wi; 225 226 if (port_get(port_fd, &pe, NULL) != 0) { 227 if (errno == EINTR) 228 continue; 229 else { 230 log_error(LOG_WARNING, 231 "port_get() failed with %s\n", 232 strerror(errno)); 233 bad_error("port_get", errno); 234 } 235 } 236 237 fd = pe.portev_object; 238 wi = pe.portev_user; 239 assert(wi != NULL); 240 assert(fd == wi->wi_fd); 241 242 if ((pe.portev_events & POLLHUP) == POLLHUP) { 243 psinfo_t psi; 244 245 if (lseek(fd, 0, SEEK_SET) != 0 || 246 read(fd, &psi, sizeof (psinfo_t)) != 247 sizeof (psinfo_t)) { 248 log_framework(LOG_WARNING, 249 "couldn't get psinfo data for %s (%s); " 250 "assuming failed\n", wi->wi_fmri, 251 strerror(errno)); 252 goto err_remove; 253 } 254 255 if (psi.pr_nlwp != 0 || 256 psi.pr_nzomb != 0 || 257 psi.pr_lwp.pr_lwpid != 0) { 258 /* 259 * We have determined, in accordance with the 260 * definition in proc(4), this process is not a 261 * zombie. Reassociate. 262 */ 263 if (port_associate(port_fd, PORT_SOURCE_FD, fd, 264 0, wi)) 265 log_error(LOG_WARNING, 266 "port_association of %d / %s " 267 "failed\n", fd, wi->wi_fmri); 268 continue; 269 } 270 } else if ( 271 (pe.portev_events & POLLERR) == 0) { 272 if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) 273 log_error(LOG_WARNING, 274 "port_association of %d / %s " 275 "failed\n", fd, wi->wi_fmri); 276 continue; 277 } 278 279 err_remove: 280 wait_remove(wi, 0); 281 } 282 283 /*LINTED E_FUNC_HAS_NO_RETURN_STMT*/ 284 } 285 286 void 287 wait_prefork() 288 { 289 MUTEX_LOCK(&wait_info_lock); 290 } 291 292 void 293 wait_postfork(pid_t pid) 294 { 295 wait_info_t *wi; 296 297 MUTEX_UNLOCK(&wait_info_lock); 298 299 if (pid != 0) 300 return; 301 302 /* 303 * Close all of the child's wait-related fds. The wait_thread() is 304 * gone, so no need to worry about returning events. We always exec(2) 305 * after a fork request, so we needn't free the list elements 306 * themselves. 307 */ 308 309 for (wi = uu_list_first(wait_info_list); 310 wi != NULL; 311 wi = uu_list_next(wait_info_list, wi)) { 312 if (wi->wi_fd != -1) 313 startd_close(wi->wi_fd); 314 } 315 316 startd_close(port_fd); 317 318 (void) setrlimit(RLIMIT_NOFILE, &init_fd_rlimit); 319 } 320 321 void 322 wait_init() 323 { 324 struct rlimit fd_new; 325 326 (void) getrlimit(RLIMIT_NOFILE, &init_fd_rlimit); 327 (void) getrlimit(RLIMIT_NOFILE, &fd_new); 328 329 fd_new.rlim_max = fd_new.rlim_cur = WAIT_FILES; 330 331 (void) setrlimit(RLIMIT_NOFILE, &fd_new); 332 333 if ((port_fd = port_create()) == -1) 334 uu_die("wait_init couldn't port_create"); 335 336 wait_info_pool = uu_list_pool_create("wait_info", sizeof (wait_info_t), 337 offsetof(wait_info_t, wi_link), NULL, UU_LIST_POOL_DEBUG); 338 if (wait_info_pool == NULL) 339 uu_die("wait_init couldn't create wait_info_pool"); 340 341 wait_info_list = uu_list_create(wait_info_pool, wait_info_list, 0); 342 if (wait_info_list == NULL) 343 uu_die("wait_init couldn't create wait_info_list"); 344 345 (void) pthread_mutex_init(&wait_info_lock, &mutex_attrs); 346 } 347