1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * wait.c - asynchronous monitoring of "wait registered" start methods 31 * 32 * Use event ports to poll on the set of fds representing the /proc/[pid]/psinfo 33 * files. If one of these fds returns an event, then we inform the restarter 34 * that it has stopped. 35 * 36 * The wait_info_list holds the series of processes currently being monitored 37 * for exit. The wi_fd member, which contains the file descriptor of the psinfo 38 * file being polled upon ("event ported upon"), will be set to -1 if the file 39 * descriptor is inactive (already closed or not yet opened). 40 */ 41 42 #ifdef _FILE_OFFSET_BITS 43 #undef _FILE_OFFSET_BITS 44 #endif /* _FILE_OFFSET_BITS */ 45 46 #include <sys/resource.h> 47 #include <sys/stat.h> 48 #include <sys/types.h> 49 #include <sys/uio.h> 50 #include <sys/wait.h> 51 52 #include <assert.h> 53 #include <errno.h> 54 #include <fcntl.h> 55 #include <libuutil.h> 56 #include <poll.h> 57 #include <port.h> 58 #include <pthread.h> 59 #include <procfs.h> 60 #include <string.h> 61 #include <stropts.h> 62 #include <unistd.h> 63 64 #include "startd.h" 65 66 #define WAIT_FILES 262144 /* reasonably high maximum */ 67 68 static int port_fd; 69 static scf_handle_t *wait_hndl; 70 static struct rlimit init_fd_rlimit; 71 72 static uu_list_pool_t *wait_info_pool; 73 static uu_list_t *wait_info_list; 74 75 static pthread_mutex_t wait_info_lock; 76 77 /* 78 * void wait_remove(wait_info_t *, int) 79 * Remove the given wait_info structure from our list, performing various 80 * cleanup operations along the way. If the direct flag is false (meaning 81 * that we are being called with from restarter instance list context), then 82 * notify the restarter that the associated instance has exited. 83 * 84 * Since we may no longer be the startd that started this process, we only are 85 * concerned with a waitpid(3C) failure if the wi_parent field is non-zero. 86 */ 87 static void 88 wait_remove(wait_info_t *wi, int direct) 89 { 90 int status; 91 92 if (waitpid(wi->wi_pid, &status, 0) == -1) { 93 if (wi->wi_parent) 94 log_framework(LOG_INFO, 95 "instance %s waitpid failure: %s\n", wi->wi_fmri, 96 strerror(errno)); 97 } else { 98 if (WEXITSTATUS(status) != 0) { 99 log_framework(LOG_NOTICE, 100 "instance %s exited with status %d\n", wi->wi_fmri, 101 WEXITSTATUS(status)); 102 } 103 } 104 105 MUTEX_LOCK(&wait_info_lock); 106 uu_list_remove(wait_info_list, wi); 107 MUTEX_UNLOCK(&wait_info_lock); 108 109 /* 110 * Make an attempt to clear out any utmpx record associated with this 111 * PID. 112 */ 113 utmpx_mark_dead(wi->wi_pid, status, B_FALSE); 114 115 if (!direct) { 116 /* 117 * Bind wait_hndl lazily. 118 */ 119 if (wait_hndl == NULL) { 120 for (wait_hndl = 121 libscf_handle_create_bound(SCF_VERSION); 122 wait_hndl == NULL; 123 wait_hndl = 124 libscf_handle_create_bound(SCF_VERSION)) { 125 log_error(LOG_INFO, "[wait_remove] Unable to " 126 "bind a new repository handle: %s\n", 127 scf_strerror(scf_error())); 128 (void) sleep(2); 129 } 130 } 131 132 log_framework(LOG_DEBUG, 133 "wait_remove requesting stop of %s\n", wi->wi_fmri); 134 (void) stop_instance_fmri(wait_hndl, wi->wi_fmri, RSTOP_EXIT); 135 } 136 137 uu_list_node_fini(wi, &wi->wi_link, wait_info_pool); 138 startd_free(wi, sizeof (wait_info_t)); 139 } 140 141 /* 142 * int wait_register(pid_t, char *, int, int) 143 * wait_register is called after we have called fork(2), and know which pid we 144 * wish to monitor. However, since the child may have already exited by the 145 * time we are called, we must handle the error cases from open(2) 146 * appropriately. The am_parent flag is recorded to handle waitpid(2) 147 * behaviour on removal; similarly, the direct flag is passed through to a 148 * potential call to wait_remove() to govern its behaviour in different 149 * contexts. 150 * 151 * Returns 0 if registration successful, 1 if child pid did not exist, and -1 152 * if a different error occurred. 153 */ 154 int 155 wait_register(pid_t pid, const char *inst_fmri, int am_parent, int direct) 156 { 157 char *fname = uu_msprintf("/proc/%ld/psinfo", pid); 158 int fd; 159 wait_info_t *wi; 160 161 assert(pid != 0); 162 163 if (fname == NULL) 164 return (-1); 165 166 wi = startd_alloc(sizeof (wait_info_t)); 167 168 uu_list_node_init(wi, &wi->wi_link, wait_info_pool); 169 170 wi->wi_fd = -1; 171 wi->wi_pid = pid; 172 wi->wi_fmri = inst_fmri; 173 wi->wi_parent = am_parent; 174 175 MUTEX_LOCK(&wait_info_lock); 176 (void) uu_list_insert_before(wait_info_list, NULL, wi); 177 MUTEX_UNLOCK(&wait_info_lock); 178 179 if ((fd = open(fname, O_RDONLY)) == -1) { 180 if (errno == ENOENT) { 181 /* 182 * Child has already exited. 183 */ 184 wait_remove(wi, direct); 185 uu_free(fname); 186 return (1); 187 } else { 188 log_error(LOG_WARNING, 189 "open %s failed; not monitoring %s: %s\n", fname, 190 inst_fmri, strerror(errno)); 191 uu_free(fname); 192 return (-1); 193 } 194 } 195 196 uu_free(fname); 197 198 wi->wi_fd = fd; 199 200 if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) { 201 log_error(LOG_WARNING, 202 "initial port_association of %d / %s failed: %s\n", fd, 203 inst_fmri, strerror(errno)); 204 return (-1); 205 } 206 207 log_framework(LOG_DEBUG, "monitoring PID %ld on fd %d (%s)\n", pid, fd, 208 inst_fmri); 209 210 return (0); 211 } 212 213 /*ARGSUSED*/ 214 void * 215 wait_thread(void *args) 216 { 217 for (;;) { 218 port_event_t pe; 219 int fd; 220 wait_info_t *wi; 221 struct timespec ts; 222 223 ts.tv_sec = 1; 224 ts.tv_nsec = 0; 225 226 if (port_get(port_fd, &pe, &ts) == -1) 227 if (errno == EINTR || errno == ETIME) 228 continue; 229 else 230 log_error(LOG_WARNING, 231 "port_get returned %s\n", strerror(errno)); 232 233 fd = pe.portev_object; 234 wi = pe.portev_user; 235 236 if ((pe.portev_events & POLLHUP) == POLLHUP) { 237 psinfo_t psi; 238 239 if (lseek(fd, 0, SEEK_SET) != 0 || 240 read(fd, &psi, sizeof (psinfo_t)) != 241 sizeof (psinfo_t)) { 242 log_framework(LOG_WARNING, 243 "couldn't get psinfo data for %s (%s); " 244 "assuming failed\n", wi->wi_fmri, 245 strerror(errno)); 246 goto err_remove; 247 } 248 249 if (psi.pr_nlwp != 0 || 250 psi.pr_nzomb != 0 || 251 psi.pr_lwp.pr_lwpid != 0) { 252 /* 253 * We have determined, in accordance with the 254 * definition in proc(4), this process is not a 255 * zombie. Reassociate. 256 */ 257 if (port_associate(port_fd, PORT_SOURCE_FD, fd, 258 0, wi)) 259 log_error(LOG_WARNING, 260 "port_association of %d / %s " 261 "failed\n", fd, wi->wi_fmri); 262 continue; 263 } 264 } else if ( 265 (pe.portev_events & POLLERR) == 0) { 266 if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) 267 log_error(LOG_WARNING, 268 "port_association of %d / %s " 269 "failed\n", fd, wi->wi_fmri); 270 continue; 271 } 272 273 err_remove: 274 startd_close(fd); 275 wi->wi_fd = -1; 276 277 wait_remove(wi, 0); 278 } 279 280 /*LINTED E_FUNC_HAS_NO_RETURN_STMT*/ 281 } 282 283 void 284 wait_prefork() 285 { 286 MUTEX_LOCK(&wait_info_lock); 287 } 288 289 void 290 wait_postfork(pid_t pid) 291 { 292 wait_info_t *wi; 293 294 MUTEX_UNLOCK(&wait_info_lock); 295 296 if (pid != 0) 297 return; 298 299 /* 300 * Close all of the child's wait-related fds. The wait_thread() is 301 * gone, so no need to worry about returning events. We always exec(2) 302 * after a fork request, so we needn't free the list elements 303 * themselves. 304 */ 305 306 for (wi = uu_list_first(wait_info_list); 307 wi != NULL; 308 wi = uu_list_next(wait_info_list, wi)) { 309 if (wi->wi_fd != -1) 310 startd_close(wi->wi_fd); 311 } 312 313 startd_close(port_fd); 314 315 (void) setrlimit(RLIMIT_NOFILE, &init_fd_rlimit); 316 } 317 318 void 319 wait_init() 320 { 321 struct rlimit fd_new; 322 323 (void) getrlimit(RLIMIT_NOFILE, &init_fd_rlimit); 324 (void) getrlimit(RLIMIT_NOFILE, &fd_new); 325 326 fd_new.rlim_max = fd_new.rlim_cur = WAIT_FILES; 327 328 (void) setrlimit(RLIMIT_NOFILE, &fd_new); 329 330 if ((port_fd = port_create()) == -1) 331 uu_die("wait_init couldn't port_create"); 332 333 wait_info_pool = uu_list_pool_create("wait_info", sizeof (wait_info_t), 334 offsetof(wait_info_t, wi_link), NULL, UU_LIST_POOL_DEBUG); 335 if (wait_info_pool == NULL) 336 uu_die("wait_init couldn't create wait_info_pool"); 337 338 wait_info_list = uu_list_create(wait_info_pool, wait_info_list, 0); 339 if (wait_info_list == NULL) 340 uu_die("wait_init couldn't create wait_info_list"); 341 342 (void) pthread_mutex_init(&wait_info_lock, &mutex_attrs); 343 } 344