1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/utsname.h> 29 #include <sys/param.h> 30 #include <sys/systeminfo.h> 31 #include <sys/fm/util.h> 32 33 #include <smbios.h> 34 #include <limits.h> 35 #include <unistd.h> 36 #include <signal.h> 37 #include <stdlib.h> 38 #include <stdio.h> 39 #include <ctype.h> 40 #include <door.h> 41 42 #include <fmd_conf.h> 43 #include <fmd_dispq.h> 44 #include <fmd_timerq.h> 45 #include <fmd_subr.h> 46 #include <fmd_error.h> 47 #include <fmd_module.h> 48 #include <fmd_thread.h> 49 #include <fmd_alloc.h> 50 #include <fmd_string.h> 51 #include <fmd_builtin.h> 52 #include <fmd_ustat.h> 53 #include <fmd_protocol.h> 54 #include <fmd_scheme.h> 55 #include <fmd_asru.h> 56 #include <fmd_case.h> 57 #include <fmd_log.h> 58 #include <fmd_idspace.h> 59 #include <fmd_rpc.h> 60 #include <fmd_dr.h> 61 #include <fmd_topo.h> 62 #include <fmd_xprt.h> 63 #include <fmd_ctl.h> 64 #include <sys/openpromio.h> 65 #include <libdevinfo.h> 66 67 #include <fmd.h> 68 69 extern const nv_alloc_ops_t fmd_nv_alloc_ops; /* see fmd_nv.c */ 70 71 const char _fmd_version[] = "1.2"; /* daemon version string */ 72 static char _fmd_plat[MAXNAMELEN]; /* native platform string */ 73 static char _fmd_isa[MAXNAMELEN]; /* native instruction set */ 74 static struct utsname _fmd_uts; /* native uname(2) info */ 75 static char _fmd_psn[MAXNAMELEN]; /* product serial number */ 76 static char _fmd_csn[MAXNAMELEN]; /* chassis serial number */ 77 static char _fmd_prod[MAXNAMELEN]; /* product name string */ 78 79 /* 80 * Note: the configuration file path is ordered from most common to most host- 81 * specific because new conf files are merged/override previous ones. The 82 * module paths are in the opposite order, from most specific to most common, 83 * because once a module is loaded fmd will not try to load over the same name. 84 */ 85 86 static const char _fmd_conf_path[] = 87 "%r/usr/lib/fm/fmd:" 88 "%r/usr/platform/%m/lib/fm/fmd:" 89 "%r/usr/platform/%i/lib/fm/fmd:" 90 "%r/etc/fm/fmd"; 91 92 static const char _fmd_agent_path[] = 93 "%r/usr/platform/%i/lib/fm/fmd/agents:" 94 "%r/usr/platform/%m/lib/fm/fmd/agents:" 95 "%r/usr/lib/fm/fmd/agents"; 96 97 static const char _fmd_plugin_path[] = 98 "%r/usr/platform/%i/lib/fm/fmd/plugins:" 99 "%r/usr/platform/%m/lib/fm/fmd/plugins:" 100 "%r/usr/lib/fm/fmd/plugins"; 101 102 static const char _fmd_scheme_path[] = 103 "usr/lib/fm/fmd/schemes"; 104 105 static const fmd_conf_mode_t _fmd_cerror_modes[] = { 106 { "unload", "unload offending client module", FMD_CERROR_UNLOAD }, 107 { "stop", "stop daemon for debugger attach", FMD_CERROR_STOP }, 108 { "abort", "abort daemon and force core dump", FMD_CERROR_ABORT }, 109 { NULL, NULL, 0 } 110 }; 111 112 static const fmd_conf_mode_t _fmd_dbout_modes[] = { 113 { "stderr", "send debug messages to stderr", FMD_DBOUT_STDERR }, 114 { "syslog", "send debug messages to syslog", FMD_DBOUT_SYSLOG }, 115 { NULL, NULL, 0 } 116 }; 117 118 static const fmd_conf_mode_t _fmd_debug_modes[] = { 119 { "help", "display debugging modes and exit", FMD_DBG_HELP }, 120 { "mod", "debug module load/unload/locking", FMD_DBG_MOD }, 121 { "disp", "debug dispatch queue processing", FMD_DBG_DISP }, 122 { "xprt", "debug transport-specific routines", FMD_DBG_XPRT }, 123 { "evt", "debug event subsystem routines", FMD_DBG_EVT }, 124 { "log", "debug log subsystem routines", FMD_DBG_LOG }, 125 { "tmr", "debug timer subsystem routines", FMD_DBG_TMR }, 126 { "fmri", "debug fmri subsystem routines", FMD_DBG_FMRI }, 127 { "asru", "debug asru subsystem routines", FMD_DBG_ASRU }, 128 { "case", "debug case subsystem routines", FMD_DBG_CASE }, 129 { "ckpt", "debug checkpoint routines", FMD_DBG_CKPT }, 130 { "rpc", "debug rpc service routines", FMD_DBG_RPC }, 131 { "trace", "display matching trace calls", FMD_DBG_TRACE }, 132 { "all", "enable all available debug modes", FMD_DBG_ALL }, 133 { NULL, NULL, 0 } 134 }; 135 136 static int 137 fmd_cerror_set(fmd_conf_param_t *pp, const char *value) 138 { 139 return (fmd_conf_mode_set(_fmd_cerror_modes, pp, value)); 140 } 141 142 static int 143 fmd_dbout_set(fmd_conf_param_t *pp, const char *value) 144 { 145 return (fmd_conf_mode_set(_fmd_dbout_modes, pp, value)); 146 } 147 148 static int 149 fmd_debug_set(fmd_conf_param_t *pp, const char *value) 150 { 151 int err = fmd_conf_mode_set(_fmd_debug_modes, pp, value); 152 153 if (err == 0) 154 fmd.d_fmd_debug = pp->cp_value.cpv_num; 155 156 return (err); 157 } 158 159 static int 160 fmd_trmode_set(fmd_conf_param_t *pp, const char *value) 161 { 162 fmd_tracebuf_f *func; 163 164 if (strcasecmp(value, "none") == 0) 165 func = fmd_trace_none; 166 else if (strcasecmp(value, "lite") == 0) 167 func = fmd_trace_lite; 168 else if (strcasecmp(value, "full") == 0) 169 func = fmd_trace_full; 170 else 171 return (fmd_set_errno(EFMD_CONF_INVAL)); 172 173 fmd.d_thr_trace = (void (*)())func; 174 pp->cp_value.cpv_ptr = (void *)func; 175 return (0); 176 } 177 178 static void 179 fmd_trmode_get(const fmd_conf_param_t *pp, void *ptr) 180 { 181 *((void **)ptr) = pp->cp_value.cpv_ptr; 182 } 183 184 static int 185 fmd_clkmode_set(fmd_conf_param_t *pp, const char *value) 186 { 187 const fmd_timeops_t *ops; 188 189 if (strcasecmp(value, "native") == 0) 190 ops = &fmd_timeops_native; 191 else if (strcasecmp(value, "simulated") == 0) 192 ops = &fmd_timeops_simulated; 193 else 194 return (fmd_set_errno(EFMD_CONF_INVAL)); 195 196 fmd.d_clockops = ops; 197 pp->cp_value.cpv_ptr = (void *)ops; 198 return (0); 199 } 200 201 static void 202 fmd_clkmode_get(const fmd_conf_param_t *pp, void *ptr) 203 { 204 *((void **)ptr) = pp->cp_value.cpv_ptr; 205 } 206 207 static const fmd_conf_ops_t fmd_cerror_ops = { 208 fmd_cerror_set, fmd_conf_mode_get, fmd_conf_notsup, fmd_conf_nop 209 }; 210 211 static const fmd_conf_ops_t fmd_dbout_ops = { 212 fmd_dbout_set, fmd_conf_mode_get, fmd_conf_notsup, fmd_conf_nop 213 }; 214 215 static const fmd_conf_ops_t fmd_debug_ops = { 216 fmd_debug_set, fmd_conf_mode_get, fmd_conf_notsup, fmd_conf_nop 217 }; 218 219 static const fmd_conf_ops_t fmd_trmode_ops = { 220 fmd_trmode_set, fmd_trmode_get, fmd_conf_notsup, fmd_conf_nop 221 }; 222 223 static const fmd_conf_ops_t fmd_clkmode_ops = { 224 fmd_clkmode_set, fmd_clkmode_get, fmd_conf_notsup, fmd_conf_nop 225 }; 226 227 static const fmd_conf_formal_t _fmd_conf[] = { 228 { "agent.path", &fmd_conf_path, _fmd_agent_path }, /* path for agents */ 229 { "alloc_msecs", &fmd_conf_uint32, "10" }, /* msecs before alloc retry */ 230 { "alloc_tries", &fmd_conf_uint32, "3" }, /* max # of alloc retries */ 231 { "product_sn", &fmd_conf_string, _fmd_psn }, /* product serial number */ 232 { "chassis", &fmd_conf_string, _fmd_csn }, /* chassis serial number */ 233 { "ckpt.dir", &fmd_conf_string, "var/fm/fmd/ckpt" }, /* ckpt directory path */ 234 { "ckpt.dirmode", &fmd_conf_int32, "0755" }, /* ckpt directory perm mode */ 235 { "ckpt.mode", &fmd_conf_int32, "0644" }, /* ckpt file perm mode */ 236 { "ckpt.restore", &fmd_conf_bool, "true" }, /* restore checkpoints? */ 237 { "ckpt.save", &fmd_conf_bool, "true" }, /* save checkpoints? */ 238 { "ckpt.zero", &fmd_conf_bool, "false" }, /* zero checkpoints on start? */ 239 { "client.buflim", &fmd_conf_size, "10m" }, /* client buffer space limit */ 240 { "client.dbout", &fmd_dbout_ops, NULL }, /* client debug output sinks */ 241 { "client.debug", &fmd_conf_bool, NULL }, /* client debug enable */ 242 { "client.error", &fmd_cerror_ops, "unload" }, /* client error policy */ 243 { "client.memlim", &fmd_conf_size, "10m" }, /* client allocation limit */ 244 { "client.evqlim", &fmd_conf_uint32, "256" }, /* client event queue limit */ 245 { "client.thrlim", &fmd_conf_uint32, "20" }, /* client aux thread limit */ 246 { "client.thrsig", &fmd_conf_signal, "SIGUSR1" }, /* fmd_thr_signal() value */ 247 { "client.tmrlim", &fmd_conf_uint32, "1024" }, /* client pending timer limit */ 248 { "client.xprtlim", &fmd_conf_uint32, "256" }, /* client transport limit */ 249 { "client.xprtlog", &fmd_conf_bool, NULL }, /* client transport logging? */ 250 { "client.xprtqlim", &fmd_conf_uint32, "1024" }, /* client transport queue li */ 251 { "clock", &fmd_clkmode_ops, "native" }, /* clock operation mode */ 252 { "conf_path", &fmd_conf_path, _fmd_conf_path }, /* root config file path */ 253 { "conf_file", &fmd_conf_string, "fmd.conf" }, /* root config file name */ 254 { "core", &fmd_conf_bool, "false" }, /* force core dump on quit */ 255 { "dbout", &fmd_dbout_ops, NULL }, /* daemon debug output sinks */ 256 { "debug", &fmd_debug_ops, NULL }, /* daemon debugging flags */ 257 { "dictdir", &fmd_conf_string, "usr/lib/fm/dict" }, /* default diagcode dir */ 258 { "domain", &fmd_conf_string, NULL }, /* domain id for de auth */ 259 { "fakenotpresent", &fmd_conf_uint32, "0" }, /* simulate rsrc not present */ 260 { "fg", &fmd_conf_bool, "false" }, /* run daemon in foreground */ 261 { "gc_interval", &fmd_conf_time, "1d" }, /* garbage collection intvl */ 262 { "ids.avg", &fmd_conf_uint32, "4" }, /* desired idspace chain len */ 263 { "ids.max", &fmd_conf_uint32, "1024" }, /* maximum idspace buckets */ 264 { "isaname", &fmd_conf_string, _fmd_isa }, /* instruction set (uname -p) */ 265 { "log.creator", &fmd_conf_string, "fmd" }, /* exacct log creator string */ 266 { "log.error", &fmd_conf_string, "var/fm/fmd/errlog" }, /* error log path */ 267 { "log.fault", &fmd_conf_string, "var/fm/fmd/fltlog" }, /* fault log path */ 268 { "log.minfree", &fmd_conf_size, "2m" }, /* min log fsys free space */ 269 { "log.rsrc", &fmd_conf_string, "var/fm/fmd/rsrc" }, /* asru log dir path */ 270 { "log.tryrotate", &fmd_conf_uint32, "10" }, /* max log rotation attempts */ 271 { "log.waitrotate", &fmd_conf_time, "200ms" }, /* log rotation retry delay */ 272 { "log.xprt", &fmd_conf_string, "var/fm/fmd/xprt" }, /* transport log dir */ 273 { "machine", &fmd_conf_string, _fmd_uts.machine }, /* machine name (uname -m) */ 274 { "nodiagcode", &fmd_conf_string, "-" }, /* diagcode to use if error */ 275 { "repaircode", &fmd_conf_string, "-" }, /* diagcode for list.repaired */ 276 { "resolvecode", &fmd_conf_string, "-" }, /* diagcode for list.resolved */ 277 { "updatecode", &fmd_conf_string, "-" }, /* diagcode for list.updated */ 278 { "osrelease", &fmd_conf_string, _fmd_uts.release }, /* release (uname -r) */ 279 { "osversion", &fmd_conf_string, _fmd_uts.version }, /* version (uname -v) */ 280 { "platform", &fmd_conf_string, _fmd_plat }, /* platform string (uname -i) */ 281 { "plugin.close", &fmd_conf_bool, "true" }, /* dlclose plugins on fini */ 282 { "plugin.path", &fmd_conf_path, _fmd_plugin_path }, /* path for plugin mods */ 283 { "product", &fmd_conf_string, _fmd_prod }, /* product name string */ 284 { "rootdir", &fmd_conf_string, "" }, /* root directory for paths */ 285 { "rpc.adm.path", &fmd_conf_string, NULL }, /* FMD_ADM rendezvous file */ 286 { "rpc.adm.prog", &fmd_conf_uint32, "100169" }, /* FMD_ADM rpc program num */ 287 { "rpc.api.path", &fmd_conf_string, NULL }, /* FMD_API rendezvous file */ 288 { "rpc.api.prog", &fmd_conf_uint32, "100170" }, /* FMD_API rpc program num */ 289 { "rpc.rcvsize", &fmd_conf_size, "128k" }, /* rpc receive buffer size */ 290 { "rpc.sndsize", &fmd_conf_size, "128k" }, /* rpc send buffer size */ 291 { "rsrc.pollperiod", &fmd_conf_time, "1h" }, /* aged rsrcs poller period */ 292 { "rsrc.age", &fmd_conf_time, "30d" }, /* max age of old rsrc log */ 293 { "rsrc.zero", &fmd_conf_bool, "false" }, /* zero rsrc cache on start? */ 294 { "schemedir", &fmd_conf_string, _fmd_scheme_path }, /* path for scheme mods */ 295 { "self.name", &fmd_conf_string, "fmd-self-diagnosis" }, /* self-diag module */ 296 { "self.dict", &fmd_conf_list, "FMD.dict" }, /* self-diag dictionary list */ 297 { "server", &fmd_conf_string, _fmd_uts.nodename }, /* server id for de auth */ 298 { "strbuckets", &fmd_conf_uint32, "211" }, /* size of string hashes */ 299 #ifdef DEBUG 300 { "trace.mode", &fmd_trmode_ops, "full" }, /* trace mode: none/lite/full */ 301 #else 302 { "trace.mode", &fmd_trmode_ops, "lite" }, /* trace mode: none/lite/full */ 303 #endif 304 { "trace.recs", &fmd_conf_uint32, "128" }, /* trace records per thread */ 305 { "trace.frames", &fmd_conf_uint32, "16" }, /* max trace rec stack frames */ 306 { "uuidlen", &fmd_conf_uint32, "36" }, /* UUID ASCII string length */ 307 { "xprt.ttl", &fmd_conf_uint8, "1" }, /* default event time-to-live */ 308 }; 309 310 /* 311 * Statistics maintained by fmd itself on behalf of various global subsystems. 312 * NOTE: FMD_TYPE_STRING statistics should not be used here. If they are 313 * required in the future, the FMD_ADM_MODGSTAT service routine must change. 314 */ 315 static fmd_statistics_t _fmd_stats = { 316 { "errlog.replayed", FMD_TYPE_UINT64, "total events replayed from errlog" }, 317 { "errlog.partials", FMD_TYPE_UINT64, "events partially committed in errlog" }, 318 { "errlog.enospc", FMD_TYPE_UINT64, "events not appended to errlog (ENOSPC)" }, 319 { "fltlog.enospc", FMD_TYPE_UINT64, "events not appended to fltlog (ENOSPC)" }, 320 { "log.enospc", FMD_TYPE_UINT64, "events not appended to other logs (ENOSPC)" }, 321 { "dr.gen", FMD_TYPE_UINT64, "dynamic reconfiguration generation" }, 322 { "topo.gen", FMD_TYPE_UINT64, "topology snapshot generation" }, 323 { "topo.drgen", FMD_TYPE_UINT64, "current topology DR generation number" }, 324 }; 325 326 /* 327 * SMBIOS serial numbers can contain characters (particularly ':' and ' ') 328 * that are invalid for the authority and can break FMRI parsing. We translate 329 * any invalid characters to a safe '-', as well as trimming any leading or 330 * trailing whitespace. Similarly, '/' can be found in some product names 331 * so we translate that to '-'. 332 */ 333 void 334 fmd_cleanup_auth_str(char *buf, const char *begin) 335 { 336 const char *end, *cp; 337 char c; 338 int i; 339 340 end = begin + strlen(begin); 341 342 while (begin < end && isspace(*begin)) 343 begin++; 344 while (begin < end && isspace(*(end - 1))) 345 end--; 346 347 if (begin >= end) 348 return; 349 350 cp = begin; 351 for (i = 0; i < MAXNAMELEN - 1; i++) { 352 if (cp >= end) 353 break; 354 c = *cp; 355 if (c == ':' || c == '=' || c == '/' || isspace(c) || 356 !isprint(c)) 357 buf[i] = '-'; 358 else 359 buf[i] = c; 360 cp++; 361 } 362 buf[i] = 0; 363 } 364 365 void 366 fmd_create(fmd_t *dp, const char *arg0, const char *root, const char *conf) 367 { 368 fmd_conf_path_t *pap; 369 char file[PATH_MAX]; 370 const char *name, *psn, *csn; 371 fmd_stat_t *sp; 372 int i; 373 374 smbios_hdl_t *shp; 375 smbios_system_t s1; 376 smbios_info_t s2; 377 id_t id; 378 379 di_prom_handle_t promh = DI_PROM_HANDLE_NIL; 380 di_node_t rooth = DI_NODE_NIL; 381 char *bufp; 382 383 (void) sysinfo(SI_PLATFORM, _fmd_plat, sizeof (_fmd_plat)); 384 (void) sysinfo(SI_ARCHITECTURE, _fmd_isa, sizeof (_fmd_isa)); 385 (void) uname(&_fmd_uts); 386 387 if ((shp = smbios_open(NULL, SMB_VERSION, 0, NULL)) != NULL) { 388 if ((id = smbios_info_system(shp, &s1)) != SMB_ERR && 389 smbios_info_common(shp, id, &s2) != SMB_ERR) 390 fmd_cleanup_auth_str(_fmd_prod, s2.smbi_product); 391 392 if ((psn = smbios_psn(shp)) != NULL) 393 fmd_cleanup_auth_str(_fmd_psn, psn); 394 395 if ((csn = smbios_csn(shp)) != NULL) 396 fmd_cleanup_auth_str(_fmd_csn, csn); 397 398 smbios_close(shp); 399 } else if ((rooth = di_init("/", DINFOPROP)) != DI_NODE_NIL && 400 (promh = di_prom_init()) != DI_PROM_HANDLE_NIL) { 401 if (di_prom_prop_lookup_bytes(promh, rooth, "chassis-sn", 402 (unsigned char **)&bufp) != -1) { 403 fmd_cleanup_auth_str(_fmd_csn, bufp); 404 } 405 } 406 407 if (promh != DI_PROM_HANDLE_NIL) 408 di_prom_fini(promh); 409 if (rooth != DI_NODE_NIL) 410 di_fini(rooth); 411 412 bzero(dp, sizeof (fmd_t)); 413 414 dp->d_version = _fmd_version; 415 dp->d_pname = fmd_strbasename(arg0); 416 dp->d_pid = getpid(); 417 418 if (pthread_key_create(&dp->d_key, NULL) != 0) 419 fmd_error(EFMD_EXIT, "failed to create pthread key"); 420 421 (void) pthread_mutex_init(&dp->d_xprt_lock, NULL); 422 (void) pthread_mutex_init(&dp->d_err_lock, NULL); 423 (void) pthread_mutex_init(&dp->d_thr_lock, NULL); 424 (void) pthread_mutex_init(&dp->d_mod_lock, NULL); 425 (void) pthread_mutex_init(&dp->d_stats_lock, NULL); 426 (void) pthread_mutex_init(&dp->d_topo_lock, NULL); 427 (void) pthread_rwlock_init(&dp->d_log_lock, NULL); 428 (void) pthread_mutex_init(&dp->d_fmd_lock, NULL); 429 (void) pthread_cond_init(&dp->d_fmd_cv, NULL); 430 431 /* 432 * A small number of properties must be set manually before we open 433 * the root configuration file. These include any settings for our 434 * memory allocator and path expansion token values, because these 435 * values are needed by the routines in fmd_conf.c itself. After 436 * the root configuration file is processed, we reset these properties 437 * based upon the latest values from the configuration file. 438 */ 439 dp->d_alloc_msecs = 10; 440 dp->d_alloc_tries = 3; 441 dp->d_str_buckets = 211; 442 443 dp->d_rootdir = root ? root : ""; 444 dp->d_platform = _fmd_plat; 445 dp->d_machine = _fmd_uts.machine; 446 dp->d_isaname = _fmd_isa; 447 448 dp->d_conf = fmd_conf_open(conf, sizeof (_fmd_conf) / 449 sizeof (_fmd_conf[0]), _fmd_conf, FMD_CONF_DEFER); 450 451 if (dp->d_conf == NULL) { 452 fmd_error(EFMD_EXIT, 453 "failed to load required configuration properties\n"); 454 } 455 456 (void) fmd_conf_getprop(dp->d_conf, "alloc.msecs", &dp->d_alloc_msecs); 457 (void) fmd_conf_getprop(dp->d_conf, "alloc.tries", &dp->d_alloc_tries); 458 (void) fmd_conf_getprop(dp->d_conf, "strbuckets", &dp->d_str_buckets); 459 460 (void) fmd_conf_getprop(dp->d_conf, "platform", &dp->d_platform); 461 (void) fmd_conf_getprop(dp->d_conf, "machine", &dp->d_machine); 462 (void) fmd_conf_getprop(dp->d_conf, "isaname", &dp->d_isaname); 463 464 /* 465 * Manually specified rootdirs override config files, so only update 466 * d_rootdir based on the config files we parsed if no 'root' was set. 467 */ 468 if (root == NULL) 469 (void) fmd_conf_getprop(dp->d_conf, "rootdir", &dp->d_rootdir); 470 else 471 (void) fmd_conf_setprop(dp->d_conf, "rootdir", dp->d_rootdir); 472 473 /* 474 * Once the base conf file properties are loaded, lookup the values 475 * of $conf_path and $conf_file and merge in any other conf files. 476 */ 477 (void) fmd_conf_getprop(dp->d_conf, "conf_path", &pap); 478 (void) fmd_conf_getprop(dp->d_conf, "conf_file", &name); 479 480 for (i = 0; i < pap->cpa_argc; i++) { 481 (void) snprintf(file, sizeof (file), 482 "%s/%s", pap->cpa_argv[i], name); 483 if (access(file, F_OK) == 0) 484 fmd_conf_merge(dp->d_conf, file); 485 } 486 487 /* 488 * Update the value of fmd.d_fg based on "fg". We cache this property 489 * because it must be accessed deep within fmd at fmd_verror() time. 490 * Update any other properties that must be cached for performance. 491 */ 492 (void) fmd_conf_getprop(fmd.d_conf, "fg", &fmd.d_fg); 493 (void) fmd_conf_getprop(fmd.d_conf, "xprt.ttl", &fmd.d_xprt_ttl); 494 495 /* 496 * Initialize our custom libnvpair allocator and create an nvlist for 497 * authority elements corresponding to this instance of the daemon. 498 */ 499 (void) nv_alloc_init(&dp->d_nva, &fmd_nv_alloc_ops); 500 dp->d_auth = fmd_protocol_authority(); 501 502 /* 503 * The fmd_module_t for the root module must be created manually. Most 504 * of it remains unused and zero, except for the few things we fill in. 505 */ 506 dp->d_rmod = fmd_zalloc(sizeof (fmd_module_t), FMD_SLEEP); 507 dp->d_rmod->mod_name = fmd_strdup(dp->d_pname, FMD_SLEEP); 508 dp->d_rmod->mod_fmri = fmd_protocol_fmri_module(dp->d_rmod); 509 510 fmd_list_append(&dp->d_mod_list, dp->d_rmod); 511 fmd_module_hold(dp->d_rmod); 512 513 (void) pthread_mutex_init(&dp->d_rmod->mod_lock, NULL); 514 (void) pthread_cond_init(&dp->d_rmod->mod_cv, NULL); 515 (void) pthread_mutex_init(&dp->d_rmod->mod_stats_lock, NULL); 516 517 dp->d_rmod->mod_thread = fmd_thread_xcreate(dp->d_rmod, pthread_self()); 518 dp->d_rmod->mod_stats = fmd_zalloc(sizeof (fmd_modstat_t), FMD_SLEEP); 519 dp->d_rmod->mod_ustat = fmd_ustat_create(); 520 521 if (pthread_setspecific(dp->d_key, dp->d_rmod->mod_thread) != 0) 522 fmd_error(EFMD_EXIT, "failed to attach main thread key"); 523 524 if ((dp->d_stats = (fmd_statistics_t *)fmd_ustat_insert( 525 dp->d_rmod->mod_ustat, FMD_USTAT_NOALLOC, sizeof (_fmd_stats) / 526 sizeof (fmd_stat_t), (fmd_stat_t *)&_fmd_stats, NULL)) == NULL) 527 fmd_error(EFMD_EXIT, "failed to initialize statistics"); 528 529 (void) pthread_mutex_lock(&dp->d_rmod->mod_lock); 530 dp->d_rmod->mod_flags |= FMD_MOD_INIT; 531 (void) pthread_mutex_unlock(&dp->d_rmod->mod_lock); 532 533 /* 534 * In addition to inserting the _fmd_stats collection of program-wide 535 * statistics, we also insert a statistic named after each of our 536 * errors and update these counts in fmd_verror() (see fmd_subr.c). 537 */ 538 dp->d_errstats = sp = fmd_zalloc(sizeof (fmd_stat_t) * 539 (EFMD_END - EFMD_UNKNOWN), FMD_SLEEP); 540 541 for (i = 0; i < EFMD_END - EFMD_UNKNOWN; i++, sp++) { 542 (void) snprintf(sp->fmds_name, sizeof (sp->fmds_name), "err.%s", 543 strrchr(fmd_errclass(EFMD_UNKNOWN + i), '.') + 1); 544 sp->fmds_type = FMD_TYPE_UINT64; 545 } 546 547 (void) fmd_ustat_insert(dp->d_rmod->mod_ustat, FMD_USTAT_NOALLOC, 548 EFMD_END - EFMD_UNKNOWN, dp->d_errstats, NULL); 549 } 550 551 void 552 fmd_destroy(fmd_t *dp) 553 { 554 fmd_module_t *mp; 555 fmd_case_t *cp; 556 int core; 557 558 (void) fmd_conf_getprop(fmd.d_conf, "core", &core); 559 560 fmd_rpc_fini(); 561 562 if (dp->d_xprt_ids != NULL) 563 fmd_xprt_suspend_all(); 564 565 /* 566 * Unload the self-diagnosis module first. This ensures that it does 567 * not get confused as we start unloading other modules, etc. We must 568 * hold the dispq lock as a writer while doing so since it uses d_self. 569 */ 570 if (dp->d_self != NULL) { 571 fmd_module_t *self; 572 573 (void) pthread_rwlock_wrlock(&dp->d_disp->dq_lock); 574 self = dp->d_self; 575 dp->d_self = NULL; 576 (void) pthread_rwlock_unlock(&dp->d_disp->dq_lock); 577 578 fmd_module_unload(self); 579 fmd_module_rele(self); 580 } 581 582 /* 583 * Unload modules in reverse order *except* for the root module, which 584 * is first in the list. This allows it to keep its thread and trace. 585 */ 586 for (mp = fmd_list_prev(&dp->d_mod_list); mp != dp->d_rmod; ) { 587 fmd_module_unload(mp); 588 mp = fmd_list_prev(mp); 589 } 590 591 if (dp->d_mod_hash != NULL) { 592 fmd_modhash_destroy(dp->d_mod_hash); 593 dp->d_mod_hash = NULL; 594 } 595 596 /* 597 * Close both log files now that modules are no longer active. We must 598 * set these pointers to NULL in case any subsequent errors occur. 599 */ 600 if (dp->d_errlog != NULL) { 601 fmd_log_rele(dp->d_errlog); 602 dp->d_errlog = NULL; 603 } 604 605 if (dp->d_fltlog != NULL) { 606 fmd_log_rele(dp->d_fltlog); 607 dp->d_fltlog = NULL; 608 } 609 610 /* 611 * Now destroy the resource cache: each ASRU contains a case reference, 612 * which may in turn contain a pointer to a referenced owning module. 613 */ 614 if (dp->d_asrus != NULL) { 615 fmd_asru_hash_destroy(dp->d_asrus); 616 dp->d_asrus = NULL; 617 } 618 619 /* 620 * Now that all data structures that refer to modules are torn down, 621 * no modules should be remaining on the module list except for d_rmod. 622 * If we trip one of these assertions, we're missing a rele somewhere. 623 */ 624 ASSERT(fmd_list_prev(&dp->d_mod_list) == dp->d_rmod); 625 ASSERT(fmd_list_next(&dp->d_mod_list) == dp->d_rmod); 626 627 /* 628 * Now destroy the root module. We clear its thread key first so any 629 * calls to fmd_trace() inside of the module code will be ignored. 630 */ 631 (void) pthread_setspecific(dp->d_key, NULL); 632 fmd_module_lock(dp->d_rmod); 633 634 while ((cp = fmd_list_next(&dp->d_rmod->mod_cases)) != NULL) 635 fmd_case_discard(cp, B_FALSE); 636 637 fmd_module_unlock(dp->d_rmod); 638 fmd_free(dp->d_rmod->mod_stats, sizeof (fmd_modstat_t)); 639 dp->d_rmod->mod_stats = NULL; 640 641 (void) pthread_mutex_lock(&dp->d_rmod->mod_lock); 642 dp->d_rmod->mod_flags |= FMD_MOD_FINI; 643 (void) pthread_mutex_unlock(&dp->d_rmod->mod_lock); 644 645 fmd_module_rele(dp->d_rmod); 646 ASSERT(fmd_list_next(&dp->d_mod_list) == NULL); 647 648 /* 649 * Now destroy the remaining global data structures. If 'core' was 650 * set to true, force a core dump so we can check for memory leaks. 651 */ 652 if (dp->d_cases != NULL) 653 fmd_case_hash_destroy(dp->d_cases); 654 if (dp->d_disp != NULL) 655 fmd_dispq_destroy(dp->d_disp); 656 if (dp->d_timers != NULL) 657 fmd_timerq_destroy(dp->d_timers); 658 if (dp->d_schemes != NULL) 659 fmd_scheme_hash_destroy(dp->d_schemes); 660 if (dp->d_xprt_ids != NULL) 661 fmd_idspace_destroy(dp->d_xprt_ids); 662 663 if (dp->d_errstats != NULL) { 664 fmd_free(dp->d_errstats, 665 sizeof (fmd_stat_t) * (EFMD_END - EFMD_UNKNOWN)); 666 } 667 668 if (dp->d_conf != NULL) 669 fmd_conf_close(dp->d_conf); 670 671 fmd_topo_fini(); 672 673 nvlist_free(dp->d_auth); 674 (void) nv_alloc_fini(&dp->d_nva); 675 dp->d_clockops->fto_fini(dp->d_clockptr); 676 677 (void) pthread_key_delete(dp->d_key); 678 bzero(dp, sizeof (fmd_t)); 679 680 if (core) 681 fmd_panic("forcing core dump at user request\n"); 682 } 683 684 /*ARGSUSED*/ 685 static void 686 fmd_gc(fmd_t *dp, id_t id, hrtime_t hrt) 687 { 688 hrtime_t delta; 689 690 if (id != 0) { 691 TRACE((FMD_DBG_MOD, "garbage collect start")); 692 fmd_modhash_apply(dp->d_mod_hash, fmd_module_gc); 693 TRACE((FMD_DBG_MOD, "garbage collect end")); 694 695 (void) pthread_rwlock_rdlock(&dp->d_log_lock); 696 fmd_log_update(dp->d_errlog); 697 (void) pthread_rwlock_unlock(&dp->d_log_lock); 698 } 699 700 (void) fmd_conf_getprop(dp->d_conf, "gc_interval", &delta); 701 (void) fmd_timerq_install(dp->d_timers, dp->d_rmod->mod_timerids, 702 (fmd_timer_f *)fmd_gc, dp, NULL, delta); 703 } 704 705 /*ARGSUSED*/ 706 static void 707 fmd_clear_aged_rsrcs(fmd_t *dp, id_t id, hrtime_t hrt) 708 { 709 hrtime_t period; 710 711 fmd_asru_clear_aged_rsrcs(); 712 (void) fmd_conf_getprop(dp->d_conf, "rsrc.pollperiod", &period); 713 (void) fmd_timerq_install(dp->d_timers, dp->d_rmod->mod_timerids, 714 (fmd_timer_f *)fmd_clear_aged_rsrcs, dp, NULL, period); 715 } 716 717 /* 718 * Events are committed to the errlog after cases are checkpointed. If fmd 719 * crashes before an event is ever associated with a module, this function will 720 * be called to replay it to all subscribers. If fmd crashes in between the 721 * subscriber checkpointing and committing the event in the error log, the 722 * module will have seen the event and we don't want to replay it. So we look 723 * for the event in all modules and transition it to the proper state. If 724 * it is found, we commit it to the error log and do not replay it. The in- 725 * memory case search used by fmd_module_contains() et al isn't particularly 726 * efficient, but it is faster than doing read i/o's on every case event to 727 * check their status or write i/o's on every event to replay to update states. 728 * We can improve the efficiency of this lookup algorithm later if necessary. 729 */ 730 /*ARGSUSED*/ 731 static void 732 fmd_err_replay(fmd_log_t *lp, fmd_event_t *ep, fmd_t *dp) 733 { 734 fmd_module_t *mp; 735 fmd_stat_t *sp; 736 737 (void) pthread_mutex_lock(&dp->d_mod_lock); 738 739 for (mp = fmd_list_next(&dp->d_mod_list); 740 mp != NULL; mp = fmd_list_next(mp)) { 741 if (fmd_module_contains(mp, ep)) { 742 fmd_module_hold(mp); 743 break; 744 } 745 } 746 747 (void) pthread_mutex_unlock(&dp->d_mod_lock); 748 749 if (mp != NULL) { 750 fmd_event_commit(ep); 751 fmd_module_rele(mp); 752 sp = &dp->d_stats->ds_log_partials; 753 } else { 754 fmd_dispq_dispatch(dp->d_disp, ep, FMD_EVENT_DATA(ep)); 755 sp = &dp->d_stats->ds_log_replayed; 756 } 757 758 (void) pthread_mutex_lock(&dp->d_stats_lock); 759 sp->fmds_value.ui64++; 760 (void) pthread_mutex_unlock(&dp->d_stats_lock); 761 } 762 763 void 764 fmd_door_server(void *dip) 765 { 766 fmd_dprintf(FMD_DBG_XPRT, "door server starting for %p\n", dip); 767 (void) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); 768 (void) door_return(NULL, 0, NULL, 0); 769 } 770 771 /* 772 * Custom door server create callback. Any fmd services that use doors will 773 * require those threads to have their fmd-specific TSD initialized, etc. 774 */ 775 static void 776 fmd_door(door_info_t *dip) 777 { 778 if (fmd_thread_create(fmd.d_rmod, fmd_door_server, dip) == NULL) 779 fmd_panic("failed to create server for door %p", (void *)dip); 780 } 781 782 /* 783 * This signal handler is installed for the client.thrsig signal to be used to 784 * force an auxiliary thread to wake up from a system call and return EINTR in 785 * response to a module's use of fmd_thr_signal(). We also trace the event. 786 */ 787 static void 788 fmd_signal(int sig) 789 { 790 TRACE((FMD_DBG_MOD, "module thread received sig #%d", sig)); 791 } 792 793 void 794 fmd_run(fmd_t *dp, int pfd) 795 { 796 char *nodc_key[] = { FMD_FLT_NODC, NULL }; 797 char *repair_key[] = { FM_LIST_REPAIRED_CLASS, NULL }; 798 char *resolve_key[] = { FM_LIST_RESOLVED_CLASS, NULL }; 799 char *update_key[] = { FM_LIST_UPDATED_CLASS, NULL }; 800 char code_str[128]; 801 struct sigaction act; 802 803 int status = FMD_EXIT_SUCCESS; 804 const char *name; 805 fmd_conf_path_t *pap; 806 fmd_event_t *e; 807 int dbout; 808 809 /* 810 * Cache all the current debug property settings in d_fmd_debug, 811 * d_fmd_dbout, d_hdl_debug, and d_hdl_dbout. If a given debug mask 812 * is non-zero and the corresponding dbout mask is zero, set dbout 813 * to a sensible default value based on whether we have daemonized. 814 */ 815 (void) fmd_conf_getprop(dp->d_conf, "dbout", &dbout); 816 817 if (dp->d_fmd_debug != 0 && dbout == 0) 818 dp->d_fmd_dbout = dp->d_fg? FMD_DBOUT_STDERR : FMD_DBOUT_SYSLOG; 819 else 820 dp->d_fmd_dbout = dbout; 821 822 (void) fmd_conf_getprop(dp->d_conf, "client.debug", &dp->d_hdl_debug); 823 (void) fmd_conf_getprop(dp->d_conf, "client.dbout", &dbout); 824 825 if (dp->d_hdl_debug != 0 && dbout == 0) 826 dp->d_hdl_dbout = dp->d_fg? FMD_DBOUT_STDERR : FMD_DBOUT_SYSLOG; 827 else 828 dp->d_hdl_dbout = dbout; 829 830 /* 831 * Initialize remaining major program data structures such as the 832 * clock, dispatch queues, log files, module hash collections, etc. 833 * This work is done here rather than in fmd_create() to permit the -o 834 * command-line option to modify properties after fmd_create() is done. 835 */ 836 name = dp->d_rootdir != NULL && 837 *dp->d_rootdir != '\0' ? dp->d_rootdir : NULL; 838 839 /* 840 * The clock must be initialized before fmd_topo_init() because 841 * fmd_topo_update() calls fmd_time_gethrtime(). 842 */ 843 dp->d_clockptr = dp->d_clockops->fto_init(); 844 845 fmd_topo_init(); 846 847 dp->d_xprt_ids = fmd_idspace_create("xprt_ids", 1, INT_MAX); 848 fmd_xprt_suspend_all(); 849 850 (void) door_server_create(fmd_door); 851 852 dp->d_rmod->mod_timerids = fmd_idspace_create(dp->d_pname, 1, 16); 853 dp->d_timers = fmd_timerq_create(); 854 dp->d_disp = fmd_dispq_create(); 855 dp->d_cases = fmd_case_hash_create(); 856 857 /* 858 * The root module's mod_queue is created with limit zero, making it 859 * act like /dev/null; anything inserted here is simply ignored. 860 */ 861 dp->d_rmod->mod_queue = fmd_eventq_create(dp->d_rmod, 862 &dp->d_rmod->mod_stats->ms_evqstat, &dp->d_rmod->mod_stats_lock, 0); 863 864 /* 865 * Once our subsystems that use signals have been set up, install the 866 * signal handler for the fmd_thr_signal() API. Verify that the signal 867 * being used for this purpose doesn't conflict with something else. 868 */ 869 (void) fmd_conf_getprop(dp->d_conf, "client.thrsig", &dp->d_thr_sig); 870 871 if (sigaction(dp->d_thr_sig, NULL, &act) != 0) { 872 fmd_error(EFMD_EXIT, "invalid signal selected for " 873 "client.thrsig property: %d\n", dp->d_thr_sig); 874 } 875 876 if (act.sa_handler != SIG_IGN && act.sa_handler != SIG_DFL) { 877 fmd_error(EFMD_EXIT, "signal selected for client.thrsig " 878 "property is already in use: %d\n", dp->d_thr_sig); 879 } 880 881 act.sa_handler = fmd_signal; 882 act.sa_flags = 0; 883 884 (void) sigemptyset(&act.sa_mask); 885 (void) sigaction(dp->d_thr_sig, &act, NULL); 886 887 (void) fmd_conf_getprop(dp->d_conf, "schemedir", &name); 888 dp->d_schemes = fmd_scheme_hash_create(dp->d_rootdir, name); 889 890 (void) fmd_conf_getprop(dp->d_conf, "log.rsrc", &name); 891 dp->d_asrus = fmd_asru_hash_create(dp->d_rootdir, name); 892 893 (void) fmd_conf_getprop(dp->d_conf, "log.error", &name); 894 dp->d_errlog = fmd_log_open(dp->d_rootdir, name, FMD_LOG_ERROR); 895 896 (void) fmd_conf_getprop(dp->d_conf, "log.fault", &name); 897 dp->d_fltlog = fmd_log_open(dp->d_rootdir, name, FMD_LOG_FAULT); 898 899 if (dp->d_asrus == NULL || dp->d_errlog == NULL || dp->d_fltlog == NULL) 900 fmd_error(EFMD_EXIT, "failed to initialize log files\n"); 901 902 /* 903 * Before loading modules, create an empty control event which will act 904 * as a global barrier for module event processing. Each module we 905 * load successfully will insert it at their head of their event queue, 906 * and then pause inside of fmd_ctl_rele() after dequeuing the event. 907 * This module barrier is required for two reasons: 908 * 909 * (a) During module loading, the restoration of case checkpoints may 910 * result in a list.* event being recreated for which the intended 911 * subscriber has not yet loaded depending on the load order. Such 912 * events could then result in spurious "no subscriber" errors. 913 * 914 * (b) During errlog replay, a sequence of errors from a long time ago 915 * may be replayed, and the module may attempt to install relative 916 * timers associated with one or more of these events. If errlog 917 * replay were "racing" with active module threads, an event E1 918 * that resulted in a relative timer T at time E1 + N nsec could 919 * fire prior to an event E2 being enqueued, even if the relative 920 * time ordering was E1 < E2 < E1 + N, causing mis-diagnosis. 921 */ 922 dp->d_mod_event = e = fmd_event_create(FMD_EVT_CTL, 923 FMD_HRT_NOW, NULL, fmd_ctl_init(NULL)); 924 925 fmd_event_hold(e); 926 927 /* 928 * Once all data structures are initialized, we load all of our modules 929 * in order according to class in order to load up any subscriptions. 930 * Once built-in modules are loaded, we detach from our waiting parent. 931 */ 932 dp->d_mod_hash = fmd_modhash_create(); 933 934 if (fmd_builtin_loadall(dp->d_mod_hash) != 0 && !dp->d_fg) 935 fmd_error(EFMD_EXIT, "failed to initialize fault manager\n"); 936 937 (void) fmd_conf_getprop(dp->d_conf, "self.name", &name); 938 dp->d_self = fmd_modhash_lookup(dp->d_mod_hash, name); 939 940 if (dp->d_self != NULL) { 941 if (fmd_module_dc_key2code(dp->d_self, nodc_key, code_str, 942 sizeof (code_str)) == 0) 943 (void) fmd_conf_setprop(dp->d_conf, "nodiagcode", 944 code_str); 945 if (fmd_module_dc_key2code(dp->d_self, repair_key, code_str, 946 sizeof (code_str)) == 0) 947 (void) fmd_conf_setprop(dp->d_conf, "repaircode", 948 code_str); 949 if (fmd_module_dc_key2code(dp->d_self, resolve_key, code_str, 950 sizeof (code_str)) == 0) 951 (void) fmd_conf_setprop(dp->d_conf, "resolvecode", 952 code_str); 953 if (fmd_module_dc_key2code(dp->d_self, update_key, code_str, 954 sizeof (code_str)) == 0) 955 (void) fmd_conf_setprop(dp->d_conf, "updatecode", 956 code_str); 957 } 958 959 fmd_rpc_init(); 960 dp->d_running = 1; /* we are now officially an active fmd */ 961 962 /* 963 * Now that we're running, if a pipe fd was specified, write an exit 964 * status to it to indicate that our parent process can safely detach. 965 * Then proceed to loading the remaining non-built-in modules. 966 */ 967 if (pfd >= 0) 968 (void) write(pfd, &status, sizeof (status)); 969 970 /* 971 * Before loading all modules, repopulate the ASRU cache from its 972 * persistent repository on disk. Then during module loading, the 973 * restoration of checkpoint files will reparent any active cases. 974 */ 975 fmd_asru_hash_refresh(dp->d_asrus); 976 977 (void) fmd_conf_getprop(dp->d_conf, "plugin.path", &pap); 978 fmd_modhash_loadall(dp->d_mod_hash, pap, &fmd_rtld_ops, ".so"); 979 980 (void) fmd_conf_getprop(dp->d_conf, "agent.path", &pap); 981 fmd_modhash_loadall(dp->d_mod_hash, pap, &fmd_proc_ops, NULL); 982 983 /* 984 * With all modules loaded, replay fault events from the ASRU cache for 985 * any ASRUs that must be retired, replay error events from the errlog 986 * that did not finish processing the last time ran, and then release 987 * the global module barrier by executing a final rele on d_mod_event. 988 */ 989 fmd_asru_hash_replay(dp->d_asrus); 990 991 (void) pthread_rwlock_rdlock(&dp->d_log_lock); 992 fmd_log_replay(dp->d_errlog, (fmd_log_f *)fmd_err_replay, dp); 993 fmd_log_update(dp->d_errlog); 994 (void) pthread_rwlock_unlock(&dp->d_log_lock); 995 996 dp->d_mod_event = NULL; 997 fmd_event_rele(e); 998 999 /* 1000 * Now replay list.updated and list.repaired events 1001 */ 1002 fmd_case_repair_replay(); 1003 1004 /* 1005 * Finally, awaken any threads associated with receiving events from 1006 * open transports and tell them to proceed with fmd_xprt_recv(). 1007 */ 1008 fmd_xprt_resume_all(); 1009 fmd_gc(dp, 0, 0); 1010 fmd_clear_aged_rsrcs(dp, 0, 0); 1011 1012 (void) pthread_mutex_lock(&dp->d_fmd_lock); 1013 dp->d_booted = 1; 1014 (void) pthread_cond_broadcast(&dp->d_fmd_cv); 1015 (void) pthread_mutex_unlock(&dp->d_fmd_lock); 1016 } 1017 1018 void 1019 fmd_help(fmd_t *dp) 1020 { 1021 const fmd_conf_mode_t *cmp; 1022 1023 (void) printf("Usage: %s -o debug=mode[,mode]\n", dp->d_pname); 1024 1025 for (cmp = _fmd_debug_modes; cmp->cm_name != NULL; cmp++) 1026 (void) printf("\t%s\t%s\n", cmp->cm_name, cmp->cm_desc); 1027 } 1028