xref: /illumos-gate/usr/src/lib/fm/topo/modules/common/ipmi/ipmi_enum.c (revision 856f710c9dc323b39da5935194d7928ffb99b67f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2017, Joyent, Inc.
25  */
26 
27 #include <assert.h>
28 #include <fm/libtopo.h>
29 #include <fm/topo_mod.h>
30 #include <sys/fm/protocol.h>
31 #include <string.h>
32 
33 #define	TOPO_PGROUP_IPMI 		"ipmi"
34 #define	TOPO_PROP_IPMI_ENTITY_REF	"entity_ref"
35 #define	TOPO_PROP_IPMI_ENTITY_PRESENT	"entity_present"
36 #define	FAC_PROV_IPMI			"fac_prov_ipmi"
37 
38 typedef struct ipmi_enum_data {
39 	topo_mod_t		*ed_mod;
40 	tnode_t			*ed_pnode;
41 	const char		*ed_name;
42 	char			*ed_label;
43 	uint8_t			ed_entity;
44 	topo_instance_t		ed_instance;
45 	ipmi_sdr_fru_locator_t	*ed_frusdr;
46 } ipmi_enum_data_t;
47 
48 static int ipmi_present(topo_mod_t *, tnode_t *, topo_version_t, nvlist_t *,
49     nvlist_t **);
50 static int ipmi_enum(topo_mod_t *, tnode_t *, const char *,
51     topo_instance_t, topo_instance_t, void *, void *);
52 static int ipmi_post_process(topo_mod_t *, tnode_t *);
53 
54 extern int ipmi_fru_label(topo_mod_t *mod, tnode_t *node,
55     topo_version_t vers, nvlist_t *in, nvlist_t **out);
56 
57 extern int ipmi_fru_fmri(topo_mod_t *mod, tnode_t *node,
58     topo_version_t vers, nvlist_t *in, nvlist_t **out);
59 
60 static const topo_method_t ipmi_methods[] = {
61 	{ TOPO_METH_PRESENT, TOPO_METH_PRESENT_DESC,
62 	    TOPO_METH_PRESENT_VERSION0, TOPO_STABILITY_INTERNAL, ipmi_present },
63 	{ "ipmi_fru_label", "Property method", 0,
64 	    TOPO_STABILITY_INTERNAL, ipmi_fru_label},
65 	{ "ipmi_fru_fmri", "Property method", 0,
66 	    TOPO_STABILITY_INTERNAL, ipmi_fru_fmri},
67 	{ TOPO_METH_SENSOR_FAILURE, TOPO_METH_SENSOR_FAILURE_DESC,
68 	    TOPO_METH_SENSOR_FAILURE_VERSION, TOPO_STABILITY_INTERNAL,
69 	    topo_method_sensor_failure },
70 	{ NULL }
71 };
72 
73 const topo_modops_t ipmi_ops = { ipmi_enum, NULL };
74 
75 const topo_modinfo_t ipmi_info =
76 	{ "ipmi", FM_FMRI_SCHEME_HC, TOPO_VERSION, &ipmi_ops };
77 
78 /*
79  * Determine if the entity is present.
80  */
81 /*ARGSUSED*/
82 static int
83 ipmi_present(topo_mod_t *mod, tnode_t *tn, topo_version_t version,
84     nvlist_t *in, nvlist_t **out)
85 {
86 	ipmi_handle_t *ihp;
87 	ipmi_entity_t *ep;
88 	boolean_t present;
89 	nvlist_t *nvl;
90 	int err, i;
91 	char *name, **names;
92 	ipmi_sdr_t *sdrp;
93 	uint_t nelems;
94 
95 	if ((ihp = topo_mod_ipmi_hold(mod)) == NULL)
96 		return (topo_mod_seterrno(mod, ETOPO_METHOD_UNKNOWN));
97 
98 	ep = topo_node_getspecific(tn);
99 	if (ep == NULL) {
100 		if (topo_prop_get_string(tn, TOPO_PGROUP_IPMI,
101 		    TOPO_PROP_IPMI_ENTITY_PRESENT, &name, &err) == 0) {
102 			/*
103 			 * Some broken IPMI implementations don't export correct
104 			 * entities, so referring to an entity isn't sufficient.
105 			 * For these platforms, we allow the XML to specify a
106 			 * single SDR record that represents the current present
107 			 * state.
108 			 */
109 			if ((sdrp = ipmi_sdr_lookup(ihp, name)) == NULL ||
110 			    ipmi_entity_present_sdr(ihp, sdrp, &present) != 0) {
111 				topo_mod_dprintf(mod,
112 				    "Failed to get present state of %s (%s)\n",
113 				    name, ipmi_errmsg(ihp));
114 				topo_mod_strfree(mod, name);
115 				topo_mod_ipmi_rele(mod);
116 				return (-1);
117 			}
118 
119 			topo_mod_dprintf(mod,
120 			    "ipmi_entity_present_sdr(%s) = %d\n", name,
121 			    present);
122 			topo_mod_strfree(mod, name);
123 		} else {
124 			if (topo_prop_get_string_array(tn, TOPO_PGROUP_IPMI,
125 			    TOPO_PROP_IPMI_ENTITY_REF, &names, &nelems, &err)
126 			    != 0) {
127 				/*
128 				 * Not all nodes have an entity_ref attribute.
129 				 * For these cases, return ENOTSUP so that we
130 				 * fall back to the default hc presence
131 				 * detection.
132 				 */
133 				topo_mod_ipmi_rele(mod);
134 				return (topo_mod_seterrno(mod,
135 				    ETOPO_METHOD_NOTSUP));
136 			}
137 
138 			for (i = 0; i < nelems; i++)
139 				if ((ep = ipmi_entity_lookup_sdr(ihp, names[i]))
140 				    != NULL)
141 					break;
142 
143 			for (i = 0; i < nelems; i++)
144 				topo_mod_strfree(mod, names[i]);
145 			topo_mod_free(mod, names, (nelems * sizeof (char *)));
146 
147 			if (ep == NULL) {
148 				topo_mod_dprintf(mod,
149 				    "Failed to get present state of %s=%d\n",
150 				    topo_node_name(tn), topo_node_instance(tn));
151 				topo_mod_ipmi_rele(mod);
152 				return (-1);
153 			}
154 			topo_node_setspecific(tn, ep);
155 		}
156 	}
157 
158 	if (ep != NULL) {
159 		if (ipmi_entity_present(ihp, ep, &present) != 0) {
160 			topo_mod_dprintf(mod,
161 			    "ipmi_entity_present() failed: %s",
162 			    ipmi_errmsg(ihp));
163 			topo_mod_ipmi_rele(mod);
164 			return (-1);
165 		}
166 
167 		topo_mod_dprintf(mod,
168 		    "ipmi_entity_present(%d, %d) = %d\n", ep->ie_type,
169 		    ep->ie_instance, present);
170 	}
171 
172 	topo_mod_ipmi_rele(mod);
173 
174 	if (topo_mod_nvalloc(mod, &nvl, NV_UNIQUE_NAME) != 0)
175 		return (topo_mod_seterrno(mod, EMOD_FMRI_NVL));
176 
177 	if (nvlist_add_uint32(nvl, TOPO_METH_PRESENT_RET, present) != 0) {
178 		nvlist_free(nvl);
179 		return (topo_mod_seterrno(mod, EMOD_FMRI_NVL));
180 	}
181 
182 	*out = nvl;
183 
184 	return (0);
185 }
186 
187 /*
188  * This determines if the entity has a FRU locator record set, in which case we
189  * treat this as a FRU, even if it's part of an association.
190  */
191 /*ARGSUSED*/
192 static int
193 ipmi_check_sdr(ipmi_handle_t *ihp, ipmi_entity_t *ep, const char *name,
194     ipmi_sdr_t *sdrp, void *data)
195 {
196 	ipmi_enum_data_t *edp = data;
197 
198 	if (sdrp->is_type == IPMI_SDR_TYPE_FRU_LOCATOR)
199 		edp->ed_frusdr = (ipmi_sdr_fru_locator_t *)sdrp->is_record;
200 
201 	return (0);
202 }
203 
204 /*
205  * Main entity enumerator.  If we find a matching entity type, then instantiate
206  * a topo node.
207  */
208 static int
209 ipmi_check_entity(ipmi_handle_t *ihp, ipmi_entity_t *ep, void *data)
210 {
211 	ipmi_enum_data_t *edp = data;
212 	ipmi_enum_data_t cdata;
213 	tnode_t *pnode = edp->ed_pnode;
214 	topo_mod_t *mod = edp->ed_mod;
215 	topo_mod_t *fmod = topo_mod_getspecific(mod);
216 	nvlist_t *auth, *fmri;
217 	tnode_t *tn;
218 	topo_pgroup_info_t pgi;
219 	char *frudata = NULL, *part = NULL, *rev = NULL, *serial = NULL;
220 	ipmi_fru_prod_info_t fruprod = {0};
221 	ipmi_fru_brd_info_t frubrd = {0};
222 	int err;
223 	const char *labelname;
224 	char label[64];
225 	size_t len;
226 
227 	/*
228 	 * Some questionable IPMI implementations group psu and fan entities
229 	 * under things like motherboard or chassis entities.  So even if this
230 	 * entity type isn't typically associated with fans and psus, if it has
231 	 * children, then regardless of the type we need to decend down and
232 	 * iterate over them.
233 	 */
234 	if (ep->ie_type != edp->ed_entity) {
235 		if (ep->ie_children != 0 &&
236 		    ipmi_entity_iter_children(ihp, ep, ipmi_check_entity,
237 		    data) != 0)
238 			return (1);
239 		return (0);
240 	}
241 
242 	/*
243 	 * The purpose of power and cooling domains is to group psus and fans
244 	 * together.  Unfortunately, some broken IPMI implementations declare
245 	 * domains that don't contain other elements.  Since the end goal is to
246 	 * only enumerate psus and fans, we'll just ignore such elements.
247 	 */
248 	if ((ep->ie_type == IPMI_ET_POWER_DOMAIN ||
249 	    ep->ie_type == IPMI_ET_COOLING_DOMAIN) &&
250 	    ep->ie_children == 0)
251 		return (0);
252 
253 	if ((auth = topo_mod_auth(mod, pnode)) == NULL) {
254 		topo_mod_dprintf(mod, "topo_mod_auth() failed: %s",
255 		    topo_mod_errmsg(mod));
256 		return (1);
257 	}
258 
259 	/*
260 	 * Determine if there's a FRU record associated with this entity.  If
261 	 * so, then read in the FRU identity info so that it can be included
262 	 * in the authority portion of the FMRI.
263 	 *
264 	 * topo_mod_hcfmri() will safely except NULL values for the part,
265 	 * rev and serial params, so we opt to simply drive on in the face of
266 	 * any strdup failures.
267 	 */
268 	edp->ed_frusdr = NULL;
269 	(void) ipmi_entity_iter_sdr(ihp, ep, ipmi_check_sdr, edp);
270 	if (edp->ed_frusdr != NULL &&
271 	    ipmi_fru_read(ihp, edp->ed_frusdr, &frudata) != -1) {
272 		if (ipmi_fru_parse_product(ihp, frudata, &fruprod) == 0) {
273 			part = strdup(fruprod.ifpi_part_number);
274 			rev = strdup(fruprod.ifpi_product_version);
275 			serial = strdup(fruprod.ifpi_product_serial);
276 		} else if (ipmi_fru_parse_board(ihp, frudata, &frubrd) == 0) {
277 			part = strdup(frubrd.ifbi_part_number);
278 			serial = strdup(frubrd.ifbi_product_serial);
279 		}
280 	}
281 	free(frudata);
282 
283 	if ((fmri = topo_mod_hcfmri(mod, pnode, FM_HC_SCHEME_VERSION,
284 	    edp->ed_name, edp->ed_instance, NULL, auth, part, rev,
285 	    serial)) == NULL) {
286 		nvlist_free(auth);
287 		free(part);
288 		free(rev);
289 		free(serial);
290 		topo_mod_dprintf(mod, "topo_mod_hcfmri() failed: %s",
291 		    topo_mod_errmsg(mod));
292 		return (1);
293 	}
294 	nvlist_free(auth);
295 	free(part);
296 	free(rev);
297 	free(serial);
298 
299 	if ((tn = topo_node_bind(mod, pnode, edp->ed_name,
300 	    edp->ed_instance, fmri)) == NULL) {
301 		nvlist_free(fmri);
302 		topo_mod_dprintf(mod, "topo_node_bind() failed: %s",
303 		    topo_mod_errmsg(mod));
304 		return (1);
305 	}
306 
307 	/*
308 	 * We inherit our label from our parent, appending our label in the
309 	 * process.  This results in defaults labels of the form "FM 1 FAN 0"
310 	 * by default when given a hierarchy.
311 	 */
312 	if (edp->ed_label != NULL)
313 		(void) snprintf(label, sizeof (label), "%s ", edp->ed_label);
314 	else
315 		label[0] = '\0';
316 
317 	switch (edp->ed_entity) {
318 	case IPMI_ET_POWER_DOMAIN:
319 		labelname = "PM";
320 		break;
321 
322 	case IPMI_ET_PSU:
323 		labelname = "PSU";
324 		break;
325 
326 	case IPMI_ET_COOLING_DOMAIN:
327 		labelname = "FM";
328 		break;
329 
330 	case IPMI_ET_FAN:
331 		labelname = "FAN";
332 		break;
333 	}
334 
335 	len = strlen(label);
336 	(void) snprintf(label + len, sizeof (label) - len, "%s %d",
337 	    labelname, edp->ed_instance);
338 
339 	nvlist_free(fmri);
340 	edp->ed_instance++;
341 
342 	if (topo_node_label_set(tn, label, &err) != 0) {
343 		topo_mod_dprintf(mod, "failed to set label: %s\n",
344 		    topo_strerror(err));
345 		return (1);
346 	}
347 
348 	/*
349 	 * Store IPMI entity details as properties on the node
350 	 */
351 	pgi.tpi_name = TOPO_PGROUP_IPMI;
352 	pgi.tpi_namestab = TOPO_STABILITY_PRIVATE;
353 	pgi.tpi_datastab = TOPO_STABILITY_PRIVATE;
354 	pgi.tpi_version = TOPO_VERSION;
355 	if (topo_pgroup_create(tn, &pgi, &err) != 0) {
356 		if (err != ETOPO_PROP_DEFD) {
357 			topo_mod_dprintf(mod, "failed to create propgroup "
358 			    "%s: %s\n", TOPO_PGROUP_IPMI, topo_strerror(err));
359 			return (1);
360 		}
361 	}
362 
363 	/*
364 	 * Add properties to contain the IPMI entity id and instance.  This
365 	 * will be used by the fac_prov_ipmi module to discover and enumerate
366 	 * facility nodes for any associated sensors.
367 	 */
368 	if (topo_prop_set_uint32(tn, TOPO_PGROUP_IPMI, TOPO_PROP_IPMI_ENTITY_ID,
369 	    TOPO_PROP_IMMUTABLE, ep->ie_type, &err) != 0 ||
370 	    topo_prop_set_uint32(tn, TOPO_PGROUP_IPMI,
371 	    TOPO_PROP_IPMI_ENTITY_INST, TOPO_PROP_IMMUTABLE, ep->ie_instance,
372 	    &err) != 0) {
373 		topo_mod_dprintf(mod, "failed to add ipmi properties (%s)",
374 		    topo_strerror(err));
375 		return (1);
376 	}
377 	if (topo_method_register(mod, tn, ipmi_methods) != 0) {
378 		topo_mod_dprintf(mod, "topo_method_register() failed: %s",
379 		    topo_mod_errmsg(mod));
380 		return (1);
381 	}
382 
383 	/*
384 	 * Invoke the tmo_enum callback from the fac_prov_ipmi module on this
385 	 * node.  This will have the effect of registering a method on this node
386 	 * for enumerating sensors.
387 	 */
388 	if (fmod == NULL && (fmod = topo_mod_load(mod, FAC_PROV_IPMI,
389 	    TOPO_VERSION)) == NULL) {
390 		topo_mod_dprintf(mod, "failed to load %s: %s",
391 		    FAC_PROV_IPMI, topo_mod_errmsg(mod));
392 		return (-1);
393 	}
394 	topo_mod_setspecific(mod, fmod);
395 
396 	if (topo_mod_enumerate(fmod, tn, FAC_PROV_IPMI, FAC_PROV_IPMI, 0, 0,
397 	    NULL) != 0) {
398 		topo_mod_dprintf(mod, "facility provider enum failed (%s)",
399 		    topo_mod_errmsg(mod));
400 		return (1);
401 	}
402 
403 	/*
404 	 * If we are a child of a non-chassis node, and there isn't an explicit
405 	 * FRU locator record, then propagate the parent's FRU.  Otherwise, set
406 	 * the FRU to be the same as the resource.
407 	 */
408 	if (strcmp(topo_node_name(pnode), CHASSIS) == 0 ||
409 	    edp->ed_frusdr != NULL) {
410 		if (topo_node_resource(tn, &fmri, &err) != 0) {
411 			topo_mod_dprintf(mod, "topo_node_resource() failed: %s",
412 			    topo_strerror(err));
413 			(void) topo_mod_seterrno(mod, err);
414 			return (1);
415 		}
416 	} else {
417 		if (topo_node_fru(pnode, &fmri, NULL, &err) != 0) {
418 			topo_mod_dprintf(mod, "topo_node_fru() failed: %s",
419 			    topo_strerror(err));
420 			(void) topo_mod_seterrno(mod, err);
421 			return (1);
422 		}
423 	}
424 
425 	if (topo_node_fru_set(tn, fmri, 0, &err) != 0) {
426 		nvlist_free(fmri);
427 		topo_mod_dprintf(mod, "topo_node_fru_set() failed: %s",
428 		    topo_strerror(err));
429 		(void) topo_mod_seterrno(mod, err);
430 		return (1);
431 	}
432 
433 	topo_node_setspecific(tn, ep);
434 
435 	nvlist_free(fmri);
436 
437 	/*
438 	 * Iterate over children, once for recursive domains and once for
439 	 * psu/fans.
440 	 */
441 	if (ep->ie_children != 0 &&
442 	    (ep->ie_type == IPMI_ET_POWER_DOMAIN ||
443 	    ep->ie_type == IPMI_ET_COOLING_DOMAIN)) {
444 		cdata.ed_mod = edp->ed_mod;
445 		cdata.ed_pnode = tn;
446 		cdata.ed_instance = 0;
447 		cdata.ed_name = edp->ed_name;
448 		cdata.ed_entity = edp->ed_entity;
449 		cdata.ed_label = label;
450 
451 		if (ipmi_entity_iter_children(ihp, ep,
452 		    ipmi_check_entity, &cdata) != 0)
453 			return (1);
454 
455 		switch (cdata.ed_entity) {
456 		case IPMI_ET_POWER_DOMAIN:
457 			cdata.ed_entity = IPMI_ET_PSU;
458 			cdata.ed_name = PSU;
459 			break;
460 
461 		case IPMI_ET_COOLING_DOMAIN:
462 			cdata.ed_entity = IPMI_ET_FAN;
463 			cdata.ed_name = FAN;
464 			break;
465 		}
466 
467 		if (ipmi_entity_iter_children(ihp, ep,
468 		    ipmi_check_entity, &cdata) != 0)
469 			return (1);
470 	}
471 
472 	return (0);
473 }
474 
475 /*
476  * libtopo enumeration point.  This simply iterates over entities looking for
477  * the appropriate type.
478  */
479 /*ARGSUSED*/
480 static int
481 ipmi_enum(topo_mod_t *mod, tnode_t *rnode, const char *name,
482     topo_instance_t min, topo_instance_t max, void *arg, void *unused)
483 {
484 	ipmi_handle_t *ihp;
485 	ipmi_enum_data_t data;
486 	int ret;
487 
488 	/*
489 	 * If the node being passed in ISN'T the chassis node, then we're being
490 	 * asked to post-process a statically defined node.
491 	 */
492 	if (strcmp(topo_node_name(rnode), CHASSIS) != 0) {
493 		if (ipmi_post_process(mod, rnode) != 0) {
494 			topo_mod_dprintf(mod, "post processing of node %s=%d "
495 			    "failed!", topo_node_name(rnode),
496 			    topo_node_instance(rnode));
497 			return (-1);
498 		}
499 		return (0);
500 	}
501 
502 	if (strcmp(name, POWERMODULE) == 0) {
503 		data.ed_entity = IPMI_ET_POWER_DOMAIN;
504 	} else if (strcmp(name, PSU) == 0) {
505 		data.ed_entity = IPMI_ET_PSU;
506 	} else if (strcmp(name, FANMODULE) == 0) {
507 		data.ed_entity = IPMI_ET_COOLING_DOMAIN;
508 	} else if (strcmp(name, FAN) == 0) {
509 		data.ed_entity = IPMI_ET_FAN;
510 	} else {
511 		topo_mod_dprintf(mod, "unknown enumeration type '%s'",
512 		    name);
513 		return (-1);
514 	}
515 
516 	if ((ihp = topo_mod_ipmi_hold(mod)) == NULL)
517 		return (0);
518 
519 	data.ed_mod = mod;
520 	data.ed_pnode = rnode;
521 	data.ed_name = name;
522 	data.ed_instance = 0;
523 	data.ed_label = NULL;
524 
525 	if ((ret = ipmi_entity_iter(ihp, ipmi_check_entity, &data)) != 0) {
526 		/*
527 		 * We don't return failure if IPMI enumeration fails.  This may
528 		 * be due to the SP being unavailable or an otherwise transient
529 		 * event.
530 		 */
531 		if (ret < 0) {
532 			topo_mod_dprintf(mod,
533 			    "failed to enumerate entities: %s",
534 			    ipmi_errmsg(ihp));
535 		} else {
536 			topo_mod_ipmi_rele(mod);
537 			return (-1);
538 		}
539 	}
540 
541 	topo_mod_ipmi_rele(mod);
542 	return (0);
543 }
544 
545 static int
546 ipmi_post_process(topo_mod_t *mod, tnode_t *tn)
547 {
548 	if (topo_method_register(mod, tn, ipmi_methods) != 0) {
549 		topo_mod_dprintf(mod, "ipmi_post_process() failed: %s",
550 		    topo_mod_errmsg(mod));
551 		return (1);
552 	}
553 	return (0);
554 }
555 
556 /*ARGSUSED*/
557 int
558 _topo_init(topo_mod_t *mod, topo_version_t version)
559 {
560 	if (getenv("TOPOIPMIDEBUG") != NULL)
561 		topo_mod_setdebug(mod);
562 
563 	if (topo_mod_register(mod, &ipmi_info, TOPO_VERSION) != 0) {
564 		topo_mod_dprintf(mod, "module registration failed: %s\n",
565 		    topo_mod_errmsg(mod));
566 		return (-1); /* mod errno already set */
567 	}
568 
569 	topo_mod_dprintf(mod, "IPMI enumerator initialized\n");
570 	return (0);
571 }
572 
573 void
574 _topo_fini(topo_mod_t *mod)
575 {
576 	/*
577 	 * This is the logical, and probably only safe spot where we could
578 	 * unload fac_prov_ipmi.  But unfortunately, calling topo_mod_unload()
579 	 * in the context of a module's _topo_fini entry point would result
580 	 * in recursively grabbing the modhash lock and we'd deadlock.
581 	 *
582 	 * Unfortunately, libtopo doesn't currently have a mechanism for
583 	 * expressing and handling intermodule dependencies, so we're left
584 	 * with this situation where once a module loads another module,
585 	 * it's going to be with us until we teardown the process.
586 	 */
587 	topo_mod_unregister(mod);
588 }
589