xref: /titanic_50/usr/src/cmd/rcm_daemon/common/pool_rcm.c (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <sys/types.h>
32 #include <unistd.h>
33 #include <errno.h>
34 #include <libintl.h>
35 #include <string.h>
36 #include <rcm_module.h>
37 #include <sys/pset.h>
38 
39 #include <pool.h>
40 
41 /*
42  * RCM module ops.
43  */
44 static int pool_register(rcm_handle_t *);
45 static int pool_unregister(rcm_handle_t *);
46 static int pool_get_info(rcm_handle_t *, char *, id_t, uint_t, char **,
47     char **, nvlist_t *, rcm_info_t **);
48 static int pool_request_suspend(rcm_handle_t *, char *, id_t,
49     timespec_t *, uint_t, char **, rcm_info_t **);
50 static int pool_notify_resume(rcm_handle_t *, char *, id_t, uint_t, char **,
51     rcm_info_t **);
52 static int pool_notify_remove(rcm_handle_t *, char *, id_t, uint_t,
53     char **, rcm_info_t **);
54 static int pool_request_offline(rcm_handle_t *, char *, id_t, uint_t,
55     char **, rcm_info_t **);
56 static int pool_notify_online(rcm_handle_t *, char *, id_t, uint_t, char **,
57     rcm_info_t **);
58 static int pool_request_capacity_change(rcm_handle_t *, char *, id_t, uint_t,
59     nvlist_t *, char **, rcm_info_t **);
60 static int pool_notify_capacity_change(rcm_handle_t *, char *, id_t, uint_t,
61     nvlist_t *, char **, rcm_info_t **);
62 
63 /*
64  * Pool-specific callback functions.
65  */
66 static int pset_validate_remove(nvlist_t *, char **);
67 
68 static struct {
69 	const char *rsrc;
70 	int (*capacity_change_cb)(nvlist_t *, char **);
71 } registrations[] = {
72 	{ "SUNW_cpu", pset_validate_remove },
73 	{ NULL, NULL }
74 };
75 
76 static int registered = 0;
77 
78 static struct rcm_mod_ops pool_ops = {
79 	RCM_MOD_OPS_VERSION,
80 	pool_register,
81 	pool_unregister,
82 	pool_get_info,
83 	pool_request_suspend,
84 	pool_notify_resume,
85 	pool_request_offline,
86 	pool_notify_online,
87 	pool_notify_remove,
88 	pool_request_capacity_change,
89 	pool_notify_capacity_change,
90 	NULL
91 };
92 
93 struct rcm_mod_ops *
94 rcm_mod_init(void)
95 {
96 	rcm_log_message(RCM_TRACE1, "Pools RCM module created\n");
97 	return (&pool_ops);
98 }
99 
100 
101 int
102 rcm_mod_fini(void)
103 {
104 	rcm_log_message(RCM_TRACE1, "Pools RCM module unloaded\n");
105 	return (RCM_SUCCESS);
106 }
107 
108 const char *
109 rcm_mod_info(void)
110 {
111 	return ("Pools RCM module %I%");
112 }
113 
114 static int
115 pool_check_pset(pool_conf_t *conf, pool_resource_t *res,
116     processorid_t *del_cpus, char **errorp)
117 {
118 	int64_t tmp;
119 	int i, j;
120 	uint_t num_cpus;
121 	uint64_t min_cpus;
122 	uint_t num_found = 0;
123 	processorid_t *cpulist;
124 	psetid_t psetid;
125 	pool_value_t *pval;
126 	pool_elem_t *elem = pool_resource_to_elem(conf, res);
127 
128 	if ((pval = pool_value_alloc()) == NULL)
129 		return (-1);
130 	if (pool_get_property(conf, elem, "pset.min", pval) != POC_UINT) {
131 		rcm_log_message(RCM_ERROR,
132 		    gettext("POOL: cannot find property 'pset.min' in pset\n"));
133 		pool_value_free(pval);
134 		return (-1);
135 	}
136 	(void) pool_value_get_uint64(pval, &min_cpus);
137 	if (pool_get_property(conf, elem, "pset.sys_id", pval) != POC_INT) {
138 		rcm_log_message(RCM_ERROR,
139 		    gettext("POOL: cannot get pset.sys_id\n"));
140 		pool_value_free(pval);
141 		return (-1);
142 	}
143 	(void) pool_value_get_int64(pval, &tmp);
144 	pool_value_free(pval);
145 	psetid = (psetid_t)tmp;
146 	rcm_log_message(RCM_TRACE1, "POOL: checking pset: %d\n", psetid);
147 
148 	rcm_log_message(RCM_TRACE1, "POOL: min_cpus is %llu\n", min_cpus);
149 	if (pset_info(psetid, NULL, &num_cpus, NULL) != 0) {
150 		rcm_log_message(RCM_ERROR,
151 		    gettext("POOL: pset_info(%d) failed: %s\n"), psetid,
152 		    strerror(errno));
153 		return (-1);
154 	}
155 	if ((cpulist = malloc(num_cpus * sizeof (processorid_t))) == NULL) {
156 		rcm_log_message(RCM_ERROR,
157 		    gettext("POOL: malloc failed: %s\n"), strerror(errno));
158 		return (-1);
159 	}
160 	if (pset_info(psetid, NULL, &num_cpus, cpulist) != 0) {
161 		free(cpulist);
162 		rcm_log_message(RCM_ERROR,
163 		    gettext("POOL: pset_info(%d) failed: %s\n"), psetid,
164 		    strerror(errno));
165 		return (-1);
166 	}
167 	for (i = 0; del_cpus[i] != -1; i++)
168 		for (j = 0; j < num_cpus; j++)
169 			if (cpulist[j] == del_cpus[i])
170 				num_found++;
171 	free(cpulist);
172 	if (num_found > 0 && (num_cpus - num_found) < (uint_t)min_cpus) {
173 		int len;
174 		char *errval;
175 		const char *errfmt =
176 		    gettext("POOL: processor set (%1$d) would go "
177 		    "below its minimum value of %2$u\n");
178 
179 		/*
180 		 * We would go below the min value. Fail this request.
181 		 */
182 		len = strlen(errfmt) + 4 * 2; /* 4 digits for psetid and min */
183 		if ((errval = malloc((len + 1) * sizeof (char))) != NULL) {
184 			(void) snprintf(errval, len + 1, errfmt, psetid,
185 			    (uint_t)min_cpus);
186 			*errorp = errval;
187 		}
188 
189 		rcm_log_message(RCM_ERROR, (char *)errfmt, psetid,
190 		    (uint_t)min_cpus);
191 
192 		return (-1);
193 	}
194 	rcm_log_message(RCM_TRACE1, "POOL: pset %d is fine\n", psetid);
195 	return (0);
196 }
197 
198 /*
199  * pset_validate_remove()
200  * 	Check to see if the requested cpu removal would be acceptable.
201  * 	Returns RCM_FAILURE if not.
202  */
203 static int
204 pset_validate_remove(nvlist_t *nvl, char **errorp)
205 {
206 	int error = RCM_SUCCESS;
207 	int32_t old_total, new_total, removed_total;
208 	processorid_t *removed_list = NULL; /* list terminated by (-1). */
209 	processorid_t *old_cpu_list = NULL, *new_cpu_list = NULL;
210 	int i, j;
211 	pool_conf_t *conf;
212 	pool_value_t *pvals[] = { NULL, NULL };
213 	pool_resource_t **res = NULL;
214 	uint_t nelem;
215 	const char *generic_error = gettext("POOL: Error processing request\n");
216 
217 	if ((conf = pool_conf_alloc()) == NULL)
218 		return (RCM_FAILURE);
219 	if (pool_conf_open(conf, pool_dynamic_location(), PO_RDONLY) < 0) {
220 		rcm_log_message(RCM_TRACE1,
221 		    "POOL: failed to parse config file: '%s'\n",
222 		    pool_dynamic_location());
223 		pool_conf_free(conf);
224 		return (RCM_SUCCESS);
225 	}
226 
227 	if ((error = nvlist_lookup_int32(nvl, "old_total", &old_total)) != 0) {
228 		(void) pool_conf_close(conf);
229 		pool_conf_free(conf);
230 		rcm_log_message(RCM_ERROR,
231 		    gettext("POOL: unable to find 'old_total' in nvlist: %s\n"),
232 		    strerror(error));
233 		*errorp = strdup(generic_error);
234 		return (RCM_FAILURE);
235 	}
236 	if ((error = nvlist_lookup_int32(nvl, "new_total", &new_total)) != 0) {
237 		(void) pool_conf_close(conf);
238 		pool_conf_free(conf);
239 		rcm_log_message(RCM_ERROR,
240 		    gettext("POOL: unable to find 'new_total' in nvlist: %s\n"),
241 		    strerror(error));
242 		*errorp = strdup(generic_error);
243 		return (RCM_FAILURE);
244 	}
245 	if (new_total >= old_total) {
246 		(void) pool_conf_close(conf);
247 		pool_conf_free(conf);
248 		/*
249 		 * This doesn't look like a cpu removal.
250 		 */
251 		rcm_log_message(RCM_TRACE1,
252 		    gettext("POOL: 'old_total' (%d) is less than 'new_total' "
253 			    "(%d)\n"), old_total, new_total);
254 		return (RCM_SUCCESS);
255 	}
256 	if ((removed_list = malloc((old_total - new_total + 1) * sizeof (int)))
257 	    == NULL) {
258 		rcm_log_message(RCM_ERROR,
259 		    gettext("POOL: malloc failed: %s\n"), strerror(errno));
260 
261 		error = RCM_FAILURE;
262 		goto out;
263 	}
264 	if ((error = nvlist_lookup_int32_array(nvl, "old_cpu_list",
265 		    &old_cpu_list, &nelem)) != 0) {
266 		rcm_log_message(RCM_ERROR,
267 		    gettext("POOL: 'old_cpu_list' not found in nvlist: %s\n"),
268 		    strerror(error));
269 		error = RCM_FAILURE;
270 		goto out;
271 	}
272 	if ((int32_t)nelem != old_total) {
273 		rcm_log_message(RCM_ERROR,
274 		    gettext("POOL: 'old_cpu_list' size mismatch: %1$d vs "
275 		    "%2$d\n"), nelem, old_total);
276 		error = RCM_FAILURE;
277 		goto out;
278 	}
279 	if ((error = nvlist_lookup_int32_array(nvl, "new_cpu_list",
280 		    &new_cpu_list, &nelem)) != 0) {
281 		rcm_log_message(RCM_ERROR,
282 		    gettext("POOL: 'new_cpu_list' not found in nvlist: %s\n"),
283 		    strerror(error));
284 		error = RCM_FAILURE;
285 		goto out;
286 	}
287 	if (nelem != new_total) {
288 		rcm_log_message(RCM_ERROR,
289 		    gettext("POOL: 'new_cpu_list' size mismatch: %1$d vs "
290 		    "%2$d\n"), nelem, new_total);
291 		error = RCM_FAILURE;
292 		goto out;
293 	}
294 
295 	for (i = 0, removed_total = 0; i < old_total; i++) {
296 		for (j = 0; j < new_total; j++)
297 			if (old_cpu_list[i] == new_cpu_list[j])
298 				break;
299 		if (j == new_total) /* not found in new_cpu_list */
300 			removed_list[removed_total++] = old_cpu_list[i];
301 	}
302 	removed_list[removed_total] = -1;
303 
304 	if (removed_total != (old_total - new_total)) {
305 		rcm_log_message(RCM_ERROR,
306 		    gettext("POOL: error finding removed cpu list\n"));
307 		error = RCM_FAILURE;
308 		goto out;
309 	}
310 	if ((pvals[0] = pool_value_alloc()) == NULL) {
311 		rcm_log_message(RCM_ERROR, gettext("POOL: pool_value_alloc"
312 		    " failed: %s\n"), strerror(errno));
313 		error = RCM_FAILURE;
314 		goto out;
315 	}
316 	/*
317 	 * Look for resources with "'type' = 'pset'"
318 	 */
319 	pool_value_set_name(pvals[0], "type");
320 	pool_value_set_string(pvals[0], "pset");
321 	if ((res = pool_query_resources(conf, &nelem, pvals)) == NULL) {
322 		rcm_log_message(RCM_ERROR,
323 		    gettext("POOL: No psets found in configuration\n"));
324 		pool_value_free(pvals[0]);
325 		error =	 RCM_FAILURE;
326 		goto out;
327 	}
328 	pool_value_free(pvals[0]);
329 	for (i = 0; res[i] != NULL; i++)
330 		/*
331 		 * Ask each pset if removing these cpus would cause it to go
332 		 * below it's minimum value.
333 		 */
334 		if (pool_check_pset(conf, res[i], removed_list, errorp) < 0) {
335 			error = RCM_FAILURE;
336 			break;
337 		}
338 	free(res);
339 out:
340 	if (removed_list)
341 		free(removed_list);
342 	if (conf) {
343 		(void) pool_conf_close(conf);
344 		pool_conf_free(conf);
345 	}
346 
347 	/*
348 	 * Set the error string if not already set.
349 	 */
350 	if (error != RCM_SUCCESS && *errorp == NULL)
351 		*errorp = strdup(generic_error);
352 	return (error);
353 }
354 
355 /*
356  * Returns RCM_SUCCESS in a number of error cases, since RCM_FAILURE would
357  * mean that the capacity change would be disallowed by this module,
358  * which is not what we mean.
359  */
360 static int
361 pool_request_capacity_change(rcm_handle_t *hdl, char *rsrcname, id_t id,
362     uint_t flags, nvlist_t *nvlist, char **errorp, rcm_info_t **dependent_info)
363 {
364 	int i;
365 
366 	*errorp = NULL;
367 	rcm_log_message(RCM_TRACE1,
368 	    "POOL: requesting capacity change for: %s (flag: %d)\n",
369 	    rsrcname, flags);
370 	if (flags & RCM_FORCE) {
371 		rcm_log_message(RCM_TRACE1,
372 		    "POOL: Allowing forced operation to pass through...\n");
373 		return (RCM_SUCCESS);
374 	}
375 	for (i = 0; registrations[i].rsrc != NULL; i++) {
376 		if (strcmp(rsrcname, registrations[i].rsrc) == 0) {
377 			return ((*registrations[i].capacity_change_cb)(nvlist,
378 			    errorp));
379 		}
380 	}
381 
382 	return (RCM_SUCCESS);
383 }
384 
385 static int
386 pool_notify_capacity_change(rcm_handle_t *hdl, char *rsrcname, id_t id,
387     uint_t flags, nvlist_t *nvlist, char **info, rcm_info_t **dependent_info)
388 {
389 	rcm_log_message(RCM_TRACE1,
390 	    "POOL: notifying capacity change for: %s (flags: %d)\n",
391 	    rsrcname, flags);
392 	return (RCM_SUCCESS);
393 }
394 
395 static int
396 pool_register(rcm_handle_t *hdl)
397 {
398 	int i;
399 
400 	rcm_log_message(RCM_TRACE1, "Registering Pools RCM module\n");
401 	if (registered)
402 		return (RCM_SUCCESS);
403 	registered++;
404 	for (i = 0; registrations[i].rsrc != NULL; i++) {
405 		if (rcm_register_capacity(hdl, (char *)registrations[i].rsrc,
406 				    0, NULL) != RCM_SUCCESS) {
407 				rcm_log_message(RCM_ERROR,
408 				    gettext("POOL: failed to register capacity "
409 				    "change for '%s'\n"),
410 				    registrations[i].rsrc);
411 			}
412 	}
413 	return (RCM_SUCCESS);
414 }
415 
416 static int
417 pool_unregister(rcm_handle_t *hdl)
418 {
419 	int i;
420 
421 	rcm_log_message(RCM_TRACE1, "Pools RCM un-registered\n");
422 	if (registered) {
423 		registered--;
424 		for (i = 0; registrations[i].rsrc != NULL; i++)
425 			if (rcm_unregister_capacity(hdl,
426 			    (char *)registrations[i].rsrc, 0) != RCM_SUCCESS) {
427 				rcm_log_message(RCM_ERROR,
428 				    gettext("POOL: unregister capacity failed "
429 				    "for '%s'\n"), registrations[i].rsrc);
430 			}
431 	}
432 	return (RCM_SUCCESS);
433 }
434 
435 static int
436 pool_get_info(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flag,
437     char **infop, char **errorp, nvlist_t *props, rcm_info_t **dependent_info)
438 {
439 	rcm_log_message(RCM_TRACE1, "POOL: RCM get info: '%s'\n", rsrcname);
440 	if ((*infop = strdup(gettext("POOL: In use by pool(4) subsystem")))
441 	    == NULL) {
442 		rcm_log_message(RCM_ERROR, gettext("POOL: get info(%s) malloc "
443 		    "failure\n"), rsrcname);
444 		*infop = NULL;
445 		*errorp = NULL;
446 		return (RCM_FAILURE);
447 	}
448 	return (RCM_SUCCESS);
449 }
450 
451 
452 static int
453 pool_request_suspend(rcm_handle_t *hdl, char *rsrcname,
454     id_t id, timespec_t *time, uint_t flags, char **reason,
455     rcm_info_t **dependent_info)
456 {
457 	rcm_log_message(RCM_TRACE1,
458 	    "POOL: requesting suspend for: %s\n", rsrcname);
459 	return (RCM_SUCCESS);
460 }
461 
462 static int
463 pool_notify_resume(rcm_handle_t *hdl, char *rsrcname,
464     id_t pid, uint_t flags, char **reason, rcm_info_t **dependent_info)
465 {
466 	rcm_log_message(RCM_TRACE1,
467 	    "POOL: notifying resume of: %s\n", rsrcname);
468 	return (RCM_SUCCESS);
469 }
470 
471 static int
472 pool_request_offline(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flag,
473     char **reason, rcm_info_t **dependent_info)
474 {
475 	rcm_log_message(RCM_TRACE1,
476 	    "POOL: requesting offline for: %s\n", rsrcname);
477 	return (RCM_SUCCESS);
478 }
479 
480 static int
481 pool_notify_online(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flags,
482     char **reason, rcm_info_t **dependent_info)
483 {
484 	rcm_log_message(RCM_TRACE1,
485 	    "POOL: notifying online for: %s\n", rsrcname);
486 	return (RCM_SUCCESS);
487 }
488 static int
489 pool_notify_remove(rcm_handle_t *hdl, char *rsrcname, id_t pid,
490     uint_t flag, char **reason, rcm_info_t **dependent_info)
491 {
492 	rcm_log_message(RCM_TRACE1,
493 	    "POOL: notifying removal of: %s\n", rsrcname);
494 	return (RCM_SUCCESS);
495 }
496