xref: /illumos-gate/usr/src/cmd/rcm_daemon/common/pool_rcm.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32 #include <errno.h>
33 #include <libintl.h>
34 #include <string.h>
35 #include <rcm_module.h>
36 #include <sys/pset.h>
37 
38 #include <pool.h>
39 
40 /*
41  * RCM module ops.
42  */
43 static int pool_register(rcm_handle_t *);
44 static int pool_unregister(rcm_handle_t *);
45 static int pool_get_info(rcm_handle_t *, char *, id_t, uint_t, char **,
46     char **, nvlist_t *, rcm_info_t **);
47 static int pool_request_suspend(rcm_handle_t *, char *, id_t,
48     timespec_t *, uint_t, char **, rcm_info_t **);
49 static int pool_notify_resume(rcm_handle_t *, char *, id_t, uint_t, char **,
50     rcm_info_t **);
51 static int pool_notify_remove(rcm_handle_t *, char *, id_t, uint_t,
52     char **, rcm_info_t **);
53 static int pool_request_offline(rcm_handle_t *, char *, id_t, uint_t,
54     char **, rcm_info_t **);
55 static int pool_notify_online(rcm_handle_t *, char *, id_t, uint_t, char **,
56     rcm_info_t **);
57 static int pool_request_capacity_change(rcm_handle_t *, char *, id_t, uint_t,
58     nvlist_t *, char **, rcm_info_t **);
59 static int pool_notify_capacity_change(rcm_handle_t *, char *, id_t, uint_t,
60     nvlist_t *, char **, rcm_info_t **);
61 
62 /*
63  * Pool-specific callback functions.
64  */
65 static int pset_validate_remove(nvlist_t *, char **);
66 
67 static struct {
68 	const char *rsrc;
69 	int (*capacity_change_cb)(nvlist_t *, char **);
70 } registrations[] = {
71 	{ "SUNW_cpu", pset_validate_remove },
72 	{ NULL, NULL }
73 };
74 
75 static int registered = 0;
76 
77 static struct rcm_mod_ops pool_ops = {
78 	RCM_MOD_OPS_VERSION,
79 	pool_register,
80 	pool_unregister,
81 	pool_get_info,
82 	pool_request_suspend,
83 	pool_notify_resume,
84 	pool_request_offline,
85 	pool_notify_online,
86 	pool_notify_remove,
87 	pool_request_capacity_change,
88 	pool_notify_capacity_change,
89 	NULL
90 };
91 
92 struct rcm_mod_ops *
93 rcm_mod_init(void)
94 {
95 	rcm_log_message(RCM_TRACE1, "Pools RCM module created\n");
96 	return (&pool_ops);
97 }
98 
99 
100 int
101 rcm_mod_fini(void)
102 {
103 	rcm_log_message(RCM_TRACE1, "Pools RCM module unloaded\n");
104 	return (RCM_SUCCESS);
105 }
106 
107 const char *
108 rcm_mod_info(void)
109 {
110 	return ("Pools RCM module 1.4");
111 }
112 
113 static int
114 pool_check_pset(pool_conf_t *conf, pool_resource_t *res,
115     processorid_t *del_cpus, char **errorp)
116 {
117 	int64_t tmp;
118 	int i, j;
119 	uint_t num_cpus;
120 	uint64_t min_cpus;
121 	uint_t num_found = 0;
122 	processorid_t *cpulist;
123 	psetid_t psetid;
124 	pool_value_t *pval;
125 	pool_elem_t *elem = pool_resource_to_elem(conf, res);
126 
127 	if ((pval = pool_value_alloc()) == NULL)
128 		return (-1);
129 	if (pool_get_property(conf, elem, "pset.min", pval) != POC_UINT) {
130 		rcm_log_message(RCM_ERROR,
131 		    gettext("POOL: cannot find property 'pset.min' in pset\n"));
132 		pool_value_free(pval);
133 		return (-1);
134 	}
135 	(void) pool_value_get_uint64(pval, &min_cpus);
136 	if (pool_get_property(conf, elem, "pset.sys_id", pval) != POC_INT) {
137 		rcm_log_message(RCM_ERROR,
138 		    gettext("POOL: cannot get pset.sys_id\n"));
139 		pool_value_free(pval);
140 		return (-1);
141 	}
142 	(void) pool_value_get_int64(pval, &tmp);
143 	pool_value_free(pval);
144 	psetid = (psetid_t)tmp;
145 	rcm_log_message(RCM_TRACE1, "POOL: checking pset: %d\n", psetid);
146 
147 	rcm_log_message(RCM_TRACE1, "POOL: min_cpus is %llu\n", min_cpus);
148 	if (pset_info(psetid, NULL, &num_cpus, NULL) != 0) {
149 		rcm_log_message(RCM_ERROR,
150 		    gettext("POOL: pset_info(%d) failed: %s\n"), psetid,
151 		    strerror(errno));
152 		return (-1);
153 	}
154 	if ((cpulist = malloc(num_cpus * sizeof (processorid_t))) == NULL) {
155 		rcm_log_message(RCM_ERROR,
156 		    gettext("POOL: malloc failed: %s\n"), strerror(errno));
157 		return (-1);
158 	}
159 	if (pset_info(psetid, NULL, &num_cpus, cpulist) != 0) {
160 		free(cpulist);
161 		rcm_log_message(RCM_ERROR,
162 		    gettext("POOL: pset_info(%d) failed: %s\n"), psetid,
163 		    strerror(errno));
164 		return (-1);
165 	}
166 	for (i = 0; del_cpus[i] != -1; i++)
167 		for (j = 0; j < num_cpus; j++)
168 			if (cpulist[j] == del_cpus[i])
169 				num_found++;
170 	free(cpulist);
171 	if (num_found > 0 && (num_cpus - num_found) < (uint_t)min_cpus) {
172 		int len;
173 		char *errval;
174 		const char *errfmt =
175 		    gettext("POOL: processor set (%1$d) would go "
176 		    "below its minimum value of %2$u\n");
177 
178 		/*
179 		 * We would go below the min value. Fail this request.
180 		 */
181 		len = strlen(errfmt) + 4 * 2; /* 4 digits for psetid and min */
182 		if ((errval = malloc((len + 1) * sizeof (char))) != NULL) {
183 			(void) snprintf(errval, len + 1, errfmt, psetid,
184 			    (uint_t)min_cpus);
185 			*errorp = errval;
186 		}
187 
188 		rcm_log_message(RCM_ERROR, (char *)errfmt, psetid,
189 		    (uint_t)min_cpus);
190 
191 		return (-1);
192 	}
193 	rcm_log_message(RCM_TRACE1, "POOL: pset %d is fine\n", psetid);
194 	return (0);
195 }
196 
197 /*
198  * pset_validate_remove()
199  * 	Check to see if the requested cpu removal would be acceptable.
200  * 	Returns RCM_FAILURE if not.
201  */
202 static int
203 pset_validate_remove(nvlist_t *nvl, char **errorp)
204 {
205 	int error = RCM_SUCCESS;
206 	int32_t old_total, new_total, removed_total;
207 	processorid_t *removed_list = NULL; /* list terminated by (-1). */
208 	processorid_t *old_cpu_list = NULL, *new_cpu_list = NULL;
209 	int i, j;
210 	pool_conf_t *conf;
211 	pool_value_t *pvals[] = { NULL, NULL };
212 	pool_resource_t **res = NULL;
213 	uint_t nelem;
214 	const char *generic_error = gettext("POOL: Error processing request\n");
215 
216 	if ((conf = pool_conf_alloc()) == NULL)
217 		return (RCM_FAILURE);
218 	if (pool_conf_open(conf, pool_dynamic_location(), PO_RDONLY) < 0) {
219 		rcm_log_message(RCM_TRACE1,
220 		    "POOL: failed to parse config file: '%s'\n",
221 		    pool_dynamic_location());
222 		pool_conf_free(conf);
223 		return (RCM_SUCCESS);
224 	}
225 
226 	if ((error = nvlist_lookup_int32(nvl, "old_total", &old_total)) != 0) {
227 		(void) pool_conf_close(conf);
228 		pool_conf_free(conf);
229 		rcm_log_message(RCM_ERROR,
230 		    gettext("POOL: unable to find 'old_total' in nvlist: %s\n"),
231 		    strerror(error));
232 		*errorp = strdup(generic_error);
233 		return (RCM_FAILURE);
234 	}
235 	if ((error = nvlist_lookup_int32(nvl, "new_total", &new_total)) != 0) {
236 		(void) pool_conf_close(conf);
237 		pool_conf_free(conf);
238 		rcm_log_message(RCM_ERROR,
239 		    gettext("POOL: unable to find 'new_total' in nvlist: %s\n"),
240 		    strerror(error));
241 		*errorp = strdup(generic_error);
242 		return (RCM_FAILURE);
243 	}
244 	if (new_total >= old_total) {
245 		(void) pool_conf_close(conf);
246 		pool_conf_free(conf);
247 		/*
248 		 * This doesn't look like a cpu removal.
249 		 */
250 		rcm_log_message(RCM_TRACE1,
251 		    gettext("POOL: 'old_total' (%d) is less than 'new_total' "
252 			    "(%d)\n"), old_total, new_total);
253 		return (RCM_SUCCESS);
254 	}
255 	if ((removed_list = malloc((old_total - new_total + 1) * sizeof (int)))
256 	    == NULL) {
257 		rcm_log_message(RCM_ERROR,
258 		    gettext("POOL: malloc failed: %s\n"), strerror(errno));
259 
260 		error = RCM_FAILURE;
261 		goto out;
262 	}
263 	if ((error = nvlist_lookup_int32_array(nvl, "old_cpu_list",
264 		    &old_cpu_list, &nelem)) != 0) {
265 		rcm_log_message(RCM_ERROR,
266 		    gettext("POOL: 'old_cpu_list' not found in nvlist: %s\n"),
267 		    strerror(error));
268 		error = RCM_FAILURE;
269 		goto out;
270 	}
271 	if ((int32_t)nelem != old_total) {
272 		rcm_log_message(RCM_ERROR,
273 		    gettext("POOL: 'old_cpu_list' size mismatch: %1$d vs "
274 		    "%2$d\n"), nelem, old_total);
275 		error = RCM_FAILURE;
276 		goto out;
277 	}
278 	if ((error = nvlist_lookup_int32_array(nvl, "new_cpu_list",
279 		    &new_cpu_list, &nelem)) != 0) {
280 		rcm_log_message(RCM_ERROR,
281 		    gettext("POOL: 'new_cpu_list' not found in nvlist: %s\n"),
282 		    strerror(error));
283 		error = RCM_FAILURE;
284 		goto out;
285 	}
286 	if (nelem != new_total) {
287 		rcm_log_message(RCM_ERROR,
288 		    gettext("POOL: 'new_cpu_list' size mismatch: %1$d vs "
289 		    "%2$d\n"), nelem, new_total);
290 		error = RCM_FAILURE;
291 		goto out;
292 	}
293 
294 	for (i = 0, removed_total = 0; i < old_total; i++) {
295 		for (j = 0; j < new_total; j++)
296 			if (old_cpu_list[i] == new_cpu_list[j])
297 				break;
298 		if (j == new_total) /* not found in new_cpu_list */
299 			removed_list[removed_total++] = old_cpu_list[i];
300 	}
301 	removed_list[removed_total] = -1;
302 
303 	if (removed_total != (old_total - new_total)) {
304 		rcm_log_message(RCM_ERROR,
305 		    gettext("POOL: error finding removed cpu list\n"));
306 		error = RCM_FAILURE;
307 		goto out;
308 	}
309 	if ((pvals[0] = pool_value_alloc()) == NULL) {
310 		rcm_log_message(RCM_ERROR, gettext("POOL: pool_value_alloc"
311 		    " failed: %s\n"), strerror(errno));
312 		error = RCM_FAILURE;
313 		goto out;
314 	}
315 	/*
316 	 * Look for resources with "'type' = 'pset'"
317 	 */
318 	pool_value_set_name(pvals[0], "type");
319 	pool_value_set_string(pvals[0], "pset");
320 	if ((res = pool_query_resources(conf, &nelem, pvals)) == NULL) {
321 		rcm_log_message(RCM_ERROR,
322 		    gettext("POOL: No psets found in configuration\n"));
323 		pool_value_free(pvals[0]);
324 		error =	 RCM_FAILURE;
325 		goto out;
326 	}
327 	pool_value_free(pvals[0]);
328 	for (i = 0; res[i] != NULL; i++)
329 		/*
330 		 * Ask each pset if removing these cpus would cause it to go
331 		 * below it's minimum value.
332 		 */
333 		if (pool_check_pset(conf, res[i], removed_list, errorp) < 0) {
334 			error = RCM_FAILURE;
335 			break;
336 		}
337 	free(res);
338 out:
339 	if (removed_list)
340 		free(removed_list);
341 	if (conf) {
342 		(void) pool_conf_close(conf);
343 		pool_conf_free(conf);
344 	}
345 
346 	/*
347 	 * Set the error string if not already set.
348 	 */
349 	if (error != RCM_SUCCESS && *errorp == NULL)
350 		*errorp = strdup(generic_error);
351 	return (error);
352 }
353 
354 /*
355  * Returns RCM_SUCCESS in a number of error cases, since RCM_FAILURE would
356  * mean that the capacity change would be disallowed by this module,
357  * which is not what we mean.
358  */
359 static int
360 pool_request_capacity_change(rcm_handle_t *hdl, char *rsrcname, id_t id,
361     uint_t flags, nvlist_t *nvlist, char **errorp, rcm_info_t **dependent_info)
362 {
363 	int i;
364 
365 	*errorp = NULL;
366 	rcm_log_message(RCM_TRACE1,
367 	    "POOL: requesting capacity change for: %s (flag: %d)\n",
368 	    rsrcname, flags);
369 	if (flags & RCM_FORCE) {
370 		rcm_log_message(RCM_TRACE1,
371 		    "POOL: Allowing forced operation to pass through...\n");
372 		return (RCM_SUCCESS);
373 	}
374 	for (i = 0; registrations[i].rsrc != NULL; i++) {
375 		if (strcmp(rsrcname, registrations[i].rsrc) == 0) {
376 			return ((*registrations[i].capacity_change_cb)(nvlist,
377 			    errorp));
378 		}
379 	}
380 
381 	return (RCM_SUCCESS);
382 }
383 
384 static int
385 pool_notify_capacity_change(rcm_handle_t *hdl, char *rsrcname, id_t id,
386     uint_t flags, nvlist_t *nvlist, char **info, rcm_info_t **dependent_info)
387 {
388 	rcm_log_message(RCM_TRACE1,
389 	    "POOL: notifying capacity change for: %s (flags: %d)\n",
390 	    rsrcname, flags);
391 	return (RCM_SUCCESS);
392 }
393 
394 static int
395 pool_register(rcm_handle_t *hdl)
396 {
397 	int i;
398 
399 	rcm_log_message(RCM_TRACE1, "Registering Pools RCM module\n");
400 	if (registered)
401 		return (RCM_SUCCESS);
402 	registered++;
403 	for (i = 0; registrations[i].rsrc != NULL; i++) {
404 		if (rcm_register_capacity(hdl, (char *)registrations[i].rsrc,
405 				    0, NULL) != RCM_SUCCESS) {
406 				rcm_log_message(RCM_ERROR,
407 				    gettext("POOL: failed to register capacity "
408 				    "change for '%s'\n"),
409 				    registrations[i].rsrc);
410 			}
411 	}
412 	return (RCM_SUCCESS);
413 }
414 
415 static int
416 pool_unregister(rcm_handle_t *hdl)
417 {
418 	int i;
419 
420 	rcm_log_message(RCM_TRACE1, "Pools RCM un-registered\n");
421 	if (registered) {
422 		registered--;
423 		for (i = 0; registrations[i].rsrc != NULL; i++)
424 			if (rcm_unregister_capacity(hdl,
425 			    (char *)registrations[i].rsrc, 0) != RCM_SUCCESS) {
426 				rcm_log_message(RCM_ERROR,
427 				    gettext("POOL: unregister capacity failed "
428 				    "for '%s'\n"), registrations[i].rsrc);
429 			}
430 	}
431 	return (RCM_SUCCESS);
432 }
433 
434 static int
435 pool_get_info(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flag,
436     char **infop, char **errorp, nvlist_t *props, rcm_info_t **dependent_info)
437 {
438 	rcm_log_message(RCM_TRACE1, "POOL: RCM get info: '%s'\n", rsrcname);
439 	if ((*infop = strdup(gettext("POOL: In use by pool(4) subsystem")))
440 	    == NULL) {
441 		rcm_log_message(RCM_ERROR, gettext("POOL: get info(%s) malloc "
442 		    "failure\n"), rsrcname);
443 		*infop = NULL;
444 		*errorp = NULL;
445 		return (RCM_FAILURE);
446 	}
447 	return (RCM_SUCCESS);
448 }
449 
450 
451 static int
452 pool_request_suspend(rcm_handle_t *hdl, char *rsrcname,
453     id_t id, timespec_t *time, uint_t flags, char **reason,
454     rcm_info_t **dependent_info)
455 {
456 	rcm_log_message(RCM_TRACE1,
457 	    "POOL: requesting suspend for: %s\n", rsrcname);
458 	return (RCM_SUCCESS);
459 }
460 
461 static int
462 pool_notify_resume(rcm_handle_t *hdl, char *rsrcname,
463     id_t pid, uint_t flags, char **reason, rcm_info_t **dependent_info)
464 {
465 	rcm_log_message(RCM_TRACE1,
466 	    "POOL: notifying resume of: %s\n", rsrcname);
467 	return (RCM_SUCCESS);
468 }
469 
470 static int
471 pool_request_offline(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flag,
472     char **reason, rcm_info_t **dependent_info)
473 {
474 	rcm_log_message(RCM_TRACE1,
475 	    "POOL: requesting offline for: %s\n", rsrcname);
476 	return (RCM_SUCCESS);
477 }
478 
479 static int
480 pool_notify_online(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flags,
481     char **reason, rcm_info_t **dependent_info)
482 {
483 	rcm_log_message(RCM_TRACE1,
484 	    "POOL: notifying online for: %s\n", rsrcname);
485 	return (RCM_SUCCESS);
486 }
487 static int
488 pool_notify_remove(rcm_handle_t *hdl, char *rsrcname, id_t pid,
489     uint_t flag, char **reason, rcm_info_t **dependent_info)
490 {
491 	rcm_log_message(RCM_TRACE1,
492 	    "POOL: notifying removal of: %s\n", rsrcname);
493 	return (RCM_SUCCESS);
494 }
495