xref: /titanic_50/usr/src/cmd/rcm_daemon/common/pool_rcm.c (revision bba9e99ca8b1c0ead08de153667f8e4f78da4b2c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <sys/types.h>
30 #include <unistd.h>
31 #include <errno.h>
32 #include <libintl.h>
33 #include <string.h>
34 #include <rcm_module.h>
35 #include <sys/pset.h>
36 
37 #include <pool.h>
38 
39 /*
40  * RCM module ops.
41  */
42 static int pool_register(rcm_handle_t *);
43 static int pool_unregister(rcm_handle_t *);
44 static int pool_get_info(rcm_handle_t *, char *, id_t, uint_t, char **,
45     char **, nvlist_t *, rcm_info_t **);
46 static int pool_request_suspend(rcm_handle_t *, char *, id_t,
47     timespec_t *, uint_t, char **, rcm_info_t **);
48 static int pool_notify_resume(rcm_handle_t *, char *, id_t, uint_t, char **,
49     rcm_info_t **);
50 static int pool_notify_remove(rcm_handle_t *, char *, id_t, uint_t,
51     char **, rcm_info_t **);
52 static int pool_request_offline(rcm_handle_t *, char *, id_t, uint_t,
53     char **, rcm_info_t **);
54 static int pool_notify_online(rcm_handle_t *, char *, id_t, uint_t, char **,
55     rcm_info_t **);
56 static int pool_request_capacity_change(rcm_handle_t *, char *, id_t, uint_t,
57     nvlist_t *, char **, rcm_info_t **);
58 static int pool_notify_capacity_change(rcm_handle_t *, char *, id_t, uint_t,
59     nvlist_t *, char **, rcm_info_t **);
60 
61 /*
62  * Pool-specific callback functions.
63  */
64 static int pset_validate_remove(nvlist_t *, char **);
65 
66 static struct {
67 	const char *rsrc;
68 	int (*capacity_change_cb)(nvlist_t *, char **);
69 } registrations[] = {
70 	{ "SUNW_cpu", pset_validate_remove },
71 	{ NULL, NULL }
72 };
73 
74 static int registered = 0;
75 
76 static struct rcm_mod_ops pool_ops = {
77 	RCM_MOD_OPS_VERSION,
78 	pool_register,
79 	pool_unregister,
80 	pool_get_info,
81 	pool_request_suspend,
82 	pool_notify_resume,
83 	pool_request_offline,
84 	pool_notify_online,
85 	pool_notify_remove,
86 	pool_request_capacity_change,
87 	pool_notify_capacity_change,
88 	NULL
89 };
90 
91 struct rcm_mod_ops *
92 rcm_mod_init(void)
93 {
94 	rcm_log_message(RCM_TRACE1, "Pools RCM module created\n");
95 	return (&pool_ops);
96 }
97 
98 
99 int
100 rcm_mod_fini(void)
101 {
102 	rcm_log_message(RCM_TRACE1, "Pools RCM module unloaded\n");
103 	return (RCM_SUCCESS);
104 }
105 
106 const char *
107 rcm_mod_info(void)
108 {
109 	return ("Pools RCM module 1.4");
110 }
111 
112 static int
113 pool_check_pset(pool_conf_t *conf, pool_resource_t *res,
114     processorid_t *del_cpus, char **errorp)
115 {
116 	int64_t tmp;
117 	int i, j;
118 	uint_t num_cpus;
119 	uint64_t min_cpus;
120 	uint_t num_found = 0;
121 	processorid_t *cpulist;
122 	psetid_t psetid;
123 	pool_value_t *pval;
124 	pool_elem_t *elem = pool_resource_to_elem(conf, res);
125 
126 	if ((pval = pool_value_alloc()) == NULL)
127 		return (-1);
128 	if (pool_get_property(conf, elem, "pset.min", pval) != POC_UINT) {
129 		rcm_log_message(RCM_ERROR,
130 		    gettext("POOL: cannot find property 'pset.min' in pset\n"));
131 		pool_value_free(pval);
132 		return (-1);
133 	}
134 	(void) pool_value_get_uint64(pval, &min_cpus);
135 	if (pool_get_property(conf, elem, "pset.sys_id", pval) != POC_INT) {
136 		rcm_log_message(RCM_ERROR,
137 		    gettext("POOL: cannot get pset.sys_id\n"));
138 		pool_value_free(pval);
139 		return (-1);
140 	}
141 	(void) pool_value_get_int64(pval, &tmp);
142 	pool_value_free(pval);
143 	psetid = (psetid_t)tmp;
144 	rcm_log_message(RCM_TRACE1, "POOL: checking pset: %d\n", psetid);
145 
146 	rcm_log_message(RCM_TRACE1, "POOL: min_cpus is %llu\n", min_cpus);
147 	if (pset_info(psetid, NULL, &num_cpus, NULL) != 0) {
148 		rcm_log_message(RCM_ERROR,
149 		    gettext("POOL: pset_info(%d) failed: %s\n"), psetid,
150 		    strerror(errno));
151 		return (-1);
152 	}
153 	if ((cpulist = malloc(num_cpus * sizeof (processorid_t))) == NULL) {
154 		rcm_log_message(RCM_ERROR,
155 		    gettext("POOL: malloc failed: %s\n"), strerror(errno));
156 		return (-1);
157 	}
158 	if (pset_info(psetid, NULL, &num_cpus, cpulist) != 0) {
159 		free(cpulist);
160 		rcm_log_message(RCM_ERROR,
161 		    gettext("POOL: pset_info(%d) failed: %s\n"), psetid,
162 		    strerror(errno));
163 		return (-1);
164 	}
165 	for (i = 0; del_cpus[i] != -1; i++)
166 		for (j = 0; j < num_cpus; j++)
167 			if (cpulist[j] == del_cpus[i])
168 				num_found++;
169 	free(cpulist);
170 	if (num_found > 0 && (num_cpus - num_found) < (uint_t)min_cpus) {
171 		int len;
172 		char *errval;
173 		const char *errfmt =
174 		    gettext("POOL: processor set (%1$d) would go "
175 		    "below its minimum value of %2$u\n");
176 
177 		/*
178 		 * We would go below the min value. Fail this request.
179 		 */
180 		len = strlen(errfmt) + 4 * 2; /* 4 digits for psetid and min */
181 		if ((errval = malloc((len + 1) * sizeof (char))) != NULL) {
182 			(void) snprintf(errval, len + 1, errfmt, psetid,
183 			    (uint_t)min_cpus);
184 			*errorp = errval;
185 		}
186 
187 		rcm_log_message(RCM_ERROR, (char *)errfmt, psetid,
188 		    (uint_t)min_cpus);
189 
190 		return (-1);
191 	}
192 	rcm_log_message(RCM_TRACE1, "POOL: pset %d is fine\n", psetid);
193 	return (0);
194 }
195 
196 /*
197  * pset_validate_remove()
198  * 	Check to see if the requested cpu removal would be acceptable.
199  * 	Returns RCM_FAILURE if not.
200  */
201 static int
202 pset_validate_remove(nvlist_t *nvl, char **errorp)
203 {
204 	int error = RCM_SUCCESS;
205 	int32_t old_total, new_total, removed_total;
206 	processorid_t *removed_list = NULL; /* list terminated by (-1). */
207 	processorid_t *old_cpu_list = NULL, *new_cpu_list = NULL;
208 	int i, j;
209 	pool_conf_t *conf;
210 	pool_value_t *pvals[] = { NULL, NULL };
211 	pool_resource_t **res = NULL;
212 	uint_t nelem;
213 	const char *generic_error = gettext("POOL: Error processing request\n");
214 
215 	if ((conf = pool_conf_alloc()) == NULL)
216 		return (RCM_FAILURE);
217 	if (pool_conf_open(conf, pool_dynamic_location(), PO_RDONLY) < 0) {
218 		rcm_log_message(RCM_TRACE1,
219 		    "POOL: failed to parse config file: '%s'\n",
220 		    pool_dynamic_location());
221 		pool_conf_free(conf);
222 		return (RCM_SUCCESS);
223 	}
224 
225 	if ((error = nvlist_lookup_int32(nvl, "old_total", &old_total)) != 0) {
226 		(void) pool_conf_close(conf);
227 		pool_conf_free(conf);
228 		rcm_log_message(RCM_ERROR,
229 		    gettext("POOL: unable to find 'old_total' in nvlist: %s\n"),
230 		    strerror(error));
231 		*errorp = strdup(generic_error);
232 		return (RCM_FAILURE);
233 	}
234 	if ((error = nvlist_lookup_int32(nvl, "new_total", &new_total)) != 0) {
235 		(void) pool_conf_close(conf);
236 		pool_conf_free(conf);
237 		rcm_log_message(RCM_ERROR,
238 		    gettext("POOL: unable to find 'new_total' in nvlist: %s\n"),
239 		    strerror(error));
240 		*errorp = strdup(generic_error);
241 		return (RCM_FAILURE);
242 	}
243 	if (new_total >= old_total) {
244 		(void) pool_conf_close(conf);
245 		pool_conf_free(conf);
246 		/*
247 		 * This doesn't look like a cpu removal.
248 		 */
249 		rcm_log_message(RCM_TRACE1,
250 		    gettext("POOL: 'old_total' (%d) is less than 'new_total' "
251 		    "(%d)\n"), old_total, new_total);
252 		return (RCM_SUCCESS);
253 	}
254 	if ((removed_list = malloc((old_total - new_total + 1) * sizeof (int)))
255 	    == NULL) {
256 		rcm_log_message(RCM_ERROR,
257 		    gettext("POOL: malloc failed: %s\n"), strerror(errno));
258 
259 		error = RCM_FAILURE;
260 		goto out;
261 	}
262 	if ((error = nvlist_lookup_int32_array(nvl, "old_cpu_list",
263 	    &old_cpu_list, &nelem)) != 0) {
264 		rcm_log_message(RCM_ERROR,
265 		    gettext("POOL: 'old_cpu_list' not found in nvlist: %s\n"),
266 		    strerror(error));
267 		error = RCM_FAILURE;
268 		goto out;
269 	}
270 	if ((int32_t)nelem != old_total) {
271 		rcm_log_message(RCM_ERROR,
272 		    gettext("POOL: 'old_cpu_list' size mismatch: %1$d vs "
273 		    "%2$d\n"), nelem, old_total);
274 		error = RCM_FAILURE;
275 		goto out;
276 	}
277 	if ((error = nvlist_lookup_int32_array(nvl, "new_cpu_list",
278 	    &new_cpu_list, &nelem)) != 0) {
279 		rcm_log_message(RCM_ERROR,
280 		    gettext("POOL: 'new_cpu_list' not found in nvlist: %s\n"),
281 		    strerror(error));
282 		error = RCM_FAILURE;
283 		goto out;
284 	}
285 	if (nelem != new_total) {
286 		rcm_log_message(RCM_ERROR,
287 		    gettext("POOL: 'new_cpu_list' size mismatch: %1$d vs "
288 		    "%2$d\n"), nelem, new_total);
289 		error = RCM_FAILURE;
290 		goto out;
291 	}
292 
293 	for (i = 0, removed_total = 0; i < old_total; i++) {
294 		for (j = 0; j < new_total; j++)
295 			if (old_cpu_list[i] == new_cpu_list[j])
296 				break;
297 		if (j == new_total) /* not found in new_cpu_list */
298 			removed_list[removed_total++] = old_cpu_list[i];
299 	}
300 	removed_list[removed_total] = -1;
301 
302 	if (removed_total != (old_total - new_total)) {
303 		rcm_log_message(RCM_ERROR,
304 		    gettext("POOL: error finding removed cpu list\n"));
305 		error = RCM_FAILURE;
306 		goto out;
307 	}
308 	if ((pvals[0] = pool_value_alloc()) == NULL) {
309 		rcm_log_message(RCM_ERROR, gettext("POOL: pool_value_alloc"
310 		    " failed: %s\n"), strerror(errno));
311 		error = RCM_FAILURE;
312 		goto out;
313 	}
314 	/*
315 	 * Look for resources with "'type' = 'pset'"
316 	 */
317 	(void) pool_value_set_name(pvals[0], "type");
318 	(void) pool_value_set_string(pvals[0], "pset");
319 	if ((res = pool_query_resources(conf, &nelem, pvals)) == NULL) {
320 		rcm_log_message(RCM_ERROR,
321 		    gettext("POOL: No psets found in configuration\n"));
322 		pool_value_free(pvals[0]);
323 		error =	 RCM_FAILURE;
324 		goto out;
325 	}
326 	pool_value_free(pvals[0]);
327 	for (i = 0; res[i] != NULL; i++)
328 		/*
329 		 * Ask each pset if removing these cpus would cause it to go
330 		 * below it's minimum value.
331 		 */
332 		if (pool_check_pset(conf, res[i], removed_list, errorp) < 0) {
333 			error = RCM_FAILURE;
334 			break;
335 		}
336 	free(res);
337 out:
338 	if (removed_list)
339 		free(removed_list);
340 	if (conf) {
341 		(void) pool_conf_close(conf);
342 		pool_conf_free(conf);
343 	}
344 
345 	/*
346 	 * Set the error string if not already set.
347 	 */
348 	if (error != RCM_SUCCESS && *errorp == NULL)
349 		*errorp = strdup(generic_error);
350 	return (error);
351 }
352 
353 /*
354  * Returns RCM_SUCCESS in a number of error cases, since RCM_FAILURE would
355  * mean that the capacity change would be disallowed by this module,
356  * which is not what we mean.
357  */
358 static int
359 pool_request_capacity_change(rcm_handle_t *hdl, char *rsrcname, id_t id,
360     uint_t flags, nvlist_t *nvlist, char **errorp, rcm_info_t **dependent_info)
361 {
362 	int i;
363 
364 	*errorp = NULL;
365 	rcm_log_message(RCM_TRACE1,
366 	    "POOL: requesting capacity change for: %s (flag: %d)\n",
367 	    rsrcname, flags);
368 	if (flags & RCM_FORCE) {
369 		rcm_log_message(RCM_TRACE1,
370 		    "POOL: Allowing forced operation to pass through...\n");
371 		return (RCM_SUCCESS);
372 	}
373 	for (i = 0; registrations[i].rsrc != NULL; i++) {
374 		if (strcmp(rsrcname, registrations[i].rsrc) == 0) {
375 			return ((*registrations[i].capacity_change_cb)(nvlist,
376 			    errorp));
377 		}
378 	}
379 
380 	return (RCM_SUCCESS);
381 }
382 
383 static int
384 pool_notify_capacity_change(rcm_handle_t *hdl, char *rsrcname, id_t id,
385     uint_t flags, nvlist_t *nvlist, char **info, rcm_info_t **dependent_info)
386 {
387 	rcm_log_message(RCM_TRACE1,
388 	    "POOL: notifying capacity change for: %s (flags: %d)\n",
389 	    rsrcname, flags);
390 	return (RCM_SUCCESS);
391 }
392 
393 static int
394 pool_register(rcm_handle_t *hdl)
395 {
396 	int i;
397 
398 	rcm_log_message(RCM_TRACE1, "Registering Pools RCM module\n");
399 	if (registered)
400 		return (RCM_SUCCESS);
401 	registered++;
402 	for (i = 0; registrations[i].rsrc != NULL; i++) {
403 		if (rcm_register_capacity(hdl, (char *)registrations[i].rsrc,
404 		    0, NULL) != RCM_SUCCESS) {
405 			rcm_log_message(RCM_ERROR,
406 			    gettext("POOL: failed to register capacity "
407 			    "change for '%s'\n"),
408 			    registrations[i].rsrc);
409 		}
410 	}
411 	return (RCM_SUCCESS);
412 }
413 
414 static int
415 pool_unregister(rcm_handle_t *hdl)
416 {
417 	int i;
418 
419 	rcm_log_message(RCM_TRACE1, "Pools RCM un-registered\n");
420 	if (registered) {
421 		registered--;
422 		for (i = 0; registrations[i].rsrc != NULL; i++)
423 			if (rcm_unregister_capacity(hdl,
424 			    (char *)registrations[i].rsrc, 0) != RCM_SUCCESS) {
425 				rcm_log_message(RCM_ERROR,
426 				    gettext("POOL: unregister capacity failed "
427 				    "for '%s'\n"), registrations[i].rsrc);
428 			}
429 	}
430 	return (RCM_SUCCESS);
431 }
432 
433 static int
434 pool_get_info(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flag,
435     char **infop, char **errorp, nvlist_t *props, rcm_info_t **dependent_info)
436 {
437 	rcm_log_message(RCM_TRACE1, "POOL: RCM get info: '%s'\n", rsrcname);
438 	if ((*infop = strdup(gettext("POOL: In use by pool(4) subsystem")))
439 	    == NULL) {
440 		rcm_log_message(RCM_ERROR, gettext("POOL: get info(%s) malloc "
441 		    "failure\n"), rsrcname);
442 		*infop = NULL;
443 		*errorp = NULL;
444 		return (RCM_FAILURE);
445 	}
446 	return (RCM_SUCCESS);
447 }
448 
449 
450 static int
451 pool_request_suspend(rcm_handle_t *hdl, char *rsrcname,
452     id_t id, timespec_t *time, uint_t flags, char **reason,
453     rcm_info_t **dependent_info)
454 {
455 	rcm_log_message(RCM_TRACE1,
456 	    "POOL: requesting suspend for: %s\n", rsrcname);
457 	return (RCM_SUCCESS);
458 }
459 
460 static int
461 pool_notify_resume(rcm_handle_t *hdl, char *rsrcname,
462     id_t pid, uint_t flags, char **reason, rcm_info_t **dependent_info)
463 {
464 	rcm_log_message(RCM_TRACE1,
465 	    "POOL: notifying resume of: %s\n", rsrcname);
466 	return (RCM_SUCCESS);
467 }
468 
469 static int
470 pool_request_offline(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flag,
471     char **reason, rcm_info_t **dependent_info)
472 {
473 	rcm_log_message(RCM_TRACE1,
474 	    "POOL: requesting offline for: %s\n", rsrcname);
475 	return (RCM_SUCCESS);
476 }
477 
478 static int
479 pool_notify_online(rcm_handle_t *hdl, char *rsrcname, id_t pid, uint_t flags,
480     char **reason, rcm_info_t **dependent_info)
481 {
482 	rcm_log_message(RCM_TRACE1,
483 	    "POOL: notifying online for: %s\n", rsrcname);
484 	return (RCM_SUCCESS);
485 }
486 static int
487 pool_notify_remove(rcm_handle_t *hdl, char *rsrcname, id_t pid,
488     uint_t flag, char **reason, rcm_info_t **dependent_info)
489 {
490 	rcm_log_message(RCM_TRACE1,
491 	    "POOL: notifying removal of: %s\n", rsrcname);
492 	return (RCM_SUCCESS);
493 }
494