xref: /titanic_50/usr/src/lib/libdscfg/common/cfg_cluster.c (revision 2e107de79998f3036decec2454002940afb9a6ff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This file contains the glue code that allows the NWS software to
28  * determine whether a cluster disk service is local to this node or
29  * not.
30  *
31  * See PSARC/1999/462 for more information on the interfaces from
32  * suncluster that are used here.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 #include <sys/mkdev.h>
38 #include <sys/stat.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <string.h>
42 #include <strings.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <stdio.h>
46 #include <dlfcn.h>
47 
48 #include <sys/ncall/ncall.h>
49 #include <sys/nsctl/nsc_hash.h>
50 
51 #include "cfg_cluster.h"
52 #include "cfg_impl.h"
53 #include "cfg.h"
54 
55 /*
56  * Static variables
57  */
58 
59 static scconf_nodeid_t cl_nodeid = (uint_t)0xffff;
60 static char *cl_nodename = NULL;
61 
62 static void *libscstat;
63 static void *libscconf;
64 
65 static hash_node_t **schash;
66 static int init_sc_entry();
67 
68 typedef struct hash_data_s {
69 	scstat_node_name_t	scstat_node_name;
70 } hash_data_t;
71 
72 /*
73  * Global variables
74  */
75 int cl_initialized = 0;
76 
77 
78 /*
79  * Tell the linker to keep quiet.
80  */
81 
82 #pragma weak scconf_get_nodename
83 #pragma weak scconf_strerr
84 #pragma weak scconf_get_ds_by_devt
85 
86 #pragma weak scstat_get_ds_status
87 #pragma weak scstat_free_ds_status
88 #pragma weak scstat_strerr
89 
90 
91 /*
92  * Initialise the library if we have not done so before.
93  *
94  * - IMPORTANT -
95  *
96  * This must -never- be called from any command that can be started
97  * from /usr/cluster/lib/sc/run_reserve (and hence
98  * /usr/cluster/sbin/reconfig) or the system will deadlock
99  * during switchover.  This includes:
100  *
101  *       - svadm (no options, "print") -- called during sv switchover
102  *       - all boot commands
103  *
104  * - grab this node's cluster nodeid
105  * - attempt to dlopen() the suncluster shared libraries we need
106  * - grab this node's cluster nodename
107  *
108  * Returns:
109  *   0   - success
110  *  -1   - error, errno is set
111  */
112 
113 int
114 cfg_cluster_init(void)
115 {
116 	const char *scconf = "/usr/cluster/lib/libscconf.so.1";
117 	const char *scstat = "/usr/cluster/lib/libscstat.so.1";
118 #ifdef DEBUG
119 	char errbuf[SCCONF_MAXSTRINGLEN];
120 #endif
121 	scconf_nodeid_t id;
122 	scconf_errno_t err;
123 	char *name;
124 	FILE *pipe;
125 	int rc;
126 
127 	/*
128 	 * First check to see if we really are a cluster as clinfo -n can lie
129 	 */
130 	if (cl_nodeid == 0xffff) {
131 		rc = system("/usr/sbin/clinfo");
132 		if (rc != -1 && WEXITSTATUS(rc) == 1) {
133 			/* not a cluster */
134 			cl_initialized = 1;
135 			cl_nodeid = 0;
136 			return (0);
137 		}
138 
139 		pipe = popen("/usr/sbin/clinfo -n 2>/dev/null || echo 0", "r");
140 		if (pipe == NULL) {
141 #ifdef DEBUG
142 			fprintf(stderr, "unable to get nodeid: %s\n",
143 				strerror(errno));
144 #endif
145 			return (-1);
146 		}
147 
148 		if ((rc = fscanf(pipe, "%d", &id)) != 1) {
149 #ifdef DEBUG
150 			fprintf(stderr, "unable to get nodeid: %s\n",
151 				strerror(errno));
152 #endif
153 			return (-1);
154 		}
155 
156 		pclose(pipe);
157 
158 		cl_nodeid = id;
159 	}
160 
161 	/* Already loaded the Sun Cluster device tree */
162 	if (cl_initialized)
163 		return (0);
164 
165 	/*
166 	 * Try and dlopen the various libraries that we need
167 	 */
168 
169 	libscconf = dlopen(scconf, RTLD_LAZY | RTLD_GLOBAL);
170 	if (libscconf == NULL)
171 		goto error;
172 
173 	libscstat = dlopen(scstat, RTLD_LAZY | RTLD_GLOBAL);
174 	if (libscstat == NULL)
175 		goto error;
176 
177 	err = scconf_get_nodename(id, &name);
178 	if (err == SCCONF_EPERM) {
179 		cl_nodename = NULL;
180 	} else if (err != SCCONF_NOERR) {
181 #ifdef DEBUG
182 		scconf_strerr(errbuf, err);
183 		fprintf(stderr, "scconf_get_nodename: %d: %s\n", err, errbuf);
184 #endif
185 		goto error;
186 	} else
187 		cl_nodename = name;
188 
189 	/* Load the Sun Cluster device tree */
190 	init_sc_entry();
191 	cl_initialized = 1;
192 	return (0);
193 
194 error:	/* error cleanup */
195 	if (libscconf)
196 		dlclose(libscconf);
197 
198 	if (libscstat)
199 		dlclose(libscstat);
200 
201 	libscconf = NULL;
202 	libscstat = NULL;
203 
204 	errno = ENOSYS;
205 	return (-1);
206 }
207 
208 
209 /*
210  * cfg_issuncluster()
211  *
212  * Description:
213  *  Return the SunCluster nodeid of this node.
214  *
215  * Returns:
216  *  >0   - running in a SunCluster (value is nodeid of this node)
217  *   0   - not running in a cluster
218  *  -1   - failure; errno is set
219  */
220 
221 int
222 cfg_issuncluster()
223 {
224 	if (cfg_cluster_init() >= 0)
225 		return ((int)cl_nodeid);
226 	else
227 		return (-1);
228 }
229 int
230 cfg_iscluster()
231 {
232 	return (cfg_issuncluster());
233 }
234 
235 /*
236  * cfg_l_dgname_islocal()
237  * Check if disk group is local on a non-SunCluster.
238  *
239  * Returns as cfg_dgname_islocal().
240  */
241 #ifndef lint
242 static int
243 cfg_l_dgname_islocal(char *dgname, char **othernode)
244 {
245 	const char *metaset = "/usr/sbin/metaset -s %s -o > /dev/null 2>&1";
246 	char command[1024];
247 	int rc;
248 
249 	if (snprintf(command, sizeof (command), metaset, dgname) >=
250 	    sizeof (command)) {
251 		errno = ENOMEM;
252 		return (-1);
253 	}
254 
255 	rc = system(command);
256 	if (rc < 0) {
257 		return (-1);
258 	}
259 
260 	if (WEXITSTATUS(rc) != 0) {
261 		if (othernode) {
262 			/* metaset doesn't tell us */
263 			*othernode = "unknown";
264 		}
265 
266 		return (0);
267 	}
268 
269 	return (1);
270 }
271 #endif
272 
273 /*
274  * cfg_dgname_islocal(char *dgname, char **othernode)
275  * -- determine if the named disk service is mastered on this node
276  *
277  * If the disk service is mastered on another node, that nodename
278  * will be returned in othernode (if not NULL).  It is up to the
279  * calling program to call free() on this value at a later time to
280  * free the memory allocated.
281  *
282  * Returns:
283  *   1   - disk service is mastered on this node
284  *   0   - disk service is not mastered on this node (*othernode set)
285  *   -1  - error (errno will be set)
286  */
287 
288 int
289 cfg_dgname_islocal(char *dgname, char **othernode)
290 {
291 	hash_data_t *data;
292 
293 	if (dgname == NULL || *dgname == '\0' || othernode == NULL) {
294 		errno = EINVAL;
295 		return (-1);
296 	}
297 
298 	/* Handle non-cluster configurations */
299 	if (cfg_cluster_init() < 0) {
300 		return (-1);
301 	} else 	if (cl_nodeid == 0) {
302 		/* it has to be local */
303 		return (1);
304 	}
305 
306 	/*
307 	 * lookup the current diskgroup name
308 	 */
309 	if (data = (hash_data_t *)nsc_lookup(schash, dgname)) {
310 		if (strcmp(data->scstat_node_name, cl_nodename)) {
311 			if (othernode)
312 			    *othernode = strdup(data->scstat_node_name);
313 			return (0);
314 		} else {
315 			return (1);
316 		}
317 	} else {
318 		errno = ENODEV;
319 		return (-1);
320 	}
321 }
322 
323 /*
324  * cfg_l_dgname()
325  * parse the disk group name from the a device pathname on a non-SunCluster.
326  *
327  * Returns as cfg_dgname().
328  */
329 
330 char *
331 cfg_l_dgname(const char *pathname, char *buffer, size_t buflen)
332 {
333 	const char *dev = "/dev/";
334 	const char *vx = "vx/";
335 	const char *md = "md/";
336 	const char *dsk = "dsk/";
337 	const char *start, *cp;
338 	int ll, len, chkdsk;
339 
340 	bzero(buffer, buflen);
341 	chkdsk = 0;
342 
343 	ll = strlen(dev);
344 	if (strncmp(pathname, dev, ll) != 0) {
345 		/* not a device pathname */
346 		errno = EINVAL;
347 		return ((char *)NULL);
348 	}
349 
350 	start = pathname + ll;
351 
352 	if (strncmp(start, md, (ll = strlen(md))) == 0) {
353 		/*
354 		 * SVM --
355 		 * /dev/md/dgname/{r}dsk/partition
356 		 */
357 
358 		start += ll;
359 
360 		if (strncmp(start, dsk, strlen(dsk)) == 0 ||
361 		    (*start == 'r' &&
362 		    strncmp((start + 1), dsk, strlen(dsk)) == 0)) {
363 			/* no dgname */
364 			return (buffer);
365 		}
366 
367 		chkdsk = 1;	/* check for trailing {r}dsk */
368 	} else if (strncmp(start, vx, (ll = strlen(vx))) == 0) {
369 		/*
370 		 * Veritas --
371 		 * /dev/vx/{r}dsk/dgname/partition
372 		 */
373 
374 		start += ll;
375 
376 		ll = strlen(dsk);
377 
378 		if (*start == 'r' && strncmp((start + 1), dsk, ll) == 0)
379 			start += ll + 1;
380 		else if (strncmp(start, dsk, ll) == 0)
381 			start += ll;
382 		else {
383 			/* no dgname */
384 			return (buffer);
385 		}
386 	} else {
387 		/* no dgname */
388 		return (buffer);
389 	}
390 
391 	for (cp = start, len = 0; *cp != '\0' && *cp != '/'; cp++)
392 		len++;	/* count length of dgname */
393 
394 	if (*cp == '\0') {
395 		/* no dgname */
396 		return (buffer);
397 	}
398 
399 #ifdef DEBUG
400 	if (*cp != '/') {
401 		fprintf(stderr,
402 		    "cfg_dgname: parse error: *cp = '%c', expected '/'\n", *cp);
403 		errno = EPROTO;
404 		return ((char *)NULL);
405 	}
406 #endif
407 
408 	if (chkdsk) {
409 		cp++;	/* skip the NULL */
410 
411 		ll = strlen(dsk);
412 
413 		if ((*cp != 'r' || strncmp((cp + 1), dsk, ll) != 0) &&
414 		    strncmp(cp, dsk, ll) != 0) {
415 			/* no dgname */
416 			return (buffer);
417 		}
418 	}
419 
420 	if (len >= buflen) {
421 		errno = E2BIG;
422 		return ((char *)NULL);
423 	}
424 
425 	(void) strncpy(buffer, start, len);
426 	return (buffer);
427 }
428 
429 
430 /*
431  * cfg_dgname()
432  * determine which cluster resource group the pathname belongs to, if any
433  *
434  * Returns:
435  *	NULL			- error (errno is set)
436  *	ptr to NULL-string	- no dgname
437  *	pointer to string	- dgname
438  */
439 
440 char *
441 cfg_dgname(const char *pathname, char *buffer, size_t buflen)
442 {
443 	scconf_errno_t conferr;
444 	char *dsname = NULL;
445 	struct stat stb;
446 #ifdef DEBUG
447 	char errbuf[SCCONF_MAXSTRINGLEN];
448 #endif
449 
450 	bzero(buffer, buflen);
451 
452 	if (pathname == NULL || *pathname == '\0') {
453 		errno = EINVAL;
454 		return ((char *)NULL);
455 	}
456 
457 	/* Handle non-cluster configurations */
458 	if (cfg_cluster_init() < 0) {
459 		errno = EINVAL;
460 		return ((char *)NULL);
461 	} else 	if (cl_nodeid == 0) {
462 		/* must be local - return NULL-string dgname */
463 		return (buffer);
464 	}
465 
466 	if (stat(pathname, &stb) < 0) {
467 		errno = EINVAL;
468 		return ((char *)NULL);
469 	}
470 
471 	conferr = scconf_get_ds_by_devt(major(stb.st_rdev),
472 	    minor(stb.st_rdev), &dsname);
473 
474 	if (conferr == SCCONF_ENOEXIST) {
475 		return (buffer);
476 	} else if (conferr != SCCONF_NOERR) {
477 #ifdef DEBUG
478 		scconf_strerr(errbuf, conferr);
479 		fprintf(stderr,
480 		    "scconf_get_ds_by_devt: %d: %s\n", conferr, errbuf);
481 #endif
482 		errno = EINVAL;
483 		return ((char *)NULL);
484 	}
485 
486 	strncpy(buffer, dsname, buflen);
487 	free(dsname);
488 
489 	return (buffer);
490 }
491 
492 
493 /*
494  * init_sc_entry
495  *
496  * Add an entry into the sclist and the schash for future lookups.
497  *
498  * - IMPORTANT -
499  *
500  * This must -never- be called from any command that can be started
501  * from /usr/cluster/lib/sc/run_reserve (and hence
502  * /usr/cluster/sbin/reconfig) or the system will deadlock
503  * during switchover.  This includes:
504  *
505  *       - svadm (no options, "print") -- called during sv switchover
506  *       - all boot commands
507  *
508  * Return values:
509  *  -1  An error occurred.
510  *   0  Entry added
511  *   1  Entry already exists.
512  */
513 static int
514 init_sc_entry()
515 {
516 	scstat_ds_node_state_t *dsn;
517 	scstat_ds_name_t dsname;
518 	scstat_ds_t *dsstatus, *dsp;
519 	scstat_errno_t err;
520 #ifdef DEBUG
521 	char errbuf[SCCONF_MAXSTRINGLEN];
522 #endif
523 
524 	hash_data_t *hdp;
525 
526 	/*
527 	 * Allocate a hash table
528 	 */
529 	if ((schash = nsc_create_hash()) == NULL)
530 		return (-1);
531 
532 	/*
533 	 * the API is broken here - the function is written to expect
534 	 * the first argument to be (scstat_ds_name_t), but the function
535 	 * declaration in scstat.h requires (scstat_ds_name_t *).
536 	 *
537 	 * We just cast it to get rid of the compiler warnings.
538 	 * If "dsname" is NULL, information for all device services is returned
539 	 */
540 	dsstatus = NULL;
541 	dsname = NULL;
542 	/* LINTED pointer alignment */
543 	err = scstat_get_ds_status((scstat_ds_name_t *)dsname, &dsstatus);
544 	if (err != SCSTAT_ENOERR) {
545 #ifdef DEBUG
546 		scstat_strerr(err, errbuf);
547 		fprintf(stderr, "scstat_get_ds_status(): %d: %s\n",
548 		    err, errbuf);
549 #endif
550 		errno = ENOSYS;
551 		return (-1);
552 	}
553 
554 	if (dsstatus == NULL) {
555 		errno = ENODEV;
556 		return (-1);
557 	}
558 
559 	/*
560 	 * Traverse scstat_ds list, saving away resource in out hash table
561 	 */
562 	for (dsp = dsstatus; dsp; dsp = dsp->scstat_ds_next) {
563 
564 		/* Skip over NULL scstat_ds_name's */
565 		if ((dsp->scstat_ds_name == NULL) ||
566 		    (dsp->scstat_ds_name[0] == '\0'))
567 			continue;
568 
569 		/* See element exits already, error if so */
570 		if (nsc_lookup(schash, dsp->scstat_ds_name)) {
571 			fprintf(stderr, "scstat_get_ds_status: duplicate %s",
572 				dsp->scstat_ds_name);
573 			errno = EEXIST;
574 			return (-1);
575 		}
576 
577 		/* Traverse the node status list */
578 		for (dsn = dsp->scstat_node_state_list; dsn;
579 					dsn = dsn->scstat_node_next) {
580 			/*
581 			 * Only keep trace of primary nodes
582 			 */
583 			if (dsn->scstat_node_state != SCSTAT_PRIMARY)
584 				continue;
585 
586 			/* Create an element to insert */
587 			hdp = (hash_data_t *)malloc(sizeof (hash_data_t));
588 			hdp->scstat_node_name = strdup(dsn->scstat_node_name);
589 			nsc_insert_node(schash, hdp, dsp->scstat_ds_name);
590 		}
591 	}
592 
593 	/*
594 	 * Free up scstat resources
595 	 */
596 	scstat_free_ds_status(dsstatus);
597 	return (0);
598 }
599