1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * This file contains the glue code that allows the NWS software to
28 * determine whether a cluster disk service is local to this node or
29 * not.
30 *
31 * See PSARC/1999/462 for more information on the interfaces from
32 * suncluster that are used here.
33 */
34
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 #include <sys/mkdev.h>
38 #include <sys/stat.h>
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <string.h>
42 #include <strings.h>
43 #include <errno.h>
44 #include <fcntl.h>
45 #include <stdio.h>
46 #include <dlfcn.h>
47
48 #include <sys/ncall/ncall.h>
49 #include <sys/nsctl/nsc_hash.h>
50
51 #include "cfg_cluster.h"
52 #include "cfg_impl.h"
53 #include "cfg.h"
54
55 /*
56 * Static variables
57 */
58
59 static scconf_nodeid_t cl_nodeid = (uint_t)0xffff;
60 static char *cl_nodename = NULL;
61
62 static void *libscstat;
63 static void *libscconf;
64
65 static hash_node_t **schash;
66 static int init_sc_entry();
67
68 typedef struct hash_data_s {
69 scstat_node_name_t scstat_node_name;
70 } hash_data_t;
71
72 /*
73 * Global variables
74 */
75 int cl_initialized = 0;
76
77
78 /*
79 * Tell the linker to keep quiet.
80 */
81
82 #pragma weak scconf_get_nodename
83 #pragma weak scconf_strerr
84 #pragma weak scconf_get_ds_by_devt
85
86 #pragma weak scstat_get_ds_status
87 #pragma weak scstat_free_ds_status
88 #pragma weak scstat_strerr
89
90
91 /*
92 * Initialise the library if we have not done so before.
93 *
94 * - IMPORTANT -
95 *
96 * This must -never- be called from any command that can be started
97 * from /usr/cluster/lib/sc/run_reserve (and hence
98 * /usr/cluster/sbin/reconfig) or the system will deadlock
99 * during switchover. This includes:
100 *
101 * - svadm (no options, "print") -- called during sv switchover
102 * - all boot commands
103 *
104 * - grab this node's cluster nodeid
105 * - attempt to dlopen() the suncluster shared libraries we need
106 * - grab this node's cluster nodename
107 *
108 * Returns:
109 * 0 - success
110 * -1 - error, errno is set
111 */
112
113 int
cfg_cluster_init(void)114 cfg_cluster_init(void)
115 {
116 const char *scconf = "/usr/cluster/lib/libscconf.so.1";
117 const char *scstat = "/usr/cluster/lib/libscstat.so.1";
118 #ifdef DEBUG
119 char errbuf[SCCONF_MAXSTRINGLEN];
120 #endif
121 scconf_nodeid_t id;
122 scconf_errno_t err;
123 char *name;
124 FILE *pipe;
125 int rc;
126
127 /*
128 * First check to see if we really are a cluster as clinfo -n can lie
129 */
130 if (cl_nodeid == 0xffff) {
131 rc = system("/usr/sbin/clinfo");
132 if (rc != -1 && WEXITSTATUS(rc) == 1) {
133 /* not a cluster */
134 cl_initialized = 1;
135 cl_nodeid = 0;
136 return (0);
137 }
138
139 pipe = popen("/usr/sbin/clinfo -n 2>/dev/null || echo 0", "r");
140 if (pipe == NULL) {
141 #ifdef DEBUG
142 fprintf(stderr, "unable to get nodeid: %s\n",
143 strerror(errno));
144 #endif
145 return (-1);
146 }
147
148 if ((rc = fscanf(pipe, "%d", &id)) != 1) {
149 #ifdef DEBUG
150 fprintf(stderr, "unable to get nodeid: %s\n",
151 strerror(errno));
152 #endif
153 return (-1);
154 }
155
156 pclose(pipe);
157
158 cl_nodeid = id;
159 }
160
161 /* Already loaded the Sun Cluster device tree */
162 if (cl_initialized)
163 return (0);
164
165 /*
166 * Try and dlopen the various libraries that we need
167 */
168
169 libscconf = dlopen(scconf, RTLD_LAZY | RTLD_GLOBAL);
170 if (libscconf == NULL)
171 goto error;
172
173 libscstat = dlopen(scstat, RTLD_LAZY | RTLD_GLOBAL);
174 if (libscstat == NULL)
175 goto error;
176
177 err = scconf_get_nodename(id, &name);
178 if (err == SCCONF_EPERM) {
179 cl_nodename = NULL;
180 } else if (err != SCCONF_NOERR) {
181 #ifdef DEBUG
182 scconf_strerr(errbuf, err);
183 fprintf(stderr, "scconf_get_nodename: %d: %s\n", err, errbuf);
184 #endif
185 goto error;
186 } else
187 cl_nodename = name;
188
189 /* Load the Sun Cluster device tree */
190 init_sc_entry();
191 cl_initialized = 1;
192 return (0);
193
194 error: /* error cleanup */
195 if (libscconf)
196 dlclose(libscconf);
197
198 if (libscstat)
199 dlclose(libscstat);
200
201 libscconf = NULL;
202 libscstat = NULL;
203
204 errno = ENOSYS;
205 return (-1);
206 }
207
208
209 /*
210 * cfg_issuncluster()
211 *
212 * Description:
213 * Return the SunCluster nodeid of this node.
214 *
215 * Returns:
216 * >0 - running in a SunCluster (value is nodeid of this node)
217 * 0 - not running in a cluster
218 * -1 - failure; errno is set
219 */
220
221 int
cfg_issuncluster()222 cfg_issuncluster()
223 {
224 if (cfg_cluster_init() >= 0)
225 return ((int)cl_nodeid);
226 else
227 return (-1);
228 }
229 int
cfg_iscluster()230 cfg_iscluster()
231 {
232 return (cfg_issuncluster());
233 }
234
235 /*
236 * cfg_l_dgname_islocal()
237 * Check if disk group is local on a non-SunCluster.
238 *
239 * Returns as cfg_dgname_islocal().
240 */
241 #ifndef lint
242 static int
cfg_l_dgname_islocal(char * dgname,char ** othernode)243 cfg_l_dgname_islocal(char *dgname, char **othernode)
244 {
245 const char *metaset = "/usr/sbin/metaset -s %s -o > /dev/null 2>&1";
246 char command[1024];
247 int rc;
248
249 if (snprintf(command, sizeof (command), metaset, dgname) >=
250 sizeof (command)) {
251 errno = ENOMEM;
252 return (-1);
253 }
254
255 rc = system(command);
256 if (rc < 0) {
257 return (-1);
258 }
259
260 if (WEXITSTATUS(rc) != 0) {
261 if (othernode) {
262 /* metaset doesn't tell us */
263 *othernode = "unknown";
264 }
265
266 return (0);
267 }
268
269 return (1);
270 }
271 #endif
272
273 /*
274 * cfg_dgname_islocal(char *dgname, char **othernode)
275 * -- determine if the named disk service is mastered on this node
276 *
277 * If the disk service is mastered on another node, that nodename
278 * will be returned in othernode (if not NULL). It is up to the
279 * calling program to call free() on this value at a later time to
280 * free the memory allocated.
281 *
282 * Returns:
283 * 1 - disk service is mastered on this node
284 * 0 - disk service is not mastered on this node (*othernode set)
285 * -1 - error (errno will be set)
286 */
287
288 int
cfg_dgname_islocal(char * dgname,char ** othernode)289 cfg_dgname_islocal(char *dgname, char **othernode)
290 {
291 hash_data_t *data;
292
293 if (dgname == NULL || *dgname == '\0' || othernode == NULL) {
294 errno = EINVAL;
295 return (-1);
296 }
297
298 /* Handle non-cluster configurations */
299 if (cfg_cluster_init() < 0) {
300 return (-1);
301 } else if (cl_nodeid == 0) {
302 /* it has to be local */
303 return (1);
304 }
305
306 /*
307 * lookup the current diskgroup name
308 */
309 if (data = (hash_data_t *)nsc_lookup(schash, dgname)) {
310 if (strcmp(data->scstat_node_name, cl_nodename)) {
311 if (othernode)
312 *othernode = strdup(data->scstat_node_name);
313 return (0);
314 } else {
315 return (1);
316 }
317 } else {
318 errno = ENODEV;
319 return (-1);
320 }
321 }
322
323 /*
324 * cfg_l_dgname()
325 * parse the disk group name from the a device pathname on a non-SunCluster.
326 *
327 * Returns as cfg_dgname().
328 */
329
330 char *
cfg_l_dgname(const char * pathname,char * buffer,size_t buflen)331 cfg_l_dgname(const char *pathname, char *buffer, size_t buflen)
332 {
333 const char *dev = "/dev/";
334 const char *vx = "vx/";
335 const char *md = "md/";
336 const char *dsk = "dsk/";
337 const char *start, *cp;
338 int ll, len, chkdsk;
339
340 bzero(buffer, buflen);
341 chkdsk = 0;
342
343 ll = strlen(dev);
344 if (strncmp(pathname, dev, ll) != 0) {
345 /* not a device pathname */
346 errno = EINVAL;
347 return ((char *)NULL);
348 }
349
350 start = pathname + ll;
351
352 if (strncmp(start, md, (ll = strlen(md))) == 0) {
353 /*
354 * SVM --
355 * /dev/md/dgname/{r}dsk/partition
356 */
357
358 start += ll;
359
360 if (strncmp(start, dsk, strlen(dsk)) == 0 ||
361 (*start == 'r' &&
362 strncmp((start + 1), dsk, strlen(dsk)) == 0)) {
363 /* no dgname */
364 return (buffer);
365 }
366
367 chkdsk = 1; /* check for trailing {r}dsk */
368 } else if (strncmp(start, vx, (ll = strlen(vx))) == 0) {
369 /*
370 * Veritas --
371 * /dev/vx/{r}dsk/dgname/partition
372 */
373
374 start += ll;
375
376 ll = strlen(dsk);
377
378 if (*start == 'r' && strncmp((start + 1), dsk, ll) == 0)
379 start += ll + 1;
380 else if (strncmp(start, dsk, ll) == 0)
381 start += ll;
382 else {
383 /* no dgname */
384 return (buffer);
385 }
386 } else {
387 /* no dgname */
388 return (buffer);
389 }
390
391 for (cp = start, len = 0; *cp != '\0' && *cp != '/'; cp++)
392 len++; /* count length of dgname */
393
394 if (*cp == '\0') {
395 /* no dgname */
396 return (buffer);
397 }
398
399 #ifdef DEBUG
400 if (*cp != '/') {
401 fprintf(stderr,
402 "cfg_dgname: parse error: *cp = '%c', expected '/'\n", *cp);
403 errno = EPROTO;
404 return ((char *)NULL);
405 }
406 #endif
407
408 if (chkdsk) {
409 cp++; /* skip the NULL */
410
411 ll = strlen(dsk);
412
413 if ((*cp != 'r' || strncmp((cp + 1), dsk, ll) != 0) &&
414 strncmp(cp, dsk, ll) != 0) {
415 /* no dgname */
416 return (buffer);
417 }
418 }
419
420 if (len >= buflen) {
421 errno = E2BIG;
422 return ((char *)NULL);
423 }
424
425 (void) strncpy(buffer, start, len);
426 return (buffer);
427 }
428
429
430 /*
431 * cfg_dgname()
432 * determine which cluster resource group the pathname belongs to, if any
433 *
434 * Returns:
435 * NULL - error (errno is set)
436 * ptr to NULL-string - no dgname
437 * pointer to string - dgname
438 */
439
440 char *
cfg_dgname(const char * pathname,char * buffer,size_t buflen)441 cfg_dgname(const char *pathname, char *buffer, size_t buflen)
442 {
443 scconf_errno_t conferr;
444 char *dsname = NULL;
445 struct stat stb;
446 #ifdef DEBUG
447 char errbuf[SCCONF_MAXSTRINGLEN];
448 #endif
449
450 bzero(buffer, buflen);
451
452 if (pathname == NULL || *pathname == '\0') {
453 errno = EINVAL;
454 return ((char *)NULL);
455 }
456
457 /* Handle non-cluster configurations */
458 if (cfg_cluster_init() < 0) {
459 errno = EINVAL;
460 return ((char *)NULL);
461 } else if (cl_nodeid == 0) {
462 /* must be local - return NULL-string dgname */
463 return (buffer);
464 }
465
466 if (stat(pathname, &stb) < 0) {
467 errno = EINVAL;
468 return ((char *)NULL);
469 }
470
471 conferr = scconf_get_ds_by_devt(major(stb.st_rdev),
472 minor(stb.st_rdev), &dsname);
473
474 if (conferr == SCCONF_ENOEXIST) {
475 return (buffer);
476 } else if (conferr != SCCONF_NOERR) {
477 #ifdef DEBUG
478 scconf_strerr(errbuf, conferr);
479 fprintf(stderr,
480 "scconf_get_ds_by_devt: %d: %s\n", conferr, errbuf);
481 #endif
482 errno = EINVAL;
483 return ((char *)NULL);
484 }
485
486 strncpy(buffer, dsname, buflen);
487 free(dsname);
488
489 return (buffer);
490 }
491
492
493 /*
494 * init_sc_entry
495 *
496 * Add an entry into the sclist and the schash for future lookups.
497 *
498 * - IMPORTANT -
499 *
500 * This must -never- be called from any command that can be started
501 * from /usr/cluster/lib/sc/run_reserve (and hence
502 * /usr/cluster/sbin/reconfig) or the system will deadlock
503 * during switchover. This includes:
504 *
505 * - svadm (no options, "print") -- called during sv switchover
506 * - all boot commands
507 *
508 * Return values:
509 * -1 An error occurred.
510 * 0 Entry added
511 * 1 Entry already exists.
512 */
513 static int
init_sc_entry()514 init_sc_entry()
515 {
516 scstat_ds_node_state_t *dsn;
517 scstat_ds_name_t dsname;
518 scstat_ds_t *dsstatus, *dsp;
519 scstat_errno_t err;
520 #ifdef DEBUG
521 char errbuf[SCCONF_MAXSTRINGLEN];
522 #endif
523
524 hash_data_t *hdp;
525
526 /*
527 * Allocate a hash table
528 */
529 if ((schash = nsc_create_hash()) == NULL)
530 return (-1);
531
532 /*
533 * the API is broken here - the function is written to expect
534 * the first argument to be (scstat_ds_name_t), but the function
535 * declaration in scstat.h requires (scstat_ds_name_t *).
536 *
537 * We just cast it to get rid of the compiler warnings.
538 * If "dsname" is NULL, information for all device services is returned
539 */
540 dsstatus = NULL;
541 dsname = NULL;
542 /* LINTED pointer alignment */
543 err = scstat_get_ds_status((scstat_ds_name_t *)dsname, &dsstatus);
544 if (err != SCSTAT_ENOERR) {
545 #ifdef DEBUG
546 scstat_strerr(err, errbuf);
547 fprintf(stderr, "scstat_get_ds_status(): %d: %s\n",
548 err, errbuf);
549 #endif
550 errno = ENOSYS;
551 return (-1);
552 }
553
554 if (dsstatus == NULL) {
555 errno = ENODEV;
556 return (-1);
557 }
558
559 /*
560 * Traverse scstat_ds list, saving away resource in out hash table
561 */
562 for (dsp = dsstatus; dsp; dsp = dsp->scstat_ds_next) {
563
564 /* Skip over NULL scstat_ds_name's */
565 if ((dsp->scstat_ds_name == NULL) ||
566 (dsp->scstat_ds_name[0] == '\0'))
567 continue;
568
569 /* See element exits already, error if so */
570 if (nsc_lookup(schash, dsp->scstat_ds_name)) {
571 fprintf(stderr, "scstat_get_ds_status: duplicate %s",
572 dsp->scstat_ds_name);
573 errno = EEXIST;
574 return (-1);
575 }
576
577 /* Traverse the node status list */
578 for (dsn = dsp->scstat_node_state_list; dsn;
579 dsn = dsn->scstat_node_next) {
580 /*
581 * Only keep trace of primary nodes
582 */
583 if (dsn->scstat_node_state != SCSTAT_PRIMARY)
584 continue;
585
586 /* Create an element to insert */
587 hdp = (hash_data_t *)malloc(sizeof (hash_data_t));
588 hdp->scstat_node_name = strdup(dsn->scstat_node_name);
589 nsc_insert_node(schash, hdp, dsp->scstat_ds_name);
590 }
591 }
592
593 /*
594 * Free up scstat resources
595 */
596 scstat_free_ds_status(dsstatus);
597 return (0);
598 }
599