1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <wait.h>
30 #include <sys/time.h>
31 #include <strings.h>
32 #include <meta.h>
33 #include <syslog.h>
34
35 extern md_mn_msg_tbl_entry_t msg_table[];
36
37 /*
38 * When contacting the local rpc.mdcommd we always want to do that using
39 * the IPv4 version of localhost.
40 */
41 #define LOCALHOST_IPv4 "127.0.0.1"
42
43 md_mn_msgclass_t
mdmn_get_message_class(md_mn_msgtype_t msgtype)44 mdmn_get_message_class(md_mn_msgtype_t msgtype)
45 {
46 return (msg_table[msgtype].mte_class);
47 }
48
49 void (*
mdmn_get_handler(md_mn_msgtype_t msgtype)50 mdmn_get_handler(md_mn_msgtype_t msgtype))
51 (md_mn_msg_t *msg, uint_t flags, md_mn_result_t *res)
52 {
53 return (msg_table[msgtype].mte_handler);
54 }
55
56 int (*
mdmn_get_submessage_generator(md_mn_msgtype_t msgtype)57 mdmn_get_submessage_generator(md_mn_msgtype_t msgtype))
58 (md_mn_msg_t *msg, md_mn_msg_t **msglist)
59 {
60 return (msg_table[msgtype].mte_smgen);
61 }
62
63 time_t
mdmn_get_timeout(md_mn_msgtype_t msgtype)64 mdmn_get_timeout(md_mn_msgtype_t msgtype)
65 {
66 return (msg_table[msgtype].mte_timeout);
67 }
68
69
70 void
ldump_msg(char * prefix,md_mn_msg_t * msg)71 ldump_msg(char *prefix, md_mn_msg_t *msg)
72 {
73 (void) fprintf(stderr, "%s &msg = 0x%x\n", prefix, (uint_t)msg);
74 (void) fprintf(stderr, "%s ID = (%d, 0x%llx-%d)\n", prefix,
75 MSGID_ELEMS(msg->msg_msgid));
76 (void) fprintf(stderr, "%s sender = %d\n", prefix, msg->msg_sender);
77 (void) fprintf(stderr, "%s flags = 0x%x\n",
78 prefix, msg->msg_flags);
79 (void) fprintf(stderr, "%s setno = %d\n", prefix, msg->msg_setno);
80 (void) fprintf(stderr, "%s recipient = %d\n",
81 prefix, msg->msg_recipient);
82 (void) fprintf(stderr, "%s type = %d\n", prefix, msg->msg_type);
83 (void) fprintf(stderr, "%s size = %d\n",
84 prefix, msg->msg_event_size);
85 }
86
87 #define COMMD_PROGNAME "rpc.mdcommd"
88
89 extern uint_t meta_rpc_err_mask(void);
90
91 /*
92 * If a clnt_call gets an RPC error, force the message out here with details.
93 * This would be nice to send to commd_debug(), but we can't call rpc.mdcommd
94 * code from libmeta.
95 */
96 static void
mdmn_handle_RPC_error(CLIENT * clnt,char * ident,md_mn_nodeid_t nid)97 mdmn_handle_RPC_error(CLIENT *clnt, char *ident, md_mn_nodeid_t nid)
98 {
99 /*
100 * This is sized for a max message which would look like this:
101 * "mdmn_wakeup_initiator: rpc.mdcommd node 4294967295"
102 */
103 char errstr[51];
104 struct rpc_err e;
105
106 CLNT_GETERR((CLIENT *) clnt, &e);
107 if (meta_rpc_err_mask() & (1 << e.re_status)) {
108 if (nid == 0) {
109 (void) snprintf(errstr, sizeof (errstr),
110 "%s: %s node (local)", ident, COMMD_PROGNAME);
111 } else {
112 (void) snprintf(errstr, sizeof (errstr),
113 "%s: %s node %d", ident, COMMD_PROGNAME, nid);
114 }
115 syslog(LOG_WARNING, "mdmn_handle_RPC_error: %s",
116 clnt_sperror(clnt, errstr));
117 }
118 }
119
120 /* Default timeout can be changed using clnt_control() */
121 static struct timeval TIMEOUT = { 25, 0 };
122
123 md_mn_result_t *
mdmn_send_2(argp,clnt,nid)124 mdmn_send_2(argp, clnt, nid)
125 md_mn_msg_t *argp;
126 CLIENT *clnt;
127 md_mn_nodeid_t nid;
128 {
129 enum clnt_stat res;
130 md_mn_result_t *clnt_res = Zalloc(sizeof (md_mn_result_t));
131
132 res = clnt_call(clnt, mdmn_send,
133 (xdrproc_t)xdr_md_mn_msg_t, (caddr_t)argp,
134 (xdrproc_t)xdr_md_mn_result_t, (caddr_t)clnt_res, TIMEOUT);
135
136 if (res == RPC_SUCCESS) {
137 return (clnt_res);
138 }
139 mdmn_handle_RPC_error(clnt, "mdmn_send", nid);
140 Free(clnt_res);
141 return (NULL);
142 }
143
144 int *
mdmn_work_2(argp,clnt,nid)145 mdmn_work_2(argp, clnt, nid)
146 md_mn_msg_t *argp;
147 CLIENT *clnt;
148 md_mn_nodeid_t nid;
149 {
150 enum clnt_stat res;
151 int *clnt_res = Zalloc(sizeof (int));
152
153 res = clnt_call(clnt, mdmn_work,
154 (xdrproc_t)xdr_md_mn_msg_t, (caddr_t)argp,
155 (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
156
157 if (res == RPC_SUCCESS) {
158 return (clnt_res);
159 }
160 mdmn_handle_RPC_error(clnt, "mdmn_work", nid);
161 Free(clnt_res);
162 return (NULL);
163 }
164
165 int *
mdmn_wakeup_initiator_2(argp,clnt,nid)166 mdmn_wakeup_initiator_2(argp, clnt, nid)
167 md_mn_result_t *argp;
168 CLIENT *clnt;
169 md_mn_nodeid_t nid;
170 {
171 enum clnt_stat res;
172 int *clnt_res = Zalloc(sizeof (int));
173
174 res = clnt_call(clnt, mdmn_wakeup_initiator,
175 (xdrproc_t)xdr_md_mn_result_t, (caddr_t)argp,
176 (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
177
178 if (res == RPC_SUCCESS) {
179 return (clnt_res);
180 }
181 mdmn_handle_RPC_error(clnt, "mdmn_wakeup_initiator", nid);
182 Free(clnt_res);
183 return (NULL);
184 }
185
186 int *
mdmn_wakeup_master_2(argp,clnt,nid)187 mdmn_wakeup_master_2(argp, clnt, nid)
188 md_mn_result_t *argp;
189 CLIENT *clnt;
190 md_mn_nodeid_t nid;
191 {
192 enum clnt_stat res;
193 int *clnt_res = Zalloc(sizeof (int));
194
195 res = clnt_call(clnt, mdmn_wakeup_master,
196 (xdrproc_t)xdr_md_mn_result_t, (caddr_t)argp,
197 (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
198
199 if (res == RPC_SUCCESS) {
200 return (clnt_res);
201 }
202 mdmn_handle_RPC_error(clnt, "mdmn_wakeup_master", nid);
203 Free(clnt_res);
204 return (NULL);
205 }
206
207 int *
mdmn_comm_lock_2(argp,clnt,nid)208 mdmn_comm_lock_2(argp, clnt, nid)
209 md_mn_set_and_class_t *argp;
210 CLIENT *clnt;
211 md_mn_nodeid_t nid;
212 {
213 enum clnt_stat res;
214 int *clnt_res = Zalloc(sizeof (int));
215
216 res = clnt_call(clnt, mdmn_comm_lock,
217 (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
218 (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
219
220 if (res == RPC_SUCCESS) {
221 return (clnt_res);
222 }
223 mdmn_handle_RPC_error(clnt, "mdmn_comm_lock", nid);
224 Free(clnt_res);
225 return (NULL);
226 }
227
228 int *
mdmn_comm_unlock_2(argp,clnt,nid)229 mdmn_comm_unlock_2(argp, clnt, nid)
230 md_mn_set_and_class_t *argp;
231 CLIENT *clnt;
232 md_mn_nodeid_t nid;
233 {
234 enum clnt_stat res;
235 int *clnt_res = Zalloc(sizeof (int));
236
237 res = clnt_call(clnt, mdmn_comm_unlock,
238 (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
239 (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
240
241 if (res == RPC_SUCCESS) {
242 return (clnt_res);
243 }
244 mdmn_handle_RPC_error(clnt, "mdmn_comm_unlock", nid);
245 Free(clnt_res);
246 return (NULL);
247 }
248
249 int *
mdmn_comm_suspend_2(argp,clnt,nid)250 mdmn_comm_suspend_2(argp, clnt, nid)
251 md_mn_set_and_class_t *argp;
252 CLIENT *clnt;
253 md_mn_nodeid_t nid;
254 {
255 enum clnt_stat res;
256 int *clnt_res = Zalloc(sizeof (int));
257
258 res = clnt_call(clnt, mdmn_comm_suspend,
259 (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
260 (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
261
262 if (res == RPC_SUCCESS) {
263 return (clnt_res);
264 }
265 mdmn_handle_RPC_error(clnt, "mdmn_comm_suspend", nid);
266 Free(clnt_res);
267 return (NULL);
268 }
269
270 int *
mdmn_comm_resume_2(argp,clnt,nid)271 mdmn_comm_resume_2(argp, clnt, nid)
272 md_mn_set_and_class_t *argp;
273 CLIENT *clnt;
274 md_mn_nodeid_t nid;
275 {
276 enum clnt_stat res;
277 int *clnt_res = Zalloc(sizeof (int));
278
279 res = clnt_call(clnt, mdmn_comm_resume,
280 (xdrproc_t)xdr_md_mn_set_and_class_t, (caddr_t)argp,
281 (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
282
283 if (res == RPC_SUCCESS) {
284 return (clnt_res);
285 }
286 mdmn_handle_RPC_error(clnt, "mdmn_comm_resume", nid);
287 Free(clnt_res);
288 return (NULL);
289 }
290
291 int *
mdmn_comm_reinit_set_2(argp,clnt,nid)292 mdmn_comm_reinit_set_2(argp, clnt, nid)
293 set_t *argp;
294 CLIENT *clnt;
295 md_mn_nodeid_t nid;
296 {
297 enum clnt_stat res;
298 int *clnt_res = Zalloc(sizeof (int));
299
300 res = clnt_call(clnt, mdmn_comm_reinit_set,
301 (xdrproc_t)xdr_set_t, (caddr_t)argp,
302 (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
303
304 if (res == RPC_SUCCESS) {
305 return (clnt_res);
306 }
307 mdmn_handle_RPC_error(clnt, "mdmn_comm_reinit_set", nid);
308 Free(clnt_res);
309 return (NULL);
310 }
311
312 int *
mdmn_comm_msglock_2(argp,clnt,nid)313 mdmn_comm_msglock_2(argp, clnt, nid)
314 md_mn_type_and_lock_t *argp;
315 CLIENT *clnt;
316 md_mn_nodeid_t nid;
317 {
318 enum clnt_stat res;
319 int *clnt_res = Zalloc(sizeof (int));
320
321 res = clnt_call(clnt, mdmn_comm_msglock,
322 (xdrproc_t)xdr_md_mn_type_and_lock_t, (caddr_t)argp,
323 (xdrproc_t)xdr_int, (caddr_t)clnt_res, TIMEOUT);
324
325 if (res == RPC_SUCCESS) {
326 return (clnt_res);
327 }
328 mdmn_handle_RPC_error(clnt, "mdmn_comm_msglock", nid);
329 Free(clnt_res);
330 return (NULL);
331 }
332
333
334 #define USECS_PER_TICK 10000
335
336
337 /*
338 * Let the kernel create a clusterwide unique message ID
339 *
340 * returns 0 on success
341 * 1 on failure
342 */
343
344 int
mdmn_create_msgid(md_mn_msgid_t * msgid)345 mdmn_create_msgid(md_mn_msgid_t *msgid)
346 {
347 md_error_t mde = mdnullerror;
348
349 if (msgid == NULL) {
350 return (1); /* failure */
351 }
352
353 if (metaioctl(MD_IOCGUNIQMSGID, msgid, &mde, NULL) != 0) {
354 msgid->mid_nid = ~0u;
355 msgid->mid_time = 0LL;
356 return (1); /* failure */
357 }
358
359 /*
360 * mid_smid and mid_oclass are only used for submessages.
361 * mdmn_create_msgid is never called for submessages, as they inherit
362 * the message ID from their parent.
363 * Thus we can safely null out the following fields.
364 */
365 msgid->mid_smid = 0;
366 msgid->mid_oclass = 0;
367
368 /* if the node_id is not set yet, somethings seems to be wrong */
369 if (msgid->mid_nid == ~0u) {
370 return (1); /* failure */
371 }
372
373 return (0); /* success */
374 }
375
376 md_mn_result_t *
copy_result(md_mn_result_t * res)377 copy_result(md_mn_result_t *res)
378 {
379 md_mn_result_t *nres;
380 nres = Zalloc(sizeof (md_mn_result_t));
381 /* It's MSGID_COPY(from, to); */
382 MSGID_COPY(&(res->mmr_msgid), &(nres->mmr_msgid));
383 nres->mmr_msgtype = res->mmr_msgtype;
384 nres->mmr_setno = res->mmr_setno;
385 nres->mmr_flags = res->mmr_flags;
386 nres->mmr_sender = res->mmr_sender;
387 nres->mmr_failing_node = res->mmr_failing_node;
388 nres->mmr_comm_state = res->mmr_comm_state;
389 nres->mmr_exitval = res->mmr_exitval;
390 nres->mmr_out_size = res->mmr_out_size;
391 nres->mmr_err_size = res->mmr_err_size;
392 if (res->mmr_out_size > 0) {
393 nres->mmr_out = Zalloc(res->mmr_out_size);
394 bcopy(res->mmr_out, nres->mmr_out, res->mmr_out_size);
395 }
396 if (res->mmr_err_size > 0) {
397 nres->mmr_err = Zalloc(res->mmr_err_size);
398 bcopy(res->mmr_err, nres->mmr_err, res->mmr_err_size);
399 }
400 if (res->mmr_ep.host != '\0') {
401 nres->mmr_ep.host = strdup(res->mmr_ep.host);
402 }
403 if (res->mmr_ep.extra != '\0') {
404 nres->mmr_ep.extra = strdup(res->mmr_ep.extra);
405 }
406 if (res->mmr_ep.name != '\0') {
407 nres->mmr_ep.name = strdup(res->mmr_ep.name);
408 }
409 return (nres);
410 }
411
412 void
free_result(md_mn_result_t * res)413 free_result(md_mn_result_t *res)
414 {
415 if (res->mmr_out_size > 0) {
416 Free(res->mmr_out);
417 }
418 if (res->mmr_err_size > 0) {
419 Free(res->mmr_err);
420 }
421 if (res->mmr_ep.host != '\0') {
422 Free(res->mmr_ep.host);
423 }
424 if (res->mmr_ep.extra != '\0') {
425 Free(res->mmr_ep.extra);
426 }
427 if (res->mmr_ep.name != '\0') {
428 Free(res->mmr_ep.name);
429 }
430 Free(res);
431 }
432
433
434 /* allocate a new message and copy a given message into it */
435 md_mn_msg_t *
copy_msg(md_mn_msg_t * msg,md_mn_msg_t * dest)436 copy_msg(md_mn_msg_t *msg, md_mn_msg_t *dest)
437 {
438 md_mn_msg_t *nmsg;
439
440 nmsg = dest;
441
442 if (nmsg == NULL) {
443 nmsg = Zalloc(sizeof (md_mn_msg_t));
444 }
445 if (nmsg->msg_event_data == NULL) {
446 nmsg->msg_event_data = Zalloc(msg->msg_event_size);
447 }
448 /* It's MSGID_COPY(from, to); */
449 MSGID_COPY(&(msg->msg_msgid), &(nmsg->msg_msgid));
450 nmsg->msg_sender = msg->msg_sender;
451 nmsg->msg_flags = msg->msg_flags;
452 nmsg->msg_setno = msg->msg_setno;
453 nmsg->msg_type = msg->msg_type;
454 nmsg->msg_recipient = msg->msg_recipient;
455 nmsg->msg_event_size = msg->msg_event_size;
456 if (msg->msg_event_size > 0) {
457 bcopy(msg->msg_event_data, nmsg->msg_event_data,
458 msg->msg_event_size);
459 }
460 return (nmsg);
461 }
462
463 void
copy_msg_2(md_mn_msg_t * msg,md_mn_msg_od_t * msgod,int direction)464 copy_msg_2(md_mn_msg_t *msg, md_mn_msg_od_t *msgod, int direction)
465 {
466 assert((direction == MD_MN_COPY_TO_ONDISK) ||
467 (direction == MD_MN_COPY_TO_INCORE));
468
469 if (direction == MD_MN_COPY_TO_ONDISK) {
470 MSGID_COPY(&(msg->msg_msgid), &(msgod->msg_msgid));
471 msgod->msg_sender = msg->msg_sender;
472 msgod->msg_flags = msg->msg_flags;
473 msgod->msg_setno = msg->msg_setno;
474 msgod->msg_type = msg->msg_type;
475 msgod->msg_recipient = msg->msg_recipient;
476 msgod->msg_od_event_size = msg->msg_event_size;
477 /* paranoid checks */
478 if (msg->msg_event_size != 0 && msg->msg_event_data != NULL)
479 bcopy(msg->msg_event_data,
480 &msgod->msg_od_event_data[0], msg->msg_event_size);
481 } else {
482 MSGID_COPY(&(msgod->msg_msgid), &(msg->msg_msgid));
483 msg->msg_sender = msgod->msg_sender;
484 msg->msg_flags = msgod->msg_flags;
485 msg->msg_setno = msgod->msg_setno;
486 msg->msg_type = msgod->msg_type;
487 msg->msg_recipient = msgod->msg_recipient;
488 msg->msg_event_size = msgod->msg_od_event_size;
489 if (msg->msg_event_data == NULL)
490 msg->msg_event_data = Zalloc(msg->msg_event_size);
491
492 bcopy(&msgod->msg_od_event_data[0],
493 msg->msg_event_data, msgod->msg_od_event_size);
494 }
495 }
496
497 /* Free a message */
498 void
free_msg(md_mn_msg_t * msg)499 free_msg(md_mn_msg_t *msg)
500 {
501 if (msg->msg_event_size > 0) {
502 Free(msg->msg_event_data);
503 }
504 Free(msg);
505 }
506
507
508 /* The following declarations are only for the next two routines */
509
510 md_mn_client_list_t *mdmn_clients;
511
512 mutex_t mcl_mutex;
513 #define MNGLC_INIT_ONLY 0x0001
514 #define MNGLC_FOR_REAL 0x0002
515 /*
516 * mdmn_get_local_clnt(flag)
517 * If there is a client in the free pool, get one,
518 * If no client is available, create one.
519 * Every multithreaded application that uses mdmn_send_message must call it
520 * single threaded first with special flags so we do the initialization
521 * stuff in a safe environment.
522 *
523 * Input: MNGLC_INIT_ONLY: just initializes the mutex
524 * MNGLC_FOR_REAL : do real work
525 * Output:
526 * An rpc client for sending rpc requests to the local commd
527 * NULL in case of an error
528 *
529 */
530 static CLIENT *
mdmn_get_local_clnt(uint_t flag)531 mdmn_get_local_clnt(uint_t flag)
532 {
533 CLIENT *local_daemon;
534 static int inited = 0;
535 md_mn_client_list_t *tmp;
536
537 if (inited == 0) {
538 (void) mutex_init(&mcl_mutex, USYNC_THREAD, NULL);
539 inited = 1;
540 }
541
542 if (flag == MNGLC_INIT_ONLY)
543 return ((CLIENT *)NULL);
544
545 (void) mutex_lock(&mcl_mutex);
546 if (mdmn_clients == (md_mn_client_list_t *)NULL) {
547 /* if there is no entry, create a client and return a it */
548 local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD,
549 TWO, "tcp");
550 } else {
551 /*
552 * If there is an entry from a previous put operation,
553 * remove it from the head of the list and free the list stuff
554 * around it. Then return the client
555 */
556 local_daemon = mdmn_clients->mcl_clnt;
557 tmp = mdmn_clients;
558 mdmn_clients = mdmn_clients->mcl_next;
559 Free(tmp);
560 }
561 (void) mutex_unlock(&mcl_mutex);
562
563
564 if (local_daemon == (CLIENT *)NULL) {
565 clnt_pcreateerror("local_daemon");
566 }
567
568 return (local_daemon);
569 }
570
571 /*
572 * mdmn_put_local_clnt()
573 * returns a no longer used client to the pool
574 *
575 * Input: an RPC client
576 * Output: void
577 */
578 static void
mdmn_put_local_clnt(CLIENT * local_daemon)579 mdmn_put_local_clnt(CLIENT *local_daemon)
580 {
581 md_mn_client_list_t *tmp;
582
583 (void) mutex_lock(&mcl_mutex);
584
585 tmp = mdmn_clients;
586 mdmn_clients = (md_mn_client_list_t *)
587 malloc(sizeof (md_mn_client_list_t));
588 mdmn_clients->mcl_clnt = local_daemon;
589 mdmn_clients->mcl_next = tmp;
590
591 (void) mutex_unlock(&mcl_mutex);
592 }
593
594 /*
595 * This is the regular interface for sending a message.
596 * This function only passes through all arguments to
597 * mdmn_send_message_with_msgid() and adds a NULL for the message ID.
598 *
599 * Normally, you don't have already a message ID for the message you want
600 * to send. Only in case of replaying a previously logged message,
601 * a msgid is already attached to it.
602 * In that case mdmn_send_message_with_msgid() has to be called directly.
603 *
604 * The recipient argument is almost always unused, and is therefore typically
605 * set to zero, as zero is an invalid cluster nodeid. The exceptions are the
606 * marking and clearing of the DRL from a node that is not currently the
607 * owner. In these cases, the recipient argument will be the nodeid of the
608 * mirror owner, and MD_MSGF_DIRECTED will be set in the flags. Non-owner
609 * nodes will not receive these messages.
610 *
611 * Return values / CAVEAT EMPTOR: see mdmn_send_message_with_msgid()
612 */
613
614 int
mdmn_send_message(set_t setno,md_mn_msgtype_t type,uint_t flags,md_mn_nodeid_t recipient,char * data,int size,md_mn_result_t ** result,md_error_t * ep)615 mdmn_send_message(
616 set_t setno,
617 md_mn_msgtype_t type,
618 uint_t flags,
619 md_mn_nodeid_t recipient,
620 char *data,
621 int size,
622 md_mn_result_t **result,
623 md_error_t *ep)
624 {
625 return (mdmn_send_message_with_msgid(setno, type, flags,
626 recipient, data, size, result, MD_NULL_MSGID, ep));
627 }
628 /*
629 * mdmn_send_message_with_msgid()
630 * Create a message from the given pieces of data and hand it over
631 * to the local commd.
632 * This may fail for various reasons (rpc error / class busy / class locked ...)
633 * Some error types are immediately deadly, others will cause retries
634 * until the request is fulfilled or until the retries are ecxceeded.
635 *
636 * In case an error is returned it is up to the user to decide what to do.
637 *
638 * Returns:
639 * 0 on success
640 * 1 if retries1 exceeded
641 * 2 if retries2 exceeded
642 * -1 if connecting to the local daemon failed
643 * -2 if the RPC call to the local daemon failed
644 * -3 if this node hasn't yet joined the set
645 * -4 if any other problem occured
646 *
647 * CAVEAT EMPTOR:
648 * The caller is responsible for calling free_result() when finished with
649 * the results!
650 */
651 int
mdmn_send_message_with_msgid(set_t setno,md_mn_msgtype_t type,uint_t flags,md_mn_nodeid_t recipient,char * data,int size,md_mn_result_t ** result,md_mn_msgid_t * msgid,md_error_t * ep)652 mdmn_send_message_with_msgid(
653 set_t setno,
654 md_mn_msgtype_t type,
655 uint_t flags,
656 md_mn_nodeid_t recipient,
657 char *data,
658 int size,
659 md_mn_result_t **result,
660 md_mn_msgid_t *msgid,
661 md_error_t *ep)
662 {
663 uint_t retry1, ticks1, retry2, ticks2;
664 int retval;
665
666 CLIENT *local_daemon;
667 struct timeval timeout;
668
669 md_mn_msg_t msg;
670 md_mn_result_t *resp;
671
672 /*
673 * Special case for multithreaded applications:
674 * When starting up, the application should call mdmn_send_message
675 * single threaded with all parameters set to NULL.
676 * When we detect this we know, we safely can do initialization
677 * stuff here.
678 * We only check for set and type being zero
679 */
680 if ((setno == 0) && (type == 0)) {
681 /* do all needed initializations here */
682 (void) mdmn_get_local_clnt(MNGLC_INIT_ONLY);
683 return (0); /* success */
684 }
685
686
687 /* did the caller specify space to store the result pointer? */
688 if (result == (md_mn_result_t **)NULL) {
689 syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
690 "FATAL, can not allocate result structure\n"));
691 return (-4);
692 }
693 *result = NULL;
694
695 /* Replay messages already have their msgID */
696 if ((flags & MD_MSGF_REPLAY_MSG) == 0) {
697 if (mdmn_create_msgid(&msg.msg_msgid) != 0) {
698 syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
699 "FATAL, can not create message ID\n"));
700 return (-4);
701 }
702 } else {
703 /* in this case a message ID must be specified */
704 assert(msgid != MD_NULL_MSGID);
705 MSGID_COPY(msgid, &msg.msg_msgid);
706 }
707
708
709 /*
710 * When setting the flags, additionally apply the
711 * default flags for this message type.
712 */
713 msg.msg_flags = flags;
714 msg.msg_setno = setno;
715 msg.msg_recipient = recipient;
716 msg.msg_type = type;
717 msg.msg_event_size = size;
718 msg.msg_event_data = data;
719
720 /*
721 * For the timeout pick the specific timeout for the message times the
722 * the maximum number of nodes.
723 * This is a better estimate than 1 hour or 3 days or never.
724 */
725 timeout.tv_sec = mdmn_get_timeout(type) * NNODES;
726 timeout.tv_usec = 0;
727
728 if (flags & MD_MSGF_VERBOSE) {
729 syslog(LOG_INFO, "send_message: ID=(%d, 0x%llx-%d)\n",
730 MSGID_ELEMS(msg.msg_msgid));
731 }
732
733 /* get an RPC client to the local commd */
734 local_daemon = mdmn_get_local_clnt(MNGLC_FOR_REAL);
735 if (local_daemon == (CLIENT *)NULL) {
736 return (-1);
737 }
738 clnt_control(local_daemon, CLSET_TIMEOUT, (char *)&timeout);
739
740 retry1 = msg_table[type].mte_retry1;
741 ticks1 = msg_table[type].mte_ticks1;
742 retry2 = msg_table[type].mte_retry2;
743 ticks2 = msg_table[type].mte_ticks2;
744
745 /*
746 * run that loop until:
747 * - commstate is Ok
748 * - deadly commstate occured
749 * - retries1 or retries2 exceeded
750 */
751 for (; ; ) {
752 *result = mdmn_send_2(&msg, local_daemon, 0);
753 resp = *result;
754 if (resp != (md_mn_result_t *)NULL) {
755 /* Bingo! */
756 if (resp->mmr_comm_state == MDMNE_ACK) {
757 retval = 0;
758 goto out;
759 }
760 /* Hmm... what if there's no handler? */
761 if (resp->mmr_comm_state == MDMNE_NO_HANDLER) {
762 retval = 0;
763 goto out;
764
765 }
766 /*
767 * This node didn't yet join the disk set. It is not
768 * supposed to send any messages then.
769 * This is deadly (no retries)
770 */
771 if (resp->mmr_comm_state == MDMNE_NOT_JOINED) {
772 retval = -3;
773 goto out;
774
775 }
776 /* these two are deadly too (no retries) */
777 if ((resp->mmr_comm_state == MDMNE_NO_WAKEUP_ENTRY) ||
778 (resp->mmr_comm_state == MDMNE_LOG_FAIL)) {
779 retval = -4;
780 goto out;
781
782 }
783 /* Class busy? Use retry1 */
784 if (resp->mmr_comm_state == MDMNE_CLASS_BUSY) {
785 if (retry1-- == 0) {
786 retval = 1; /* retry1 exceeded */
787 goto out;
788 }
789 (void) usleep(ticks1 * USECS_PER_TICK);
790 free_result(resp);
791
792 if (flags & MD_MSGF_VERBOSE)
793 (void) printf("#Resend1 ID=(%d, "
794 "0x%llx-%d)\n",
795 MSGID_ELEMS(msg.msg_msgid));
796 continue;
797 }
798 if ((resp->mmr_comm_state == MDMNE_CLASS_LOCKED) ||
799 (resp->mmr_comm_state == MDMNE_ABORT)) {
800 /*
801 * Be patient, wait for 1 secs and try again.
802 * It's not likely that the ABORT condition ever
803 * goes away, but it won't hurt to retry
804 */
805 free_result(resp);
806 (void) sleep(1);
807 continue;
808 }
809 if (resp->mmr_comm_state == MDMNE_SUSPENDED) {
810 if (flags & MD_MSGF_FAIL_ON_SUSPEND) {
811 /* caller wants us to fail here */
812 (void) mddserror(ep,
813 MDE_DS_NOTNOW_RECONFIG, setno,
814 mynode(), mynode(), NULL);
815 retval = -4;
816 goto out;
817 } else {
818 /* wait for 1 secs and try again. */
819 free_result(resp);
820 (void) sleep(1);
821 continue;
822 }
823 }
824 } else {
825 /*
826 * If we get a NULL back from the rpc call, try to
827 * reinitialize the client.
828 * Depending on retries2 we try again, or not.
829 */
830 syslog(LOG_INFO,
831 "send_message: ID=(%d, 0x%llx-%d) resp = NULL\n",
832 MSGID_ELEMS(msg.msg_msgid));
833
834 clnt_destroy(local_daemon);
835 local_daemon = mdmn_get_local_clnt(MNGLC_FOR_REAL);
836
837 if (local_daemon == (CLIENT *)NULL) {
838 return (-1);
839 }
840 clnt_control(local_daemon, CLSET_TIMEOUT,
841 (char *)&timeout);
842 }
843
844 /*
845 * If we are here, either resp is zero or resp is non-zero
846 * but some commstate not mentioned above occured.
847 * In either case we use retry2
848 */
849 if (retry2-- == 0) {
850 syslog(LOG_INFO, dgettext(TEXT_DOMAIN,
851 "send_message: (%d, 0x%llx-%d) retry2 exceeded\n"),
852 MSGID_ELEMS(msg.msg_msgid));
853
854 retval = 2; /* retry2 exceeded */
855 goto out;
856 }
857 if (flags & MD_MSGF_VERBOSE) {
858 syslog(LOG_DEBUG, dgettext(TEXT_DOMAIN,
859 "send_message: (%d, 0x%llx-%d) resend on retry2\n"),
860 MSGID_ELEMS(msg.msg_msgid));
861 }
862
863 (void) usleep(ticks2 * USECS_PER_TICK);
864
865 if (resp != (md_mn_result_t *)NULL) {
866 free_result(resp);
867 }
868 }
869 out:
870 mdmn_put_local_clnt(local_daemon);
871 return (retval);
872 }
873
874 /*
875 * suspend the commd for a given set/class combination.
876 *
877 * Parameter:
878 * set number or 0 (meaning all sets)
879 * class number or 0 (meaning all classes)
880 *
881 * Returns:
882 * 0 on success (set is suspended and all messages drained)
883 * MDE_DS_COMMDCTL_SUSPEND_NYD if set is not yet drained
884 * MDE_DS_COMMDCTL_SUSPEND_FAIL if any failure occurred
885 */
886 int
mdmn_suspend(set_t setno,md_mn_msgclass_t class,long timeout)887 mdmn_suspend(set_t setno, md_mn_msgclass_t class, long timeout)
888 {
889 int *resp;
890 CLIENT *local_daemon;
891 md_mn_set_and_class_t msc;
892 md_error_t xep = mdnullerror;
893
894 if ((setno >= MD_MAXSETS) || (class >= MD_MN_NCLASSES)) {
895 return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
896 }
897 local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO,
898 "tcp");
899 if (local_daemon == (CLIENT *)NULL) {
900 clnt_pcreateerror("local_daemon");
901 return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
902 }
903
904 if (timeout != 0) {
905 if (cl_sto(local_daemon, LOCALHOST_IPv4, timeout, &xep) != 0) {
906 clnt_destroy(local_daemon);
907 return (1);
908 }
909 }
910
911 msc.msc_set = setno;
912 msc.msc_class = class;
913 msc.msc_flags = 0;
914
915 resp = mdmn_comm_suspend_2(&msc, local_daemon, 0);
916 clnt_destroy(local_daemon);
917
918 if (resp == NULL) {
919 return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
920 }
921
922 if (*resp == MDMNE_ACK) {
923 /* set successfully drained, no outstanding messages */
924 return (0);
925 }
926 if (*resp != MDMNE_SET_NOT_DRAINED) {
927 /* some error occurred */
928 return (MDE_DS_COMMDCTL_SUSPEND_FAIL);
929 }
930
931 /* still outstanding messages, return not yet drained failure */
932 return (MDE_DS_COMMDCTL_SUSPEND_NYD);
933 }
934
935 /*
936 * resume the commd for a given set/class combination.
937 *
938 * Parameter:
939 * set number or 0 (meaning all sets)
940 * class number or 0 (meaning all classes)
941 *
942 * Returns:
943 * 0 on success
944 * MDE_DS_COMMDCTL_RESUME_FAIL on failure
945 */
946 int
mdmn_resume(set_t setno,md_mn_msgclass_t class,uint_t flags,long timeout)947 mdmn_resume(set_t setno, md_mn_msgclass_t class, uint_t flags, long timeout)
948 {
949 md_mn_set_and_class_t msc;
950 int ret = MDE_DS_COMMDCTL_RESUME_FAIL;
951 int *resp;
952 CLIENT *local_daemon;
953 md_error_t xep = mdnullerror;
954
955 if ((setno >= MD_MAXSETS) || (class >= MD_MN_NCLASSES)) {
956 return (MDE_DS_COMMDCTL_RESUME_FAIL);
957 }
958 local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO,
959 "tcp");
960 if (local_daemon == (CLIENT *)NULL) {
961 clnt_pcreateerror("local_daemon");
962 return (MDE_DS_COMMDCTL_RESUME_FAIL);
963 }
964
965 if (timeout != 0) {
966 if (cl_sto(local_daemon, LOCALHOST_IPv4, timeout, &xep) != 0) {
967 clnt_destroy(local_daemon);
968 return (1);
969 }
970 }
971
972 msc.msc_set = setno;
973 msc.msc_class = class;
974 msc.msc_flags = flags;
975
976 resp = mdmn_comm_resume_2(&msc, local_daemon, 0);
977
978 if (resp != NULL) {
979 if (*resp == MDMNE_ACK) {
980 ret = 0;
981 }
982 Free(resp);
983 }
984
985 clnt_destroy(local_daemon);
986 return (ret);
987 }
988
989 /*
990 * abort all communication
991 *
992 * returns void, because: if *this* get's an error what do you want to do?
993 */
994 void
mdmn_abort(void)995 mdmn_abort(void)
996 {
997 char *dummy = "abort";
998 md_mn_result_t *resultp = NULL;
999 md_error_t mdne = mdnullerror;
1000
1001 (void) mdmn_send_message(0, /* No set is needed for this message */
1002 MD_MN_MSG_ABORT, MD_MSGF_LOCAL_ONLY, 0,
1003 dummy, sizeof (dummy), &resultp, &mdne);
1004
1005 if (resultp != NULL) {
1006 Free(resultp);
1007 }
1008 }
1009
1010 /*
1011 * trigger the reinitialization for a given set.
1012 *
1013 * Parameter: set number
1014 *
1015 * Returns:
1016 * 0 on success
1017 * 1 on failure
1018 */
1019 int
mdmn_reinit_set(set_t setno,long timeout)1020 mdmn_reinit_set(set_t setno, long timeout)
1021 {
1022 int ret = 1;
1023 int *resp;
1024 CLIENT *local_daemon;
1025 md_error_t xep = mdnullerror;
1026
1027 if ((setno == 0) || (setno >= MD_MAXSETS)) {
1028 return (1);
1029 }
1030 local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO,
1031 "tcp");
1032 if (local_daemon == (CLIENT *)NULL) {
1033 clnt_pcreateerror("local_daemon");
1034 return (1);
1035 }
1036
1037 if (timeout != 0) {
1038 if (cl_sto(local_daemon, LOCALHOST_IPv4, timeout, &xep) != 0) {
1039 clnt_destroy(local_daemon);
1040 return (1);
1041 }
1042 }
1043
1044 resp = mdmn_comm_reinit_set_2(&setno, local_daemon, 0);
1045
1046 if (resp != NULL) {
1047 if (*resp == MDMNE_ACK) {
1048 ret = 0;
1049 }
1050 Free(resp);
1051 }
1052
1053 clnt_destroy(local_daemon);
1054 return (ret);
1055 }
1056
1057
1058 /*
1059 * Lock a single message type from being processed on this node
1060 *
1061 * Parameter: md_mn_msgtype_t msgtype, uint_t locktype
1062 *
1063 * Returns:
1064 * 0 on success
1065 * 1 on failure
1066 */
1067 int
mdmn_msgtype_lock(md_mn_msgtype_t msgtype,uint_t locktype)1068 mdmn_msgtype_lock(md_mn_msgtype_t msgtype, uint_t locktype)
1069 {
1070 int ret = 1;
1071 int *resp;
1072 CLIENT *local_daemon;
1073 md_mn_type_and_lock_t mmtl;
1074
1075
1076 if ((msgtype == 0) || (msgtype >= MD_MN_NMESSAGES)) {
1077 return (1);
1078 }
1079 local_daemon = meta_client_create(LOCALHOST_IPv4, MDMN_COMMD, TWO,
1080 "tcp");
1081 if (local_daemon == (CLIENT *)NULL) {
1082 clnt_pcreateerror("local_daemon");
1083 return (1);
1084 }
1085 mmtl.mmtl_type = msgtype;
1086 mmtl.mmtl_lock = locktype;
1087
1088 resp = mdmn_comm_msglock_2(&mmtl, local_daemon, 0);
1089
1090 if (resp != NULL) {
1091 if (*resp == MDMNE_ACK) {
1092 ret = 0;
1093 }
1094 Free(resp);
1095 }
1096
1097 clnt_destroy(local_daemon);
1098 return (ret);
1099 }
1100