1 /*- 2 * Copyright (c) 2009-2010 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Pawel Jakub Dawidek under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/types.h> 34 #include <sys/wait.h> 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <pthread.h> 39 #include <signal.h> 40 #include <stdio.h> 41 #include <string.h> 42 #include <unistd.h> 43 44 #include "hast.h" 45 #include "hastd.h" 46 #include "hast_proto.h" 47 #include "hooks.h" 48 #include "nv.h" 49 #include "pjdlog.h" 50 #include "proto.h" 51 #include "subr.h" 52 53 #include "control.h" 54 55 void 56 child_cleanup(struct hast_resource *res) 57 { 58 59 proto_close(res->hr_ctrl); 60 res->hr_ctrl = NULL; 61 if (res->hr_event != NULL) { 62 proto_close(res->hr_event); 63 res->hr_event = NULL; 64 } 65 res->hr_workerpid = 0; 66 } 67 68 static void 69 control_set_role_common(struct hastd_config *cfg, struct nv *nvout, 70 uint8_t role, struct hast_resource *res, const char *name, unsigned int no) 71 { 72 int oldrole; 73 74 /* Name is always needed. */ 75 if (name != NULL) 76 nv_add_string(nvout, name, "resource%u", no); 77 78 if (res == NULL) { 79 assert(cfg != NULL); 80 assert(name != NULL); 81 82 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 83 if (strcmp(res->hr_name, name) == 0) 84 break; 85 } 86 if (res == NULL) { 87 nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 88 return; 89 } 90 } 91 assert(res != NULL); 92 93 /* Send previous role back. */ 94 nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 95 96 /* Nothing changed, return here. */ 97 if (role == res->hr_role) 98 return; 99 100 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 101 pjdlog_info("Role changed to %s.", role2str(role)); 102 103 /* Change role to the new one. */ 104 oldrole = res->hr_role; 105 res->hr_role = role; 106 pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role)); 107 108 /* 109 * If previous role was primary or secondary we have to kill process 110 * doing that work. 111 */ 112 if (res->hr_workerpid != 0) { 113 if (kill(res->hr_workerpid, SIGTERM) < 0) { 114 pjdlog_errno(LOG_WARNING, 115 "Unable to kill worker process %u", 116 (unsigned int)res->hr_workerpid); 117 } else if (waitpid(res->hr_workerpid, NULL, 0) != 118 res->hr_workerpid) { 119 pjdlog_errno(LOG_WARNING, 120 "Error while waiting for worker process %u", 121 (unsigned int)res->hr_workerpid); 122 } else { 123 pjdlog_debug(1, "Worker process %u stopped.", 124 (unsigned int)res->hr_workerpid); 125 } 126 child_cleanup(res); 127 } 128 129 /* Start worker process if we are changing to primary. */ 130 if (role == HAST_ROLE_PRIMARY) 131 hastd_primary(res); 132 pjdlog_prefix_set("%s", ""); 133 hook_exec(res->hr_exec, "role", res->hr_name, role2str(oldrole), 134 role2str(res->hr_role), NULL); 135 } 136 137 void 138 control_set_role(struct hast_resource *res, uint8_t role) 139 { 140 141 control_set_role_common(NULL, NULL, role, res, NULL, 0); 142 } 143 144 static void 145 control_status_worker(struct hast_resource *res, struct nv *nvout, 146 unsigned int no) 147 { 148 struct nv *cnvin, *cnvout; 149 const char *str; 150 int error; 151 152 cnvin = cnvout = NULL; 153 error = 0; 154 155 /* 156 * Prepare and send command to worker process. 157 */ 158 cnvout = nv_alloc(); 159 nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd"); 160 error = nv_error(cnvout); 161 if (error != 0) { 162 /* LOG */ 163 goto end; 164 } 165 if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) { 166 error = errno; 167 /* LOG */ 168 goto end; 169 } 170 171 /* 172 * Receive response. 173 */ 174 if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) { 175 error = errno; 176 /* LOG */ 177 goto end; 178 } 179 180 error = nv_get_int64(cnvin, "error"); 181 if (error != 0) 182 goto end; 183 184 if ((str = nv_get_string(cnvin, "status")) == NULL) { 185 error = ENOENT; 186 /* LOG */ 187 goto end; 188 } 189 nv_add_string(nvout, str, "status%u", no); 190 nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no); 191 nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"), 192 "extentsize%u", no); 193 nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"), 194 "keepdirty%u", no); 195 end: 196 if (cnvin != NULL) 197 nv_free(cnvin); 198 if (cnvout != NULL) 199 nv_free(cnvout); 200 if (error != 0) 201 nv_add_int16(nvout, error, "error"); 202 } 203 204 static void 205 control_status(struct hastd_config *cfg, struct nv *nvout, 206 struct hast_resource *res, const char *name, unsigned int no) 207 { 208 209 assert(cfg != NULL); 210 assert(nvout != NULL); 211 assert(name != NULL); 212 213 /* Name is always needed. */ 214 nv_add_string(nvout, name, "resource%u", no); 215 216 if (res == NULL) { 217 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 218 if (strcmp(res->hr_name, name) == 0) 219 break; 220 } 221 if (res == NULL) { 222 nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no); 223 return; 224 } 225 } 226 assert(res != NULL); 227 nv_add_string(nvout, res->hr_provname, "provname%u", no); 228 nv_add_string(nvout, res->hr_localpath, "localpath%u", no); 229 nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no); 230 switch (res->hr_replication) { 231 case HAST_REPLICATION_FULLSYNC: 232 nv_add_string(nvout, "fullsync", "replication%u", no); 233 break; 234 case HAST_REPLICATION_MEMSYNC: 235 nv_add_string(nvout, "memsync", "replication%u", no); 236 break; 237 case HAST_REPLICATION_ASYNC: 238 nv_add_string(nvout, "async", "replication%u", no); 239 break; 240 default: 241 nv_add_string(nvout, "unknown", "replication%u", no); 242 break; 243 } 244 nv_add_string(nvout, role2str(res->hr_role), "role%u", no); 245 246 switch (res->hr_role) { 247 case HAST_ROLE_PRIMARY: 248 assert(res->hr_workerpid != 0); 249 /* FALLTHROUGH */ 250 case HAST_ROLE_SECONDARY: 251 if (res->hr_workerpid != 0) 252 break; 253 /* FALLTHROUGH */ 254 default: 255 return; 256 } 257 258 /* 259 * If we are here, it means that we have a worker process, which we 260 * want to ask some questions. 261 */ 262 control_status_worker(res, nvout, no); 263 } 264 265 void 266 control_handle(struct hastd_config *cfg) 267 { 268 struct proto_conn *conn; 269 struct nv *nvin, *nvout; 270 unsigned int ii; 271 const char *str; 272 uint8_t cmd, role; 273 int error; 274 275 if (proto_accept(cfg->hc_controlconn, &conn) < 0) { 276 pjdlog_errno(LOG_ERR, "Unable to accept control connection"); 277 return; 278 } 279 280 nvin = nvout = NULL; 281 role = HAST_ROLE_UNDEF; 282 283 if (hast_proto_recv_hdr(conn, &nvin) < 0) { 284 pjdlog_errno(LOG_ERR, "Unable to receive control header"); 285 nvin = NULL; 286 goto close; 287 } 288 289 /* Obtain command code. 0 means that nv_get_uint8() failed. */ 290 cmd = nv_get_uint8(nvin, "cmd"); 291 if (cmd == 0) { 292 pjdlog_error("Control header is missing 'cmd' field."); 293 error = EHAST_INVALID; 294 goto close; 295 } 296 297 /* Allocate outgoing nv structure. */ 298 nvout = nv_alloc(); 299 if (nvout == NULL) { 300 pjdlog_error("Unable to allocate header for control response."); 301 error = EHAST_NOMEMORY; 302 goto close; 303 } 304 305 error = 0; 306 307 str = nv_get_string(nvin, "resource0"); 308 if (str == NULL) { 309 pjdlog_error("Control header is missing 'resource0' field."); 310 error = EHAST_INVALID; 311 goto fail; 312 } 313 if (cmd == HASTCTL_SET_ROLE) { 314 role = nv_get_uint8(nvin, "role"); 315 switch (role) { 316 case HAST_ROLE_INIT: /* Is that valid to set, hmm? */ 317 case HAST_ROLE_PRIMARY: 318 case HAST_ROLE_SECONDARY: 319 break; 320 default: 321 pjdlog_error("Invalid role received (%hhu).", role); 322 error = EHAST_INVALID; 323 goto fail; 324 } 325 } 326 if (strcmp(str, "all") == 0) { 327 struct hast_resource *res; 328 329 /* All configured resources. */ 330 331 ii = 0; 332 TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) { 333 switch (cmd) { 334 case HASTCTL_SET_ROLE: 335 control_set_role_common(cfg, nvout, role, res, 336 res->hr_name, ii++); 337 break; 338 case HASTCTL_STATUS: 339 control_status(cfg, nvout, res, res->hr_name, 340 ii++); 341 break; 342 default: 343 pjdlog_error("Invalid command received (%hhu).", 344 cmd); 345 error = EHAST_UNIMPLEMENTED; 346 goto fail; 347 } 348 } 349 } else { 350 /* Only selected resources. */ 351 352 for (ii = 0; ; ii++) { 353 str = nv_get_string(nvin, "resource%u", ii); 354 if (str == NULL) 355 break; 356 switch (cmd) { 357 case HASTCTL_SET_ROLE: 358 control_set_role_common(cfg, nvout, role, NULL, 359 str, ii); 360 break; 361 case HASTCTL_STATUS: 362 control_status(cfg, nvout, NULL, str, ii); 363 break; 364 default: 365 pjdlog_error("Invalid command received (%hhu).", 366 cmd); 367 error = EHAST_UNIMPLEMENTED; 368 goto fail; 369 } 370 } 371 } 372 if (nv_error(nvout) != 0) 373 goto close; 374 fail: 375 if (error != 0) 376 nv_add_int16(nvout, error, "error"); 377 378 if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) 379 pjdlog_errno(LOG_ERR, "Unable to send control response"); 380 close: 381 if (nvin != NULL) 382 nv_free(nvin); 383 if (nvout != NULL) 384 nv_free(nvout); 385 proto_close(conn); 386 } 387 388 /* 389 * Thread handles control requests from the parent. 390 */ 391 void * 392 ctrl_thread(void *arg) 393 { 394 struct hast_resource *res = arg; 395 struct nv *nvin, *nvout; 396 uint8_t cmd; 397 398 for (;;) { 399 if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) { 400 if (sigexit_received) 401 pthread_exit(NULL); 402 pjdlog_errno(LOG_ERR, 403 "Unable to receive control message"); 404 kill(getpid(), SIGTERM); 405 pthread_exit(NULL); 406 } 407 cmd = nv_get_uint8(nvin, "cmd"); 408 if (cmd == 0) { 409 pjdlog_error("Control message is missing 'cmd' field."); 410 nv_free(nvin); 411 continue; 412 } 413 nv_free(nvin); 414 nvout = nv_alloc(); 415 switch (cmd) { 416 case HASTCTL_STATUS: 417 if (res->hr_remotein != NULL && 418 res->hr_remoteout != NULL) { 419 nv_add_string(nvout, "complete", "status"); 420 } else { 421 nv_add_string(nvout, "degraded", "status"); 422 } 423 nv_add_uint32(nvout, (uint32_t)res->hr_extentsize, 424 "extentsize"); 425 if (res->hr_role == HAST_ROLE_PRIMARY) { 426 nv_add_uint32(nvout, 427 (uint32_t)res->hr_keepdirty, "keepdirty"); 428 nv_add_uint64(nvout, 429 (uint64_t)(activemap_ndirty(res->hr_amp) * 430 res->hr_extentsize), "dirty"); 431 } else { 432 nv_add_uint32(nvout, (uint32_t)0, "keepdirty"); 433 nv_add_uint64(nvout, (uint64_t)0, "dirty"); 434 } 435 break; 436 default: 437 nv_add_int16(nvout, EINVAL, "error"); 438 break; 439 } 440 if (nv_error(nvout) != 0) { 441 pjdlog_error("Unable to create answer on control message."); 442 nv_free(nvout); 443 continue; 444 } 445 if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) { 446 pjdlog_errno(LOG_ERR, 447 "Unable to send reply to control message"); 448 } 449 nv_free(nvout); 450 } 451 /* NOTREACHED */ 452 return (NULL); 453 } 454