1 /*- 2 * Copyright (c) 2016 Microsoft Corp. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/kernel.h> 29 #include <sys/conf.h> 30 #include <sys/uio.h> 31 #include <sys/bus.h> 32 #include <sys/malloc.h> 33 #include <sys/mbuf.h> 34 #include <sys/module.h> 35 #include <sys/lock.h> 36 #include <sys/taskqueue.h> 37 #include <sys/selinfo.h> 38 #include <sys/sysctl.h> 39 #include <sys/poll.h> 40 #include <sys/proc.h> 41 #include <sys/queue.h> 42 #include <sys/kthread.h> 43 #include <sys/syscallsubr.h> 44 #include <sys/sysproto.h> 45 #include <sys/un.h> 46 #include <sys/endian.h> 47 #include <sys/sema.h> 48 #include <sys/signal.h> 49 #include <sys/syslog.h> 50 #include <sys/systm.h> 51 #include <sys/mutex.h> 52 #include <sys/callout.h> 53 54 #include <dev/hyperv/include/hyperv.h> 55 #include <dev/hyperv/utilities/hv_utilreg.h> 56 #include <dev/hyperv/utilities/vmbus_icreg.h> 57 #include <dev/hyperv/utilities/vmbus_icvar.h> 58 59 #include "hv_snapshot.h" 60 #include "vmbus_if.h" 61 62 #define VSS_MAJOR 5 63 #define VSS_MINOR 0 64 #define VSS_MSGVER VMBUS_IC_VERSION(VSS_MAJOR, VSS_MINOR) 65 66 #define VSS_FWVER_MAJOR 3 67 #define VSS_FWVER VMBUS_IC_VERSION(VSS_FWVER_MAJOR, 0) 68 69 #define TIMEOUT_LIMIT (15) // seconds 70 enum hv_vss_op { 71 VSS_OP_CREATE = 0, 72 VSS_OP_DELETE, 73 VSS_OP_HOT_BACKUP, 74 VSS_OP_GET_DM_INFO, 75 VSS_OP_BU_COMPLETE, 76 /* 77 * Following operations are only supported with IC version >= 5.0 78 */ 79 VSS_OP_FREEZE, /* Freeze the file systems in the VM */ 80 VSS_OP_THAW, /* Unfreeze the file systems */ 81 VSS_OP_AUTO_RECOVER, 82 VSS_OP_COUNT /* Number of operations, must be last */ 83 }; 84 85 /* 86 * Header for all VSS messages. 87 */ 88 struct hv_vss_hdr { 89 struct vmbus_icmsg_hdr ic_hdr; 90 uint8_t operation; 91 uint8_t reserved[7]; 92 } __packed; 93 94 95 /* 96 * Flag values for the hv_vss_check_feature. Here supports only 97 * one value. 98 */ 99 #define VSS_HBU_NO_AUTO_RECOVERY 0x00000005 100 101 struct hv_vss_check_feature { 102 uint32_t flags; 103 } __packed; 104 105 struct hv_vss_check_dm_info { 106 uint32_t flags; 107 } __packed; 108 109 struct hv_vss_msg { 110 union { 111 struct hv_vss_hdr vss_hdr; 112 } hdr; 113 union { 114 struct hv_vss_check_feature vss_cf; 115 struct hv_vss_check_dm_info dm_info; 116 } body; 117 } __packed; 118 119 struct hv_vss_req { 120 struct hv_vss_opt_msg opt_msg; /* used to communicate with daemon */ 121 struct hv_vss_msg msg; /* used to communicate with host */ 122 } __packed; 123 124 /* hv_vss debug control */ 125 static int hv_vss_log = 0; 126 127 #define hv_vss_log_error(...) do { \ 128 if (hv_vss_log > 0) \ 129 log(LOG_ERR, "hv_vss: " __VA_ARGS__); \ 130 } while (0) 131 132 #define hv_vss_log_info(...) do { \ 133 if (hv_vss_log > 1) \ 134 log(LOG_INFO, "hv_vss: " __VA_ARGS__); \ 135 } while (0) 136 137 static const struct vmbus_ic_desc vmbus_vss_descs[] = { 138 { 139 .ic_guid = { .hv_guid = { 140 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, 141 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40} }, 142 .ic_desc = "Hyper-V VSS" 143 }, 144 VMBUS_IC_DESC_END 145 }; 146 147 static const char * vss_opt_name[] = {"None", "VSSCheck", "Freeze", "Thaw"}; 148 149 /* character device prototypes */ 150 static d_open_t hv_vss_dev_open; 151 static d_close_t hv_vss_dev_close; 152 static d_poll_t hv_vss_dev_daemon_poll; 153 static d_ioctl_t hv_vss_dev_daemon_ioctl; 154 155 static d_open_t hv_appvss_dev_open; 156 static d_close_t hv_appvss_dev_close; 157 static d_poll_t hv_appvss_dev_poll; 158 static d_ioctl_t hv_appvss_dev_ioctl; 159 160 /* hv_vss character device structure */ 161 static struct cdevsw hv_vss_cdevsw = 162 { 163 .d_version = D_VERSION, 164 .d_open = hv_vss_dev_open, 165 .d_close = hv_vss_dev_close, 166 .d_poll = hv_vss_dev_daemon_poll, 167 .d_ioctl = hv_vss_dev_daemon_ioctl, 168 .d_name = FS_VSS_DEV_NAME, 169 }; 170 171 static struct cdevsw hv_appvss_cdevsw = 172 { 173 .d_version = D_VERSION, 174 .d_open = hv_appvss_dev_open, 175 .d_close = hv_appvss_dev_close, 176 .d_poll = hv_appvss_dev_poll, 177 .d_ioctl = hv_appvss_dev_ioctl, 178 .d_name = APP_VSS_DEV_NAME, 179 }; 180 181 struct hv_vss_sc; 182 /* 183 * Global state to track cdev 184 */ 185 struct hv_vss_dev_sc { 186 /* 187 * msg was transferred from host to notify queue, and 188 * ack queue. Finally, it was recyled to free list. 189 */ 190 STAILQ_HEAD(, hv_vss_req_internal) to_notify_queue; 191 STAILQ_HEAD(, hv_vss_req_internal) to_ack_queue; 192 struct hv_vss_sc *sc; 193 struct proc *proc_task; 194 struct selinfo hv_vss_selinfo; 195 }; 196 /* 197 * Global state to track and synchronize the transaction requests from the host. 198 * The VSS allows user to register their function to do freeze/thaw for application. 199 * VSS kernel will notify both vss daemon and user application if it is registered. 200 * The implementation state transition is illustrated by: 201 * https://clovertrail.github.io/assets/vssdot.png 202 */ 203 typedef struct hv_vss_sc { 204 struct vmbus_ic_softc util_sc; 205 device_t dev; 206 207 struct task task; 208 209 /* 210 * mutex is used to protect access of list/queue, 211 * callout in request is also used this mutex. 212 */ 213 struct mtx pending_mutex; 214 /* 215 * req_free_list contains all free items 216 */ 217 LIST_HEAD(, hv_vss_req_internal) req_free_list; 218 219 /* Indicates if daemon registered with driver */ 220 boolean_t register_done; 221 222 boolean_t app_register_done; 223 224 /* cdev for file system freeze/thaw */ 225 struct cdev *hv_vss_dev; 226 /* cdev for application freeze/thaw */ 227 struct cdev *hv_appvss_dev; 228 229 /* sc for app */ 230 struct hv_vss_dev_sc app_sc; 231 /* sc for deamon */ 232 struct hv_vss_dev_sc daemon_sc; 233 } hv_vss_sc; 234 235 typedef struct hv_vss_req_internal { 236 LIST_ENTRY(hv_vss_req_internal) link; 237 STAILQ_ENTRY(hv_vss_req_internal) slink; 238 struct hv_vss_req vss_req; 239 240 /* Rcv buffer for communicating with the host*/ 241 uint8_t *rcv_buf; 242 /* Length of host message */ 243 uint32_t host_msg_len; 244 /* Host message id */ 245 uint64_t host_msg_id; 246 247 hv_vss_sc *sc; 248 249 struct callout callout; 250 } hv_vss_req_internal; 251 252 #define SEARCH_REMOVE_REQ_LOCKED(reqp, queue, link, tmp, id) \ 253 do { \ 254 STAILQ_FOREACH_SAFE(reqp, queue, link, tmp) { \ 255 if (reqp->vss_req.opt_msg.msgid == id) { \ 256 STAILQ_REMOVE(queue, \ 257 reqp, hv_vss_req_internal, link); \ 258 break; \ 259 } \ 260 } \ 261 } while (0) 262 263 static bool 264 hv_vss_is_daemon_killed_after_launch(hv_vss_sc *sc) 265 { 266 return (!sc->register_done && sc->daemon_sc.proc_task); 267 } 268 269 /* 270 * Callback routine that gets called whenever there is a message from host 271 */ 272 static void 273 hv_vss_callback(struct vmbus_channel *chan __unused, void *context) 274 { 275 hv_vss_sc *sc = (hv_vss_sc*)context; 276 if (hv_vss_is_daemon_killed_after_launch(sc)) 277 hv_vss_log_info("%s: daemon was killed!\n", __func__); 278 if (sc->register_done || sc->daemon_sc.proc_task) { 279 hv_vss_log_info("%s: Queuing work item\n", __func__); 280 if (hv_vss_is_daemon_killed_after_launch(sc)) 281 hv_vss_log_info("%s: daemon was killed!\n", __func__); 282 taskqueue_enqueue(taskqueue_thread, &sc->task); 283 } else { 284 hv_vss_log_info("%s: daemon has never been registered\n", __func__); 285 } 286 hv_vss_log_info("%s: received msg from host\n", __func__); 287 } 288 /* 289 * Send the response back to the host. 290 */ 291 static void 292 hv_vss_respond_host(uint8_t *rcv_buf, struct vmbus_channel *ch, 293 uint32_t recvlen, uint64_t requestid, uint32_t error) 294 { 295 struct vmbus_icmsg_hdr *hv_icmsg_hdrp; 296 297 hv_icmsg_hdrp = (struct vmbus_icmsg_hdr *)rcv_buf; 298 299 hv_icmsg_hdrp->ic_status = error; 300 hv_icmsg_hdrp->ic_flags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; 301 302 error = vmbus_chan_send(ch, VMBUS_CHANPKT_TYPE_INBAND, 0, 303 rcv_buf, recvlen, requestid); 304 if (error) 305 hv_vss_log_info("%s: hv_vss_respond_host: sendpacket error:%d\n", 306 __func__, error); 307 } 308 309 static void 310 hv_vss_notify_host_result_locked(struct hv_vss_req_internal *reqp, uint32_t status) 311 { 312 struct hv_vss_msg* msg = (struct hv_vss_msg *)reqp->rcv_buf; 313 hv_vss_sc *sc = reqp->sc; 314 if (reqp->vss_req.opt_msg.opt == HV_VSS_CHECK) { 315 msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY; 316 } 317 hv_vss_log_info("%s, %s response %s to host\n", __func__, 318 vss_opt_name[reqp->vss_req.opt_msg.opt], 319 status == HV_S_OK ? "Success" : "Fail"); 320 hv_vss_respond_host(reqp->rcv_buf, vmbus_get_channel(reqp->sc->dev), 321 reqp->host_msg_len, reqp->host_msg_id, status); 322 /* recycle the request */ 323 LIST_INSERT_HEAD(&sc->req_free_list, reqp, link); 324 } 325 326 static void 327 hv_vss_notify_host_result(struct hv_vss_req_internal *reqp, uint32_t status) 328 { 329 mtx_lock(&reqp->sc->pending_mutex); 330 hv_vss_notify_host_result_locked(reqp, status); 331 mtx_unlock(&reqp->sc->pending_mutex); 332 } 333 334 static void 335 hv_vss_cp_vssreq_to_user(struct hv_vss_req_internal *reqp, 336 struct hv_vss_opt_msg *userdata) 337 { 338 struct hv_vss_req *hv_vss_dev_buf; 339 hv_vss_dev_buf = &reqp->vss_req; 340 hv_vss_dev_buf->opt_msg.opt = HV_VSS_NONE; 341 switch (reqp->vss_req.msg.hdr.vss_hdr.operation) { 342 case VSS_OP_FREEZE: 343 hv_vss_dev_buf->opt_msg.opt = HV_VSS_FREEZE; 344 break; 345 case VSS_OP_THAW: 346 hv_vss_dev_buf->opt_msg.opt = HV_VSS_THAW; 347 break; 348 case VSS_OP_HOT_BACKUP: 349 hv_vss_dev_buf->opt_msg.opt = HV_VSS_CHECK; 350 break; 351 } 352 *userdata = hv_vss_dev_buf->opt_msg; 353 hv_vss_log_info("%s, read data from user for " 354 "%s (%ju) \n", __func__, vss_opt_name[userdata->opt], 355 (uintmax_t)userdata->msgid); 356 } 357 358 /** 359 * Remove the request id from app notifiy or ack queue, 360 * and recyle the request by inserting it to free list. 361 * 362 * When app was notified but not yet sending ack, the request 363 * should locate in either notify queue or ack queue. 364 */ 365 static struct hv_vss_req_internal* 366 hv_vss_drain_req_queue_locked(hv_vss_sc *sc, uint64_t req_id) 367 { 368 struct hv_vss_req_internal *reqp, *tmp; 369 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_notify_queue, 370 slink, tmp, req_id); 371 if (reqp == NULL) 372 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_ack_queue, 373 slink, tmp, req_id); 374 if (reqp == NULL) 375 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_notify_queue, 376 slink, tmp, req_id); 377 if (reqp == NULL) 378 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_ack_queue, slink, 379 tmp, req_id); 380 return (reqp); 381 } 382 /** 383 * Actions for daemon who has been notified. 384 */ 385 static void 386 hv_vss_notified(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) 387 { 388 struct hv_vss_req_internal *reqp; 389 mtx_lock(&dev_sc->sc->pending_mutex); 390 if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) { 391 reqp = STAILQ_FIRST(&dev_sc->to_notify_queue); 392 hv_vss_cp_vssreq_to_user(reqp, userdata); 393 STAILQ_REMOVE_HEAD(&dev_sc->to_notify_queue, slink); 394 /* insert the msg to queue for write */ 395 STAILQ_INSERT_TAIL(&dev_sc->to_ack_queue, reqp, slink); 396 userdata->status = VSS_SUCCESS; 397 } else { 398 /* Timeout occur, thus request was removed from queue. */ 399 hv_vss_log_info("%s: notify queue is empty!\n", __func__); 400 userdata->status = VSS_FAIL; 401 } 402 mtx_unlock(&dev_sc->sc->pending_mutex); 403 } 404 405 static void 406 hv_vss_notify(struct hv_vss_dev_sc *dev_sc, struct hv_vss_req_internal *reqp) 407 { 408 uint32_t opt = reqp->vss_req.opt_msg.opt; 409 mtx_lock(&dev_sc->sc->pending_mutex); 410 STAILQ_INSERT_TAIL(&dev_sc->to_notify_queue, reqp, slink); 411 hv_vss_log_info("%s: issuing query %s (%ju) to %s\n", __func__, 412 vss_opt_name[opt], (uintmax_t)reqp->vss_req.opt_msg.msgid, 413 &dev_sc->sc->app_sc == dev_sc ? "app" : "daemon"); 414 mtx_unlock(&dev_sc->sc->pending_mutex); 415 selwakeup(&dev_sc->hv_vss_selinfo); 416 } 417 418 /** 419 * Actions for daemon who has acknowledged. 420 */ 421 static void 422 hv_vss_daemon_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) 423 { 424 struct hv_vss_req_internal *reqp, *tmp; 425 uint64_t req_id; 426 int opt; 427 uint32_t status; 428 429 opt = userdata->opt; 430 req_id = userdata->msgid; 431 status = userdata->status; 432 /* make sure the reserved fields are all zeros. */ 433 memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) - 434 __offsetof(struct hv_vss_opt_msg, reserved)); 435 mtx_lock(&dev_sc->sc->pending_mutex); 436 SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id); 437 mtx_unlock(&dev_sc->sc->pending_mutex); 438 if (reqp == NULL) { 439 hv_vss_log_info("%s Timeout: fail to find daemon ack request\n", 440 __func__); 441 userdata->status = VSS_FAIL; 442 return; 443 } 444 KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!")); 445 hv_vss_log_info("%s, get response %d from daemon for %s (%ju) \n", __func__, 446 status, vss_opt_name[opt], (uintmax_t)req_id); 447 switch (opt) { 448 case HV_VSS_CHECK: 449 case HV_VSS_FREEZE: 450 callout_drain(&reqp->callout); 451 hv_vss_notify_host_result(reqp, 452 status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); 453 break; 454 case HV_VSS_THAW: 455 if (dev_sc->sc->app_register_done) { 456 if (status == VSS_SUCCESS) { 457 hv_vss_notify(&dev_sc->sc->app_sc, reqp); 458 } else { 459 /* handle error */ 460 callout_drain(&reqp->callout); 461 hv_vss_notify_host_result(reqp, HV_E_FAIL); 462 } 463 } else { 464 callout_drain(&reqp->callout); 465 hv_vss_notify_host_result(reqp, 466 status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); 467 } 468 break; 469 } 470 } 471 472 /** 473 * Actions for app who has acknowledged. 474 */ 475 static void 476 hv_vss_app_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) 477 { 478 struct hv_vss_req_internal *reqp, *tmp; 479 uint64_t req_id; 480 int opt; 481 uint8_t status; 482 483 opt = userdata->opt; 484 req_id = userdata->msgid; 485 status = userdata->status; 486 /* make sure the reserved fields are all zeros. */ 487 memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) - 488 __offsetof(struct hv_vss_opt_msg, reserved)); 489 mtx_lock(&dev_sc->sc->pending_mutex); 490 SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id); 491 mtx_unlock(&dev_sc->sc->pending_mutex); 492 if (reqp == NULL) { 493 hv_vss_log_info("%s Timeout: fail to find app ack request\n", 494 __func__); 495 userdata->status = VSS_FAIL; 496 return; 497 } 498 KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!")); 499 hv_vss_log_info("%s, get response %d from app for %s (%ju) \n", 500 __func__, status, vss_opt_name[opt], (uintmax_t)req_id); 501 if (dev_sc->sc->register_done) { 502 switch (opt) { 503 case HV_VSS_CHECK: 504 case HV_VSS_FREEZE: 505 if (status == VSS_SUCCESS) { 506 hv_vss_notify(&dev_sc->sc->daemon_sc, reqp); 507 } else { 508 /* handle error */ 509 callout_drain(&reqp->callout); 510 hv_vss_notify_host_result(reqp, HV_E_FAIL); 511 } 512 break; 513 case HV_VSS_THAW: 514 callout_drain(&reqp->callout); 515 hv_vss_notify_host_result(reqp, 516 status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); 517 break; 518 } 519 } else { 520 hv_vss_log_info("%s, Fatal: vss daemon was killed\n", __func__); 521 } 522 } 523 524 static int 525 hv_vss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 526 { 527 struct proc *td_proc; 528 td_proc = td->td_proc; 529 530 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 531 hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n", 532 __func__, td_proc->p_comm, FS_VSS_DEV_NAME); 533 534 if (dev_sc->sc->register_done) 535 return (EBUSY); 536 537 dev_sc->sc->register_done = true; 538 hv_vss_callback(vmbus_get_channel(dev_sc->sc->dev), dev_sc->sc); 539 540 dev_sc->proc_task = curproc; 541 return (0); 542 } 543 544 static int 545 hv_vss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused, 546 struct thread *td) 547 { 548 struct proc *td_proc; 549 td_proc = td->td_proc; 550 551 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 552 553 hv_vss_log_info("%s: %s closes device \"%s\"\n", 554 __func__, td_proc->p_comm, FS_VSS_DEV_NAME); 555 dev_sc->sc->register_done = false; 556 return (0); 557 } 558 559 static int 560 hv_vss_dev_daemon_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 561 struct thread *td) 562 { 563 struct proc *td_proc; 564 struct hv_vss_dev_sc *sc; 565 566 td_proc = td->td_proc; 567 sc = (struct hv_vss_dev_sc*)dev->si_drv1; 568 569 hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm); 570 571 struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data; 572 switch(cmd) { 573 case IOCHVVSSREAD: 574 hv_vss_notified(sc, userdata); 575 break; 576 case IOCHVVSSWRITE: 577 hv_vss_daemon_acked(sc, userdata); 578 break; 579 } 580 return (0); 581 } 582 583 /* 584 * hv_vss_daemon poll invokes this function to check if data is available 585 * for daemon to read. 586 */ 587 static int 588 hv_vss_dev_daemon_poll(struct cdev *dev, int events, struct thread *td) 589 { 590 int revent = 0; 591 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 592 593 mtx_lock(&dev_sc->sc->pending_mutex); 594 /** 595 * if there is data ready, inform daemon's poll 596 */ 597 if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) 598 revent = POLLIN; 599 if (revent == 0) 600 selrecord(td, &dev_sc->hv_vss_selinfo); 601 hv_vss_log_info("%s return 0x%x\n", __func__, revent); 602 mtx_unlock(&dev_sc->sc->pending_mutex); 603 return (revent); 604 } 605 606 static int 607 hv_appvss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 608 { 609 struct proc *td_proc; 610 td_proc = td->td_proc; 611 612 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 613 hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n", 614 __func__, td_proc->p_comm, APP_VSS_DEV_NAME); 615 616 if (dev_sc->sc->app_register_done) 617 return (EBUSY); 618 619 dev_sc->sc->app_register_done = true; 620 dev_sc->proc_task = curproc; 621 return (0); 622 } 623 624 static int 625 hv_appvss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused, 626 struct thread *td) 627 { 628 struct proc *td_proc; 629 td_proc = td->td_proc; 630 631 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 632 633 hv_vss_log_info("%s: %s closes device \"%s\".\n", 634 __func__, td_proc->p_comm, APP_VSS_DEV_NAME); 635 dev_sc->sc->app_register_done = false; 636 return (0); 637 } 638 639 static int 640 hv_appvss_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 641 struct thread *td) 642 { 643 struct proc *td_proc; 644 struct hv_vss_dev_sc *dev_sc; 645 646 td_proc = td->td_proc; 647 dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 648 649 hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm); 650 651 struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data; 652 switch(cmd) { 653 case IOCHVVSSREAD: 654 hv_vss_notified(dev_sc, userdata); 655 break; 656 case IOCHVVSSWRITE: 657 hv_vss_app_acked(dev_sc, userdata); 658 break; 659 } 660 return (0); 661 } 662 663 /* 664 * hv_vss_daemon poll invokes this function to check if data is available 665 * for daemon to read. 666 */ 667 static int 668 hv_appvss_dev_poll(struct cdev *dev, int events, struct thread *td) 669 { 670 int revent = 0; 671 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 672 673 mtx_lock(&dev_sc->sc->pending_mutex); 674 /** 675 * if there is data ready, inform daemon's poll 676 */ 677 if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) 678 revent = POLLIN; 679 if (revent == 0) 680 selrecord(td, &dev_sc->hv_vss_selinfo); 681 hv_vss_log_info("%s return 0x%x\n", __func__, revent); 682 mtx_unlock(&dev_sc->sc->pending_mutex); 683 return (revent); 684 } 685 686 static void 687 hv_vss_timeout(void *arg) 688 { 689 hv_vss_req_internal *reqp = arg; 690 hv_vss_req_internal *request __diagused; 691 hv_vss_sc* sc = reqp->sc; 692 uint64_t req_id = reqp->vss_req.opt_msg.msgid; 693 /* This thread is locked */ 694 KASSERT(mtx_owned(&sc->pending_mutex), ("mutex lock is not owned!")); 695 request = hv_vss_drain_req_queue_locked(sc, req_id); 696 KASSERT(request != NULL, ("timeout but fail to find request")); 697 hv_vss_notify_host_result_locked(reqp, HV_E_FAIL); 698 } 699 700 /* 701 * This routine is called whenever a message is received from the host 702 */ 703 static void 704 hv_vss_init_req(hv_vss_req_internal *reqp, 705 uint32_t recvlen, uint64_t requestid, uint8_t *vss_buf, hv_vss_sc *sc) 706 { 707 struct timespec vm_ts; 708 struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; 709 710 memset(reqp, 0, __offsetof(hv_vss_req_internal, callout)); 711 reqp->host_msg_len = recvlen; 712 reqp->host_msg_id = requestid; 713 reqp->rcv_buf = vss_buf; 714 reqp->sc = sc; 715 memcpy(&reqp->vss_req.msg, 716 (struct hv_vss_msg *)vss_buf, sizeof(struct hv_vss_msg)); 717 /* set the opt for users */ 718 switch (msg->hdr.vss_hdr.operation) { 719 case VSS_OP_FREEZE: 720 reqp->vss_req.opt_msg.opt = HV_VSS_FREEZE; 721 break; 722 case VSS_OP_THAW: 723 reqp->vss_req.opt_msg.opt = HV_VSS_THAW; 724 break; 725 case VSS_OP_HOT_BACKUP: 726 reqp->vss_req.opt_msg.opt = HV_VSS_CHECK; 727 break; 728 } 729 /* Use a timestamp as msg request ID */ 730 nanotime(&vm_ts); 731 reqp->vss_req.opt_msg.msgid = (vm_ts.tv_sec * NANOSEC) + vm_ts.tv_nsec; 732 } 733 734 static hv_vss_req_internal* 735 hv_vss_get_new_req_locked(hv_vss_sc *sc) 736 { 737 hv_vss_req_internal *reqp; 738 if (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue) || 739 !STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue) || 740 !STAILQ_EMPTY(&sc->app_sc.to_notify_queue) || 741 !STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) { 742 /* 743 * There is request coming from host before 744 * finishing previous requests 745 */ 746 hv_vss_log_info("%s: Warning: there is new request " 747 "coming before finishing previous requests\n", __func__); 748 return (NULL); 749 } 750 if (LIST_EMPTY(&sc->req_free_list)) { 751 /* TODO Error: no buffer */ 752 hv_vss_log_info("Error: No buffer\n"); 753 return (NULL); 754 } 755 reqp = LIST_FIRST(&sc->req_free_list); 756 LIST_REMOVE(reqp, link); 757 return (reqp); 758 } 759 760 static void 761 hv_vss_start_notify(hv_vss_req_internal *reqp, uint32_t opt) 762 { 763 hv_vss_sc *sc = reqp->sc; 764 /* 765 * Freeze/Check notification sequence: kernel -> app -> daemon(fs) 766 * Thaw notification sequence: kernel -> daemon(fs) -> app 767 * 768 * We should wake up the daemon, in case it's doing poll(). 769 * The response should be received after 5s, otherwise, trigger timeout. 770 */ 771 switch (opt) { 772 case VSS_OP_FREEZE: 773 case VSS_OP_HOT_BACKUP: 774 if (sc->app_register_done) 775 hv_vss_notify(&sc->app_sc, reqp); 776 else 777 hv_vss_notify(&sc->daemon_sc, reqp); 778 callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz, 779 hv_vss_timeout, reqp); 780 break; 781 case VSS_OP_THAW: 782 hv_vss_notify(&sc->daemon_sc, reqp); 783 callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz, 784 hv_vss_timeout, reqp); 785 break; 786 } 787 } 788 789 /* 790 * Function to read the vss request buffer from host 791 * and interact with daemon 792 */ 793 static void 794 hv_vss_process_request(void *context, int pending __unused) 795 { 796 uint8_t *vss_buf; 797 struct vmbus_channel *channel; 798 uint32_t recvlen = 0; 799 uint64_t requestid; 800 struct vmbus_icmsg_hdr *icmsghdrp; 801 int ret = 0; 802 hv_vss_sc *sc; 803 hv_vss_req_internal *reqp; 804 805 hv_vss_log_info("%s: entering hv_vss_process_request\n", __func__); 806 807 sc = (hv_vss_sc*)context; 808 vss_buf = sc->util_sc.ic_buf; 809 channel = vmbus_get_channel(sc->dev); 810 811 recvlen = sc->util_sc.ic_buflen; 812 ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid); 813 KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough")); 814 /* XXX check recvlen to make sure that it contains enough data */ 815 816 while ((ret == 0) && (recvlen > 0)) { 817 icmsghdrp = (struct vmbus_icmsg_hdr *)vss_buf; 818 819 if (icmsghdrp->ic_type == HV_ICMSGTYPE_NEGOTIATE) { 820 ret = vmbus_ic_negomsg(&sc->util_sc, vss_buf, 821 &recvlen, VSS_FWVER, VSS_MSGVER); 822 hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), 823 recvlen, requestid, ret); 824 hv_vss_log_info("%s: version negotiated\n", __func__); 825 } else if (!hv_vss_is_daemon_killed_after_launch(sc)) { 826 struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; 827 switch(msg->hdr.vss_hdr.operation) { 828 case VSS_OP_FREEZE: 829 case VSS_OP_THAW: 830 case VSS_OP_HOT_BACKUP: 831 mtx_lock(&sc->pending_mutex); 832 reqp = hv_vss_get_new_req_locked(sc); 833 mtx_unlock(&sc->pending_mutex); 834 if (reqp == NULL) { 835 /* ignore this request from host */ 836 break; 837 } 838 hv_vss_init_req(reqp, recvlen, requestid, vss_buf, sc); 839 hv_vss_log_info("%s: receive %s (%ju) from host\n", 840 __func__, 841 vss_opt_name[reqp->vss_req.opt_msg.opt], 842 (uintmax_t)reqp->vss_req.opt_msg.msgid); 843 hv_vss_start_notify(reqp, msg->hdr.vss_hdr.operation); 844 break; 845 case VSS_OP_GET_DM_INFO: 846 hv_vss_log_info("%s: receive GET_DM_INFO from host\n", 847 __func__); 848 msg->body.dm_info.flags = 0; 849 hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), 850 recvlen, requestid, HV_S_OK); 851 break; 852 default: 853 device_printf(sc->dev, "Unknown opt from host: %d\n", 854 msg->hdr.vss_hdr.operation); 855 break; 856 } 857 } else { 858 /* daemon was killed for some reason after it was launched */ 859 struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; 860 switch(msg->hdr.vss_hdr.operation) { 861 case VSS_OP_FREEZE: 862 hv_vss_log_info("%s: response fail for FREEZE\n", 863 __func__); 864 break; 865 case VSS_OP_THAW: 866 hv_vss_log_info("%s: response fail for THAW\n", 867 __func__); 868 break; 869 case VSS_OP_HOT_BACKUP: 870 hv_vss_log_info("%s: response fail for HOT_BACKUP\n", 871 __func__); 872 msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY; 873 break; 874 case VSS_OP_GET_DM_INFO: 875 hv_vss_log_info("%s: response fail for GET_DM_INFO\n", 876 __func__); 877 msg->body.dm_info.flags = 0; 878 break; 879 default: 880 device_printf(sc->dev, "Unknown opt from host: %d\n", 881 msg->hdr.vss_hdr.operation); 882 break; 883 } 884 hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), 885 recvlen, requestid, HV_E_FAIL); 886 } 887 /* 888 * Try reading next buffer 889 */ 890 recvlen = sc->util_sc.ic_buflen; 891 ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid); 892 KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough")); 893 /* XXX check recvlen to make sure that it contains enough data */ 894 895 hv_vss_log_info("%s: read: context %p, ret =%d, recvlen=%d\n", 896 __func__, context, ret, recvlen); 897 } 898 } 899 900 static int 901 hv_vss_probe(device_t dev) 902 { 903 return (vmbus_ic_probe(dev, vmbus_vss_descs)); 904 } 905 906 static int 907 hv_vss_init_send_receive_queue(device_t dev) 908 { 909 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 910 int i; 911 const int max_list = 4; /* It is big enough for the list */ 912 struct hv_vss_req_internal* reqp; 913 914 LIST_INIT(&sc->req_free_list); 915 STAILQ_INIT(&sc->daemon_sc.to_notify_queue); 916 STAILQ_INIT(&sc->daemon_sc.to_ack_queue); 917 STAILQ_INIT(&sc->app_sc.to_notify_queue); 918 STAILQ_INIT(&sc->app_sc.to_ack_queue); 919 920 for (i = 0; i < max_list; i++) { 921 reqp = malloc(sizeof(struct hv_vss_req_internal), 922 M_DEVBUF, M_WAITOK|M_ZERO); 923 LIST_INSERT_HEAD(&sc->req_free_list, reqp, link); 924 callout_init_mtx(&reqp->callout, &sc->pending_mutex, 0); 925 } 926 return (0); 927 } 928 929 static int 930 hv_vss_destroy_send_receive_queue(device_t dev) 931 { 932 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 933 hv_vss_req_internal* reqp; 934 935 while (!LIST_EMPTY(&sc->req_free_list)) { 936 reqp = LIST_FIRST(&sc->req_free_list); 937 LIST_REMOVE(reqp, link); 938 free(reqp, M_DEVBUF); 939 } 940 941 while (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue)) { 942 reqp = STAILQ_FIRST(&sc->daemon_sc.to_notify_queue); 943 STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_notify_queue, slink); 944 free(reqp, M_DEVBUF); 945 } 946 947 while (!STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue)) { 948 reqp = STAILQ_FIRST(&sc->daemon_sc.to_ack_queue); 949 STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_ack_queue, slink); 950 free(reqp, M_DEVBUF); 951 } 952 953 while (!STAILQ_EMPTY(&sc->app_sc.to_notify_queue)) { 954 reqp = STAILQ_FIRST(&sc->app_sc.to_notify_queue); 955 STAILQ_REMOVE_HEAD(&sc->app_sc.to_notify_queue, slink); 956 free(reqp, M_DEVBUF); 957 } 958 959 while (!STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) { 960 reqp = STAILQ_FIRST(&sc->app_sc.to_ack_queue); 961 STAILQ_REMOVE_HEAD(&sc->app_sc.to_ack_queue, slink); 962 free(reqp, M_DEVBUF); 963 } 964 return (0); 965 } 966 967 static int 968 hv_vss_attach(device_t dev) 969 { 970 int error; 971 struct sysctl_oid_list *child; 972 struct sysctl_ctx_list *ctx; 973 974 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 975 976 sc->dev = dev; 977 mtx_init(&sc->pending_mutex, "hv_vss pending mutex", NULL, MTX_DEF); 978 979 ctx = device_get_sysctl_ctx(dev); 980 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 981 982 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_vss_log", 983 CTLFLAG_RWTUN, &hv_vss_log, 0, "Hyperv VSS service log level"); 984 985 TASK_INIT(&sc->task, 0, hv_vss_process_request, sc); 986 hv_vss_init_send_receive_queue(dev); 987 /* create character device for file system freeze/thaw */ 988 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, 989 &sc->hv_vss_dev, 990 &hv_vss_cdevsw, 991 0, 992 UID_ROOT, 993 GID_WHEEL, 994 0640, 995 FS_VSS_DEV_NAME); 996 997 if (error != 0) { 998 hv_vss_log_info("Fail to create '%s': %d\n", FS_VSS_DEV_NAME, error); 999 return (error); 1000 } 1001 sc->hv_vss_dev->si_drv1 = &sc->daemon_sc; 1002 sc->daemon_sc.sc = sc; 1003 /* create character device for application freeze/thaw */ 1004 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, 1005 &sc->hv_appvss_dev, 1006 &hv_appvss_cdevsw, 1007 0, 1008 UID_ROOT, 1009 GID_WHEEL, 1010 0640, 1011 APP_VSS_DEV_NAME); 1012 1013 if (error != 0) { 1014 hv_vss_log_info("Fail to create '%s': %d\n", APP_VSS_DEV_NAME, error); 1015 return (error); 1016 } 1017 sc->hv_appvss_dev->si_drv1 = &sc->app_sc; 1018 sc->app_sc.sc = sc; 1019 1020 return (vmbus_ic_attach(dev, hv_vss_callback)); 1021 } 1022 1023 static int 1024 hv_vss_detach(device_t dev) 1025 { 1026 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 1027 mtx_destroy(&sc->pending_mutex); 1028 if (sc->daemon_sc.proc_task != NULL) { 1029 PROC_LOCK(sc->daemon_sc.proc_task); 1030 kern_psignal(sc->daemon_sc.proc_task, SIGKILL); 1031 PROC_UNLOCK(sc->daemon_sc.proc_task); 1032 } 1033 if (sc->app_sc.proc_task != NULL) { 1034 PROC_LOCK(sc->app_sc.proc_task); 1035 kern_psignal(sc->app_sc.proc_task, SIGKILL); 1036 PROC_UNLOCK(sc->app_sc.proc_task); 1037 } 1038 hv_vss_destroy_send_receive_queue(dev); 1039 destroy_dev(sc->hv_vss_dev); 1040 destroy_dev(sc->hv_appvss_dev); 1041 return (vmbus_ic_detach(dev)); 1042 } 1043 1044 static device_method_t vss_methods[] = { 1045 /* Device interface */ 1046 DEVMETHOD(device_probe, hv_vss_probe), 1047 DEVMETHOD(device_attach, hv_vss_attach), 1048 DEVMETHOD(device_detach, hv_vss_detach), 1049 { 0, 0 } 1050 }; 1051 1052 static driver_t vss_driver = { "hvvss", vss_methods, sizeof(hv_vss_sc)}; 1053 1054 DRIVER_MODULE(hv_vss, vmbus, vss_driver, NULL, NULL); 1055 MODULE_VERSION(hv_vss, 1); 1056 MODULE_DEPEND(hv_vss, vmbus, 1, 1, 1); 1057