1 /*- 2 * Copyright (c) 2016 Microsoft Corp. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 #include <sys/param.h> 29 #include <sys/kernel.h> 30 #include <sys/conf.h> 31 #include <sys/uio.h> 32 #include <sys/bus.h> 33 #include <sys/malloc.h> 34 #include <sys/mbuf.h> 35 #include <sys/module.h> 36 #include <sys/lock.h> 37 #include <sys/taskqueue.h> 38 #include <sys/selinfo.h> 39 #include <sys/sysctl.h> 40 #include <sys/poll.h> 41 #include <sys/proc.h> 42 #include <sys/queue.h> 43 #include <sys/kthread.h> 44 #include <sys/syscallsubr.h> 45 #include <sys/sysproto.h> 46 #include <sys/un.h> 47 #include <sys/endian.h> 48 #include <sys/sema.h> 49 #include <sys/signal.h> 50 #include <sys/syslog.h> 51 #include <sys/systm.h> 52 #include <sys/mutex.h> 53 #include <sys/callout.h> 54 55 #include <dev/hyperv/include/hyperv.h> 56 #include <dev/hyperv/utilities/hv_utilreg.h> 57 #include <dev/hyperv/utilities/vmbus_icreg.h> 58 #include <dev/hyperv/utilities/vmbus_icvar.h> 59 60 #include "hv_snapshot.h" 61 #include "vmbus_if.h" 62 63 #define VSS_MAJOR 5 64 #define VSS_MINOR 0 65 #define VSS_MSGVER VMBUS_IC_VERSION(VSS_MAJOR, VSS_MINOR) 66 67 #define VSS_FWVER_MAJOR 3 68 #define VSS_FWVER VMBUS_IC_VERSION(VSS_FWVER_MAJOR, 0) 69 70 #define TIMEOUT_LIMIT (15) // seconds 71 enum hv_vss_op { 72 VSS_OP_CREATE = 0, 73 VSS_OP_DELETE, 74 VSS_OP_HOT_BACKUP, 75 VSS_OP_GET_DM_INFO, 76 VSS_OP_BU_COMPLETE, 77 /* 78 * Following operations are only supported with IC version >= 5.0 79 */ 80 VSS_OP_FREEZE, /* Freeze the file systems in the VM */ 81 VSS_OP_THAW, /* Unfreeze the file systems */ 82 VSS_OP_AUTO_RECOVER, 83 VSS_OP_COUNT /* Number of operations, must be last */ 84 }; 85 86 /* 87 * Header for all VSS messages. 88 */ 89 struct hv_vss_hdr { 90 struct vmbus_icmsg_hdr ic_hdr; 91 uint8_t operation; 92 uint8_t reserved[7]; 93 } __packed; 94 95 96 /* 97 * Flag values for the hv_vss_check_feature. Here supports only 98 * one value. 99 */ 100 #define VSS_HBU_NO_AUTO_RECOVERY 0x00000005 101 102 struct hv_vss_check_feature { 103 uint32_t flags; 104 } __packed; 105 106 struct hv_vss_check_dm_info { 107 uint32_t flags; 108 } __packed; 109 110 struct hv_vss_msg { 111 union { 112 struct hv_vss_hdr vss_hdr; 113 } hdr; 114 union { 115 struct hv_vss_check_feature vss_cf; 116 struct hv_vss_check_dm_info dm_info; 117 } body; 118 } __packed; 119 120 struct hv_vss_req { 121 struct hv_vss_opt_msg opt_msg; /* used to communicate with daemon */ 122 struct hv_vss_msg msg; /* used to communicate with host */ 123 } __packed; 124 125 /* hv_vss debug control */ 126 static int hv_vss_log = 0; 127 128 #define hv_vss_log_error(...) do { \ 129 if (hv_vss_log > 0) \ 130 log(LOG_ERR, "hv_vss: " __VA_ARGS__); \ 131 } while (0) 132 133 #define hv_vss_log_info(...) do { \ 134 if (hv_vss_log > 1) \ 135 log(LOG_INFO, "hv_vss: " __VA_ARGS__); \ 136 } while (0) 137 138 static const struct vmbus_ic_desc vmbus_vss_descs[] = { 139 { 140 .ic_guid = { .hv_guid = { 141 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, 142 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40} }, 143 .ic_desc = "Hyper-V VSS" 144 }, 145 VMBUS_IC_DESC_END 146 }; 147 148 static const char * vss_opt_name[] = {"None", "VSSCheck", "Freeze", "Thaw"}; 149 150 /* character device prototypes */ 151 static d_open_t hv_vss_dev_open; 152 static d_close_t hv_vss_dev_close; 153 static d_poll_t hv_vss_dev_daemon_poll; 154 static d_ioctl_t hv_vss_dev_daemon_ioctl; 155 156 static d_open_t hv_appvss_dev_open; 157 static d_close_t hv_appvss_dev_close; 158 static d_poll_t hv_appvss_dev_poll; 159 static d_ioctl_t hv_appvss_dev_ioctl; 160 161 /* hv_vss character device structure */ 162 static struct cdevsw hv_vss_cdevsw = 163 { 164 .d_version = D_VERSION, 165 .d_open = hv_vss_dev_open, 166 .d_close = hv_vss_dev_close, 167 .d_poll = hv_vss_dev_daemon_poll, 168 .d_ioctl = hv_vss_dev_daemon_ioctl, 169 .d_name = FS_VSS_DEV_NAME, 170 }; 171 172 static struct cdevsw hv_appvss_cdevsw = 173 { 174 .d_version = D_VERSION, 175 .d_open = hv_appvss_dev_open, 176 .d_close = hv_appvss_dev_close, 177 .d_poll = hv_appvss_dev_poll, 178 .d_ioctl = hv_appvss_dev_ioctl, 179 .d_name = APP_VSS_DEV_NAME, 180 }; 181 182 struct hv_vss_sc; 183 /* 184 * Global state to track cdev 185 */ 186 struct hv_vss_dev_sc { 187 /* 188 * msg was transferred from host to notify queue, and 189 * ack queue. Finally, it was recyled to free list. 190 */ 191 STAILQ_HEAD(, hv_vss_req_internal) to_notify_queue; 192 STAILQ_HEAD(, hv_vss_req_internal) to_ack_queue; 193 struct hv_vss_sc *sc; 194 struct proc *proc_task; 195 struct selinfo hv_vss_selinfo; 196 }; 197 /* 198 * Global state to track and synchronize the transaction requests from the host. 199 * The VSS allows user to register their function to do freeze/thaw for application. 200 * VSS kernel will notify both vss daemon and user application if it is registered. 201 * The implementation state transition is illustrated by: 202 * https://clovertrail.github.io/assets/vssdot.png 203 */ 204 typedef struct hv_vss_sc { 205 struct vmbus_ic_softc util_sc; 206 device_t dev; 207 208 struct task task; 209 210 /* 211 * mutex is used to protect access of list/queue, 212 * callout in request is also used this mutex. 213 */ 214 struct mtx pending_mutex; 215 /* 216 * req_free_list contains all free items 217 */ 218 LIST_HEAD(, hv_vss_req_internal) req_free_list; 219 220 /* Indicates if daemon registered with driver */ 221 boolean_t register_done; 222 223 boolean_t app_register_done; 224 225 /* cdev for file system freeze/thaw */ 226 struct cdev *hv_vss_dev; 227 /* cdev for application freeze/thaw */ 228 struct cdev *hv_appvss_dev; 229 230 /* sc for app */ 231 struct hv_vss_dev_sc app_sc; 232 /* sc for deamon */ 233 struct hv_vss_dev_sc daemon_sc; 234 } hv_vss_sc; 235 236 typedef struct hv_vss_req_internal { 237 LIST_ENTRY(hv_vss_req_internal) link; 238 STAILQ_ENTRY(hv_vss_req_internal) slink; 239 struct hv_vss_req vss_req; 240 241 /* Rcv buffer for communicating with the host*/ 242 uint8_t *rcv_buf; 243 /* Length of host message */ 244 uint32_t host_msg_len; 245 /* Host message id */ 246 uint64_t host_msg_id; 247 248 hv_vss_sc *sc; 249 250 struct callout callout; 251 } hv_vss_req_internal; 252 253 #define SEARCH_REMOVE_REQ_LOCKED(reqp, queue, link, tmp, id) \ 254 do { \ 255 STAILQ_FOREACH_SAFE(reqp, queue, link, tmp) { \ 256 if (reqp->vss_req.opt_msg.msgid == id) { \ 257 STAILQ_REMOVE(queue, \ 258 reqp, hv_vss_req_internal, link); \ 259 break; \ 260 } \ 261 } \ 262 } while (0) 263 264 static bool 265 hv_vss_is_daemon_killed_after_launch(hv_vss_sc *sc) 266 { 267 return (!sc->register_done && sc->daemon_sc.proc_task); 268 } 269 270 /* 271 * Callback routine that gets called whenever there is a message from host 272 */ 273 static void 274 hv_vss_callback(struct vmbus_channel *chan __unused, void *context) 275 { 276 hv_vss_sc *sc = (hv_vss_sc*)context; 277 if (hv_vss_is_daemon_killed_after_launch(sc)) 278 hv_vss_log_info("%s: daemon was killed!\n", __func__); 279 if (sc->register_done || sc->daemon_sc.proc_task) { 280 hv_vss_log_info("%s: Queuing work item\n", __func__); 281 if (hv_vss_is_daemon_killed_after_launch(sc)) 282 hv_vss_log_info("%s: daemon was killed!\n", __func__); 283 taskqueue_enqueue(taskqueue_thread, &sc->task); 284 } else { 285 hv_vss_log_info("%s: daemon has never been registered\n", __func__); 286 } 287 hv_vss_log_info("%s: received msg from host\n", __func__); 288 } 289 /* 290 * Send the response back to the host. 291 */ 292 static void 293 hv_vss_respond_host(uint8_t *rcv_buf, struct vmbus_channel *ch, 294 uint32_t recvlen, uint64_t requestid, uint32_t error) 295 { 296 struct vmbus_icmsg_hdr *hv_icmsg_hdrp; 297 298 hv_icmsg_hdrp = (struct vmbus_icmsg_hdr *)rcv_buf; 299 300 hv_icmsg_hdrp->ic_status = error; 301 hv_icmsg_hdrp->ic_flags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; 302 303 error = vmbus_chan_send(ch, VMBUS_CHANPKT_TYPE_INBAND, 0, 304 rcv_buf, recvlen, requestid); 305 if (error) 306 hv_vss_log_info("%s: hv_vss_respond_host: sendpacket error:%d\n", 307 __func__, error); 308 } 309 310 static void 311 hv_vss_notify_host_result_locked(struct hv_vss_req_internal *reqp, uint32_t status) 312 { 313 struct hv_vss_msg* msg = (struct hv_vss_msg *)reqp->rcv_buf; 314 hv_vss_sc *sc = reqp->sc; 315 if (reqp->vss_req.opt_msg.opt == HV_VSS_CHECK) { 316 msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY; 317 } 318 hv_vss_log_info("%s, %s response %s to host\n", __func__, 319 vss_opt_name[reqp->vss_req.opt_msg.opt], 320 status == HV_S_OK ? "Success" : "Fail"); 321 hv_vss_respond_host(reqp->rcv_buf, vmbus_get_channel(reqp->sc->dev), 322 reqp->host_msg_len, reqp->host_msg_id, status); 323 /* recycle the request */ 324 LIST_INSERT_HEAD(&sc->req_free_list, reqp, link); 325 } 326 327 static void 328 hv_vss_notify_host_result(struct hv_vss_req_internal *reqp, uint32_t status) 329 { 330 mtx_lock(&reqp->sc->pending_mutex); 331 hv_vss_notify_host_result_locked(reqp, status); 332 mtx_unlock(&reqp->sc->pending_mutex); 333 } 334 335 static void 336 hv_vss_cp_vssreq_to_user(struct hv_vss_req_internal *reqp, 337 struct hv_vss_opt_msg *userdata) 338 { 339 struct hv_vss_req *hv_vss_dev_buf; 340 hv_vss_dev_buf = &reqp->vss_req; 341 hv_vss_dev_buf->opt_msg.opt = HV_VSS_NONE; 342 switch (reqp->vss_req.msg.hdr.vss_hdr.operation) { 343 case VSS_OP_FREEZE: 344 hv_vss_dev_buf->opt_msg.opt = HV_VSS_FREEZE; 345 break; 346 case VSS_OP_THAW: 347 hv_vss_dev_buf->opt_msg.opt = HV_VSS_THAW; 348 break; 349 case VSS_OP_HOT_BACKUP: 350 hv_vss_dev_buf->opt_msg.opt = HV_VSS_CHECK; 351 break; 352 } 353 *userdata = hv_vss_dev_buf->opt_msg; 354 hv_vss_log_info("%s, read data from user for " 355 "%s (%ju) \n", __func__, vss_opt_name[userdata->opt], 356 (uintmax_t)userdata->msgid); 357 } 358 359 /** 360 * Remove the request id from app notifiy or ack queue, 361 * and recyle the request by inserting it to free list. 362 * 363 * When app was notified but not yet sending ack, the request 364 * should locate in either notify queue or ack queue. 365 */ 366 static struct hv_vss_req_internal* 367 hv_vss_drain_req_queue_locked(hv_vss_sc *sc, uint64_t req_id) 368 { 369 struct hv_vss_req_internal *reqp, *tmp; 370 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_notify_queue, 371 slink, tmp, req_id); 372 if (reqp == NULL) 373 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_ack_queue, 374 slink, tmp, req_id); 375 if (reqp == NULL) 376 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_notify_queue, 377 slink, tmp, req_id); 378 if (reqp == NULL) 379 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_ack_queue, slink, 380 tmp, req_id); 381 return (reqp); 382 } 383 /** 384 * Actions for daemon who has been notified. 385 */ 386 static void 387 hv_vss_notified(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) 388 { 389 struct hv_vss_req_internal *reqp; 390 mtx_lock(&dev_sc->sc->pending_mutex); 391 if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) { 392 reqp = STAILQ_FIRST(&dev_sc->to_notify_queue); 393 hv_vss_cp_vssreq_to_user(reqp, userdata); 394 STAILQ_REMOVE_HEAD(&dev_sc->to_notify_queue, slink); 395 /* insert the msg to queue for write */ 396 STAILQ_INSERT_TAIL(&dev_sc->to_ack_queue, reqp, slink); 397 userdata->status = VSS_SUCCESS; 398 } else { 399 /* Timeout occur, thus request was removed from queue. */ 400 hv_vss_log_info("%s: notify queue is empty!\n", __func__); 401 userdata->status = VSS_FAIL; 402 } 403 mtx_unlock(&dev_sc->sc->pending_mutex); 404 } 405 406 static void 407 hv_vss_notify(struct hv_vss_dev_sc *dev_sc, struct hv_vss_req_internal *reqp) 408 { 409 uint32_t opt = reqp->vss_req.opt_msg.opt; 410 mtx_lock(&dev_sc->sc->pending_mutex); 411 STAILQ_INSERT_TAIL(&dev_sc->to_notify_queue, reqp, slink); 412 hv_vss_log_info("%s: issuing query %s (%ju) to %s\n", __func__, 413 vss_opt_name[opt], (uintmax_t)reqp->vss_req.opt_msg.msgid, 414 &dev_sc->sc->app_sc == dev_sc ? "app" : "daemon"); 415 mtx_unlock(&dev_sc->sc->pending_mutex); 416 selwakeup(&dev_sc->hv_vss_selinfo); 417 } 418 419 /** 420 * Actions for daemon who has acknowledged. 421 */ 422 static void 423 hv_vss_daemon_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) 424 { 425 struct hv_vss_req_internal *reqp, *tmp; 426 uint64_t req_id; 427 int opt; 428 uint32_t status; 429 430 opt = userdata->opt; 431 req_id = userdata->msgid; 432 status = userdata->status; 433 /* make sure the reserved fields are all zeros. */ 434 memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) - 435 __offsetof(struct hv_vss_opt_msg, reserved)); 436 mtx_lock(&dev_sc->sc->pending_mutex); 437 SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id); 438 mtx_unlock(&dev_sc->sc->pending_mutex); 439 if (reqp == NULL) { 440 hv_vss_log_info("%s Timeout: fail to find daemon ack request\n", 441 __func__); 442 userdata->status = VSS_FAIL; 443 return; 444 } 445 KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!")); 446 hv_vss_log_info("%s, get response %d from daemon for %s (%ju) \n", __func__, 447 status, vss_opt_name[opt], (uintmax_t)req_id); 448 switch (opt) { 449 case HV_VSS_CHECK: 450 case HV_VSS_FREEZE: 451 callout_drain(&reqp->callout); 452 hv_vss_notify_host_result(reqp, 453 status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); 454 break; 455 case HV_VSS_THAW: 456 if (dev_sc->sc->app_register_done) { 457 if (status == VSS_SUCCESS) { 458 hv_vss_notify(&dev_sc->sc->app_sc, reqp); 459 } else { 460 /* handle error */ 461 callout_drain(&reqp->callout); 462 hv_vss_notify_host_result(reqp, HV_E_FAIL); 463 } 464 } else { 465 callout_drain(&reqp->callout); 466 hv_vss_notify_host_result(reqp, 467 status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); 468 } 469 break; 470 } 471 } 472 473 /** 474 * Actions for app who has acknowledged. 475 */ 476 static void 477 hv_vss_app_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) 478 { 479 struct hv_vss_req_internal *reqp, *tmp; 480 uint64_t req_id; 481 int opt; 482 uint8_t status; 483 484 opt = userdata->opt; 485 req_id = userdata->msgid; 486 status = userdata->status; 487 /* make sure the reserved fields are all zeros. */ 488 memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) - 489 __offsetof(struct hv_vss_opt_msg, reserved)); 490 mtx_lock(&dev_sc->sc->pending_mutex); 491 SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id); 492 mtx_unlock(&dev_sc->sc->pending_mutex); 493 if (reqp == NULL) { 494 hv_vss_log_info("%s Timeout: fail to find app ack request\n", 495 __func__); 496 userdata->status = VSS_FAIL; 497 return; 498 } 499 KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!")); 500 hv_vss_log_info("%s, get response %d from app for %s (%ju) \n", 501 __func__, status, vss_opt_name[opt], (uintmax_t)req_id); 502 if (dev_sc->sc->register_done) { 503 switch (opt) { 504 case HV_VSS_CHECK: 505 case HV_VSS_FREEZE: 506 if (status == VSS_SUCCESS) { 507 hv_vss_notify(&dev_sc->sc->daemon_sc, reqp); 508 } else { 509 /* handle error */ 510 callout_drain(&reqp->callout); 511 hv_vss_notify_host_result(reqp, HV_E_FAIL); 512 } 513 break; 514 case HV_VSS_THAW: 515 callout_drain(&reqp->callout); 516 hv_vss_notify_host_result(reqp, 517 status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); 518 break; 519 } 520 } else { 521 hv_vss_log_info("%s, Fatal: vss daemon was killed\n", __func__); 522 } 523 } 524 525 static int 526 hv_vss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 527 { 528 struct proc *td_proc; 529 td_proc = td->td_proc; 530 531 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 532 hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n", 533 __func__, td_proc->p_comm, FS_VSS_DEV_NAME); 534 535 if (dev_sc->sc->register_done) 536 return (EBUSY); 537 538 dev_sc->sc->register_done = true; 539 hv_vss_callback(vmbus_get_channel(dev_sc->sc->dev), dev_sc->sc); 540 541 dev_sc->proc_task = curproc; 542 return (0); 543 } 544 545 static int 546 hv_vss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused, 547 struct thread *td) 548 { 549 struct proc *td_proc; 550 td_proc = td->td_proc; 551 552 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 553 554 hv_vss_log_info("%s: %s closes device \"%s\"\n", 555 __func__, td_proc->p_comm, FS_VSS_DEV_NAME); 556 dev_sc->sc->register_done = false; 557 return (0); 558 } 559 560 static int 561 hv_vss_dev_daemon_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 562 struct thread *td) 563 { 564 struct proc *td_proc; 565 struct hv_vss_dev_sc *sc; 566 567 td_proc = td->td_proc; 568 sc = (struct hv_vss_dev_sc*)dev->si_drv1; 569 570 hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm); 571 572 struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data; 573 switch(cmd) { 574 case IOCHVVSSREAD: 575 hv_vss_notified(sc, userdata); 576 break; 577 case IOCHVVSSWRITE: 578 hv_vss_daemon_acked(sc, userdata); 579 break; 580 } 581 return (0); 582 } 583 584 /* 585 * hv_vss_daemon poll invokes this function to check if data is available 586 * for daemon to read. 587 */ 588 static int 589 hv_vss_dev_daemon_poll(struct cdev *dev, int events, struct thread *td) 590 { 591 int revent = 0; 592 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 593 594 mtx_lock(&dev_sc->sc->pending_mutex); 595 /** 596 * if there is data ready, inform daemon's poll 597 */ 598 if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) 599 revent = POLLIN; 600 if (revent == 0) 601 selrecord(td, &dev_sc->hv_vss_selinfo); 602 hv_vss_log_info("%s return 0x%x\n", __func__, revent); 603 mtx_unlock(&dev_sc->sc->pending_mutex); 604 return (revent); 605 } 606 607 static int 608 hv_appvss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 609 { 610 struct proc *td_proc; 611 td_proc = td->td_proc; 612 613 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 614 hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n", 615 __func__, td_proc->p_comm, APP_VSS_DEV_NAME); 616 617 if (dev_sc->sc->app_register_done) 618 return (EBUSY); 619 620 dev_sc->sc->app_register_done = true; 621 dev_sc->proc_task = curproc; 622 return (0); 623 } 624 625 static int 626 hv_appvss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused, 627 struct thread *td) 628 { 629 struct proc *td_proc; 630 td_proc = td->td_proc; 631 632 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 633 634 hv_vss_log_info("%s: %s closes device \"%s\".\n", 635 __func__, td_proc->p_comm, APP_VSS_DEV_NAME); 636 dev_sc->sc->app_register_done = false; 637 return (0); 638 } 639 640 static int 641 hv_appvss_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 642 struct thread *td) 643 { 644 struct proc *td_proc; 645 struct hv_vss_dev_sc *dev_sc; 646 647 td_proc = td->td_proc; 648 dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 649 650 hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm); 651 652 struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data; 653 switch(cmd) { 654 case IOCHVVSSREAD: 655 hv_vss_notified(dev_sc, userdata); 656 break; 657 case IOCHVVSSWRITE: 658 hv_vss_app_acked(dev_sc, userdata); 659 break; 660 } 661 return (0); 662 } 663 664 /* 665 * hv_vss_daemon poll invokes this function to check if data is available 666 * for daemon to read. 667 */ 668 static int 669 hv_appvss_dev_poll(struct cdev *dev, int events, struct thread *td) 670 { 671 int revent = 0; 672 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 673 674 mtx_lock(&dev_sc->sc->pending_mutex); 675 /** 676 * if there is data ready, inform daemon's poll 677 */ 678 if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) 679 revent = POLLIN; 680 if (revent == 0) 681 selrecord(td, &dev_sc->hv_vss_selinfo); 682 hv_vss_log_info("%s return 0x%x\n", __func__, revent); 683 mtx_unlock(&dev_sc->sc->pending_mutex); 684 return (revent); 685 } 686 687 static void 688 hv_vss_timeout(void *arg) 689 { 690 hv_vss_req_internal *reqp = arg; 691 hv_vss_req_internal *request __diagused; 692 hv_vss_sc* sc = reqp->sc; 693 uint64_t req_id = reqp->vss_req.opt_msg.msgid; 694 /* This thread is locked */ 695 KASSERT(mtx_owned(&sc->pending_mutex), ("mutex lock is not owned!")); 696 request = hv_vss_drain_req_queue_locked(sc, req_id); 697 KASSERT(request != NULL, ("timeout but fail to find request")); 698 hv_vss_notify_host_result_locked(reqp, HV_E_FAIL); 699 } 700 701 /* 702 * This routine is called whenever a message is received from the host 703 */ 704 static void 705 hv_vss_init_req(hv_vss_req_internal *reqp, 706 uint32_t recvlen, uint64_t requestid, uint8_t *vss_buf, hv_vss_sc *sc) 707 { 708 struct timespec vm_ts; 709 struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; 710 711 memset(reqp, 0, __offsetof(hv_vss_req_internal, callout)); 712 reqp->host_msg_len = recvlen; 713 reqp->host_msg_id = requestid; 714 reqp->rcv_buf = vss_buf; 715 reqp->sc = sc; 716 memcpy(&reqp->vss_req.msg, 717 (struct hv_vss_msg *)vss_buf, sizeof(struct hv_vss_msg)); 718 /* set the opt for users */ 719 switch (msg->hdr.vss_hdr.operation) { 720 case VSS_OP_FREEZE: 721 reqp->vss_req.opt_msg.opt = HV_VSS_FREEZE; 722 break; 723 case VSS_OP_THAW: 724 reqp->vss_req.opt_msg.opt = HV_VSS_THAW; 725 break; 726 case VSS_OP_HOT_BACKUP: 727 reqp->vss_req.opt_msg.opt = HV_VSS_CHECK; 728 break; 729 } 730 /* Use a timestamp as msg request ID */ 731 nanotime(&vm_ts); 732 reqp->vss_req.opt_msg.msgid = (vm_ts.tv_sec * NANOSEC) + vm_ts.tv_nsec; 733 } 734 735 static hv_vss_req_internal* 736 hv_vss_get_new_req_locked(hv_vss_sc *sc) 737 { 738 hv_vss_req_internal *reqp; 739 if (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue) || 740 !STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue) || 741 !STAILQ_EMPTY(&sc->app_sc.to_notify_queue) || 742 !STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) { 743 /* 744 * There is request coming from host before 745 * finishing previous requests 746 */ 747 hv_vss_log_info("%s: Warning: there is new request " 748 "coming before finishing previous requests\n", __func__); 749 return (NULL); 750 } 751 if (LIST_EMPTY(&sc->req_free_list)) { 752 /* TODO Error: no buffer */ 753 hv_vss_log_info("Error: No buffer\n"); 754 return (NULL); 755 } 756 reqp = LIST_FIRST(&sc->req_free_list); 757 LIST_REMOVE(reqp, link); 758 return (reqp); 759 } 760 761 static void 762 hv_vss_start_notify(hv_vss_req_internal *reqp, uint32_t opt) 763 { 764 hv_vss_sc *sc = reqp->sc; 765 /* 766 * Freeze/Check notification sequence: kernel -> app -> daemon(fs) 767 * Thaw notification sequence: kernel -> daemon(fs) -> app 768 * 769 * We should wake up the daemon, in case it's doing poll(). 770 * The response should be received after 5s, otherwise, trigger timeout. 771 */ 772 switch (opt) { 773 case VSS_OP_FREEZE: 774 case VSS_OP_HOT_BACKUP: 775 if (sc->app_register_done) 776 hv_vss_notify(&sc->app_sc, reqp); 777 else 778 hv_vss_notify(&sc->daemon_sc, reqp); 779 callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz, 780 hv_vss_timeout, reqp); 781 break; 782 case VSS_OP_THAW: 783 hv_vss_notify(&sc->daemon_sc, reqp); 784 callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz, 785 hv_vss_timeout, reqp); 786 break; 787 } 788 } 789 790 /* 791 * Function to read the vss request buffer from host 792 * and interact with daemon 793 */ 794 static void 795 hv_vss_process_request(void *context, int pending __unused) 796 { 797 uint8_t *vss_buf; 798 struct vmbus_channel *channel; 799 uint32_t recvlen = 0; 800 uint64_t requestid; 801 struct vmbus_icmsg_hdr *icmsghdrp; 802 int ret = 0; 803 hv_vss_sc *sc; 804 hv_vss_req_internal *reqp; 805 806 hv_vss_log_info("%s: entering hv_vss_process_request\n", __func__); 807 808 sc = (hv_vss_sc*)context; 809 vss_buf = sc->util_sc.ic_buf; 810 channel = vmbus_get_channel(sc->dev); 811 812 recvlen = sc->util_sc.ic_buflen; 813 ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid); 814 KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough")); 815 /* XXX check recvlen to make sure that it contains enough data */ 816 817 while ((ret == 0) && (recvlen > 0)) { 818 icmsghdrp = (struct vmbus_icmsg_hdr *)vss_buf; 819 820 if (icmsghdrp->ic_type == HV_ICMSGTYPE_NEGOTIATE) { 821 ret = vmbus_ic_negomsg(&sc->util_sc, vss_buf, 822 &recvlen, VSS_FWVER, VSS_MSGVER); 823 hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), 824 recvlen, requestid, ret); 825 hv_vss_log_info("%s: version negotiated\n", __func__); 826 } else if (!hv_vss_is_daemon_killed_after_launch(sc)) { 827 struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; 828 switch(msg->hdr.vss_hdr.operation) { 829 case VSS_OP_FREEZE: 830 case VSS_OP_THAW: 831 case VSS_OP_HOT_BACKUP: 832 mtx_lock(&sc->pending_mutex); 833 reqp = hv_vss_get_new_req_locked(sc); 834 mtx_unlock(&sc->pending_mutex); 835 if (reqp == NULL) { 836 /* ignore this request from host */ 837 break; 838 } 839 hv_vss_init_req(reqp, recvlen, requestid, vss_buf, sc); 840 hv_vss_log_info("%s: receive %s (%ju) from host\n", 841 __func__, 842 vss_opt_name[reqp->vss_req.opt_msg.opt], 843 (uintmax_t)reqp->vss_req.opt_msg.msgid); 844 hv_vss_start_notify(reqp, msg->hdr.vss_hdr.operation); 845 break; 846 case VSS_OP_GET_DM_INFO: 847 hv_vss_log_info("%s: receive GET_DM_INFO from host\n", 848 __func__); 849 msg->body.dm_info.flags = 0; 850 hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), 851 recvlen, requestid, HV_S_OK); 852 break; 853 default: 854 device_printf(sc->dev, "Unknown opt from host: %d\n", 855 msg->hdr.vss_hdr.operation); 856 break; 857 } 858 } else { 859 /* daemon was killed for some reason after it was launched */ 860 struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; 861 switch(msg->hdr.vss_hdr.operation) { 862 case VSS_OP_FREEZE: 863 hv_vss_log_info("%s: response fail for FREEZE\n", 864 __func__); 865 break; 866 case VSS_OP_THAW: 867 hv_vss_log_info("%s: response fail for THAW\n", 868 __func__); 869 break; 870 case VSS_OP_HOT_BACKUP: 871 hv_vss_log_info("%s: response fail for HOT_BACKUP\n", 872 __func__); 873 msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY; 874 break; 875 case VSS_OP_GET_DM_INFO: 876 hv_vss_log_info("%s: response fail for GET_DM_INFO\n", 877 __func__); 878 msg->body.dm_info.flags = 0; 879 break; 880 default: 881 device_printf(sc->dev, "Unknown opt from host: %d\n", 882 msg->hdr.vss_hdr.operation); 883 break; 884 } 885 hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), 886 recvlen, requestid, HV_E_FAIL); 887 } 888 /* 889 * Try reading next buffer 890 */ 891 recvlen = sc->util_sc.ic_buflen; 892 ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid); 893 KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough")); 894 /* XXX check recvlen to make sure that it contains enough data */ 895 896 hv_vss_log_info("%s: read: context %p, ret =%d, recvlen=%d\n", 897 __func__, context, ret, recvlen); 898 } 899 } 900 901 static int 902 hv_vss_probe(device_t dev) 903 { 904 return (vmbus_ic_probe(dev, vmbus_vss_descs)); 905 } 906 907 static int 908 hv_vss_init_send_receive_queue(device_t dev) 909 { 910 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 911 int i; 912 const int max_list = 4; /* It is big enough for the list */ 913 struct hv_vss_req_internal* reqp; 914 915 LIST_INIT(&sc->req_free_list); 916 STAILQ_INIT(&sc->daemon_sc.to_notify_queue); 917 STAILQ_INIT(&sc->daemon_sc.to_ack_queue); 918 STAILQ_INIT(&sc->app_sc.to_notify_queue); 919 STAILQ_INIT(&sc->app_sc.to_ack_queue); 920 921 for (i = 0; i < max_list; i++) { 922 reqp = malloc(sizeof(struct hv_vss_req_internal), 923 M_DEVBUF, M_WAITOK|M_ZERO); 924 LIST_INSERT_HEAD(&sc->req_free_list, reqp, link); 925 callout_init_mtx(&reqp->callout, &sc->pending_mutex, 0); 926 } 927 return (0); 928 } 929 930 static int 931 hv_vss_destroy_send_receive_queue(device_t dev) 932 { 933 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 934 hv_vss_req_internal* reqp; 935 936 while (!LIST_EMPTY(&sc->req_free_list)) { 937 reqp = LIST_FIRST(&sc->req_free_list); 938 LIST_REMOVE(reqp, link); 939 free(reqp, M_DEVBUF); 940 } 941 942 while (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue)) { 943 reqp = STAILQ_FIRST(&sc->daemon_sc.to_notify_queue); 944 STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_notify_queue, slink); 945 free(reqp, M_DEVBUF); 946 } 947 948 while (!STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue)) { 949 reqp = STAILQ_FIRST(&sc->daemon_sc.to_ack_queue); 950 STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_ack_queue, slink); 951 free(reqp, M_DEVBUF); 952 } 953 954 while (!STAILQ_EMPTY(&sc->app_sc.to_notify_queue)) { 955 reqp = STAILQ_FIRST(&sc->app_sc.to_notify_queue); 956 STAILQ_REMOVE_HEAD(&sc->app_sc.to_notify_queue, slink); 957 free(reqp, M_DEVBUF); 958 } 959 960 while (!STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) { 961 reqp = STAILQ_FIRST(&sc->app_sc.to_ack_queue); 962 STAILQ_REMOVE_HEAD(&sc->app_sc.to_ack_queue, slink); 963 free(reqp, M_DEVBUF); 964 } 965 return (0); 966 } 967 968 static int 969 hv_vss_attach(device_t dev) 970 { 971 int error; 972 struct sysctl_oid_list *child; 973 struct sysctl_ctx_list *ctx; 974 975 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 976 977 sc->dev = dev; 978 mtx_init(&sc->pending_mutex, "hv_vss pending mutex", NULL, MTX_DEF); 979 980 ctx = device_get_sysctl_ctx(dev); 981 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 982 983 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_vss_log", 984 CTLFLAG_RWTUN, &hv_vss_log, 0, "Hyperv VSS service log level"); 985 986 TASK_INIT(&sc->task, 0, hv_vss_process_request, sc); 987 hv_vss_init_send_receive_queue(dev); 988 /* create character device for file system freeze/thaw */ 989 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, 990 &sc->hv_vss_dev, 991 &hv_vss_cdevsw, 992 0, 993 UID_ROOT, 994 GID_WHEEL, 995 0640, 996 FS_VSS_DEV_NAME); 997 998 if (error != 0) { 999 hv_vss_log_info("Fail to create '%s': %d\n", FS_VSS_DEV_NAME, error); 1000 return (error); 1001 } 1002 sc->hv_vss_dev->si_drv1 = &sc->daemon_sc; 1003 sc->daemon_sc.sc = sc; 1004 /* create character device for application freeze/thaw */ 1005 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, 1006 &sc->hv_appvss_dev, 1007 &hv_appvss_cdevsw, 1008 0, 1009 UID_ROOT, 1010 GID_WHEEL, 1011 0640, 1012 APP_VSS_DEV_NAME); 1013 1014 if (error != 0) { 1015 hv_vss_log_info("Fail to create '%s': %d\n", APP_VSS_DEV_NAME, error); 1016 return (error); 1017 } 1018 sc->hv_appvss_dev->si_drv1 = &sc->app_sc; 1019 sc->app_sc.sc = sc; 1020 1021 return (vmbus_ic_attach(dev, hv_vss_callback)); 1022 } 1023 1024 static int 1025 hv_vss_detach(device_t dev) 1026 { 1027 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 1028 mtx_destroy(&sc->pending_mutex); 1029 if (sc->daemon_sc.proc_task != NULL) { 1030 PROC_LOCK(sc->daemon_sc.proc_task); 1031 kern_psignal(sc->daemon_sc.proc_task, SIGKILL); 1032 PROC_UNLOCK(sc->daemon_sc.proc_task); 1033 } 1034 if (sc->app_sc.proc_task != NULL) { 1035 PROC_LOCK(sc->app_sc.proc_task); 1036 kern_psignal(sc->app_sc.proc_task, SIGKILL); 1037 PROC_UNLOCK(sc->app_sc.proc_task); 1038 } 1039 hv_vss_destroy_send_receive_queue(dev); 1040 destroy_dev(sc->hv_vss_dev); 1041 destroy_dev(sc->hv_appvss_dev); 1042 return (vmbus_ic_detach(dev)); 1043 } 1044 1045 static device_method_t vss_methods[] = { 1046 /* Device interface */ 1047 DEVMETHOD(device_probe, hv_vss_probe), 1048 DEVMETHOD(device_attach, hv_vss_attach), 1049 DEVMETHOD(device_detach, hv_vss_detach), 1050 { 0, 0 } 1051 }; 1052 1053 static driver_t vss_driver = { "hvvss", vss_methods, sizeof(hv_vss_sc)}; 1054 1055 DRIVER_MODULE(hv_vss, vmbus, vss_driver, NULL, NULL); 1056 MODULE_VERSION(hv_vss, 1); 1057 MODULE_DEPEND(hv_vss, vmbus, 1, 1, 1); 1058