1 /*- 2 * Copyright (c) 2016 Microsoft Corp. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/kernel.h> 32 #include <sys/conf.h> 33 #include <sys/uio.h> 34 #include <sys/bus.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/module.h> 38 #include <sys/lock.h> 39 #include <sys/taskqueue.h> 40 #include <sys/selinfo.h> 41 #include <sys/sysctl.h> 42 #include <sys/poll.h> 43 #include <sys/proc.h> 44 #include <sys/queue.h> 45 #include <sys/kthread.h> 46 #include <sys/syscallsubr.h> 47 #include <sys/sysproto.h> 48 #include <sys/un.h> 49 #include <sys/endian.h> 50 #include <sys/sema.h> 51 #include <sys/signal.h> 52 #include <sys/syslog.h> 53 #include <sys/systm.h> 54 #include <sys/mutex.h> 55 #include <sys/callout.h> 56 57 #include <dev/hyperv/include/hyperv.h> 58 #include <dev/hyperv/utilities/hv_utilreg.h> 59 #include <dev/hyperv/utilities/vmbus_icreg.h> 60 #include <dev/hyperv/utilities/vmbus_icvar.h> 61 62 #include "hv_snapshot.h" 63 #include "vmbus_if.h" 64 65 #define VSS_MAJOR 5 66 #define VSS_MINOR 0 67 #define VSS_MSGVER VMBUS_IC_VERSION(VSS_MAJOR, VSS_MINOR) 68 69 #define VSS_FWVER_MAJOR 3 70 #define VSS_FWVER VMBUS_IC_VERSION(VSS_FWVER_MAJOR, 0) 71 72 #define TIMEOUT_LIMIT (15) // seconds 73 enum hv_vss_op { 74 VSS_OP_CREATE = 0, 75 VSS_OP_DELETE, 76 VSS_OP_HOT_BACKUP, 77 VSS_OP_GET_DM_INFO, 78 VSS_OP_BU_COMPLETE, 79 /* 80 * Following operations are only supported with IC version >= 5.0 81 */ 82 VSS_OP_FREEZE, /* Freeze the file systems in the VM */ 83 VSS_OP_THAW, /* Unfreeze the file systems */ 84 VSS_OP_AUTO_RECOVER, 85 VSS_OP_COUNT /* Number of operations, must be last */ 86 }; 87 88 /* 89 * Header for all VSS messages. 90 */ 91 struct hv_vss_hdr { 92 struct vmbus_icmsg_hdr ic_hdr; 93 uint8_t operation; 94 uint8_t reserved[7]; 95 } __packed; 96 97 98 /* 99 * Flag values for the hv_vss_check_feature. Here supports only 100 * one value. 101 */ 102 #define VSS_HBU_NO_AUTO_RECOVERY 0x00000005 103 104 struct hv_vss_check_feature { 105 uint32_t flags; 106 } __packed; 107 108 struct hv_vss_check_dm_info { 109 uint32_t flags; 110 } __packed; 111 112 struct hv_vss_msg { 113 union { 114 struct hv_vss_hdr vss_hdr; 115 } hdr; 116 union { 117 struct hv_vss_check_feature vss_cf; 118 struct hv_vss_check_dm_info dm_info; 119 } body; 120 } __packed; 121 122 struct hv_vss_req { 123 struct hv_vss_opt_msg opt_msg; /* used to communicate with daemon */ 124 struct hv_vss_msg msg; /* used to communicate with host */ 125 } __packed; 126 127 /* hv_vss debug control */ 128 static int hv_vss_log = 0; 129 130 #define hv_vss_log_error(...) do { \ 131 if (hv_vss_log > 0) \ 132 log(LOG_ERR, "hv_vss: " __VA_ARGS__); \ 133 } while (0) 134 135 #define hv_vss_log_info(...) do { \ 136 if (hv_vss_log > 1) \ 137 log(LOG_INFO, "hv_vss: " __VA_ARGS__); \ 138 } while (0) 139 140 static const struct vmbus_ic_desc vmbus_vss_descs[] = { 141 { 142 .ic_guid = { .hv_guid = { 143 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, 144 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40} }, 145 .ic_desc = "Hyper-V VSS" 146 }, 147 VMBUS_IC_DESC_END 148 }; 149 150 static const char * vss_opt_name[] = {"None", "VSSCheck", "Freeze", "Thaw"}; 151 152 /* character device prototypes */ 153 static d_open_t hv_vss_dev_open; 154 static d_close_t hv_vss_dev_close; 155 static d_poll_t hv_vss_dev_daemon_poll; 156 static d_ioctl_t hv_vss_dev_daemon_ioctl; 157 158 static d_open_t hv_appvss_dev_open; 159 static d_close_t hv_appvss_dev_close; 160 static d_poll_t hv_appvss_dev_poll; 161 static d_ioctl_t hv_appvss_dev_ioctl; 162 163 /* hv_vss character device structure */ 164 static struct cdevsw hv_vss_cdevsw = 165 { 166 .d_version = D_VERSION, 167 .d_open = hv_vss_dev_open, 168 .d_close = hv_vss_dev_close, 169 .d_poll = hv_vss_dev_daemon_poll, 170 .d_ioctl = hv_vss_dev_daemon_ioctl, 171 .d_name = FS_VSS_DEV_NAME, 172 }; 173 174 static struct cdevsw hv_appvss_cdevsw = 175 { 176 .d_version = D_VERSION, 177 .d_open = hv_appvss_dev_open, 178 .d_close = hv_appvss_dev_close, 179 .d_poll = hv_appvss_dev_poll, 180 .d_ioctl = hv_appvss_dev_ioctl, 181 .d_name = APP_VSS_DEV_NAME, 182 }; 183 184 struct hv_vss_sc; 185 /* 186 * Global state to track cdev 187 */ 188 struct hv_vss_dev_sc { 189 /* 190 * msg was transferred from host to notify queue, and 191 * ack queue. Finally, it was recyled to free list. 192 */ 193 STAILQ_HEAD(, hv_vss_req_internal) to_notify_queue; 194 STAILQ_HEAD(, hv_vss_req_internal) to_ack_queue; 195 struct hv_vss_sc *sc; 196 struct proc *proc_task; 197 struct selinfo hv_vss_selinfo; 198 }; 199 /* 200 * Global state to track and synchronize the transaction requests from the host. 201 * The VSS allows user to register their function to do freeze/thaw for application. 202 * VSS kernel will notify both vss daemon and user application if it is registered. 203 * The implementation state transition is illustrated by: 204 * https://clovertrail.github.io/assets/vssdot.png 205 */ 206 typedef struct hv_vss_sc { 207 struct vmbus_ic_softc util_sc; 208 device_t dev; 209 210 struct task task; 211 212 /* 213 * mutex is used to protect access of list/queue, 214 * callout in request is also used this mutex. 215 */ 216 struct mtx pending_mutex; 217 /* 218 * req_free_list contains all free items 219 */ 220 LIST_HEAD(, hv_vss_req_internal) req_free_list; 221 222 /* Indicates if daemon registered with driver */ 223 boolean_t register_done; 224 225 boolean_t app_register_done; 226 227 /* cdev for file system freeze/thaw */ 228 struct cdev *hv_vss_dev; 229 /* cdev for application freeze/thaw */ 230 struct cdev *hv_appvss_dev; 231 232 /* sc for app */ 233 struct hv_vss_dev_sc app_sc; 234 /* sc for deamon */ 235 struct hv_vss_dev_sc daemon_sc; 236 } hv_vss_sc; 237 238 typedef struct hv_vss_req_internal { 239 LIST_ENTRY(hv_vss_req_internal) link; 240 STAILQ_ENTRY(hv_vss_req_internal) slink; 241 struct hv_vss_req vss_req; 242 243 /* Rcv buffer for communicating with the host*/ 244 uint8_t *rcv_buf; 245 /* Length of host message */ 246 uint32_t host_msg_len; 247 /* Host message id */ 248 uint64_t host_msg_id; 249 250 hv_vss_sc *sc; 251 252 struct callout callout; 253 } hv_vss_req_internal; 254 255 #define SEARCH_REMOVE_REQ_LOCKED(reqp, queue, link, tmp, id) \ 256 do { \ 257 STAILQ_FOREACH_SAFE(reqp, queue, link, tmp) { \ 258 if (reqp->vss_req.opt_msg.msgid == id) { \ 259 STAILQ_REMOVE(queue, \ 260 reqp, hv_vss_req_internal, link); \ 261 break; \ 262 } \ 263 } \ 264 } while (0) 265 266 static bool 267 hv_vss_is_daemon_killed_after_launch(hv_vss_sc *sc) 268 { 269 return (!sc->register_done && sc->daemon_sc.proc_task); 270 } 271 272 /* 273 * Callback routine that gets called whenever there is a message from host 274 */ 275 static void 276 hv_vss_callback(struct vmbus_channel *chan __unused, void *context) 277 { 278 hv_vss_sc *sc = (hv_vss_sc*)context; 279 if (hv_vss_is_daemon_killed_after_launch(sc)) 280 hv_vss_log_info("%s: daemon was killed!\n", __func__); 281 if (sc->register_done || sc->daemon_sc.proc_task) { 282 hv_vss_log_info("%s: Queuing work item\n", __func__); 283 if (hv_vss_is_daemon_killed_after_launch(sc)) 284 hv_vss_log_info("%s: daemon was killed!\n", __func__); 285 taskqueue_enqueue(taskqueue_thread, &sc->task); 286 } else { 287 hv_vss_log_info("%s: daemon has never been registered\n", __func__); 288 } 289 hv_vss_log_info("%s: received msg from host\n", __func__); 290 } 291 /* 292 * Send the response back to the host. 293 */ 294 static void 295 hv_vss_respond_host(uint8_t *rcv_buf, struct vmbus_channel *ch, 296 uint32_t recvlen, uint64_t requestid, uint32_t error) 297 { 298 struct vmbus_icmsg_hdr *hv_icmsg_hdrp; 299 300 hv_icmsg_hdrp = (struct vmbus_icmsg_hdr *)rcv_buf; 301 302 hv_icmsg_hdrp->ic_status = error; 303 hv_icmsg_hdrp->ic_flags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; 304 305 error = vmbus_chan_send(ch, VMBUS_CHANPKT_TYPE_INBAND, 0, 306 rcv_buf, recvlen, requestid); 307 if (error) 308 hv_vss_log_info("%s: hv_vss_respond_host: sendpacket error:%d\n", 309 __func__, error); 310 } 311 312 static void 313 hv_vss_notify_host_result_locked(struct hv_vss_req_internal *reqp, uint32_t status) 314 { 315 struct hv_vss_msg* msg = (struct hv_vss_msg *)reqp->rcv_buf; 316 hv_vss_sc *sc = reqp->sc; 317 if (reqp->vss_req.opt_msg.opt == HV_VSS_CHECK) { 318 msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY; 319 } 320 hv_vss_log_info("%s, %s response %s to host\n", __func__, 321 vss_opt_name[reqp->vss_req.opt_msg.opt], 322 status == HV_S_OK ? "Success" : "Fail"); 323 hv_vss_respond_host(reqp->rcv_buf, vmbus_get_channel(reqp->sc->dev), 324 reqp->host_msg_len, reqp->host_msg_id, status); 325 /* recycle the request */ 326 LIST_INSERT_HEAD(&sc->req_free_list, reqp, link); 327 } 328 329 static void 330 hv_vss_notify_host_result(struct hv_vss_req_internal *reqp, uint32_t status) 331 { 332 mtx_lock(&reqp->sc->pending_mutex); 333 hv_vss_notify_host_result_locked(reqp, status); 334 mtx_unlock(&reqp->sc->pending_mutex); 335 } 336 337 static void 338 hv_vss_cp_vssreq_to_user(struct hv_vss_req_internal *reqp, 339 struct hv_vss_opt_msg *userdata) 340 { 341 struct hv_vss_req *hv_vss_dev_buf; 342 hv_vss_dev_buf = &reqp->vss_req; 343 hv_vss_dev_buf->opt_msg.opt = HV_VSS_NONE; 344 switch (reqp->vss_req.msg.hdr.vss_hdr.operation) { 345 case VSS_OP_FREEZE: 346 hv_vss_dev_buf->opt_msg.opt = HV_VSS_FREEZE; 347 break; 348 case VSS_OP_THAW: 349 hv_vss_dev_buf->opt_msg.opt = HV_VSS_THAW; 350 break; 351 case VSS_OP_HOT_BACKUP: 352 hv_vss_dev_buf->opt_msg.opt = HV_VSS_CHECK; 353 break; 354 } 355 *userdata = hv_vss_dev_buf->opt_msg; 356 hv_vss_log_info("%s, read data from user for " 357 "%s (%ju) \n", __func__, vss_opt_name[userdata->opt], 358 (uintmax_t)userdata->msgid); 359 } 360 361 /** 362 * Remove the request id from app notifiy or ack queue, 363 * and recyle the request by inserting it to free list. 364 * 365 * When app was notified but not yet sending ack, the request 366 * should locate in either notify queue or ack queue. 367 */ 368 static struct hv_vss_req_internal* 369 hv_vss_drain_req_queue_locked(hv_vss_sc *sc, uint64_t req_id) 370 { 371 struct hv_vss_req_internal *reqp, *tmp; 372 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_notify_queue, 373 slink, tmp, req_id); 374 if (reqp == NULL) 375 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_ack_queue, 376 slink, tmp, req_id); 377 if (reqp == NULL) 378 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_notify_queue, 379 slink, tmp, req_id); 380 if (reqp == NULL) 381 SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_ack_queue, slink, 382 tmp, req_id); 383 return (reqp); 384 } 385 /** 386 * Actions for daemon who has been notified. 387 */ 388 static void 389 hv_vss_notified(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) 390 { 391 struct hv_vss_req_internal *reqp; 392 mtx_lock(&dev_sc->sc->pending_mutex); 393 if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) { 394 reqp = STAILQ_FIRST(&dev_sc->to_notify_queue); 395 hv_vss_cp_vssreq_to_user(reqp, userdata); 396 STAILQ_REMOVE_HEAD(&dev_sc->to_notify_queue, slink); 397 /* insert the msg to queue for write */ 398 STAILQ_INSERT_TAIL(&dev_sc->to_ack_queue, reqp, slink); 399 userdata->status = VSS_SUCCESS; 400 } else { 401 /* Timeout occur, thus request was removed from queue. */ 402 hv_vss_log_info("%s: notify queue is empty!\n", __func__); 403 userdata->status = VSS_FAIL; 404 } 405 mtx_unlock(&dev_sc->sc->pending_mutex); 406 } 407 408 static void 409 hv_vss_notify(struct hv_vss_dev_sc *dev_sc, struct hv_vss_req_internal *reqp) 410 { 411 uint32_t opt = reqp->vss_req.opt_msg.opt; 412 mtx_lock(&dev_sc->sc->pending_mutex); 413 STAILQ_INSERT_TAIL(&dev_sc->to_notify_queue, reqp, slink); 414 hv_vss_log_info("%s: issuing query %s (%ju) to %s\n", __func__, 415 vss_opt_name[opt], (uintmax_t)reqp->vss_req.opt_msg.msgid, 416 &dev_sc->sc->app_sc == dev_sc ? "app" : "daemon"); 417 mtx_unlock(&dev_sc->sc->pending_mutex); 418 selwakeup(&dev_sc->hv_vss_selinfo); 419 } 420 421 /** 422 * Actions for daemon who has acknowledged. 423 */ 424 static void 425 hv_vss_daemon_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) 426 { 427 struct hv_vss_req_internal *reqp, *tmp; 428 uint64_t req_id; 429 int opt; 430 uint32_t status; 431 432 opt = userdata->opt; 433 req_id = userdata->msgid; 434 status = userdata->status; 435 /* make sure the reserved fields are all zeros. */ 436 memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) - 437 __offsetof(struct hv_vss_opt_msg, reserved)); 438 mtx_lock(&dev_sc->sc->pending_mutex); 439 SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id); 440 mtx_unlock(&dev_sc->sc->pending_mutex); 441 if (reqp == NULL) { 442 hv_vss_log_info("%s Timeout: fail to find daemon ack request\n", 443 __func__); 444 userdata->status = VSS_FAIL; 445 return; 446 } 447 KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!")); 448 hv_vss_log_info("%s, get response %d from daemon for %s (%ju) \n", __func__, 449 status, vss_opt_name[opt], (uintmax_t)req_id); 450 switch (opt) { 451 case HV_VSS_CHECK: 452 case HV_VSS_FREEZE: 453 callout_drain(&reqp->callout); 454 hv_vss_notify_host_result(reqp, 455 status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); 456 break; 457 case HV_VSS_THAW: 458 if (dev_sc->sc->app_register_done) { 459 if (status == VSS_SUCCESS) { 460 hv_vss_notify(&dev_sc->sc->app_sc, reqp); 461 } else { 462 /* handle error */ 463 callout_drain(&reqp->callout); 464 hv_vss_notify_host_result(reqp, HV_E_FAIL); 465 } 466 } else { 467 callout_drain(&reqp->callout); 468 hv_vss_notify_host_result(reqp, 469 status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); 470 } 471 break; 472 } 473 } 474 475 /** 476 * Actions for app who has acknowledged. 477 */ 478 static void 479 hv_vss_app_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) 480 { 481 struct hv_vss_req_internal *reqp, *tmp; 482 uint64_t req_id; 483 int opt; 484 uint8_t status; 485 486 opt = userdata->opt; 487 req_id = userdata->msgid; 488 status = userdata->status; 489 /* make sure the reserved fields are all zeros. */ 490 memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) - 491 __offsetof(struct hv_vss_opt_msg, reserved)); 492 mtx_lock(&dev_sc->sc->pending_mutex); 493 SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id); 494 mtx_unlock(&dev_sc->sc->pending_mutex); 495 if (reqp == NULL) { 496 hv_vss_log_info("%s Timeout: fail to find app ack request\n", 497 __func__); 498 userdata->status = VSS_FAIL; 499 return; 500 } 501 KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!")); 502 hv_vss_log_info("%s, get response %d from app for %s (%ju) \n", 503 __func__, status, vss_opt_name[opt], (uintmax_t)req_id); 504 if (dev_sc->sc->register_done) { 505 switch (opt) { 506 case HV_VSS_CHECK: 507 case HV_VSS_FREEZE: 508 if (status == VSS_SUCCESS) { 509 hv_vss_notify(&dev_sc->sc->daemon_sc, reqp); 510 } else { 511 /* handle error */ 512 callout_drain(&reqp->callout); 513 hv_vss_notify_host_result(reqp, HV_E_FAIL); 514 } 515 break; 516 case HV_VSS_THAW: 517 callout_drain(&reqp->callout); 518 hv_vss_notify_host_result(reqp, 519 status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); 520 break; 521 } 522 } else { 523 hv_vss_log_info("%s, Fatal: vss daemon was killed\n", __func__); 524 } 525 } 526 527 static int 528 hv_vss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 529 { 530 struct proc *td_proc; 531 td_proc = td->td_proc; 532 533 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 534 hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n", 535 __func__, td_proc->p_comm, FS_VSS_DEV_NAME); 536 537 if (dev_sc->sc->register_done) 538 return (EBUSY); 539 540 dev_sc->sc->register_done = true; 541 hv_vss_callback(vmbus_get_channel(dev_sc->sc->dev), dev_sc->sc); 542 543 dev_sc->proc_task = curproc; 544 return (0); 545 } 546 547 static int 548 hv_vss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused, 549 struct thread *td) 550 { 551 struct proc *td_proc; 552 td_proc = td->td_proc; 553 554 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 555 556 hv_vss_log_info("%s: %s closes device \"%s\"\n", 557 __func__, td_proc->p_comm, FS_VSS_DEV_NAME); 558 dev_sc->sc->register_done = false; 559 return (0); 560 } 561 562 static int 563 hv_vss_dev_daemon_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 564 struct thread *td) 565 { 566 struct proc *td_proc; 567 struct hv_vss_dev_sc *sc; 568 569 td_proc = td->td_proc; 570 sc = (struct hv_vss_dev_sc*)dev->si_drv1; 571 572 hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm); 573 574 struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data; 575 switch(cmd) { 576 case IOCHVVSSREAD: 577 hv_vss_notified(sc, userdata); 578 break; 579 case IOCHVVSSWRITE: 580 hv_vss_daemon_acked(sc, userdata); 581 break; 582 } 583 return (0); 584 } 585 586 /* 587 * hv_vss_daemon poll invokes this function to check if data is available 588 * for daemon to read. 589 */ 590 static int 591 hv_vss_dev_daemon_poll(struct cdev *dev, int events, struct thread *td) 592 { 593 int revent = 0; 594 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 595 596 mtx_lock(&dev_sc->sc->pending_mutex); 597 /** 598 * if there is data ready, inform daemon's poll 599 */ 600 if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) 601 revent = POLLIN; 602 if (revent == 0) 603 selrecord(td, &dev_sc->hv_vss_selinfo); 604 hv_vss_log_info("%s return 0x%x\n", __func__, revent); 605 mtx_unlock(&dev_sc->sc->pending_mutex); 606 return (revent); 607 } 608 609 static int 610 hv_appvss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 611 { 612 struct proc *td_proc; 613 td_proc = td->td_proc; 614 615 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 616 hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n", 617 __func__, td_proc->p_comm, APP_VSS_DEV_NAME); 618 619 if (dev_sc->sc->app_register_done) 620 return (EBUSY); 621 622 dev_sc->sc->app_register_done = true; 623 dev_sc->proc_task = curproc; 624 return (0); 625 } 626 627 static int 628 hv_appvss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused, 629 struct thread *td) 630 { 631 struct proc *td_proc; 632 td_proc = td->td_proc; 633 634 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 635 636 hv_vss_log_info("%s: %s closes device \"%s\".\n", 637 __func__, td_proc->p_comm, APP_VSS_DEV_NAME); 638 dev_sc->sc->app_register_done = false; 639 return (0); 640 } 641 642 static int 643 hv_appvss_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, 644 struct thread *td) 645 { 646 struct proc *td_proc; 647 struct hv_vss_dev_sc *dev_sc; 648 649 td_proc = td->td_proc; 650 dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 651 652 hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm); 653 654 struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data; 655 switch(cmd) { 656 case IOCHVVSSREAD: 657 hv_vss_notified(dev_sc, userdata); 658 break; 659 case IOCHVVSSWRITE: 660 hv_vss_app_acked(dev_sc, userdata); 661 break; 662 } 663 return (0); 664 } 665 666 /* 667 * hv_vss_daemon poll invokes this function to check if data is available 668 * for daemon to read. 669 */ 670 static int 671 hv_appvss_dev_poll(struct cdev *dev, int events, struct thread *td) 672 { 673 int revent = 0; 674 struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; 675 676 mtx_lock(&dev_sc->sc->pending_mutex); 677 /** 678 * if there is data ready, inform daemon's poll 679 */ 680 if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) 681 revent = POLLIN; 682 if (revent == 0) 683 selrecord(td, &dev_sc->hv_vss_selinfo); 684 hv_vss_log_info("%s return 0x%x\n", __func__, revent); 685 mtx_unlock(&dev_sc->sc->pending_mutex); 686 return (revent); 687 } 688 689 static void 690 hv_vss_timeout(void *arg) 691 { 692 hv_vss_req_internal *reqp = arg; 693 hv_vss_req_internal *request __diagused; 694 hv_vss_sc* sc = reqp->sc; 695 uint64_t req_id = reqp->vss_req.opt_msg.msgid; 696 /* This thread is locked */ 697 KASSERT(mtx_owned(&sc->pending_mutex), ("mutex lock is not owned!")); 698 request = hv_vss_drain_req_queue_locked(sc, req_id); 699 KASSERT(request != NULL, ("timeout but fail to find request")); 700 hv_vss_notify_host_result_locked(reqp, HV_E_FAIL); 701 } 702 703 /* 704 * This routine is called whenever a message is received from the host 705 */ 706 static void 707 hv_vss_init_req(hv_vss_req_internal *reqp, 708 uint32_t recvlen, uint64_t requestid, uint8_t *vss_buf, hv_vss_sc *sc) 709 { 710 struct timespec vm_ts; 711 struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; 712 713 memset(reqp, 0, __offsetof(hv_vss_req_internal, callout)); 714 reqp->host_msg_len = recvlen; 715 reqp->host_msg_id = requestid; 716 reqp->rcv_buf = vss_buf; 717 reqp->sc = sc; 718 memcpy(&reqp->vss_req.msg, 719 (struct hv_vss_msg *)vss_buf, sizeof(struct hv_vss_msg)); 720 /* set the opt for users */ 721 switch (msg->hdr.vss_hdr.operation) { 722 case VSS_OP_FREEZE: 723 reqp->vss_req.opt_msg.opt = HV_VSS_FREEZE; 724 break; 725 case VSS_OP_THAW: 726 reqp->vss_req.opt_msg.opt = HV_VSS_THAW; 727 break; 728 case VSS_OP_HOT_BACKUP: 729 reqp->vss_req.opt_msg.opt = HV_VSS_CHECK; 730 break; 731 } 732 /* Use a timestamp as msg request ID */ 733 nanotime(&vm_ts); 734 reqp->vss_req.opt_msg.msgid = (vm_ts.tv_sec * NANOSEC) + vm_ts.tv_nsec; 735 } 736 737 static hv_vss_req_internal* 738 hv_vss_get_new_req_locked(hv_vss_sc *sc) 739 { 740 hv_vss_req_internal *reqp; 741 if (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue) || 742 !STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue) || 743 !STAILQ_EMPTY(&sc->app_sc.to_notify_queue) || 744 !STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) { 745 /* 746 * There is request coming from host before 747 * finishing previous requests 748 */ 749 hv_vss_log_info("%s: Warning: there is new request " 750 "coming before finishing previous requests\n", __func__); 751 return (NULL); 752 } 753 if (LIST_EMPTY(&sc->req_free_list)) { 754 /* TODO Error: no buffer */ 755 hv_vss_log_info("Error: No buffer\n"); 756 return (NULL); 757 } 758 reqp = LIST_FIRST(&sc->req_free_list); 759 LIST_REMOVE(reqp, link); 760 return (reqp); 761 } 762 763 static void 764 hv_vss_start_notify(hv_vss_req_internal *reqp, uint32_t opt) 765 { 766 hv_vss_sc *sc = reqp->sc; 767 /* 768 * Freeze/Check notification sequence: kernel -> app -> daemon(fs) 769 * Thaw notification sequence: kernel -> daemon(fs) -> app 770 * 771 * We should wake up the daemon, in case it's doing poll(). 772 * The response should be received after 5s, otherwise, trigger timeout. 773 */ 774 switch (opt) { 775 case VSS_OP_FREEZE: 776 case VSS_OP_HOT_BACKUP: 777 if (sc->app_register_done) 778 hv_vss_notify(&sc->app_sc, reqp); 779 else 780 hv_vss_notify(&sc->daemon_sc, reqp); 781 callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz, 782 hv_vss_timeout, reqp); 783 break; 784 case VSS_OP_THAW: 785 hv_vss_notify(&sc->daemon_sc, reqp); 786 callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz, 787 hv_vss_timeout, reqp); 788 break; 789 } 790 } 791 792 /* 793 * Function to read the vss request buffer from host 794 * and interact with daemon 795 */ 796 static void 797 hv_vss_process_request(void *context, int pending __unused) 798 { 799 uint8_t *vss_buf; 800 struct vmbus_channel *channel; 801 uint32_t recvlen = 0; 802 uint64_t requestid; 803 struct vmbus_icmsg_hdr *icmsghdrp; 804 int ret = 0; 805 hv_vss_sc *sc; 806 hv_vss_req_internal *reqp; 807 808 hv_vss_log_info("%s: entering hv_vss_process_request\n", __func__); 809 810 sc = (hv_vss_sc*)context; 811 vss_buf = sc->util_sc.ic_buf; 812 channel = vmbus_get_channel(sc->dev); 813 814 recvlen = sc->util_sc.ic_buflen; 815 ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid); 816 KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough")); 817 /* XXX check recvlen to make sure that it contains enough data */ 818 819 while ((ret == 0) && (recvlen > 0)) { 820 icmsghdrp = (struct vmbus_icmsg_hdr *)vss_buf; 821 822 if (icmsghdrp->ic_type == HV_ICMSGTYPE_NEGOTIATE) { 823 ret = vmbus_ic_negomsg(&sc->util_sc, vss_buf, 824 &recvlen, VSS_FWVER, VSS_MSGVER); 825 hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), 826 recvlen, requestid, ret); 827 hv_vss_log_info("%s: version negotiated\n", __func__); 828 } else if (!hv_vss_is_daemon_killed_after_launch(sc)) { 829 struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; 830 switch(msg->hdr.vss_hdr.operation) { 831 case VSS_OP_FREEZE: 832 case VSS_OP_THAW: 833 case VSS_OP_HOT_BACKUP: 834 mtx_lock(&sc->pending_mutex); 835 reqp = hv_vss_get_new_req_locked(sc); 836 mtx_unlock(&sc->pending_mutex); 837 if (reqp == NULL) { 838 /* ignore this request from host */ 839 break; 840 } 841 hv_vss_init_req(reqp, recvlen, requestid, vss_buf, sc); 842 hv_vss_log_info("%s: receive %s (%ju) from host\n", 843 __func__, 844 vss_opt_name[reqp->vss_req.opt_msg.opt], 845 (uintmax_t)reqp->vss_req.opt_msg.msgid); 846 hv_vss_start_notify(reqp, msg->hdr.vss_hdr.operation); 847 break; 848 case VSS_OP_GET_DM_INFO: 849 hv_vss_log_info("%s: receive GET_DM_INFO from host\n", 850 __func__); 851 msg->body.dm_info.flags = 0; 852 hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), 853 recvlen, requestid, HV_S_OK); 854 break; 855 default: 856 device_printf(sc->dev, "Unknown opt from host: %d\n", 857 msg->hdr.vss_hdr.operation); 858 break; 859 } 860 } else { 861 /* daemon was killed for some reason after it was launched */ 862 struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; 863 switch(msg->hdr.vss_hdr.operation) { 864 case VSS_OP_FREEZE: 865 hv_vss_log_info("%s: response fail for FREEZE\n", 866 __func__); 867 break; 868 case VSS_OP_THAW: 869 hv_vss_log_info("%s: response fail for THAW\n", 870 __func__); 871 break; 872 case VSS_OP_HOT_BACKUP: 873 hv_vss_log_info("%s: response fail for HOT_BACKUP\n", 874 __func__); 875 msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY; 876 break; 877 case VSS_OP_GET_DM_INFO: 878 hv_vss_log_info("%s: response fail for GET_DM_INFO\n", 879 __func__); 880 msg->body.dm_info.flags = 0; 881 break; 882 default: 883 device_printf(sc->dev, "Unknown opt from host: %d\n", 884 msg->hdr.vss_hdr.operation); 885 break; 886 } 887 hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), 888 recvlen, requestid, HV_E_FAIL); 889 } 890 /* 891 * Try reading next buffer 892 */ 893 recvlen = sc->util_sc.ic_buflen; 894 ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid); 895 KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough")); 896 /* XXX check recvlen to make sure that it contains enough data */ 897 898 hv_vss_log_info("%s: read: context %p, ret =%d, recvlen=%d\n", 899 __func__, context, ret, recvlen); 900 } 901 } 902 903 static int 904 hv_vss_probe(device_t dev) 905 { 906 return (vmbus_ic_probe(dev, vmbus_vss_descs)); 907 } 908 909 static int 910 hv_vss_init_send_receive_queue(device_t dev) 911 { 912 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 913 int i; 914 const int max_list = 4; /* It is big enough for the list */ 915 struct hv_vss_req_internal* reqp; 916 917 LIST_INIT(&sc->req_free_list); 918 STAILQ_INIT(&sc->daemon_sc.to_notify_queue); 919 STAILQ_INIT(&sc->daemon_sc.to_ack_queue); 920 STAILQ_INIT(&sc->app_sc.to_notify_queue); 921 STAILQ_INIT(&sc->app_sc.to_ack_queue); 922 923 for (i = 0; i < max_list; i++) { 924 reqp = malloc(sizeof(struct hv_vss_req_internal), 925 M_DEVBUF, M_WAITOK|M_ZERO); 926 LIST_INSERT_HEAD(&sc->req_free_list, reqp, link); 927 callout_init_mtx(&reqp->callout, &sc->pending_mutex, 0); 928 } 929 return (0); 930 } 931 932 static int 933 hv_vss_destroy_send_receive_queue(device_t dev) 934 { 935 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 936 hv_vss_req_internal* reqp; 937 938 while (!LIST_EMPTY(&sc->req_free_list)) { 939 reqp = LIST_FIRST(&sc->req_free_list); 940 LIST_REMOVE(reqp, link); 941 free(reqp, M_DEVBUF); 942 } 943 944 while (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue)) { 945 reqp = STAILQ_FIRST(&sc->daemon_sc.to_notify_queue); 946 STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_notify_queue, slink); 947 free(reqp, M_DEVBUF); 948 } 949 950 while (!STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue)) { 951 reqp = STAILQ_FIRST(&sc->daemon_sc.to_ack_queue); 952 STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_ack_queue, slink); 953 free(reqp, M_DEVBUF); 954 } 955 956 while (!STAILQ_EMPTY(&sc->app_sc.to_notify_queue)) { 957 reqp = STAILQ_FIRST(&sc->app_sc.to_notify_queue); 958 STAILQ_REMOVE_HEAD(&sc->app_sc.to_notify_queue, slink); 959 free(reqp, M_DEVBUF); 960 } 961 962 while (!STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) { 963 reqp = STAILQ_FIRST(&sc->app_sc.to_ack_queue); 964 STAILQ_REMOVE_HEAD(&sc->app_sc.to_ack_queue, slink); 965 free(reqp, M_DEVBUF); 966 } 967 return (0); 968 } 969 970 static int 971 hv_vss_attach(device_t dev) 972 { 973 int error; 974 struct sysctl_oid_list *child; 975 struct sysctl_ctx_list *ctx; 976 977 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 978 979 sc->dev = dev; 980 mtx_init(&sc->pending_mutex, "hv_vss pending mutex", NULL, MTX_DEF); 981 982 ctx = device_get_sysctl_ctx(dev); 983 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 984 985 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_vss_log", 986 CTLFLAG_RWTUN, &hv_vss_log, 0, "Hyperv VSS service log level"); 987 988 TASK_INIT(&sc->task, 0, hv_vss_process_request, sc); 989 hv_vss_init_send_receive_queue(dev); 990 /* create character device for file system freeze/thaw */ 991 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, 992 &sc->hv_vss_dev, 993 &hv_vss_cdevsw, 994 0, 995 UID_ROOT, 996 GID_WHEEL, 997 0640, 998 FS_VSS_DEV_NAME); 999 1000 if (error != 0) { 1001 hv_vss_log_info("Fail to create '%s': %d\n", FS_VSS_DEV_NAME, error); 1002 return (error); 1003 } 1004 sc->hv_vss_dev->si_drv1 = &sc->daemon_sc; 1005 sc->daemon_sc.sc = sc; 1006 /* create character device for application freeze/thaw */ 1007 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, 1008 &sc->hv_appvss_dev, 1009 &hv_appvss_cdevsw, 1010 0, 1011 UID_ROOT, 1012 GID_WHEEL, 1013 0640, 1014 APP_VSS_DEV_NAME); 1015 1016 if (error != 0) { 1017 hv_vss_log_info("Fail to create '%s': %d\n", APP_VSS_DEV_NAME, error); 1018 return (error); 1019 } 1020 sc->hv_appvss_dev->si_drv1 = &sc->app_sc; 1021 sc->app_sc.sc = sc; 1022 1023 return (vmbus_ic_attach(dev, hv_vss_callback)); 1024 } 1025 1026 static int 1027 hv_vss_detach(device_t dev) 1028 { 1029 hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); 1030 mtx_destroy(&sc->pending_mutex); 1031 if (sc->daemon_sc.proc_task != NULL) { 1032 PROC_LOCK(sc->daemon_sc.proc_task); 1033 kern_psignal(sc->daemon_sc.proc_task, SIGKILL); 1034 PROC_UNLOCK(sc->daemon_sc.proc_task); 1035 } 1036 if (sc->app_sc.proc_task != NULL) { 1037 PROC_LOCK(sc->app_sc.proc_task); 1038 kern_psignal(sc->app_sc.proc_task, SIGKILL); 1039 PROC_UNLOCK(sc->app_sc.proc_task); 1040 } 1041 hv_vss_destroy_send_receive_queue(dev); 1042 destroy_dev(sc->hv_vss_dev); 1043 destroy_dev(sc->hv_appvss_dev); 1044 return (vmbus_ic_detach(dev)); 1045 } 1046 1047 static device_method_t vss_methods[] = { 1048 /* Device interface */ 1049 DEVMETHOD(device_probe, hv_vss_probe), 1050 DEVMETHOD(device_attach, hv_vss_attach), 1051 DEVMETHOD(device_detach, hv_vss_detach), 1052 { 0, 0 } 1053 }; 1054 1055 static driver_t vss_driver = { "hvvss", vss_methods, sizeof(hv_vss_sc)}; 1056 1057 DRIVER_MODULE(hv_vss, vmbus, vss_driver, NULL, NULL); 1058 MODULE_VERSION(hv_vss, 1); 1059 MODULE_DEPEND(hv_vss, vmbus, 1, 1, 1); 1060