1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2015 Joyent, Inc. All rights reserved. 14 */ 15 16 /* 17 * varpd persistence backend 18 */ 19 20 #include <sys/types.h> 21 #include <sys/stat.h> 22 #include <fcntl.h> 23 #include <stdlib.h> 24 #include <unistd.h> 25 #include <errno.h> 26 #include <strings.h> 27 #include <librename.h> 28 #include <md5.h> 29 #include <sys/sysmacros.h> 30 #include <dirent.h> 31 #include <sys/mman.h> 32 #include <umem.h> 33 #include <sys/debug.h> 34 35 #include <libvarpd_impl.h> 36 37 static uint8_t varpd_persist_magic[4] = { 38 'v', 39 'a', 40 'r', 41 'p', 42 }; 43 44 #define VARPD_PERSIST_MAXWRITE 4096 45 #define VARPD_PERSIST_VERSION_ONE 1 46 #define VARPD_PERSIST_SUFFIX ".varpd" 47 48 typedef struct varpd_persist_header { 49 uint8_t vph_magic[4]; 50 uint32_t vph_version; 51 uint8_t vph_md5[16]; 52 } varpd_persist_header_t; 53 54 void 55 libvarpd_persist_init(varpd_impl_t *vip) 56 { 57 vip->vdi_persistfd = -1; 58 if (rwlock_init(&vip->vdi_pfdlock, USYNC_THREAD, NULL) != 0) 59 libvarpd_panic("failed to create rw vdi_pfdlock"); 60 } 61 62 void 63 libvarpd_persist_fini(varpd_impl_t *vip) 64 { 65 /* 66 * Clean up for someone that left something behind. 67 */ 68 if (vip->vdi_persistfd != -1) { 69 if (close(vip->vdi_persistfd) != 0) 70 libvarpd_panic("failed to close persist fd %d: %d", 71 vip->vdi_persistfd, errno); 72 vip->vdi_persistfd = -1; 73 } 74 if (rwlock_destroy(&vip->vdi_pfdlock) != 0) 75 libvarpd_panic("failed to destroy rw vdi_pfdlock"); 76 } 77 78 int 79 libvarpd_persist_enable(varpd_handle_t *vhp, const char *rootdir) 80 { 81 int fd; 82 struct stat st; 83 varpd_impl_t *vip = (varpd_impl_t *)vhp; 84 85 fd = open(rootdir, O_RDONLY); 86 if (fd < 0) 87 return (errno); 88 89 if (fstat(fd, &st) != 0) { 90 int ret = errno; 91 if (close(fd) != 0) 92 libvarpd_panic("failed to close rootdir fd (%s) %d: %d", 93 rootdir, fd, errno); 94 return (ret); 95 } 96 97 if (!S_ISDIR(st.st_mode)) { 98 if (close(fd) != 0) 99 libvarpd_panic("failed to close rootdir fd (%s) %d: %d", 100 rootdir, fd, errno); 101 return (EINVAL); 102 } 103 104 105 VERIFY0(rw_wrlock(&vip->vdi_pfdlock)); 106 if (vip->vdi_persistfd != -1) { 107 VERIFY0(rw_unlock(&vip->vdi_pfdlock)); 108 if (close(fd) != 0) 109 libvarpd_panic("failed to close rootdir fd (%s) %d: %d", 110 rootdir, fd, errno); 111 return (EEXIST); 112 } 113 vip->vdi_persistfd = fd; 114 VERIFY0(rw_unlock(&vip->vdi_pfdlock)); 115 116 return (0); 117 } 118 119 static int 120 libvarpd_persist_write(int fd, const void *buf, size_t buflen) 121 { 122 ssize_t ret; 123 off_t off = 0; 124 125 while (buflen > 0) { 126 ret = write(fd, (void *)((uintptr_t)buf + off), 127 MIN(buflen, VARPD_PERSIST_MAXWRITE)); 128 if (ret == -1 && errno == EINTR) 129 continue; 130 if (ret == -1) 131 return (errno); 132 133 off += ret; 134 buflen -= ret; 135 } 136 137 return (0); 138 } 139 140 static int 141 libvarpd_persist_nvlist(int dirfd, uint64_t id, nvlist_t *nvl) 142 { 143 int err, fd; 144 size_t size; 145 varpd_persist_header_t hdr; 146 librename_atomic_t *lrap; 147 char *buf = NULL, *name; 148 149 if ((err = nvlist_pack(nvl, &buf, &size, NV_ENCODE_XDR, 0)) != 0) 150 return (err); 151 152 if (asprintf(&name, "%llu%s", (unsigned long long)id, ".varpd") == -1) { 153 err = errno; 154 free(buf); 155 return (err); 156 } 157 158 if ((err = librename_atomic_fdinit(dirfd, name, NULL, 0600, 0, 159 &lrap)) != 0) { 160 free(name); 161 free(buf); 162 return (err); 163 } 164 165 fd = librename_atomic_fd(lrap); 166 167 bzero(&hdr, sizeof (varpd_persist_header_t)); 168 bcopy(varpd_persist_magic, hdr.vph_magic, sizeof (varpd_persist_magic)); 169 hdr.vph_version = VARPD_PERSIST_VERSION_ONE; 170 md5_calc(hdr.vph_md5, buf, size); 171 172 if ((err = libvarpd_persist_write(fd, &hdr, 173 sizeof (varpd_persist_header_t))) != 0) { 174 librename_atomic_fini(lrap); 175 free(name); 176 free(buf); 177 return (err); 178 } 179 180 if ((err = libvarpd_persist_write(fd, buf, size)) != 0) { 181 librename_atomic_fini(lrap); 182 free(name); 183 free(buf); 184 return (err); 185 } 186 187 do { 188 err = librename_atomic_commit(lrap); 189 } while (err == EINTR); 190 191 librename_atomic_fini(lrap); 192 free(name); 193 free(buf); 194 return (err); 195 } 196 197 int 198 libvarpd_persist_instance(varpd_impl_t *vip, varpd_instance_t *inst) 199 { 200 int err = 0; 201 nvlist_t *nvl = NULL, *cvl = NULL; 202 203 VERIFY0(rw_rdlock(&vip->vdi_pfdlock)); 204 /* Check if persistence exists */ 205 if (vip->vdi_persistfd == -1) 206 goto out; 207 208 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0)) != 0) 209 goto out; 210 211 if ((err = nvlist_alloc(&cvl, NV_UNIQUE_NAME, 0)) != 0) 212 goto out; 213 214 if ((err = nvlist_add_uint64(nvl, "vri_id", inst->vri_id)) != 0) 215 goto out; 216 217 if ((err = nvlist_add_uint32(nvl, "vri_linkid", inst->vri_linkid)) != 0) 218 goto out; 219 220 if ((err = nvlist_add_uint32(nvl, "vri_dest", 221 (uint32_t)inst->vri_dest)) != 0) 222 goto out; 223 if ((err = nvlist_add_uint32(nvl, "vri_mode", 224 (uint32_t)inst->vri_mode)) != 0) 225 goto out; 226 227 if ((err = nvlist_add_string(nvl, "vri_plugin", 228 inst->vri_plugin->vpp_name)) != 0) 229 goto out; 230 231 err = inst->vri_plugin->vpp_ops->vpo_save(inst->vri_private, cvl); 232 if (err != 0) 233 goto out; 234 235 if ((err = nvlist_add_nvlist(nvl, "vri_private", cvl)) != 0) 236 goto out; 237 238 err = libvarpd_persist_nvlist(vip->vdi_persistfd, inst->vri_id, nvl); 239 out: 240 nvlist_free(nvl); 241 nvlist_free(cvl); 242 VERIFY0(rw_unlock(&vip->vdi_pfdlock)); 243 return (err); 244 } 245 246 void 247 libvarpd_torch_instance(varpd_impl_t *vip, varpd_instance_t *inst) 248 { 249 char buf[32]; 250 int ret; 251 252 VERIFY0(rw_rdlock(&vip->vdi_pfdlock)); 253 if (vip->vdi_persistfd == -1) { 254 VERIFY0(rw_unlock(&vip->vdi_pfdlock)); 255 return; 256 } 257 258 if (snprintf(buf, sizeof (buf), "%lld.varpd", inst->vri_id) >= 32) 259 libvarpd_panic("somehow exceeded static value for " 260 "libvarpd_torch_instance buffer"); 261 262 do { 263 ret = unlinkat(vip->vdi_persistfd, buf, 0); 264 } while (ret == -1 && errno == EINTR); 265 if (ret != 0) { 266 switch (errno) { 267 case ENOENT: 268 break; 269 default: 270 libvarpd_panic("failed to unlinkat %d`%s: %s", 271 vip->vdi_persistfd, buf, strerror(errno)); 272 } 273 } 274 275 VERIFY0(rw_unlock(&vip->vdi_pfdlock)); 276 } 277 278 static int 279 libvarpd_persist_restore_instance(varpd_impl_t *vip, nvlist_t *nvl) 280 { 281 int err; 282 nvlist_t *pvl; 283 uint64_t id, flags, vid; 284 uint32_t linkid, dest, mode; 285 char *pluginstr; 286 varpd_plugin_t *plugin; 287 overlay_plugin_dest_t adest; 288 varpd_instance_t *inst, lookup; 289 290 if (nvlist_lookup_uint64(nvl, "vri_id", &id) != 0) 291 return (EINVAL); 292 293 if (nvlist_lookup_uint32(nvl, "vri_linkid", &linkid) != 0) 294 return (EINVAL); 295 296 if (nvlist_lookup_uint32(nvl, "vri_dest", &dest) != 0) 297 return (EINVAL); 298 299 if (nvlist_lookup_uint32(nvl, "vri_mode", &mode) != 0) 300 return (EINVAL); 301 302 if (nvlist_lookup_string(nvl, "vri_plugin", &pluginstr) != 0) 303 return (EINVAL); 304 305 if (nvlist_lookup_nvlist(nvl, "vri_private", &pvl) != 0) 306 return (EINVAL); 307 308 plugin = libvarpd_plugin_lookup(vip, pluginstr); 309 if (plugin == NULL) 310 return (EINVAL); 311 312 if (plugin->vpp_mode != mode) 313 return (EINVAL); 314 315 if (libvarpd_overlay_info(vip, linkid, &adest, &flags, &vid) != 0) 316 return (EINVAL); 317 318 if (dest != adest) 319 return (EINVAL); 320 321 inst = umem_alloc(sizeof (varpd_instance_t), UMEM_DEFAULT); 322 if (inst == NULL) 323 libvarpd_panic("failed to allocate instance for restore"); 324 325 inst->vri_id = id_alloc_specific(vip->vdi_idspace, id); 326 if (inst->vri_id != id) { 327 umem_free(inst, sizeof (varpd_instance_t)); 328 return (EINVAL); 329 } 330 331 inst->vri_linkid = linkid; 332 inst->vri_vnetid = vid; 333 inst->vri_mode = plugin->vpp_mode; 334 inst->vri_dest = dest; 335 inst->vri_plugin = plugin; 336 inst->vri_impl = vip; 337 inst->vri_flags = 0; 338 if (plugin->vpp_ops->vpo_restore(pvl, (varpd_provider_handle_t *)inst, 339 dest, &inst->vri_private) != 0) { 340 id_free(vip->vdi_idspace, id); 341 umem_free(inst, sizeof (varpd_instance_t)); 342 return (EINVAL); 343 } 344 345 if (mutex_init(&inst->vri_lock, USYNC_THREAD | LOCK_ERRORCHECK, 346 NULL) != 0) 347 libvarpd_panic("failed to create vri_lock mutex"); 348 349 mutex_enter(&vip->vdi_lock); 350 lookup.vri_id = inst->vri_id; 351 if (avl_find(&vip->vdi_instances, &lookup, NULL) != NULL) 352 libvarpd_panic("found duplicate instance with id %d", 353 lookup.vri_id); 354 avl_add(&vip->vdi_instances, inst); 355 lookup.vri_linkid = inst->vri_linkid; 356 if (avl_find(&vip->vdi_linstances, &lookup, NULL) != NULL) 357 libvarpd_panic("found duplicate linstance with id %d", 358 lookup.vri_linkid); 359 avl_add(&vip->vdi_linstances, inst); 360 mutex_exit(&vip->vdi_lock); 361 362 if (plugin->vpp_ops->vpo_start(inst->vri_private) != 0) { 363 libvarpd_instance_destroy((varpd_instance_handle_t *)inst); 364 return (EINVAL); 365 } 366 367 if (flags & OVERLAY_TARG_INFO_F_ACTIVE) 368 (void) libvarpd_overlay_disassociate(inst); 369 370 if (libvarpd_overlay_associate(inst) != 0) { 371 libvarpd_instance_destroy((varpd_instance_handle_t *)inst); 372 return (EINVAL); 373 } 374 375 if (flags & OVERLAY_TARG_INFO_F_DEGRADED) { 376 if ((err = libvarpd_overlay_restore(inst)) != 0) { 377 libvarpd_panic("failed to restore instance %p: %d\n", 378 inst, err); 379 } 380 } 381 382 mutex_enter(&inst->vri_lock); 383 inst->vri_flags |= VARPD_INSTANCE_F_ACTIVATED; 384 mutex_exit(&inst->vri_lock); 385 386 return (0); 387 } 388 389 static int 390 libvarpd_persist_restore_one(varpd_impl_t *vip, int fd) 391 { 392 int err; 393 size_t fsize; 394 struct stat st; 395 void *buf, *datap; 396 varpd_persist_header_t *hdr; 397 uint8_t md5[16]; 398 nvlist_t *nvl; 399 400 if (fstat(fd, &st) != 0) 401 return (errno); 402 403 if (st.st_size <= sizeof (varpd_persist_header_t)) 404 return (EINVAL); 405 fsize = st.st_size - sizeof (varpd_persist_header_t); 406 407 buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); 408 if (buf == MAP_FAILED) 409 return (errno); 410 411 hdr = buf; 412 if (bcmp(varpd_persist_magic, hdr->vph_magic, 413 sizeof (varpd_persist_magic)) != 0) { 414 if (munmap(buf, st.st_size) != 0) 415 libvarpd_panic("failed to munmap %p: %d", buf, errno); 416 return (EINVAL); 417 } 418 419 if (hdr->vph_version != VARPD_PERSIST_VERSION_ONE) { 420 if (munmap(buf, st.st_size) != 0) 421 libvarpd_panic("failed to munmap %p: %d", buf, errno); 422 return (EINVAL); 423 } 424 425 datap = (void *)((uintptr_t)buf + sizeof (varpd_persist_header_t)); 426 md5_calc(md5, datap, fsize); 427 if (bcmp(md5, hdr->vph_md5, sizeof (uint8_t) * 16) != 0) { 428 if (munmap(buf, st.st_size) != 0) 429 libvarpd_panic("failed to munmap %p: %d", buf, errno); 430 return (EINVAL); 431 } 432 433 err = nvlist_unpack(datap, fsize, &nvl, 0); 434 if (munmap(buf, st.st_size) != 0) 435 libvarpd_panic("failed to munmap %p: %d", buf, errno); 436 437 if (err != 0) 438 return (EINVAL); 439 440 err = libvarpd_persist_restore_instance(vip, nvl); 441 nvlist_free(nvl); 442 return (err); 443 } 444 445 /* ARGSUSED */ 446 static int 447 libvarpd_check_degrade_cb(varpd_impl_t *vip, datalink_id_t linkid, void *arg) 448 { 449 varpd_instance_t *inst; 450 451 mutex_enter(&vip->vdi_lock); 452 for (inst = avl_first(&vip->vdi_instances); inst != NULL; 453 inst = AVL_NEXT(&vip->vdi_instances, inst)) { 454 if (inst->vri_linkid == linkid) { 455 mutex_exit(&vip->vdi_lock); 456 return (0); 457 } 458 } 459 460 mutex_exit(&vip->vdi_lock); 461 462 (void) libvarpd_overlay_degrade_datalink(vip, linkid, 463 "no varpd instance exists"); 464 return (0); 465 } 466 467 static void 468 libvarpd_check_degrade(varpd_impl_t *vip) 469 { 470 (void) libvarpd_overlay_iter(vip, libvarpd_check_degrade_cb, NULL); 471 } 472 473 int 474 libvarpd_persist_restore(varpd_handle_t *vhp) 475 { 476 int dirfd; 477 int ret = 0; 478 DIR *dirp = NULL; 479 struct dirent *dp; 480 varpd_impl_t *vip = (varpd_impl_t *)vhp; 481 482 VERIFY0(rw_rdlock(&vip->vdi_pfdlock)); 483 if ((dirfd = dup(vip->vdi_persistfd)) < 0) { 484 ret = errno; 485 goto out; 486 } 487 488 if ((dirp = fdopendir(dirfd)) == NULL) { 489 ret = errno; 490 if (close(dirfd) != 0) 491 libvarpd_panic("failed to close dirfd %d: %d", 492 dirfd, errno); 493 goto out; 494 } 495 496 for (;;) { 497 int fd; 498 uint64_t id; 499 char *eptr; 500 struct stat st; 501 502 errno = 0; 503 dp = readdir(dirp); 504 if (dp == NULL) { 505 ret = errno; 506 break; 507 } 508 509 if (strcmp(dp->d_name, ".") == 0 || 510 strcmp(dp->d_name, "..") == 0) 511 continue; 512 513 /* 514 * Leave files that we don't recognize alone. A valid file has 515 * the format `%llu.varpd`. 516 */ 517 errno = 0; 518 id = strtoull(dp->d_name, &eptr, 10); 519 if ((id == 0 && errno == EINVAL) || 520 (id == ULLONG_MAX && errno == ERANGE)) 521 continue; 522 523 if (strcmp(eptr, VARPD_PERSIST_SUFFIX) != 0) 524 continue; 525 526 fd = openat(vip->vdi_persistfd, dp->d_name, O_RDONLY); 527 if (fd < 0) { 528 ret = errno; 529 break; 530 } 531 532 if (fstat(fd, &st) != 0) { 533 ret = errno; 534 break; 535 } 536 537 if (!S_ISREG(st.st_mode)) { 538 if (close(fd) != 0) 539 libvarpd_panic("failed to close fd (%s) %d: " 540 "%d\n", dp->d_name, fd, errno); 541 continue; 542 } 543 544 ret = libvarpd_persist_restore_one(vip, fd); 545 if (close(fd) != 0) 546 libvarpd_panic("failed to close fd (%s) %d: " 547 "%d\n", dp->d_name, fd, errno); 548 /* 549 * This is an invalid file. We'll unlink it to save us this 550 * trouble in the future. 551 */ 552 if (ret != 0) { 553 if (unlinkat(vip->vdi_persistfd, dp->d_name, 0) != 0) { 554 ret = errno; 555 break; 556 } 557 } 558 } 559 560 libvarpd_check_degrade(vip); 561 562 out: 563 if (dirp != NULL) 564 (void) closedir(dirp); 565 VERIFY0(rw_unlock(&vip->vdi_pfdlock)); 566 return (ret); 567 } 568 569 int 570 libvarpd_persist_disable(varpd_handle_t *vhp) 571 { 572 varpd_impl_t *vip = (varpd_impl_t *)vhp; 573 574 VERIFY0(rw_wrlock(&vip->vdi_pfdlock)); 575 if (vip->vdi_persistfd == -1) { 576 mutex_exit(&vip->vdi_lock); 577 VERIFY0(rw_unlock(&vip->vdi_pfdlock)); 578 return (ENOENT); 579 } 580 if (close(vip->vdi_persistfd) != 0) 581 libvarpd_panic("failed to close persist fd %d: %d", 582 vip->vdi_persistfd, errno); 583 vip->vdi_persistfd = -1; 584 VERIFY0(rw_unlock(&vip->vdi_pfdlock)); 585 return (0); 586 } 587