1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/errno.h> 31 #include <sys/uio.h> 32 #include <sys/buf.h> 33 #include <sys/modctl.h> 34 #include <sys/open.h> 35 #include <sys/file.h> 36 #include <sys/kmem.h> 37 #include <sys/conf.h> 38 #include <sys/cmn_err.h> 39 #include <sys/stat.h> 40 #include <sys/zfs_ioctl.h> 41 #include <sys/zap.h> 42 #include <sys/spa.h> 43 #include <sys/vdev.h> 44 #include <sys/dmu.h> 45 #include <sys/dsl_dir.h> 46 #include <sys/dsl_dataset.h> 47 #include <sys/dsl_prop.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/sunldi.h> 51 #include <sys/policy.h> 52 #include <sys/zone.h> 53 #include <sys/nvpair.h> 54 #include <sys/pathname.h> 55 #include <sys/mount.h> 56 #include <sys/sdt.h> 57 #include <sys/fs/zfs.h> 58 #include <sys/zfs_ctldir.h> 59 60 #include "zfs_namecheck.h" 61 62 extern struct modlfs zfs_modlfs; 63 64 extern void zfs_init(void); 65 extern void zfs_fini(void); 66 67 ldi_ident_t zfs_li = NULL; 68 dev_info_t *zfs_dip; 69 70 typedef int zfs_ioc_func_t(zfs_cmd_t *); 71 typedef int zfs_secpolicy_func_t(const char *, const char *, cred_t *); 72 73 typedef struct zfs_ioc_vec { 74 zfs_ioc_func_t *zvec_func; 75 zfs_secpolicy_func_t *zvec_secpolicy; 76 enum { 77 no_name, 78 pool_name, 79 dataset_name 80 } zvec_namecheck; 81 } zfs_ioc_vec_t; 82 83 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */ 84 void 85 __dprintf(const char *file, const char *func, int line, const char *fmt, ...) 86 { 87 const char *newfile; 88 char buf[256]; 89 va_list adx; 90 91 /* 92 * Get rid of annoying "../common/" prefix to filename. 93 */ 94 newfile = strrchr(file, '/'); 95 if (newfile != NULL) { 96 newfile = newfile + 1; /* Get rid of leading / */ 97 } else { 98 newfile = file; 99 } 100 101 va_start(adx, fmt); 102 (void) vsnprintf(buf, sizeof (buf), fmt, adx); 103 va_end(adx); 104 105 /* 106 * To get this data, use the zfs-dprintf probe as so: 107 * dtrace -q -n 'zfs-dprintf \ 108 * /stringof(arg0) == "dbuf.c"/ \ 109 * {printf("%s: %s", stringof(arg1), stringof(arg3))}' 110 * arg0 = file name 111 * arg1 = function name 112 * arg2 = line number 113 * arg3 = message 114 */ 115 DTRACE_PROBE4(zfs__dprintf, 116 char *, newfile, char *, func, int, line, char *, buf); 117 } 118 119 /* 120 * Policy for top-level read operations (list pools). Requires no privileges, 121 * and can be used in the local zone, as there is no associated dataset. 122 */ 123 /* ARGSUSED */ 124 static int 125 zfs_secpolicy_none(const char *unused1, const char *unused2, cred_t *cr) 126 { 127 return (0); 128 } 129 130 /* 131 * Policy for dataset read operations (list children, get statistics). Requires 132 * no privileges, but must be visible in the local zone. 133 */ 134 /* ARGSUSED */ 135 static int 136 zfs_secpolicy_read(const char *dataset, const char *unused, cred_t *cr) 137 { 138 if (INGLOBALZONE(curproc) || 139 zone_dataset_visible(dataset, NULL)) 140 return (0); 141 142 return (ENOENT); 143 } 144 145 static int 146 zfs_dozonecheck(const char *dataset, cred_t *cr) 147 { 148 uint64_t zoned; 149 int writable = 1; 150 151 /* 152 * The dataset must be visible by this zone -- check this first 153 * so they don't see EPERM on something they shouldn't know about. 154 */ 155 if (!INGLOBALZONE(curproc) && 156 !zone_dataset_visible(dataset, &writable)) 157 return (ENOENT); 158 159 if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL)) 160 return (ENOENT); 161 162 if (INGLOBALZONE(curproc)) { 163 /* 164 * If the fs is zoned, only root can access it from the 165 * global zone. 166 */ 167 if (secpolicy_zfs(cr) && zoned) 168 return (EPERM); 169 } else { 170 /* 171 * If we are in a local zone, the 'zoned' property must be set. 172 */ 173 if (!zoned) 174 return (EPERM); 175 176 /* must be writable by this zone */ 177 if (!writable) 178 return (EPERM); 179 } 180 return (0); 181 } 182 183 /* 184 * Policy for dataset write operations (create children, set properties, etc). 185 * Requires SYS_MOUNT privilege, and must be writable in the local zone. 186 */ 187 /* ARGSUSED */ 188 int 189 zfs_secpolicy_write(const char *dataset, const char *unused, cred_t *cr) 190 { 191 int error; 192 193 if (error = zfs_dozonecheck(dataset, cr)) 194 return (error); 195 196 return (secpolicy_zfs(cr)); 197 } 198 199 /* 200 * Policy for operations that want to write a dataset's parent: 201 * create, destroy, snapshot, clone, restore. 202 */ 203 static int 204 zfs_secpolicy_parent(const char *dataset, const char *unused, cred_t *cr) 205 { 206 char parentname[MAXNAMELEN]; 207 char *cp; 208 209 /* 210 * Remove the @bla or /bla from the end of the name to get the parent. 211 */ 212 (void) strncpy(parentname, dataset, sizeof (parentname)); 213 cp = strrchr(parentname, '@'); 214 if (cp != NULL) { 215 cp[0] = '\0'; 216 } else { 217 cp = strrchr(parentname, '/'); 218 if (cp == NULL) 219 return (ENOENT); 220 cp[0] = '\0'; 221 222 } 223 224 return (zfs_secpolicy_write(parentname, unused, cr)); 225 } 226 227 /* 228 * Policy for dataset write operations (create children, set properties, etc). 229 * Requires SYS_MOUNT privilege, and must be writable in the local zone. 230 */ 231 static int 232 zfs_secpolicy_setprop(const char *dataset, const char *prop, cred_t *cr) 233 { 234 int error; 235 236 if (error = zfs_dozonecheck(dataset, cr)) 237 return (error); 238 239 if (strcmp(prop, "zoned") == 0) { 240 /* 241 * Disallow setting of 'zoned' from within a local zone. 242 */ 243 if (!INGLOBALZONE(curproc)) 244 return (EPERM); 245 } 246 247 return (secpolicy_zfs(cr)); 248 } 249 250 /* 251 * Security policy for setting the quota. This is the same as 252 * zfs_secpolicy_write, except that the local zone may not change the quota at 253 * the zone-property setpoint. 254 */ 255 /* ARGSUSED */ 256 static int 257 zfs_secpolicy_quota(const char *dataset, const char *unused, cred_t *cr) 258 { 259 int error; 260 261 if (error = zfs_dozonecheck(dataset, cr)) 262 return (error); 263 264 if (!INGLOBALZONE(curproc)) { 265 uint64_t zoned; 266 char setpoint[MAXNAMELEN]; 267 int dslen; 268 /* 269 * Unprivileged users are allowed to modify the quota 270 * on things *under* (ie. contained by) the thing they 271 * own. 272 */ 273 if (dsl_prop_get_integer(dataset, "zoned", &zoned, setpoint)) 274 return (EPERM); 275 if (!zoned) /* this shouldn't happen */ 276 return (EPERM); 277 dslen = strlen(dataset); 278 if (dslen <= strlen(setpoint)) 279 return (EPERM); 280 } 281 282 return (secpolicy_zfs(cr)); 283 } 284 285 /* 286 * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires 287 * SYS_CONFIG privilege, which is not available in a local zone. 288 */ 289 /* ARGSUSED */ 290 static int 291 zfs_secpolicy_config(const char *unused, const char *unused2, cred_t *cr) 292 { 293 if (secpolicy_sys_config(cr, B_FALSE) != 0) 294 return (EPERM); 295 296 return (0); 297 } 298 299 /* 300 * Policy for fault injection. Requires all privileges. 301 */ 302 /* ARGSUSED */ 303 static int 304 zfs_secpolicy_inject(const char *unused, const char *unused2, cred_t *cr) 305 { 306 return (secpolicy_zinject(cr)); 307 } 308 309 /* 310 * Returns the nvlist as specified by the user in the zfs_cmd_t. 311 */ 312 static int 313 get_config(zfs_cmd_t *zc, nvlist_t **nvp) 314 { 315 char *packed; 316 size_t size; 317 int error; 318 nvlist_t *config = NULL; 319 320 /* 321 * Read in and unpack the user-supplied nvlist. By this point, we know 322 * that the user has the SYS_CONFIG privilege, so allocating arbitrary 323 * sized regions of memory should not be a problem. 324 */ 325 if ((size = zc->zc_config_src_size) == 0) 326 return (EINVAL); 327 328 packed = kmem_alloc(size, KM_SLEEP); 329 330 if ((error = xcopyin((void *)(uintptr_t)zc->zc_config_src, packed, 331 size)) != 0) { 332 kmem_free(packed, size); 333 return (error); 334 } 335 336 if ((error = nvlist_unpack(packed, size, &config, 0)) != 0) { 337 kmem_free(packed, size); 338 return (error); 339 } 340 341 kmem_free(packed, size); 342 343 *nvp = config; 344 return (0); 345 } 346 347 static int 348 zfs_ioc_pool_create(zfs_cmd_t *zc) 349 { 350 int error; 351 nvlist_t *config; 352 353 if ((error = get_config(zc, &config)) != 0) 354 return (error); 355 356 error = spa_create(zc->zc_name, config, zc->zc_root[0] == '\0' ? 357 NULL : zc->zc_root); 358 359 nvlist_free(config); 360 361 return (error); 362 } 363 364 static int 365 zfs_ioc_pool_destroy(zfs_cmd_t *zc) 366 { 367 return (spa_destroy(zc->zc_name)); 368 } 369 370 static int 371 zfs_ioc_pool_import(zfs_cmd_t *zc) 372 { 373 int error; 374 nvlist_t *config; 375 uint64_t guid; 376 377 if ((error = get_config(zc, &config)) != 0) 378 return (error); 379 380 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 || 381 guid != zc->zc_guid) 382 error = EINVAL; 383 else 384 error = spa_import(zc->zc_name, config, 385 zc->zc_root[0] == '\0' ? NULL : zc->zc_root); 386 387 nvlist_free(config); 388 389 return (error); 390 } 391 392 static int 393 zfs_ioc_pool_export(zfs_cmd_t *zc) 394 { 395 return (spa_export(zc->zc_name, NULL)); 396 } 397 398 static int 399 zfs_ioc_pool_configs(zfs_cmd_t *zc) 400 { 401 nvlist_t *configs; 402 char *packed = NULL; 403 size_t size = 0; 404 int error; 405 406 if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL) 407 return (EEXIST); 408 409 VERIFY(nvlist_pack(configs, &packed, &size, NV_ENCODE_NATIVE, 410 KM_SLEEP) == 0); 411 412 if (size > zc->zc_config_dst_size) 413 error = ENOMEM; 414 else 415 error = xcopyout(packed, (void *)(uintptr_t)zc->zc_config_dst, 416 size); 417 418 zc->zc_config_dst_size = size; 419 420 kmem_free(packed, size); 421 nvlist_free(configs); 422 423 return (error); 424 } 425 426 static int 427 zfs_ioc_pool_stats(zfs_cmd_t *zc) 428 { 429 nvlist_t *config; 430 char *packed = NULL; 431 size_t size = 0; 432 int error; 433 int ret = 0; 434 435 error = spa_get_stats(zc->zc_name, &config, zc->zc_root, 436 sizeof (zc->zc_root)); 437 438 if (config != NULL) { 439 VERIFY(nvlist_pack(config, &packed, &size, 440 NV_ENCODE_NATIVE, KM_SLEEP) == 0); 441 442 if (size > zc->zc_config_dst_size) 443 ret = ENOMEM; 444 else if (xcopyout(packed, (void *)(uintptr_t)zc->zc_config_dst, 445 size)) 446 ret = EFAULT; 447 448 zc->zc_config_dst_size = size; 449 450 kmem_free(packed, size); 451 nvlist_free(config); 452 453 /* 454 * The config may be present even if 'error' is non-zero. 455 * In this case we return success, and preserve the real errno 456 * in 'zc_cookie'. 457 */ 458 zc->zc_cookie = error; 459 } else { 460 ret = error; 461 } 462 463 return (ret); 464 } 465 466 /* 467 * Try to import the given pool, returning pool stats as appropriate so that 468 * user land knows which devices are available and overall pool health. 469 */ 470 static int 471 zfs_ioc_pool_tryimport(zfs_cmd_t *zc) 472 { 473 nvlist_t *tryconfig, *config; 474 char *packed = NULL; 475 size_t size = 0; 476 int error; 477 478 if ((error = get_config(zc, &tryconfig)) != 0) 479 return (error); 480 481 config = spa_tryimport(tryconfig); 482 483 nvlist_free(tryconfig); 484 485 if (config == NULL) 486 return (EINVAL); 487 488 VERIFY(nvlist_pack(config, &packed, &size, NV_ENCODE_NATIVE, 489 KM_SLEEP) == 0); 490 491 if (size > zc->zc_config_dst_size) 492 error = ENOMEM; 493 else 494 error = xcopyout(packed, (void *)(uintptr_t)zc->zc_config_dst, 495 size); 496 497 zc->zc_config_dst_size = size; 498 499 kmem_free(packed, size); 500 nvlist_free(config); 501 502 return (error); 503 } 504 505 static int 506 zfs_ioc_pool_scrub(zfs_cmd_t *zc) 507 { 508 spa_t *spa; 509 int error; 510 511 error = spa_open(zc->zc_name, &spa, FTAG); 512 if (error == 0) { 513 error = spa_scrub(spa, zc->zc_cookie, B_FALSE); 514 spa_close(spa, FTAG); 515 } 516 return (error); 517 } 518 519 static int 520 zfs_ioc_pool_freeze(zfs_cmd_t *zc) 521 { 522 spa_t *spa; 523 int error; 524 525 error = spa_open(zc->zc_name, &spa, FTAG); 526 if (error == 0) { 527 spa_freeze(spa); 528 spa_close(spa, FTAG); 529 } 530 return (error); 531 } 532 533 static int 534 zfs_ioc_pool_upgrade(zfs_cmd_t *zc) 535 { 536 spa_t *spa; 537 int error; 538 539 error = spa_open(zc->zc_name, &spa, FTAG); 540 if (error == 0) { 541 spa_upgrade(spa); 542 spa_close(spa, FTAG); 543 } 544 return (error); 545 } 546 547 static int 548 zfs_ioc_vdev_add(zfs_cmd_t *zc) 549 { 550 spa_t *spa; 551 int error; 552 nvlist_t *config; 553 554 error = spa_open(zc->zc_name, &spa, FTAG); 555 if (error != 0) 556 return (error); 557 558 if ((error = get_config(zc, &config)) == 0) { 559 error = spa_vdev_add(spa, config); 560 nvlist_free(config); 561 } 562 563 spa_close(spa, FTAG); 564 return (error); 565 } 566 567 static int 568 zfs_ioc_vdev_remove(zfs_cmd_t *zc) 569 { 570 spa_t *spa; 571 int error; 572 573 error = spa_open(zc->zc_name, &spa, FTAG); 574 if (error != 0) 575 return (error); 576 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE); 577 spa_close(spa, FTAG); 578 return (error); 579 } 580 581 static int 582 zfs_ioc_vdev_online(zfs_cmd_t *zc) 583 { 584 spa_t *spa; 585 int error; 586 587 error = spa_open(zc->zc_name, &spa, FTAG); 588 if (error != 0) 589 return (error); 590 error = vdev_online(spa, zc->zc_guid); 591 spa_close(spa, FTAG); 592 return (error); 593 } 594 595 static int 596 zfs_ioc_vdev_offline(zfs_cmd_t *zc) 597 { 598 spa_t *spa; 599 int istmp = zc->zc_cookie; 600 int error; 601 602 error = spa_open(zc->zc_name, &spa, FTAG); 603 if (error != 0) 604 return (error); 605 error = vdev_offline(spa, zc->zc_guid, istmp); 606 spa_close(spa, FTAG); 607 return (error); 608 } 609 610 static int 611 zfs_ioc_vdev_attach(zfs_cmd_t *zc) 612 { 613 spa_t *spa; 614 int replacing = zc->zc_cookie; 615 nvlist_t *config; 616 int error; 617 618 error = spa_open(zc->zc_name, &spa, FTAG); 619 if (error != 0) 620 return (error); 621 622 if ((error = get_config(zc, &config)) == 0) { 623 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing); 624 nvlist_free(config); 625 } 626 627 spa_close(spa, FTAG); 628 return (error); 629 } 630 631 static int 632 zfs_ioc_vdev_detach(zfs_cmd_t *zc) 633 { 634 spa_t *spa; 635 int error; 636 637 error = spa_open(zc->zc_name, &spa, FTAG); 638 if (error != 0) 639 return (error); 640 641 error = spa_vdev_detach(spa, zc->zc_guid, B_FALSE); 642 643 spa_close(spa, FTAG); 644 return (error); 645 } 646 647 static int 648 zfs_ioc_vdev_setpath(zfs_cmd_t *zc) 649 { 650 spa_t *spa; 651 char *path = zc->zc_prop_value; 652 uint64_t guid = zc->zc_guid; 653 int error; 654 655 error = spa_open(zc->zc_name, &spa, FTAG); 656 if (error != 0) 657 return (error); 658 659 error = spa_vdev_setpath(spa, guid, path); 660 661 spa_close(spa, FTAG); 662 return (error); 663 } 664 665 static int 666 zfs_ioc_objset_stats(zfs_cmd_t *zc) 667 { 668 objset_t *os = NULL; 669 int error; 670 nvlist_t *nv; 671 size_t sz; 672 char *buf; 673 674 retry: 675 error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, 676 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 677 if (error != 0) { 678 /* 679 * This is ugly: dmu_objset_open() can return EBUSY if 680 * the objset is held exclusively. Fortunately this hold is 681 * only for a short while, so we retry here. 682 * This avoids user code having to handle EBUSY, 683 * for example for a "zfs list". 684 */ 685 if (error == EBUSY) { 686 delay(1); 687 goto retry; 688 } 689 return (error); 690 } 691 692 dmu_objset_stats(os, &zc->zc_objset_stats); 693 694 if (zc->zc_config_src != NULL && 695 (error = dsl_prop_get_all(os, &nv)) == 0) { 696 VERIFY(nvlist_size(nv, &sz, NV_ENCODE_NATIVE) == 0); 697 if (sz > zc->zc_config_src_size) { 698 zc->zc_config_src_size = sz; 699 error = ENOMEM; 700 } else { 701 buf = kmem_alloc(sz, KM_SLEEP); 702 VERIFY(nvlist_pack(nv, &buf, &sz, 703 NV_ENCODE_NATIVE, 0) == 0); 704 error = xcopyout(buf, 705 (void *)(uintptr_t)zc->zc_config_src, sz); 706 kmem_free(buf, sz); 707 } 708 nvlist_free(nv); 709 } 710 711 if (!error && zc->zc_objset_stats.dds_type == DMU_OST_ZVOL) 712 error = zvol_get_stats(zc, os); 713 714 spa_altroot(dmu_objset_spa(os), zc->zc_root, sizeof (zc->zc_root)); 715 716 dmu_objset_close(os); 717 return (error); 718 } 719 720 static int 721 zfs_ioc_dataset_list_next(zfs_cmd_t *zc) 722 { 723 objset_t *os; 724 int error; 725 char *p; 726 727 retry: 728 error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, 729 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 730 if (error != 0) { 731 /* 732 * This is ugly: dmu_objset_open() can return EBUSY if 733 * the objset is held exclusively. Fortunately this hold is 734 * only for a short while, so we retry here. 735 * This avoids user code having to handle EBUSY, 736 * for example for a "zfs list". 737 */ 738 if (error == EBUSY) { 739 delay(1); 740 goto retry; 741 } 742 if (error == ENOENT) 743 error = ESRCH; 744 return (error); 745 } 746 747 p = strrchr(zc->zc_name, '/'); 748 if (p == NULL || p[1] != '\0') 749 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name)); 750 p = zc->zc_name + strlen(zc->zc_name); 751 752 do { 753 error = dmu_dir_list_next(os, 754 sizeof (zc->zc_name) - (p - zc->zc_name), p, 755 NULL, &zc->zc_cookie); 756 if (error == ENOENT) 757 error = ESRCH; 758 } while (error == 0 && !INGLOBALZONE(curproc) && 759 !zone_dataset_visible(zc->zc_name, NULL)); 760 761 /* 762 * If it's a hidden dataset (ie. with a '$' in its name), don't 763 * try to get stats for it. Userland will skip over it. 764 */ 765 if (error == 0 && strchr(zc->zc_name, '$') == NULL) 766 error = zfs_ioc_objset_stats(zc); /* fill in the stats */ 767 768 dmu_objset_close(os); 769 return (error); 770 } 771 772 static int 773 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) 774 { 775 objset_t *os; 776 int error; 777 778 retry: 779 error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, 780 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 781 if (error != 0) { 782 /* 783 * This is ugly: dmu_objset_open() can return EBUSY if 784 * the objset is held exclusively. Fortunately this hold is 785 * only for a short while, so we retry here. 786 * This avoids user code having to handle EBUSY, 787 * for example for a "zfs list". 788 */ 789 if (error == EBUSY) { 790 delay(1); 791 goto retry; 792 } 793 if (error == ENOENT) 794 error = ESRCH; 795 return (error); 796 } 797 798 /* 799 * A dataset name of maximum length cannot have any snapshots, 800 * so exit immediately. 801 */ 802 if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) { 803 dmu_objset_close(os); 804 return (ESRCH); 805 } 806 807 error = dmu_snapshot_list_next(os, 808 sizeof (zc->zc_name) - strlen(zc->zc_name), 809 zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie); 810 if (error == ENOENT) 811 error = ESRCH; 812 813 if (error == 0) 814 error = zfs_ioc_objset_stats(zc); /* fill in the stats */ 815 816 dmu_objset_close(os); 817 return (error); 818 } 819 820 static int 821 zfs_ioc_set_prop(zfs_cmd_t *zc) 822 { 823 return (dsl_prop_set(zc->zc_name, zc->zc_prop_name, 824 zc->zc_intsz, zc->zc_numints, zc->zc_prop_value)); 825 } 826 827 static int 828 zfs_ioc_set_quota(zfs_cmd_t *zc) 829 { 830 return (dsl_dir_set_quota(zc->zc_name, zc->zc_cookie)); 831 } 832 833 static int 834 zfs_ioc_set_reservation(zfs_cmd_t *zc) 835 { 836 return (dsl_dir_set_reservation(zc->zc_name, zc->zc_cookie)); 837 } 838 839 static int 840 zfs_ioc_set_volsize(zfs_cmd_t *zc) 841 { 842 return (zvol_set_volsize(zc)); 843 } 844 845 static int 846 zfs_ioc_set_volblocksize(zfs_cmd_t *zc) 847 { 848 return (zvol_set_volblocksize(zc)); 849 } 850 851 static int 852 zfs_ioc_create_minor(zfs_cmd_t *zc) 853 { 854 return (zvol_create_minor(zc)); 855 } 856 857 static int 858 zfs_ioc_remove_minor(zfs_cmd_t *zc) 859 { 860 return (zvol_remove_minor(zc)); 861 } 862 863 /* 864 * Search the vfs list for a specified resource. Returns a pointer to it 865 * or NULL if no suitable entry is found. The caller of this routine 866 * is responsible for releasing the returned vfs pointer. 867 */ 868 static vfs_t * 869 zfs_get_vfs(const char *resource) 870 { 871 struct vfs *vfsp; 872 struct vfs *vfs_found = NULL; 873 874 vfs_list_read_lock(); 875 vfsp = rootvfs; 876 do { 877 if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) { 878 VFS_HOLD(vfsp); 879 vfs_found = vfsp; 880 break; 881 } 882 vfsp = vfsp->vfs_next; 883 } while (vfsp != rootvfs); 884 vfs_list_unlock(); 885 return (vfs_found); 886 } 887 888 static void 889 zfs_create_cb(objset_t *os, void *arg, dmu_tx_t *tx) 890 { 891 zfs_cmd_t *zc = arg; 892 zfs_create_fs(os, (cred_t *)(uintptr_t)zc->zc_cred, tx); 893 } 894 895 static int 896 zfs_ioc_create(zfs_cmd_t *zc) 897 { 898 objset_t *clone; 899 int error = 0; 900 void (*cbfunc)(objset_t *os, void *arg, dmu_tx_t *tx); 901 dmu_objset_type_t type = zc->zc_objset_type; 902 903 switch (type) { 904 905 case DMU_OST_ZFS: 906 cbfunc = zfs_create_cb; 907 break; 908 909 case DMU_OST_ZVOL: 910 cbfunc = zvol_create_cb; 911 break; 912 913 default: 914 cbfunc = NULL; 915 } 916 if (strchr(zc->zc_name, '@')) 917 return (EINVAL); 918 919 if (zc->zc_filename[0] != '\0') { 920 /* 921 * We're creating a clone of an existing snapshot. 922 */ 923 zc->zc_filename[sizeof (zc->zc_filename) - 1] = '\0'; 924 if (dataset_namecheck(zc->zc_filename, NULL, NULL) != 0) 925 return (EINVAL); 926 927 error = dmu_objset_open(zc->zc_filename, type, 928 DS_MODE_STANDARD | DS_MODE_READONLY, &clone); 929 if (error) 930 return (error); 931 error = dmu_objset_create(zc->zc_name, type, clone, NULL, NULL); 932 dmu_objset_close(clone); 933 } else { 934 if (cbfunc == NULL) 935 return (EINVAL); 936 /* 937 * We're creating a new dataset. 938 */ 939 if (type == DMU_OST_ZVOL) { 940 941 if ((error = zvol_check_volblocksize(zc)) != 0) 942 return (error); 943 944 if ((error = zvol_check_volsize(zc, 945 zc->zc_volblocksize)) != 0) 946 return (error); 947 } 948 error = dmu_objset_create(zc->zc_name, type, NULL, cbfunc, zc); 949 } 950 return (error); 951 } 952 953 static int 954 zfs_ioc_snapshot(zfs_cmd_t *zc) 955 { 956 if (snapshot_namecheck(zc->zc_prop_value, NULL, NULL) != 0) 957 return (EINVAL); 958 return (dmu_objset_snapshot(zc->zc_name, 959 zc->zc_prop_value, zc->zc_cookie)); 960 } 961 962 static int 963 zfs_unmount_snap(char *name, void *arg) 964 { 965 char *snapname = arg; 966 char *cp; 967 vfs_t *vfsp = NULL; 968 969 /* 970 * Snapshots (which are under .zfs control) must be unmounted 971 * before they can be destroyed. 972 */ 973 974 if (snapname) { 975 (void) strcat(name, "@"); 976 (void) strcat(name, snapname); 977 vfsp = zfs_get_vfs(name); 978 cp = strchr(name, '@'); 979 *cp = '\0'; 980 } else if (strchr(name, '@')) { 981 vfsp = zfs_get_vfs(name); 982 } 983 984 if (vfsp) { 985 /* 986 * Always force the unmount for snapshots. 987 */ 988 int flag = MS_FORCE; 989 int err; 990 991 if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { 992 VFS_RELE(vfsp); 993 return (err); 994 } 995 VFS_RELE(vfsp); 996 if ((err = dounmount(vfsp, flag, kcred)) != 0) 997 return (err); 998 } 999 return (0); 1000 } 1001 1002 static int 1003 zfs_ioc_destroy_snaps(zfs_cmd_t *zc) 1004 { 1005 int err; 1006 1007 if (snapshot_namecheck(zc->zc_prop_value, NULL, NULL) != 0) 1008 return (EINVAL); 1009 err = dmu_objset_find(zc->zc_name, 1010 zfs_unmount_snap, zc->zc_prop_value, DS_FIND_CHILDREN); 1011 if (err) 1012 return (err); 1013 return (dmu_snapshots_destroy(zc->zc_name, zc->zc_prop_value)); 1014 } 1015 1016 static int 1017 zfs_ioc_destroy(zfs_cmd_t *zc) 1018 { 1019 if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) { 1020 int err = zfs_unmount_snap(zc->zc_name, NULL); 1021 if (err) 1022 return (err); 1023 } 1024 1025 return (dmu_objset_destroy(zc->zc_name)); 1026 } 1027 1028 static int 1029 zfs_ioc_rollback(zfs_cmd_t *zc) 1030 { 1031 return (dmu_objset_rollback(zc->zc_name)); 1032 } 1033 1034 static int 1035 zfs_ioc_rename(zfs_cmd_t *zc) 1036 { 1037 zc->zc_prop_value[sizeof (zc->zc_prop_value) - 1] = '\0'; 1038 if (dataset_namecheck(zc->zc_prop_value, NULL, NULL) != 0) 1039 return (EINVAL); 1040 1041 if (strchr(zc->zc_name, '@') != NULL && 1042 zc->zc_objset_type == DMU_OST_ZFS) { 1043 int err = zfs_unmount_snap(zc->zc_name, NULL); 1044 if (err) 1045 return (err); 1046 } 1047 1048 return (dmu_objset_rename(zc->zc_name, zc->zc_prop_value)); 1049 } 1050 1051 static int 1052 zfs_ioc_recvbackup(zfs_cmd_t *zc) 1053 { 1054 file_t *fp; 1055 int error, fd; 1056 1057 fd = zc->zc_cookie; 1058 fp = getf(fd); 1059 if (fp == NULL) 1060 return (EBADF); 1061 error = dmu_recvbackup(zc->zc_filename, &zc->zc_begin_record, 1062 &zc->zc_cookie, (boolean_t)zc->zc_numints, fp->f_vnode, 1063 fp->f_offset); 1064 releasef(fd); 1065 return (error); 1066 } 1067 1068 static int 1069 zfs_ioc_sendbackup(zfs_cmd_t *zc) 1070 { 1071 objset_t *fromsnap = NULL; 1072 objset_t *tosnap; 1073 file_t *fp; 1074 int error; 1075 1076 error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, 1077 DS_MODE_STANDARD | DS_MODE_READONLY, &tosnap); 1078 if (error) 1079 return (error); 1080 1081 if (zc->zc_prop_value[0] != '\0') { 1082 error = dmu_objset_open(zc->zc_prop_value, DMU_OST_ANY, 1083 DS_MODE_STANDARD | DS_MODE_READONLY, &fromsnap); 1084 if (error) { 1085 dmu_objset_close(tosnap); 1086 return (error); 1087 } 1088 } 1089 1090 fp = getf(zc->zc_cookie); 1091 if (fp == NULL) { 1092 dmu_objset_close(tosnap); 1093 if (fromsnap) 1094 dmu_objset_close(fromsnap); 1095 return (EBADF); 1096 } 1097 1098 error = dmu_sendbackup(tosnap, fromsnap, fp->f_vnode); 1099 1100 releasef(zc->zc_cookie); 1101 if (fromsnap) 1102 dmu_objset_close(fromsnap); 1103 dmu_objset_close(tosnap); 1104 return (error); 1105 } 1106 1107 static int 1108 zfs_ioc_inject_fault(zfs_cmd_t *zc) 1109 { 1110 int id, error; 1111 1112 error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id, 1113 &zc->zc_inject_record); 1114 1115 if (error == 0) 1116 zc->zc_guid = (uint64_t)id; 1117 1118 return (error); 1119 } 1120 1121 static int 1122 zfs_ioc_clear_fault(zfs_cmd_t *zc) 1123 { 1124 return (zio_clear_fault((int)zc->zc_guid)); 1125 } 1126 1127 static int 1128 zfs_ioc_inject_list_next(zfs_cmd_t *zc) 1129 { 1130 int id = (int)zc->zc_guid; 1131 int error; 1132 1133 error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name), 1134 &zc->zc_inject_record); 1135 1136 zc->zc_guid = id; 1137 1138 return (error); 1139 } 1140 1141 static int 1142 zfs_ioc_error_log(zfs_cmd_t *zc) 1143 { 1144 spa_t *spa; 1145 int error; 1146 size_t count = (size_t)zc->zc_config_dst_size; 1147 1148 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) 1149 return (error); 1150 1151 error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_config_dst, 1152 &count); 1153 if (error == 0) 1154 zc->zc_config_dst_size = count; 1155 else 1156 zc->zc_config_dst_size = spa_get_errlog_size(spa); 1157 1158 spa_close(spa, FTAG); 1159 1160 return (error); 1161 } 1162 1163 static int 1164 zfs_ioc_clear(zfs_cmd_t *zc) 1165 { 1166 spa_t *spa; 1167 vdev_t *vd; 1168 int error; 1169 1170 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) 1171 return (error); 1172 1173 spa_config_enter(spa, RW_WRITER, FTAG); 1174 1175 if (zc->zc_prop_value[0] == '\0') 1176 vd = NULL; 1177 else if ((vd = spa_lookup_by_guid(spa, zc->zc_guid)) == NULL) { 1178 spa_config_exit(spa, FTAG); 1179 spa_close(spa, FTAG); 1180 return (ENODEV); 1181 } 1182 1183 vdev_clear(spa, vd); 1184 1185 spa_config_exit(spa, FTAG); 1186 1187 spa_close(spa, FTAG); 1188 1189 return (0); 1190 } 1191 1192 static int 1193 zfs_ioc_bookmark_name(zfs_cmd_t *zc) 1194 { 1195 spa_t *spa; 1196 int error; 1197 1198 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) 1199 return (error); 1200 1201 error = spa_bookmark_name(spa, &zc->zc_bookmark, 1202 zc->zc_prop_name, sizeof (zc->zc_prop_name), zc->zc_prop_value, 1203 sizeof (zc->zc_prop_value), zc->zc_filename, 1204 sizeof (zc->zc_filename)); 1205 1206 spa_close(spa, FTAG); 1207 1208 return (error); 1209 } 1210 1211 static int 1212 zfs_ioc_promote(zfs_cmd_t *zc) 1213 { 1214 char *cp; 1215 1216 /* 1217 * We don't need to unmount *all* the origin fs's snapshots, but 1218 * it's easier. 1219 */ 1220 cp = strchr(zc->zc_prop_value, '@'); 1221 if (cp) 1222 *cp = '\0'; 1223 (void) dmu_objset_find(zc->zc_prop_value, 1224 zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS); 1225 return (dsl_dataset_promote(zc->zc_name)); 1226 } 1227 1228 static zfs_ioc_vec_t zfs_ioc_vec[] = { 1229 { zfs_ioc_pool_create, zfs_secpolicy_config, pool_name }, 1230 { zfs_ioc_pool_destroy, zfs_secpolicy_config, pool_name }, 1231 { zfs_ioc_pool_import, zfs_secpolicy_config, pool_name }, 1232 { zfs_ioc_pool_export, zfs_secpolicy_config, pool_name }, 1233 { zfs_ioc_pool_configs, zfs_secpolicy_none, no_name }, 1234 { zfs_ioc_pool_stats, zfs_secpolicy_read, pool_name }, 1235 { zfs_ioc_pool_tryimport, zfs_secpolicy_config, no_name }, 1236 { zfs_ioc_pool_scrub, zfs_secpolicy_config, pool_name }, 1237 { zfs_ioc_pool_freeze, zfs_secpolicy_config, no_name }, 1238 { zfs_ioc_pool_upgrade, zfs_secpolicy_config, pool_name }, 1239 { zfs_ioc_vdev_add, zfs_secpolicy_config, pool_name }, 1240 { zfs_ioc_vdev_remove, zfs_secpolicy_config, pool_name }, 1241 { zfs_ioc_vdev_online, zfs_secpolicy_config, pool_name }, 1242 { zfs_ioc_vdev_offline, zfs_secpolicy_config, pool_name }, 1243 { zfs_ioc_vdev_attach, zfs_secpolicy_config, pool_name }, 1244 { zfs_ioc_vdev_detach, zfs_secpolicy_config, pool_name }, 1245 { zfs_ioc_vdev_setpath, zfs_secpolicy_config, pool_name }, 1246 { zfs_ioc_objset_stats, zfs_secpolicy_read, dataset_name }, 1247 { zfs_ioc_dataset_list_next, zfs_secpolicy_read, dataset_name }, 1248 { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, dataset_name }, 1249 { zfs_ioc_set_prop, zfs_secpolicy_setprop, dataset_name }, 1250 { zfs_ioc_set_quota, zfs_secpolicy_quota, dataset_name }, 1251 { zfs_ioc_set_reservation, zfs_secpolicy_write, dataset_name }, 1252 { zfs_ioc_set_volsize, zfs_secpolicy_config, dataset_name }, 1253 { zfs_ioc_set_volblocksize, zfs_secpolicy_config, dataset_name }, 1254 { zfs_ioc_create_minor, zfs_secpolicy_config, dataset_name }, 1255 { zfs_ioc_remove_minor, zfs_secpolicy_config, dataset_name }, 1256 { zfs_ioc_create, zfs_secpolicy_parent, dataset_name }, 1257 { zfs_ioc_destroy, zfs_secpolicy_parent, dataset_name }, 1258 { zfs_ioc_rollback, zfs_secpolicy_write, dataset_name }, 1259 { zfs_ioc_rename, zfs_secpolicy_write, dataset_name }, 1260 { zfs_ioc_recvbackup, zfs_secpolicy_write, dataset_name }, 1261 { zfs_ioc_sendbackup, zfs_secpolicy_write, dataset_name }, 1262 { zfs_ioc_inject_fault, zfs_secpolicy_inject, no_name }, 1263 { zfs_ioc_clear_fault, zfs_secpolicy_inject, no_name }, 1264 { zfs_ioc_inject_list_next, zfs_secpolicy_inject, no_name }, 1265 { zfs_ioc_error_log, zfs_secpolicy_inject, pool_name }, 1266 { zfs_ioc_clear, zfs_secpolicy_config, pool_name }, 1267 { zfs_ioc_bookmark_name, zfs_secpolicy_inject, pool_name }, 1268 { zfs_ioc_promote, zfs_secpolicy_write, dataset_name }, 1269 { zfs_ioc_destroy_snaps, zfs_secpolicy_write, dataset_name }, 1270 { zfs_ioc_snapshot, zfs_secpolicy_write, dataset_name } 1271 }; 1272 1273 static int 1274 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) 1275 { 1276 zfs_cmd_t *zc; 1277 uint_t vec; 1278 int error, rc; 1279 1280 if (getminor(dev) != 0) 1281 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp)); 1282 1283 vec = cmd - ZFS_IOC; 1284 1285 if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) 1286 return (EINVAL); 1287 1288 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); 1289 1290 error = xcopyin((void *)arg, zc, sizeof (zfs_cmd_t)); 1291 1292 if (error == 0) { 1293 zc->zc_cred = (uintptr_t)cr; 1294 zc->zc_dev = dev; 1295 error = zfs_ioc_vec[vec].zvec_secpolicy(zc->zc_name, 1296 zc->zc_prop_name, cr); 1297 } 1298 1299 /* 1300 * Ensure that all pool/dataset names are valid before we pass down to 1301 * the lower layers. 1302 */ 1303 if (error == 0) { 1304 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; 1305 switch (zfs_ioc_vec[vec].zvec_namecheck) { 1306 case pool_name: 1307 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) 1308 error = EINVAL; 1309 break; 1310 1311 case dataset_name: 1312 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) 1313 error = EINVAL; 1314 break; 1315 } 1316 } 1317 1318 if (error == 0) 1319 error = zfs_ioc_vec[vec].zvec_func(zc); 1320 1321 rc = xcopyout(zc, (void *)arg, sizeof (zfs_cmd_t)); 1322 if (error == 0) 1323 error = rc; 1324 1325 kmem_free(zc, sizeof (zfs_cmd_t)); 1326 return (error); 1327 } 1328 1329 static int 1330 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1331 { 1332 if (cmd != DDI_ATTACH) 1333 return (DDI_FAILURE); 1334 1335 if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0, 1336 DDI_PSEUDO, 0) == DDI_FAILURE) 1337 return (DDI_FAILURE); 1338 1339 zfs_dip = dip; 1340 1341 ddi_report_dev(dip); 1342 1343 return (DDI_SUCCESS); 1344 } 1345 1346 static int 1347 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1348 { 1349 if (spa_busy() || zfs_busy() || zvol_busy()) 1350 return (DDI_FAILURE); 1351 1352 if (cmd != DDI_DETACH) 1353 return (DDI_FAILURE); 1354 1355 zfs_dip = NULL; 1356 1357 ddi_prop_remove_all(dip); 1358 ddi_remove_minor_node(dip, NULL); 1359 1360 return (DDI_SUCCESS); 1361 } 1362 1363 /*ARGSUSED*/ 1364 static int 1365 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1366 { 1367 switch (infocmd) { 1368 case DDI_INFO_DEVT2DEVINFO: 1369 *result = zfs_dip; 1370 return (DDI_SUCCESS); 1371 1372 case DDI_INFO_DEVT2INSTANCE: 1373 *result = (void *)0; 1374 return (DDI_SUCCESS); 1375 } 1376 1377 return (DDI_FAILURE); 1378 } 1379 1380 /* 1381 * OK, so this is a little weird. 1382 * 1383 * /dev/zfs is the control node, i.e. minor 0. 1384 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0. 1385 * 1386 * /dev/zfs has basically nothing to do except serve up ioctls, 1387 * so most of the standard driver entry points are in zvol.c. 1388 */ 1389 static struct cb_ops zfs_cb_ops = { 1390 zvol_open, /* open */ 1391 zvol_close, /* close */ 1392 zvol_strategy, /* strategy */ 1393 nodev, /* print */ 1394 nodev, /* dump */ 1395 zvol_read, /* read */ 1396 zvol_write, /* write */ 1397 zfsdev_ioctl, /* ioctl */ 1398 nodev, /* devmap */ 1399 nodev, /* mmap */ 1400 nodev, /* segmap */ 1401 nochpoll, /* poll */ 1402 ddi_prop_op, /* prop_op */ 1403 NULL, /* streamtab */ 1404 D_NEW | D_MP | D_64BIT, /* Driver compatibility flag */ 1405 CB_REV, /* version */ 1406 zvol_aread, /* async read */ 1407 zvol_awrite, /* async write */ 1408 }; 1409 1410 static struct dev_ops zfs_dev_ops = { 1411 DEVO_REV, /* version */ 1412 0, /* refcnt */ 1413 zfs_info, /* info */ 1414 nulldev, /* identify */ 1415 nulldev, /* probe */ 1416 zfs_attach, /* attach */ 1417 zfs_detach, /* detach */ 1418 nodev, /* reset */ 1419 &zfs_cb_ops, /* driver operations */ 1420 NULL /* no bus operations */ 1421 }; 1422 1423 static struct modldrv zfs_modldrv = { 1424 &mod_driverops, "ZFS storage pool version 1", &zfs_dev_ops 1425 }; 1426 1427 static struct modlinkage modlinkage = { 1428 MODREV_1, 1429 (void *)&zfs_modlfs, 1430 (void *)&zfs_modldrv, 1431 NULL 1432 }; 1433 1434 int 1435 _init(void) 1436 { 1437 int error; 1438 1439 spa_init(FREAD | FWRITE); 1440 zfs_init(); 1441 zvol_init(); 1442 1443 if ((error = mod_install(&modlinkage)) != 0) { 1444 zvol_fini(); 1445 zfs_fini(); 1446 spa_fini(); 1447 return (error); 1448 } 1449 1450 error = ldi_ident_from_mod(&modlinkage, &zfs_li); 1451 ASSERT(error == 0); 1452 1453 return (0); 1454 } 1455 1456 int 1457 _fini(void) 1458 { 1459 int error; 1460 1461 if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled) 1462 return (EBUSY); 1463 1464 if ((error = mod_remove(&modlinkage)) != 0) 1465 return (error); 1466 1467 zvol_fini(); 1468 zfs_fini(); 1469 spa_fini(); 1470 1471 ldi_ident_release(zfs_li); 1472 zfs_li = NULL; 1473 1474 return (error); 1475 } 1476 1477 int 1478 _info(struct modinfo *modinfop) 1479 { 1480 return (mod_info(&modlinkage, modinfop)); 1481 } 1482