1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/errno.h> 32 #include <sys/uio.h> 33 #include <sys/buf.h> 34 #include <sys/modctl.h> 35 #include <sys/open.h> 36 #include <sys/file.h> 37 #include <sys/kmem.h> 38 #include <sys/conf.h> 39 #include <sys/cmn_err.h> 40 #include <sys/stat.h> 41 #include <sys/zfs_ioctl.h> 42 #include <sys/zap.h> 43 #include <sys/spa.h> 44 #include <sys/vdev.h> 45 #include <sys/dmu.h> 46 #include <sys/dsl_dir.h> 47 #include <sys/dsl_dataset.h> 48 #include <sys/dsl_prop.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/sunldi.h> 52 #include <sys/policy.h> 53 #include <sys/zone.h> 54 #include <sys/nvpair.h> 55 #include <sys/pathname.h> 56 #include <sys/mount.h> 57 #include <sys/sdt.h> 58 #include <sys/fs/zfs.h> 59 #include <sys/zfs_ctldir.h> 60 61 #include "zfs_namecheck.h" 62 63 extern struct modlfs zfs_modlfs; 64 65 extern void zfs_init(void); 66 extern void zfs_fini(void); 67 68 ldi_ident_t zfs_li = NULL; 69 dev_info_t *zfs_dip; 70 71 typedef int zfs_ioc_func_t(zfs_cmd_t *); 72 typedef int zfs_secpolicy_func_t(const char *, const char *, cred_t *); 73 74 typedef struct zfs_ioc_vec { 75 zfs_ioc_func_t *zvec_func; 76 zfs_secpolicy_func_t *zvec_secpolicy; 77 enum { 78 no_name, 79 pool_name, 80 dataset_name 81 } zvec_namecheck; 82 } zfs_ioc_vec_t; 83 84 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */ 85 void 86 __dprintf(const char *file, const char *func, int line, const char *fmt, ...) 87 { 88 const char *newfile; 89 char buf[256]; 90 va_list adx; 91 92 /* 93 * Get rid of annoying "../common/" prefix to filename. 94 */ 95 newfile = strrchr(file, '/'); 96 if (newfile != NULL) { 97 newfile = newfile + 1; /* Get rid of leading / */ 98 } else { 99 newfile = file; 100 } 101 102 va_start(adx, fmt); 103 (void) vsnprintf(buf, sizeof (buf), fmt, adx); 104 va_end(adx); 105 106 /* 107 * To get this data, use the zfs-dprintf probe as so: 108 * dtrace -q -n 'zfs-dprintf \ 109 * /stringof(arg0) == "dbuf.c"/ \ 110 * {printf("%s: %s", stringof(arg1), stringof(arg3))}' 111 * arg0 = file name 112 * arg1 = function name 113 * arg2 = line number 114 * arg3 = message 115 */ 116 DTRACE_PROBE4(zfs__dprintf, 117 char *, newfile, char *, func, int, line, char *, buf); 118 } 119 120 /* 121 * Policy for top-level read operations (list pools). Requires no privileges, 122 * and can be used in the local zone, as there is no associated dataset. 123 */ 124 /* ARGSUSED */ 125 static int 126 zfs_secpolicy_none(const char *unused1, const char *unused2, cred_t *cr) 127 { 128 return (0); 129 } 130 131 /* 132 * Policy for dataset read operations (list children, get statistics). Requires 133 * no privileges, but must be visible in the local zone. 134 */ 135 /* ARGSUSED */ 136 static int 137 zfs_secpolicy_read(const char *dataset, const char *unused, cred_t *cr) 138 { 139 if (INGLOBALZONE(curproc) || 140 zone_dataset_visible(dataset, NULL)) 141 return (0); 142 143 return (ENOENT); 144 } 145 146 static int 147 zfs_dozonecheck(const char *dataset, cred_t *cr) 148 { 149 uint64_t zoned; 150 int writable = 1; 151 152 /* 153 * The dataset must be visible by this zone -- check this first 154 * so they don't see EPERM on something they shouldn't know about. 155 */ 156 if (!INGLOBALZONE(curproc) && 157 !zone_dataset_visible(dataset, &writable)) 158 return (ENOENT); 159 160 if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL)) 161 return (ENOENT); 162 163 if (INGLOBALZONE(curproc)) { 164 /* 165 * If the fs is zoned, only root can access it from the 166 * global zone. 167 */ 168 if (secpolicy_zfs(cr) && zoned) 169 return (EPERM); 170 } else { 171 /* 172 * If we are in a local zone, the 'zoned' property must be set. 173 */ 174 if (!zoned) 175 return (EPERM); 176 177 /* must be writable by this zone */ 178 if (!writable) 179 return (EPERM); 180 } 181 return (0); 182 } 183 184 /* 185 * Policy for dataset write operations (create children, set properties, etc). 186 * Requires SYS_MOUNT privilege, and must be writable in the local zone. 187 */ 188 /* ARGSUSED */ 189 int 190 zfs_secpolicy_write(const char *dataset, const char *unused, cred_t *cr) 191 { 192 int error; 193 194 if (error = zfs_dozonecheck(dataset, cr)) 195 return (error); 196 197 return (secpolicy_zfs(cr)); 198 } 199 200 /* 201 * Policy for operations that want to write a dataset's parent: 202 * create, destroy, snapshot, clone, restore. 203 */ 204 static int 205 zfs_secpolicy_parent(const char *dataset, const char *unused, cred_t *cr) 206 { 207 char parentname[MAXNAMELEN]; 208 char *cp; 209 210 /* 211 * Remove the @bla or /bla from the end of the name to get the parent. 212 */ 213 (void) strncpy(parentname, dataset, sizeof (parentname)); 214 cp = strrchr(parentname, '@'); 215 if (cp != NULL) { 216 cp[0] = '\0'; 217 } else { 218 cp = strrchr(parentname, '/'); 219 if (cp == NULL) 220 return (ENOENT); 221 cp[0] = '\0'; 222 223 } 224 225 return (zfs_secpolicy_write(parentname, unused, cr)); 226 } 227 228 /* 229 * Policy for dataset write operations (create children, set properties, etc). 230 * Requires SYS_MOUNT privilege, and must be writable in the local zone. 231 */ 232 static int 233 zfs_secpolicy_setprop(const char *dataset, const char *prop, cred_t *cr) 234 { 235 int error; 236 237 if (error = zfs_dozonecheck(dataset, cr)) 238 return (error); 239 240 if (strcmp(prop, "zoned") == 0) { 241 /* 242 * Disallow setting of 'zoned' from within a local zone. 243 */ 244 if (!INGLOBALZONE(curproc)) 245 return (EPERM); 246 } 247 248 return (secpolicy_zfs(cr)); 249 } 250 251 /* 252 * Security policy for setting the quota. This is the same as 253 * zfs_secpolicy_write, except that the local zone may not change the quota at 254 * the zone-property setpoint. 255 */ 256 /* ARGSUSED */ 257 static int 258 zfs_secpolicy_quota(const char *dataset, const char *unused, cred_t *cr) 259 { 260 int error; 261 262 if (error = zfs_dozonecheck(dataset, cr)) 263 return (error); 264 265 if (!INGLOBALZONE(curproc)) { 266 uint64_t zoned; 267 char setpoint[MAXNAMELEN]; 268 int dslen; 269 /* 270 * Unprivileged users are allowed to modify the quota 271 * on things *under* (ie. contained by) the thing they 272 * own. 273 */ 274 if (dsl_prop_get_integer(dataset, "zoned", &zoned, setpoint)) 275 return (EPERM); 276 if (!zoned) /* this shouldn't happen */ 277 return (EPERM); 278 dslen = strlen(dataset); 279 if (dslen <= strlen(setpoint)) 280 return (EPERM); 281 } 282 283 return (secpolicy_zfs(cr)); 284 } 285 286 /* 287 * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires 288 * SYS_CONFIG privilege, which is not available in a local zone. 289 */ 290 /* ARGSUSED */ 291 static int 292 zfs_secpolicy_config(const char *unused, const char *unused2, cred_t *cr) 293 { 294 if (secpolicy_sys_config(cr, B_FALSE) != 0) 295 return (EPERM); 296 297 return (0); 298 } 299 300 /* 301 * Returns the nvlist as specified by the user in the zfs_cmd_t. 302 */ 303 static int 304 get_config(zfs_cmd_t *zc, nvlist_t **nvp) 305 { 306 char *packed; 307 size_t size; 308 int error; 309 nvlist_t *config = NULL; 310 311 /* 312 * Read in and unpack the user-supplied nvlist. By this point, we know 313 * that the user has the SYS_CONFIG privilege, so allocating arbitrary 314 * sized regions of memory should not be a problem. 315 */ 316 if ((size = zc->zc_config_src_size) == 0) 317 return (EINVAL); 318 319 packed = kmem_alloc(size, KM_SLEEP); 320 321 if ((error = xcopyin((void *)(uintptr_t)zc->zc_config_src, packed, 322 size)) != 0) { 323 kmem_free(packed, size); 324 return (error); 325 } 326 327 if ((error = nvlist_unpack(packed, size, &config, 0)) != 0) { 328 kmem_free(packed, size); 329 return (error); 330 } 331 332 kmem_free(packed, size); 333 334 *nvp = config; 335 return (0); 336 } 337 338 static int 339 zfs_ioc_pool_create(zfs_cmd_t *zc) 340 { 341 int error; 342 nvlist_t *config; 343 344 if ((error = get_config(zc, &config)) != 0) 345 return (error); 346 347 error = spa_create(zc->zc_name, config, zc->zc_root[0] == '\0' ? 348 NULL : zc->zc_root); 349 350 nvlist_free(config); 351 352 return (error); 353 } 354 355 static int 356 zfs_ioc_pool_destroy(zfs_cmd_t *zc) 357 { 358 return (spa_destroy(zc->zc_name)); 359 } 360 361 static int 362 zfs_ioc_pool_import(zfs_cmd_t *zc) 363 { 364 int error; 365 nvlist_t *config; 366 uint64_t guid; 367 368 if ((error = get_config(zc, &config)) != 0) 369 return (error); 370 371 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 || 372 guid != zc->zc_pool_guid) 373 error = EINVAL; 374 else 375 error = spa_import(zc->zc_name, config, 376 zc->zc_root[0] == '\0' ? NULL : zc->zc_root); 377 378 nvlist_free(config); 379 380 return (error); 381 } 382 383 static int 384 zfs_ioc_pool_export(zfs_cmd_t *zc) 385 { 386 return (spa_export(zc->zc_name)); 387 } 388 389 static int 390 zfs_ioc_pool_configs(zfs_cmd_t *zc) 391 { 392 nvlist_t *configs; 393 char *packed = NULL; 394 size_t size = 0; 395 int error; 396 397 if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL) 398 return (EEXIST); 399 400 VERIFY(nvlist_pack(configs, &packed, &size, NV_ENCODE_NATIVE, 0) == 0); 401 402 if (size > zc->zc_config_dst_size) 403 error = ENOMEM; 404 else 405 error = xcopyout(packed, (void *)(uintptr_t)zc->zc_config_dst, 406 size); 407 408 zc->zc_config_dst_size = size; 409 410 kmem_free(packed, size); 411 nvlist_free(configs); 412 413 return (error); 414 } 415 416 static int 417 zfs_ioc_pool_guid(zfs_cmd_t *zc) 418 { 419 spa_t *spa; 420 int error; 421 422 error = spa_open(zc->zc_name, &spa, FTAG); 423 if (error == 0) { 424 zc->zc_pool_guid = spa_guid(spa); 425 spa_close(spa, FTAG); 426 } 427 return (error); 428 } 429 430 static int 431 zfs_ioc_pool_stats(zfs_cmd_t *zc) 432 { 433 nvlist_t *config; 434 char *packed = NULL; 435 size_t size = 0; 436 int error; 437 438 error = spa_get_stats(zc->zc_name, &config); 439 440 if (config != NULL) { 441 VERIFY(nvlist_pack(config, &packed, &size, 442 NV_ENCODE_NATIVE, 0) == 0); 443 444 if (size > zc->zc_config_dst_size) 445 error = ENOMEM; 446 else if (xcopyout(packed, (void *)(uintptr_t)zc->zc_config_dst, 447 size)) 448 error = EFAULT; 449 450 zc->zc_config_dst_size = size; 451 452 kmem_free(packed, size); 453 nvlist_free(config); 454 } else { 455 ASSERT(error != 0); 456 } 457 458 return (error); 459 } 460 461 /* 462 * Try to import the given pool, returning pool stats as appropriate so that 463 * user land knows which devices are available and overall pool health. 464 */ 465 static int 466 zfs_ioc_pool_tryimport(zfs_cmd_t *zc) 467 { 468 nvlist_t *tryconfig, *config; 469 char *packed = NULL; 470 size_t size = 0; 471 int error; 472 473 if ((error = get_config(zc, &tryconfig)) != 0) 474 return (error); 475 476 config = spa_tryimport(tryconfig); 477 478 nvlist_free(tryconfig); 479 480 if (config == NULL) 481 return (EINVAL); 482 483 VERIFY(nvlist_pack(config, &packed, &size, NV_ENCODE_NATIVE, 0) == 0); 484 485 if (size > zc->zc_config_dst_size) 486 error = ENOMEM; 487 else 488 error = xcopyout(packed, (void *)(uintptr_t)zc->zc_config_dst, 489 size); 490 491 zc->zc_config_dst_size = size; 492 493 kmem_free(packed, size); 494 nvlist_free(config); 495 496 return (error); 497 } 498 499 static int 500 zfs_ioc_pool_scrub(zfs_cmd_t *zc) 501 { 502 spa_t *spa; 503 int error; 504 505 error = spa_open(zc->zc_name, &spa, FTAG); 506 if (error == 0) { 507 error = spa_scrub(spa, zc->zc_cookie, B_FALSE); 508 spa_close(spa, FTAG); 509 } 510 return (error); 511 } 512 513 static int 514 zfs_ioc_pool_freeze(zfs_cmd_t *zc) 515 { 516 spa_t *spa; 517 int error; 518 519 error = spa_open(zc->zc_name, &spa, FTAG); 520 if (error == 0) { 521 spa_freeze(spa); 522 spa_close(spa, FTAG); 523 } 524 return (error); 525 } 526 527 static int 528 zfs_ioc_vdev_add(zfs_cmd_t *zc) 529 { 530 spa_t *spa; 531 int error; 532 nvlist_t *config; 533 534 error = spa_open(zc->zc_name, &spa, FTAG); 535 if (error != 0) 536 return (error); 537 538 if ((error = get_config(zc, &config)) == 0) { 539 error = spa_vdev_add(spa, config); 540 nvlist_free(config); 541 } 542 543 spa_close(spa, FTAG); 544 return (error); 545 } 546 547 /* ARGSUSED */ 548 static int 549 zfs_ioc_vdev_remove(zfs_cmd_t *zc) 550 { 551 return (ENOTSUP); 552 } 553 554 static int 555 zfs_ioc_vdev_online(zfs_cmd_t *zc) 556 { 557 spa_t *spa; 558 char *path = zc->zc_prop_value; 559 int error; 560 561 error = spa_open(zc->zc_name, &spa, FTAG); 562 if (error != 0) 563 return (error); 564 error = vdev_online(spa, path); 565 spa_close(spa, FTAG); 566 return (error); 567 } 568 569 static int 570 zfs_ioc_vdev_offline(zfs_cmd_t *zc) 571 { 572 spa_t *spa; 573 char *path = zc->zc_prop_value; 574 int error; 575 576 error = spa_open(zc->zc_name, &spa, FTAG); 577 if (error != 0) 578 return (error); 579 error = vdev_offline(spa, path); 580 spa_close(spa, FTAG); 581 return (error); 582 } 583 584 static int 585 zfs_ioc_vdev_attach(zfs_cmd_t *zc) 586 { 587 spa_t *spa; 588 char *path = zc->zc_prop_value; 589 int replacing = zc->zc_cookie; 590 nvlist_t *config; 591 int error; 592 593 error = spa_open(zc->zc_name, &spa, FTAG); 594 if (error != 0) 595 return (error); 596 597 if ((error = get_config(zc, &config)) == 0) { 598 error = spa_vdev_attach(spa, path, config, replacing); 599 nvlist_free(config); 600 } 601 602 spa_close(spa, FTAG); 603 return (error); 604 } 605 606 static int 607 zfs_ioc_vdev_detach(zfs_cmd_t *zc) 608 { 609 spa_t *spa; 610 char *path = zc->zc_prop_value; 611 int error; 612 613 error = spa_open(zc->zc_name, &spa, FTAG); 614 if (error != 0) 615 return (error); 616 617 error = spa_vdev_detach(spa, path, 0, B_FALSE); 618 619 spa_close(spa, FTAG); 620 return (error); 621 } 622 623 static int 624 zfs_get_stats(zfs_cmd_t *zc) 625 { 626 char *name = zc->zc_name; 627 zfs_stats_t *zs = &zc->zc_zfs_stats; 628 int error; 629 630 bzero(zs, sizeof (zfs_stats_t)); 631 632 if ((error = dsl_prop_get_integer(name, "atime", 633 &zs->zs_atime, zs->zs_atime_setpoint)) != 0 || 634 (error = dsl_prop_get_integer(name, "recordsize", 635 &zs->zs_recordsize, zs->zs_recordsize_setpoint)) != 0 || 636 (error = dsl_prop_get_integer(name, "readonly", 637 &zs->zs_readonly, zs->zs_readonly_setpoint)) != 0 || 638 (error = dsl_prop_get_integer(name, "devices", 639 &zs->zs_devices, zs->zs_devices_setpoint)) != 0 || 640 (error = dsl_prop_get_integer(name, "setuid", 641 &zs->zs_setuid, zs->zs_setuid_setpoint)) != 0 || 642 (error = dsl_prop_get_integer(name, "exec", 643 &zs->zs_exec, zs->zs_exec_setpoint)) != 0 || 644 (error = dsl_prop_get_string(name, "mountpoint", zs->zs_mountpoint, 645 sizeof (zs->zs_mountpoint), zs->zs_mountpoint_setpoint)) != 0 || 646 (error = dsl_prop_get_string(name, "sharenfs", zs->zs_sharenfs, 647 sizeof (zs->zs_sharenfs), zs->zs_sharenfs_setpoint)) != 0 || 648 (error = dsl_prop_get_integer(name, "aclmode", 649 &zs->zs_acl_mode, zs->zs_acl_mode_setpoint)) != 0 || 650 (error = dsl_prop_get_integer(name, "snapdir", 651 &zs->zs_snapdir, zs->zs_snapdir_setpoint)) != 0 || 652 (error = dsl_prop_get_integer(name, "aclinherit", 653 &zs->zs_acl_inherit, zs->zs_acl_inherit_setpoint)) != 0) 654 return (error); 655 656 return (0); 657 } 658 659 static int 660 zfs_ioc_objset_stats(zfs_cmd_t *zc) 661 { 662 objset_t *os = NULL; 663 int error; 664 665 retry: 666 error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, 667 DS_MODE_STANDARD | DS_MODE_READONLY, &os); 668 if (error != 0) { 669 /* 670 * This is ugly: dmu_objset_open() can return EBUSY if 671 * the objset is held exclusively. Fortunately this hold is 672 * only for a short while, so we retry here. 673 * This avoids user code having to handle EBUSY, 674 * for example for a "zfs list". 675 */ 676 if (error == EBUSY) { 677 delay(1); 678 goto retry; 679 } 680 return (error); 681 } 682 683 dmu_objset_stats(os, &zc->zc_objset_stats); 684 685 switch (zc->zc_objset_stats.dds_type) { 686 687 case DMU_OST_ZFS: 688 error = zfs_get_stats(zc); 689 break; 690 691 case DMU_OST_ZVOL: 692 error = zvol_get_stats(zc, os); 693 break; 694 } 695 696 dmu_objset_close(os); 697 return (error); 698 } 699 700 static int 701 zfs_ioc_dataset_list_next(zfs_cmd_t *zc) 702 { 703 dsl_dir_t *dd; 704 zap_cursor_t cursor; 705 zap_attribute_t attr; 706 int error; 707 char *p; 708 709 dd = dsl_dir_open(zc->zc_name, FTAG, NULL); 710 if (dd == NULL) 711 return (ESRCH); 712 713 if (dd->dd_phys->dd_child_dir_zapobj == 0) { 714 dsl_dir_close(dd, FTAG); 715 return (ESRCH); 716 } 717 718 p = strrchr(zc->zc_name, '/'); 719 if (p == NULL || p[1] != '\0') 720 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name)); 721 p = zc->zc_name + strlen(zc->zc_name); 722 723 do { 724 zap_cursor_init_serialized(&cursor, dd->dd_pool->dp_meta_objset, 725 dd->dd_phys->dd_child_dir_zapobj, zc->zc_cookie); 726 727 error = zap_cursor_retrieve(&cursor, &attr); 728 if (error == ENOENT) 729 error = ESRCH; 730 if (error != 0) { 731 dsl_dir_close(dd, FTAG); 732 *p = '\0'; 733 return (error); 734 } 735 736 (void) strlcpy(p, attr.za_name, sizeof (zc->zc_name) - 737 (p - zc->zc_name)); 738 739 zap_cursor_advance(&cursor); 740 zc->zc_cookie = zap_cursor_serialize(&cursor); 741 742 } while (!INGLOBALZONE(curproc) && 743 !zone_dataset_visible(zc->zc_name, NULL)); 744 745 dsl_dir_close(dd, FTAG); 746 747 /* 748 * If it's a hidden dataset, don't try to get stats for it. 749 * User land will skip over it. 750 */ 751 if (strchr(zc->zc_name, '$') != NULL) 752 return (0); 753 754 error = zfs_ioc_objset_stats(zc); /* will just fill in the stats */ 755 return (error); 756 } 757 758 static int 759 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc) 760 { 761 zap_cursor_t cursor; 762 zap_attribute_t attr; 763 dsl_dataset_t *ds; 764 int error; 765 766 retry: 767 error = dsl_dataset_open(zc->zc_name, 768 DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds); 769 if (error) { 770 /* 771 * This is ugly: dsl_dataset_open() can return EBUSY if 772 * the objset is held exclusively. Fortunately this hold is 773 * only for a short while, so we retry here. 774 * This avoids user code having to handle EBUSY, 775 * for example for a "zfs list -s". 776 */ 777 if (error == EBUSY) { 778 delay(1); 779 goto retry; 780 } 781 if (error == ENOENT) 782 return (ESRCH); 783 return (error); 784 } 785 786 /* 787 * If ds_snapnames_zapobj is 0, someone is trying to iterate over 788 * snapshots of a snapshot. In this case, pretend that it has no 789 * snapshots; otherwise zap_cursor_retrieve() will blow up. 790 */ 791 if (ds->ds_phys->ds_snapnames_zapobj == 0) { 792 error = ESRCH; 793 goto out; 794 } 795 796 zap_cursor_init_serialized(&cursor, 797 ds->ds_dir->dd_pool->dp_meta_objset, 798 ds->ds_phys->ds_snapnames_zapobj, zc->zc_cookie); 799 800 error = zap_cursor_retrieve(&cursor, &attr); 801 if (error == ENOENT) 802 error = ESRCH; 803 if (error != 0) 804 goto out; 805 806 if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= 807 sizeof (zc->zc_name) || 808 strlcat(zc->zc_name, attr.za_name, sizeof (zc->zc_name)) >= 809 sizeof (zc->zc_name)) { 810 error = ENAMETOOLONG; 811 goto out; 812 } 813 814 zap_cursor_advance(&cursor); 815 zc->zc_cookie = zap_cursor_serialize(&cursor); 816 817 error = zfs_ioc_objset_stats(zc); /* will just fill in the stats */ 818 819 out: 820 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 821 return (error); 822 } 823 824 static int 825 zfs_ioc_set_prop(zfs_cmd_t *zc) 826 { 827 return (dsl_prop_set(zc->zc_name, zc->zc_prop_name, 828 zc->zc_intsz, zc->zc_numints, zc->zc_prop_value)); 829 } 830 831 static int 832 zfs_ioc_set_quota(zfs_cmd_t *zc) 833 { 834 return (dsl_dir_set_quota(zc->zc_name, zc->zc_cookie)); 835 } 836 837 static int 838 zfs_ioc_set_reservation(zfs_cmd_t *zc) 839 { 840 return (dsl_dir_set_reservation(zc->zc_name, zc->zc_cookie)); 841 } 842 843 static int 844 zfs_ioc_set_volsize(zfs_cmd_t *zc) 845 { 846 return (zvol_set_volsize(zc)); 847 } 848 849 static int 850 zfs_ioc_set_volblocksize(zfs_cmd_t *zc) 851 { 852 return (zvol_set_volblocksize(zc)); 853 } 854 855 static int 856 zfs_ioc_create_minor(zfs_cmd_t *zc) 857 { 858 return (zvol_create_minor(zc)); 859 } 860 861 static int 862 zfs_ioc_remove_minor(zfs_cmd_t *zc) 863 { 864 return (zvol_remove_minor(zc)); 865 } 866 867 /* 868 * Search the vfs list for a specified resource. Returns a pointer to it 869 * or NULL if no suitable entry is found. The caller of this routine 870 * is responsible for releasing the returned vfs pointer. 871 */ 872 static vfs_t * 873 zfs_get_vfs(const char *resource) 874 { 875 struct vfs *vfsp; 876 struct vfs *vfs_found = NULL; 877 878 vfs_list_read_lock(); 879 vfsp = rootvfs; 880 do { 881 if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) { 882 VFS_HOLD(vfsp); 883 vfs_found = vfsp; 884 break; 885 } 886 vfsp = vfsp->vfs_next; 887 } while (vfsp != rootvfs); 888 vfs_list_unlock(); 889 return (vfs_found); 890 } 891 892 static void 893 zfs_create_cb(objset_t *os, void *arg, dmu_tx_t *tx) 894 { 895 zfs_cmd_t *zc = arg; 896 zfs_create_fs(os, (cred_t *)(uintptr_t)zc->zc_cred, tx); 897 } 898 899 static int 900 zfs_ioc_create(zfs_cmd_t *zc) 901 { 902 objset_t *clone; 903 int error = 0; 904 void (*cbfunc)(objset_t *os, void *arg, dmu_tx_t *tx); 905 dmu_objset_type_t type = zc->zc_objset_type; 906 907 switch (type) { 908 909 case DMU_OST_ZFS: 910 cbfunc = zfs_create_cb; 911 break; 912 913 case DMU_OST_ZVOL: 914 cbfunc = zvol_create_cb; 915 break; 916 917 default: 918 return (EINVAL); 919 } 920 921 if (zc->zc_filename[0] != '\0') { 922 /* 923 * We're creating a clone of an existing snapshot. 924 */ 925 zc->zc_filename[sizeof (zc->zc_filename) - 1] = '\0'; 926 if (dataset_namecheck(zc->zc_filename, NULL, NULL) != 0) 927 return (EINVAL); 928 929 error = dmu_objset_open(zc->zc_filename, type, 930 DS_MODE_STANDARD | DS_MODE_READONLY, &clone); 931 if (error) 932 return (error); 933 error = dmu_objset_create(zc->zc_name, type, clone, NULL, NULL); 934 dmu_objset_close(clone); 935 } else if (strchr(zc->zc_name, '@') != 0) { 936 /* 937 * We're taking a snapshot of an existing dataset. 938 */ 939 error = dmu_objset_create(zc->zc_name, type, NULL, NULL, NULL); 940 } else { 941 /* 942 * We're creating a new dataset. 943 */ 944 if (type == DMU_OST_ZVOL) { 945 if ((error = zvol_check_volsize(zc)) != 0) 946 return (error); 947 if ((error = zvol_check_volblocksize(zc)) != 0) 948 return (error); 949 } 950 error = dmu_objset_create(zc->zc_name, type, NULL, cbfunc, zc); 951 } 952 return (error); 953 } 954 955 static int 956 zfs_ioc_destroy(zfs_cmd_t *zc) 957 { 958 if (strchr(zc->zc_name, '@') != NULL && 959 zc->zc_objset_type == DMU_OST_ZFS) { 960 vfs_t *vfsp; 961 int err; 962 963 /* 964 * Snapshots under .zfs control must be unmounted 965 * before they can be destroyed. 966 */ 967 if ((vfsp = zfs_get_vfs(zc->zc_name)) != NULL) { 968 /* 969 * Always force the unmount for snapshots. 970 */ 971 int flag = MS_FORCE; 972 973 if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { 974 VFS_RELE(vfsp); 975 return (err); 976 } 977 VFS_RELE(vfsp); 978 if ((err = dounmount(vfsp, flag, kcred)) != 0) 979 return (err); 980 } 981 } 982 983 return (dmu_objset_destroy(zc->zc_name)); 984 } 985 986 static int 987 zfs_ioc_rollback(zfs_cmd_t *zc) 988 { 989 return (dmu_objset_rollback(zc->zc_name)); 990 } 991 992 static int 993 zfs_ioc_rename(zfs_cmd_t *zc) 994 { 995 zc->zc_prop_value[sizeof (zc->zc_prop_value) - 1] = '\0'; 996 if (dataset_namecheck(zc->zc_prop_value, NULL, NULL) != 0) 997 return (EINVAL); 998 999 if (strchr(zc->zc_name, '@') != NULL && 1000 zc->zc_objset_type == DMU_OST_ZFS) { 1001 vfs_t *vfsp; 1002 int err; 1003 1004 /* 1005 * Snapshots under .zfs control must be unmounted 1006 * before they can be renamed. 1007 */ 1008 if ((vfsp = zfs_get_vfs(zc->zc_name)) != NULL) { 1009 /* 1010 * Always force the unmount for snapshots. 1011 */ 1012 int flag = MS_FORCE; 1013 1014 if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) { 1015 VFS_RELE(vfsp); 1016 return (err); 1017 } 1018 VFS_RELE(vfsp); 1019 if ((err = dounmount(vfsp, flag, kcred)) != 0) 1020 return (err); 1021 } 1022 } 1023 1024 return (dmu_objset_rename(zc->zc_name, zc->zc_prop_value)); 1025 } 1026 1027 static int 1028 zfs_ioc_recvbackup(zfs_cmd_t *zc) 1029 { 1030 file_t *fp; 1031 int error, fd; 1032 1033 fd = zc->zc_cookie; 1034 fp = getf(fd); 1035 if (fp == NULL) 1036 return (EBADF); 1037 error = dmu_recvbackup(&zc->zc_begin_record, &zc->zc_cookie, 1038 fp->f_vnode, fp->f_offset); 1039 releasef(fd); 1040 return (error); 1041 } 1042 1043 static int 1044 zfs_ioc_sendbackup(zfs_cmd_t *zc) 1045 { 1046 objset_t *fromsnap = NULL; 1047 objset_t *tosnap; 1048 file_t *fp; 1049 int error; 1050 1051 error = dmu_objset_open(zc->zc_name, DMU_OST_ANY, 1052 DS_MODE_STANDARD | DS_MODE_READONLY, &tosnap); 1053 if (error) 1054 return (error); 1055 1056 if (zc->zc_prop_value[0] != '\0') { 1057 error = dmu_objset_open(zc->zc_prop_value, DMU_OST_ANY, 1058 DS_MODE_STANDARD | DS_MODE_READONLY, &fromsnap); 1059 if (error) { 1060 dmu_objset_close(tosnap); 1061 return (error); 1062 } 1063 } 1064 1065 fp = getf(zc->zc_cookie); 1066 if (fp == NULL) { 1067 dmu_objset_close(tosnap); 1068 if (fromsnap) 1069 dmu_objset_close(fromsnap); 1070 return (EBADF); 1071 } 1072 1073 error = dmu_sendbackup(tosnap, fromsnap, fp->f_vnode); 1074 1075 releasef(zc->zc_cookie); 1076 if (fromsnap) 1077 dmu_objset_close(fromsnap); 1078 dmu_objset_close(tosnap); 1079 return (error); 1080 } 1081 1082 static zfs_ioc_vec_t zfs_ioc_vec[] = { 1083 { zfs_ioc_pool_create, zfs_secpolicy_config, pool_name }, 1084 { zfs_ioc_pool_destroy, zfs_secpolicy_config, pool_name }, 1085 { zfs_ioc_pool_import, zfs_secpolicy_config, pool_name }, 1086 { zfs_ioc_pool_export, zfs_secpolicy_config, pool_name }, 1087 { zfs_ioc_pool_configs, zfs_secpolicy_none, no_name }, 1088 { zfs_ioc_pool_guid, zfs_secpolicy_read, pool_name }, 1089 { zfs_ioc_pool_stats, zfs_secpolicy_read, pool_name }, 1090 { zfs_ioc_pool_tryimport, zfs_secpolicy_config, no_name }, 1091 { zfs_ioc_pool_scrub, zfs_secpolicy_config, pool_name }, 1092 { zfs_ioc_pool_freeze, zfs_secpolicy_config, no_name }, 1093 { zfs_ioc_vdev_add, zfs_secpolicy_config, pool_name }, 1094 { zfs_ioc_vdev_remove, zfs_secpolicy_config, pool_name }, 1095 { zfs_ioc_vdev_online, zfs_secpolicy_config, pool_name }, 1096 { zfs_ioc_vdev_offline, zfs_secpolicy_config, pool_name }, 1097 { zfs_ioc_vdev_attach, zfs_secpolicy_config, pool_name }, 1098 { zfs_ioc_vdev_detach, zfs_secpolicy_config, pool_name }, 1099 { zfs_ioc_objset_stats, zfs_secpolicy_read, dataset_name }, 1100 { zfs_ioc_dataset_list_next, zfs_secpolicy_read, dataset_name }, 1101 { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, dataset_name }, 1102 { zfs_ioc_set_prop, zfs_secpolicy_setprop, dataset_name }, 1103 { zfs_ioc_set_quota, zfs_secpolicy_quota, dataset_name }, 1104 { zfs_ioc_set_reservation, zfs_secpolicy_write, dataset_name }, 1105 { zfs_ioc_set_volsize, zfs_secpolicy_config, dataset_name }, 1106 { zfs_ioc_set_volblocksize, zfs_secpolicy_config, dataset_name }, 1107 { zfs_ioc_create_minor, zfs_secpolicy_config, dataset_name }, 1108 { zfs_ioc_remove_minor, zfs_secpolicy_config, dataset_name }, 1109 { zfs_ioc_create, zfs_secpolicy_parent, dataset_name }, 1110 { zfs_ioc_destroy, zfs_secpolicy_parent, dataset_name }, 1111 { zfs_ioc_rollback, zfs_secpolicy_write, dataset_name }, 1112 { zfs_ioc_rename, zfs_secpolicy_write, dataset_name }, 1113 { zfs_ioc_recvbackup, zfs_secpolicy_write, dataset_name }, 1114 { zfs_ioc_sendbackup, zfs_secpolicy_write, dataset_name }, 1115 }; 1116 1117 static int 1118 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp) 1119 { 1120 zfs_cmd_t *zc; 1121 uint_t vec; 1122 int error; 1123 1124 if (getminor(dev) != 0) 1125 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp)); 1126 1127 vec = cmd - ZFS_IOC; 1128 1129 if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0])) 1130 return (EINVAL); 1131 1132 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); 1133 1134 error = xcopyin((void *)arg, zc, sizeof (zfs_cmd_t)); 1135 1136 if (error == 0) { 1137 zc->zc_cred = (uintptr_t)cr; 1138 zc->zc_dev = dev; 1139 error = zfs_ioc_vec[vec].zvec_secpolicy(zc->zc_name, 1140 zc->zc_prop_name, cr); 1141 } 1142 1143 /* 1144 * Ensure that all pool/dataset names are valid before we pass down to 1145 * the lower layers. 1146 */ 1147 if (error == 0) { 1148 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0'; 1149 switch (zfs_ioc_vec[vec].zvec_namecheck) { 1150 case pool_name: 1151 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0) 1152 error = EINVAL; 1153 break; 1154 1155 case dataset_name: 1156 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) 1157 error = EINVAL; 1158 break; 1159 } 1160 } 1161 1162 if (error == 0) 1163 error = zfs_ioc_vec[vec].zvec_func(zc); 1164 1165 if (error == 0 || error == ENOMEM) { 1166 int rc = xcopyout(zc, (void *)arg, sizeof (zfs_cmd_t)); 1167 if (error == 0) 1168 error = rc; 1169 } 1170 1171 kmem_free(zc, sizeof (zfs_cmd_t)); 1172 return (error); 1173 } 1174 1175 static int 1176 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1177 { 1178 if (cmd != DDI_ATTACH) 1179 return (DDI_FAILURE); 1180 1181 if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0, 1182 DDI_PSEUDO, 0) == DDI_FAILURE) 1183 return (DDI_FAILURE); 1184 1185 zfs_dip = dip; 1186 1187 ddi_report_dev(dip); 1188 1189 return (DDI_SUCCESS); 1190 } 1191 1192 static int 1193 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1194 { 1195 if (spa_busy() || zfs_busy() || zvol_busy()) 1196 return (DDI_FAILURE); 1197 1198 if (cmd != DDI_DETACH) 1199 return (DDI_FAILURE); 1200 1201 zfs_dip = NULL; 1202 1203 ddi_prop_remove_all(dip); 1204 ddi_remove_minor_node(dip, NULL); 1205 1206 return (DDI_SUCCESS); 1207 } 1208 1209 /*ARGSUSED*/ 1210 static int 1211 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1212 { 1213 switch (infocmd) { 1214 case DDI_INFO_DEVT2DEVINFO: 1215 *result = zfs_dip; 1216 return (DDI_SUCCESS); 1217 1218 case DDI_INFO_DEVT2INSTANCE: 1219 *result = (void *)(uintptr_t)getminor((dev_t)arg); 1220 return (DDI_SUCCESS); 1221 } 1222 1223 return (DDI_FAILURE); 1224 } 1225 1226 /* 1227 * OK, so this is a little weird. 1228 * 1229 * /dev/zfs is the control node, i.e. minor 0. 1230 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0. 1231 * 1232 * /dev/zfs has basically nothing to do except serve up ioctls, 1233 * so most of the standard driver entry points are in zvol.c. 1234 */ 1235 static struct cb_ops zfs_cb_ops = { 1236 zvol_open, /* open */ 1237 zvol_close, /* close */ 1238 zvol_strategy, /* strategy */ 1239 nodev, /* print */ 1240 nodev, /* dump */ 1241 zvol_read, /* read */ 1242 zvol_write, /* write */ 1243 zfsdev_ioctl, /* ioctl */ 1244 nodev, /* devmap */ 1245 nodev, /* mmap */ 1246 nodev, /* segmap */ 1247 nochpoll, /* poll */ 1248 ddi_prop_op, /* prop_op */ 1249 NULL, /* streamtab */ 1250 D_NEW | D_MP | D_64BIT, /* Driver compatibility flag */ 1251 CB_REV, /* version */ 1252 zvol_aread, /* async read */ 1253 zvol_awrite, /* async write */ 1254 }; 1255 1256 static struct dev_ops zfs_dev_ops = { 1257 DEVO_REV, /* version */ 1258 0, /* refcnt */ 1259 zfs_info, /* info */ 1260 nulldev, /* identify */ 1261 nulldev, /* probe */ 1262 zfs_attach, /* attach */ 1263 zfs_detach, /* detach */ 1264 nodev, /* reset */ 1265 &zfs_cb_ops, /* driver operations */ 1266 NULL /* no bus operations */ 1267 }; 1268 1269 static struct modldrv zfs_modldrv = { 1270 &mod_driverops, "ZFS storage pool version 1", &zfs_dev_ops 1271 }; 1272 1273 static struct modlinkage modlinkage = { 1274 MODREV_1, 1275 (void *)&zfs_modlfs, 1276 (void *)&zfs_modldrv, 1277 NULL 1278 }; 1279 1280 int 1281 _init(void) 1282 { 1283 int error; 1284 1285 if ((error = mod_install(&modlinkage)) != 0) 1286 return (error); 1287 1288 error = ldi_ident_from_mod(&modlinkage, &zfs_li); 1289 ASSERT(error == 0); 1290 1291 spa_init(FREAD | FWRITE); 1292 zfs_init(); 1293 zvol_init(); 1294 1295 return (0); 1296 } 1297 1298 int 1299 _fini(void) 1300 { 1301 int error; 1302 1303 if (spa_busy() || zfs_busy() || zvol_busy()) 1304 return (EBUSY); 1305 1306 if ((error = mod_remove(&modlinkage)) != 0) 1307 return (error); 1308 1309 zvol_fini(); 1310 zfs_fini(); 1311 spa_fini(); 1312 1313 ldi_ident_release(zfs_li); 1314 zfs_li = NULL; 1315 1316 return (error); 1317 } 1318 1319 int 1320 _info(struct modinfo *modinfop) 1321 { 1322 return (mod_info(&modlinkage, modinfop)); 1323 } 1324