1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 23 * 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 Andrey Sokolov 26 * Copyright 2019 Joyent, Inc. 27 * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. 28 * Copyright 2021 Toomas Soome <tsoome@me.com> 29 * Copyright 2023 Oxide Computer Company 30 */ 31 32 /* 33 * lofi (loopback file) driver - allows you to attach a file to a device, 34 * which can then be accessed through that device. The simple model is that 35 * you tell lofi to open a file, and then use the block device you get as 36 * you would any block device. lofi translates access to the block device 37 * into I/O on the underlying file. This is mostly useful for 38 * mounting images of filesystems. 39 * 40 * lofi is controlled through /dev/lofictl - this is the only device exported 41 * during attach, and is instance number 0. lofiadm communicates with lofi 42 * through ioctls on this device. When a file is attached to lofi, block and 43 * character devices are exported in /dev/lofi and /dev/rlofi. These devices 44 * are identified by lofi instance number, and the instance number is also used 45 * as the name in /dev/lofi. 46 * 47 * Virtual disks, or, labeled lofi, implements virtual disk support to 48 * support partition table and related tools. Such mappings will cause 49 * block and character devices to be exported in /dev/dsk and /dev/rdsk 50 * directories. 51 * 52 * To support virtual disks, the instance number space is divided to two 53 * parts, upper part for instance number and lower part for minor number 54 * space to identify partitions and slices. The virtual disk support is 55 * implemented by stacking cmlb module. For virtual disks, the partition 56 * related ioctl calls are routed to cmlb module. Compression and encryption 57 * is not supported for virtual disks. 58 * 59 * Mapped devices are tracked with state structures handled with 60 * ddi_soft_state(9F) for simplicity. 61 * 62 * A file attached to lofi is opened when attached and not closed until 63 * explicitly detached from lofi. This seems more sensible than deferring 64 * the open until the /dev/lofi device is opened, for a number of reasons. 65 * One is that any failure is likely to be noticed by the person (or script) 66 * running lofiadm. Another is that it would be a security problem if the 67 * file was replaced by another one after being added but before being opened. 68 * 69 * The only hard part about lofi is the ioctls. In order to support things 70 * like 'newfs' on a lofi device, it needs to support certain disk ioctls. 71 * So it has to fake disk geometry and partition information. More may need 72 * to be faked if your favorite utility doesn't work and you think it should 73 * (fdformat doesn't work because it really wants to know the type of floppy 74 * controller to talk to, and that didn't seem easy to fake. Or possibly even 75 * necessary, since we have mkfs_pcfs now). 76 * 77 * Normally, a lofi device cannot be detached if it is open (i.e. busy). To 78 * support simulation of hotplug events, an optional force flag is provided. 79 * If a lofi device is open when a force detach is requested, then the 80 * underlying file is closed and any subsequent operations return EIO. When the 81 * device is closed for the last time, it will be cleaned up at that time. In 82 * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is 83 * detached but not removed. 84 * 85 * If detach was requested and lofi device is not open, we will perform 86 * unmap and remove the lofi instance. 87 * 88 * If the lofi device is open and the li_cleanup is set on ioctl request, 89 * we set ls_cleanup flag to notify the cleanup is requested, and the 90 * last lofi_close will perform the unmapping and this lofi instance will be 91 * removed. 92 * 93 * If the lofi device is open and the li_force is set on ioctl request, 94 * we set ls_cleanup flag to notify the cleanup is requested, 95 * we also set ls_vp_closereq to notify IO tasks to return EIO on new 96 * IO requests and wait in process IO count to become 0, indicating there 97 * are no more IO requests. Since ls_cleanup is set, the last lofi_close 98 * will perform unmap and this lofi instance will be removed. 99 * See also lofi_unmap_file() for details. 100 * 101 * Once ls_cleanup is set for the instance, we do not allow lofi_open() 102 * calls to succeed and can have last lofi_close() to remove the instance. 103 * 104 * Known problems: 105 * 106 * UFS logging. Mounting a UFS filesystem image "logging" 107 * works for basic copy testing but wedges during a build of ON through 108 * that image. Some deadlock in lufs holding the log mutex and then 109 * getting stuck on a buf. So for now, don't do that. 110 * 111 * Direct I/O. Since the filesystem data is being cached in the buffer 112 * cache, _and_ again in the underlying filesystem, it's tempting to 113 * enable direct I/O on the underlying file. Don't, because that deadlocks. 114 * I think to fix the cache-twice problem we might need filesystem support. 115 * 116 * Interesting things to do: 117 * 118 * Allow multiple files for each device. A poor-man's metadisk, basically. 119 * 120 * Pass-through ioctls on block devices. You can (though it's not 121 * documented), give lofi a block device as a file name. Then we shouldn't 122 * need to fake a geometry, however, it may be relevant if you're replacing 123 * metadisk, or using lofi to get crypto. 124 * It makes sense to do lofiadm -c aes -a /dev/dsk/c0t0d0s4 /dev/lofi/1 125 * and then in /etc/vfstab have an entry for /dev/lofi/1 as /export/home. 126 * In fact this even makes sense if you have lofi "above" metadisk. 127 * 128 * Encryption: 129 * Each lofi device can have its own symmetric key and cipher. 130 * They are passed to us by lofiadm(8) in the correct format for use 131 * with the misc/kcf crypto_* routines. 132 * 133 * Each block has its own IV, that is calculated in lofi_blk_mech(), based 134 * on the "master" key held in the lsp and the block number of the buffer. 135 */ 136 137 #include <sys/types.h> 138 #include <netinet/in.h> 139 #include <sys/sysmacros.h> 140 #include <sys/uio.h> 141 #include <sys/kmem.h> 142 #include <sys/cred.h> 143 #include <sys/mman.h> 144 #include <sys/errno.h> 145 #include <sys/aio_req.h> 146 #include <sys/stat.h> 147 #include <sys/file.h> 148 #include <sys/modctl.h> 149 #include <sys/conf.h> 150 #include <sys/debug.h> 151 #include <sys/vnode.h> 152 #include <sys/lofi.h> 153 #include <sys/lofi_impl.h> /* for cache structure */ 154 #include <sys/fcntl.h> 155 #include <sys/pathname.h> 156 #include <sys/filio.h> 157 #include <sys/fdio.h> 158 #include <sys/open.h> 159 #include <sys/disp.h> 160 #include <vm/seg_map.h> 161 #include <sys/ddi.h> 162 #include <sys/dkioc_free_util.h> 163 #include <sys/sunddi.h> 164 #include <sys/zmod.h> 165 #include <sys/id_space.h> 166 #include <sys/mkdev.h> 167 #include <sys/crypto/common.h> 168 #include <sys/crypto/api.h> 169 #include <sys/rctl.h> 170 #include <sys/vtoc.h> 171 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 172 #include <sys/scsi/impl/uscsi.h> 173 #include <sys/sysevent/dev.h> 174 #include <sys/efi_partition.h> 175 #include <LzmaDec.h> 176 177 #define NBLOCKS_PROP_NAME "Nblocks" 178 #define SIZE_PROP_NAME "Size" 179 #define ZONE_PROP_NAME "zone" 180 181 #define SETUP_C_DATA(cd, buf, len) \ 182 (cd).cd_format = CRYPTO_DATA_RAW; \ 183 (cd).cd_offset = 0; \ 184 (cd).cd_miscdata = NULL; \ 185 (cd).cd_length = (len); \ 186 (cd).cd_raw.iov_base = (buf); \ 187 (cd).cd_raw.iov_len = (len); 188 189 #define UIO_CHECK(uio) \ 190 if (((uio)->uio_loffset % DEV_BSIZE) != 0 || \ 191 ((uio)->uio_resid % DEV_BSIZE) != 0) { \ 192 return (EINVAL); \ 193 } 194 195 #define LOFI_TIMEOUT 120 196 197 int lofi_timeout = LOFI_TIMEOUT; 198 static void *lofi_statep; 199 static kmutex_t lofi_lock; /* state lock */ 200 static id_space_t *lofi_id; /* lofi ID values */ 201 static list_t lofi_list; 202 static zone_key_t lofi_zone_key; 203 204 /* 205 * Because lofi_taskq_nthreads limits the actual swamping of the device, the 206 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively 207 * high. If we want to be assured that the underlying device is always busy, 208 * we must be sure that the number of bytes enqueued when the number of 209 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for 210 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should 211 * set maxalloc to be the maximum throughput (in bytes per second) of the 212 * underlying device divided by the minimum I/O size. We assume a realistic 213 * maximum throughput of one hundred megabytes per second; we set maxalloc on 214 * the lofi task queue to be 104857600 divided by DEV_BSIZE. 215 */ 216 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; 217 static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ 218 219 const char lofi_crypto_magic[6] = LOFI_CRYPTO_MAGIC; 220 221 /* 222 * To avoid decompressing data in a compressed segment multiple times 223 * when accessing small parts of a segment's data, we cache and reuse 224 * the uncompressed segment's data. 225 * 226 * A single cached segment is sufficient to avoid lots of duplicate 227 * segment decompress operations. A small cache size also reduces the 228 * memory footprint. 229 * 230 * lofi_max_comp_cache is the maximum number of decompressed data segments 231 * cached for each compressed lofi image. It can be set to 0 to disable 232 * caching. 233 */ 234 235 uint32_t lofi_max_comp_cache = 1; 236 237 static int gzip_decompress(void *src, size_t srclen, void *dst, 238 size_t *destlen, int level); 239 240 static int lzma_decompress(void *src, size_t srclen, void *dst, 241 size_t *dstlen, int level); 242 243 lofi_compress_info_t lofi_compress_table[LOFI_COMPRESS_FUNCTIONS] = { 244 {gzip_decompress, NULL, 6, "gzip"}, /* default */ 245 {gzip_decompress, NULL, 6, "gzip-6"}, 246 {gzip_decompress, NULL, 9, "gzip-9"}, 247 {lzma_decompress, NULL, 0, "lzma"} 248 }; 249 250 static void lofi_strategy_task(void *); 251 static int lofi_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, 252 size_t, void *); 253 static int lofi_tg_getinfo(dev_info_t *, int, void *, void *); 254 255 struct cmlb_tg_ops lofi_tg_ops = { 256 TG_DK_OPS_VERSION_1, 257 lofi_tg_rdwr, 258 lofi_tg_getinfo 259 }; 260 261 typedef enum { 262 RDWR_RAW, 263 RDWR_BCOPY 264 } lofi_rdrw_method_t; 265 266 static void 267 *SzAlloc(void *p __unused, size_t size) 268 { 269 return (kmem_alloc(size, KM_SLEEP)); 270 } 271 272 static void 273 SzFree(void *p __unused, void *address, size_t size) 274 { 275 kmem_free(address, size); 276 } 277 278 static ISzAlloc g_Alloc = { SzAlloc, SzFree }; 279 280 /* 281 * Free data referenced by the linked list of cached uncompressed 282 * segments. 283 */ 284 static void 285 lofi_free_comp_cache(struct lofi_state *lsp) 286 { 287 struct lofi_comp_cache *lc; 288 289 while ((lc = list_remove_head(&lsp->ls_comp_cache)) != NULL) { 290 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 291 kmem_free(lc, sizeof (struct lofi_comp_cache)); 292 lsp->ls_comp_cache_count--; 293 } 294 ASSERT(lsp->ls_comp_cache_count == 0); 295 } 296 297 static int 298 is_opened(struct lofi_state *lsp) 299 { 300 int i; 301 boolean_t last = B_TRUE; 302 303 ASSERT(MUTEX_HELD(&lofi_lock)); 304 for (i = 0; i < LOFI_PART_MAX; i++) { 305 if (lsp->ls_open_lyr[i]) { 306 last = B_FALSE; 307 break; 308 } 309 } 310 311 for (i = 0; last && (i < OTYP_LYR); i++) { 312 if (lsp->ls_open_reg[i]) { 313 last = B_FALSE; 314 } 315 } 316 317 return (!last); 318 } 319 320 static void 321 lofi_set_cleanup(struct lofi_state *lsp) 322 { 323 ASSERT(MUTEX_HELD(&lofi_lock)); 324 325 lsp->ls_cleanup = B_TRUE; 326 327 /* wake up any threads waiting on dkiocstate */ 328 cv_broadcast(&lsp->ls_vp_cv); 329 } 330 331 static void 332 lofi_free_crypto(struct lofi_state *lsp) 333 { 334 ASSERT(MUTEX_HELD(&lofi_lock)); 335 336 if (lsp->ls_crypto_enabled) { 337 /* 338 * Clean up the crypto state so that it doesn't hang around 339 * in memory after we are done with it. 340 */ 341 if (lsp->ls_key.ck_data != NULL) { 342 bzero(lsp->ls_key.ck_data, 343 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 344 kmem_free(lsp->ls_key.ck_data, 345 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 346 lsp->ls_key.ck_data = NULL; 347 lsp->ls_key.ck_length = 0; 348 } 349 350 if (lsp->ls_mech.cm_param != NULL) { 351 kmem_free(lsp->ls_mech.cm_param, 352 lsp->ls_mech.cm_param_len); 353 lsp->ls_mech.cm_param = NULL; 354 lsp->ls_mech.cm_param_len = 0; 355 } 356 357 if (lsp->ls_iv_mech.cm_param != NULL) { 358 kmem_free(lsp->ls_iv_mech.cm_param, 359 lsp->ls_iv_mech.cm_param_len); 360 lsp->ls_iv_mech.cm_param = NULL; 361 lsp->ls_iv_mech.cm_param_len = 0; 362 } 363 364 mutex_destroy(&lsp->ls_crypto_lock); 365 } 366 } 367 368 static int 369 lofi_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 370 size_t length, void *tg_cookie __unused) 371 { 372 struct lofi_state *lsp; 373 buf_t *bp; 374 int instance; 375 int rv = 0; 376 377 instance = ddi_get_instance(dip); 378 if (instance == 0) /* control node does not have disk */ 379 return (ENXIO); 380 381 lsp = ddi_get_soft_state(lofi_statep, instance); 382 383 if (lsp == NULL) 384 return (ENXIO); 385 386 if (cmd != TG_READ && cmd != TG_WRITE) 387 return (EINVAL); 388 389 /* 390 * Make sure the mapping is set up by checking lsp->ls_vp_ready. 391 */ 392 mutex_enter(&lsp->ls_vp_lock); 393 while (lsp->ls_vp_ready == B_FALSE) 394 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); 395 mutex_exit(&lsp->ls_vp_lock); 396 397 if (P2PHASE(length, (1U << lsp->ls_lbshift)) != 0) { 398 /* We can only transfer whole blocks at a time! */ 399 return (EINVAL); 400 } 401 402 bp = getrbuf(KM_SLEEP); 403 404 if (cmd == TG_READ) { 405 bp->b_flags = B_READ; 406 } else { 407 if (lsp->ls_readonly == B_TRUE) { 408 freerbuf(bp); 409 return (EROFS); 410 } 411 bp->b_flags = B_WRITE; 412 } 413 414 bp->b_un.b_addr = bufaddr; 415 bp->b_bcount = length; 416 bp->b_lblkno = start; 417 bp->b_private = NULL; 418 bp->b_edev = lsp->ls_dev; 419 420 if (lsp->ls_kstat) { 421 mutex_enter(lsp->ls_kstat->ks_lock); 422 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 423 mutex_exit(lsp->ls_kstat->ks_lock); 424 } 425 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP); 426 (void) biowait(bp); 427 428 rv = geterror(bp); 429 freerbuf(bp); 430 return (rv); 431 } 432 433 /* 434 * Get device geometry info for cmlb. 435 * 436 * We have mapped disk image as virtual block device and have to report 437 * physical/virtual geometry to cmlb. 438 * 439 * So we have two principal cases: 440 * 1. Uninitialised image without any existing labels, 441 * for this case we fabricate the data based on mapped image. 442 * 2. Image with existing label information. 443 * Since we have no information how the image was created (it may be 444 * dump from some physical device), we need to rely on label information 445 * from image, or we get "corrupted label" errors. 446 * NOTE: label can be MBR, MBR+SMI, GPT 447 */ 448 static int 449 lofi_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie __unused) 450 { 451 struct lofi_state *lsp; 452 int instance; 453 int ashift; 454 455 instance = ddi_get_instance(dip); 456 if (instance == 0) /* control device has no storage */ 457 return (ENXIO); 458 459 lsp = ddi_get_soft_state(lofi_statep, instance); 460 461 if (lsp == NULL) 462 return (ENXIO); 463 464 /* 465 * Make sure the mapping is set up by checking lsp->ls_vp_ready. 466 * 467 * When mapping is created, new lofi instance is created and 468 * lofi_attach() will call cmlb_attach() as part of the procedure 469 * to set the mapping up. This chain of events will happen in 470 * the same thread. 471 * Since cmlb_attach() will call lofi_tg_getinfo to get 472 * capacity, we return error on that call if cookie is set, 473 * otherwise lofi_attach will be stuck as the mapping is not yet 474 * finalized and lofi is not yet ready. 475 * Note, such error is not fatal for cmlb, as the label setup 476 * will be finalized when cmlb_validate() is called. 477 */ 478 mutex_enter(&lsp->ls_vp_lock); 479 if (tg_cookie != NULL && lsp->ls_vp_ready == B_FALSE) { 480 mutex_exit(&lsp->ls_vp_lock); 481 return (ENXIO); 482 } 483 while (lsp->ls_vp_ready == B_FALSE) 484 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); 485 mutex_exit(&lsp->ls_vp_lock); 486 487 ashift = lsp->ls_lbshift; 488 489 switch (cmd) { 490 case TG_GETPHYGEOM: { 491 cmlb_geom_t *geomp = arg; 492 493 geomp->g_capacity = 494 (lsp->ls_vp_size - lsp->ls_crypto_offset) >> ashift; 495 geomp->g_nsect = lsp->ls_dkg.dkg_nsect; 496 geomp->g_nhead = lsp->ls_dkg.dkg_nhead; 497 geomp->g_acyl = lsp->ls_dkg.dkg_acyl; 498 geomp->g_ncyl = lsp->ls_dkg.dkg_ncyl; 499 geomp->g_secsize = (1U << ashift); 500 geomp->g_intrlv = lsp->ls_dkg.dkg_intrlv; 501 geomp->g_rpm = lsp->ls_dkg.dkg_rpm; 502 return (0); 503 } 504 505 case TG_GETCAPACITY: 506 *(diskaddr_t *)arg = 507 (lsp->ls_vp_size - lsp->ls_crypto_offset) >> ashift; 508 return (0); 509 510 case TG_GETBLOCKSIZE: 511 *(uint32_t *)arg = (1U << ashift); 512 return (0); 513 514 case TG_GETATTR: { 515 tg_attribute_t *tgattr = arg; 516 517 tgattr->media_is_writable = !lsp->ls_readonly; 518 tgattr->media_is_solid_state = B_FALSE; 519 tgattr->media_is_rotational = B_FALSE; 520 return (0); 521 } 522 523 default: 524 return (EINVAL); 525 } 526 } 527 528 static void 529 lofi_teardown_task(void *arg) 530 { 531 struct lofi_state *lsp = arg; 532 int id = LOFI_MINOR2ID(getminor(lsp->ls_dev)); 533 534 mutex_enter(&lofi_lock); 535 while (ndi_devi_offline(lsp->ls_dip, NDI_DEVI_REMOVE) != NDI_SUCCESS) { 536 mutex_exit(&lofi_lock); 537 /* do a sleeping wait for one second */; 538 delay(drv_usectohz(MICROSEC)); 539 mutex_enter(&lofi_lock); 540 } 541 id_free(lofi_id, id); 542 mutex_exit(&lofi_lock); 543 } 544 545 static void 546 lofi_destroy(struct lofi_state *lsp, cred_t *credp) 547 { 548 int id = LOFI_MINOR2ID(getminor(lsp->ls_dev)); 549 int i; 550 551 ASSERT(MUTEX_HELD(&lofi_lock)); 552 553 /* 554 * Before we can start to release the other resources, 555 * make sure we have all tasks completed and taskq removed. 556 */ 557 if (lsp->ls_taskq != NULL) { 558 taskq_destroy(lsp->ls_taskq); 559 lsp->ls_taskq = NULL; 560 } 561 562 list_remove(&lofi_list, lsp); 563 564 lofi_free_crypto(lsp); 565 566 /* 567 * Free pre-allocated compressed buffers 568 */ 569 if (lsp->ls_comp_bufs != NULL) { 570 for (i = 0; i < lofi_taskq_nthreads; i++) { 571 if (lsp->ls_comp_bufs[i].bufsize > 0) 572 kmem_free(lsp->ls_comp_bufs[i].buf, 573 lsp->ls_comp_bufs[i].bufsize); 574 } 575 kmem_free(lsp->ls_comp_bufs, 576 sizeof (struct compbuf) * lofi_taskq_nthreads); 577 } 578 579 if (lsp->ls_vp != NULL) { 580 (void) VOP_PUTPAGE(lsp->ls_vp, 0, 0, B_FREE, credp, NULL); 581 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 582 1, 0, credp, NULL); 583 VN_RELE(lsp->ls_vp); 584 } 585 if (lsp->ls_stacked_vp != lsp->ls_vp) 586 VN_RELE(lsp->ls_stacked_vp); 587 lsp->ls_vp = lsp->ls_stacked_vp = NULL; 588 589 if (lsp->ls_kstat != NULL) { 590 kstat_delete(lsp->ls_kstat); 591 lsp->ls_kstat = NULL; 592 } 593 594 /* 595 * Free cached decompressed segment data 596 */ 597 lofi_free_comp_cache(lsp); 598 list_destroy(&lsp->ls_comp_cache); 599 600 if (lsp->ls_uncomp_seg_sz > 0) { 601 kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz); 602 lsp->ls_uncomp_seg_sz = 0; 603 } 604 605 rctl_decr_lofi(lsp->ls_zone.zref_zone, 1); 606 zone_rele_ref(&lsp->ls_zone, ZONE_REF_LOFI); 607 608 mutex_destroy(&lsp->ls_comp_cache_lock); 609 mutex_destroy(&lsp->ls_comp_bufs_lock); 610 mutex_destroy(&lsp->ls_kstat_lock); 611 mutex_destroy(&lsp->ls_vp_lock); 612 cv_destroy(&lsp->ls_vp_cv); 613 lsp->ls_vp_ready = B_FALSE; 614 lsp->ls_vp_closereq = B_FALSE; 615 616 ASSERT(ddi_get_soft_state(lofi_statep, id) == lsp); 617 /* 618 * Instance state is allocated in lofi_attach() and freed in 619 * lofi_detach(). New instance is created when we create new mapping. 620 * Instance removal is performed by unmap ioctl on lofi control 621 * instance (0). 622 * 623 * If the unmap is performed with instance which is still in use, 624 * we either cancel unmap with error or we can perform delayed unmap 625 * by blocking all IO, waiting the consumers to close access to this 626 * instance and once there are no more consumers, complete the unmap. 627 * 628 * Delayed unmap will trigger instance removal on last lofi_close(), 629 * but we can not remove device instance while the instance is still 630 * in use due to lofi_close() is running. 631 * Spawn task to complete device instance offlining in separate thread. 632 */ 633 (void) taskq_dispatch(system_taskq, lofi_teardown_task, lsp, KM_SLEEP); 634 } 635 636 static void 637 lofi_free_dev(struct lofi_state *lsp) 638 { 639 ASSERT(MUTEX_HELD(&lofi_lock)); 640 641 if (lsp->ls_cmlbhandle != NULL) { 642 cmlb_invalidate(lsp->ls_cmlbhandle, 0); 643 cmlb_detach(lsp->ls_cmlbhandle, 0); 644 cmlb_free_handle(&lsp->ls_cmlbhandle); 645 lsp->ls_cmlbhandle = NULL; 646 } 647 (void) ddi_prop_remove_all(lsp->ls_dip); 648 ddi_remove_minor_node(lsp->ls_dip, NULL); 649 } 650 651 static void 652 lofi_zone_shutdown(zoneid_t zoneid, void *arg __unused) 653 { 654 struct lofi_state *lsp; 655 struct lofi_state *next; 656 657 mutex_enter(&lofi_lock); 658 659 for (lsp = list_head(&lofi_list); lsp != NULL; lsp = next) { 660 661 /* lofi_destroy() frees lsp */ 662 next = list_next(&lofi_list, lsp); 663 664 if (lsp->ls_zone.zref_zone->zone_id != zoneid) 665 continue; 666 667 /* 668 * No in-zone processes are running, but something has this 669 * open. It's either a global zone process, or a lofi 670 * mount. In either case we set ls_cleanup so the last 671 * user destroys the device. 672 */ 673 if (is_opened(lsp)) { 674 lofi_set_cleanup(lsp); 675 } else { 676 lofi_free_dev(lsp); 677 lofi_destroy(lsp, kcred); 678 } 679 } 680 681 mutex_exit(&lofi_lock); 682 } 683 684 static int 685 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp __unused) 686 { 687 int id; 688 minor_t part; 689 uint64_t mask; 690 diskaddr_t nblks; 691 diskaddr_t lba; 692 boolean_t ndelay; 693 694 struct lofi_state *lsp; 695 696 if (otyp >= OTYPCNT) 697 return (EINVAL); 698 699 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 700 701 /* 702 * lofiadm -a /dev/lofi/1 gets us here. 703 */ 704 if (mutex_owner(&lofi_lock) == curthread) 705 return (EINVAL); 706 707 mutex_enter(&lofi_lock); 708 709 id = LOFI_MINOR2ID(getminor(*devp)); 710 part = LOFI_PART(getminor(*devp)); 711 mask = (1U << part); 712 713 /* master control device */ 714 if (id == 0) { 715 mutex_exit(&lofi_lock); 716 return (0); 717 } 718 719 /* otherwise, the mapping should already exist */ 720 lsp = ddi_get_soft_state(lofi_statep, id); 721 if (lsp == NULL) { 722 mutex_exit(&lofi_lock); 723 return (EINVAL); 724 } 725 726 if (lsp->ls_cleanup == B_TRUE) { 727 mutex_exit(&lofi_lock); 728 return (ENXIO); 729 } 730 731 if (lsp->ls_vp == NULL) { 732 mutex_exit(&lofi_lock); 733 return (ENXIO); 734 } 735 736 if (lsp->ls_readonly && (flag & FWRITE)) { 737 mutex_exit(&lofi_lock); 738 return (EROFS); 739 } 740 741 if ((lsp->ls_open_excl) & (mask)) { 742 mutex_exit(&lofi_lock); 743 return (EBUSY); 744 } 745 746 if (flag & FEXCL) { 747 if (lsp->ls_open_lyr[part]) { 748 mutex_exit(&lofi_lock); 749 return (EBUSY); 750 } 751 for (int i = 0; i < OTYP_LYR; i++) { 752 if (lsp->ls_open_reg[i] & mask) { 753 mutex_exit(&lofi_lock); 754 return (EBUSY); 755 } 756 } 757 } 758 759 if (lsp->ls_cmlbhandle != NULL) { 760 if (cmlb_validate(lsp->ls_cmlbhandle, 0, 0) != 0) { 761 /* 762 * non-blocking opens are allowed to succeed to 763 * support format and fdisk to create partitioning. 764 */ 765 if (!ndelay) { 766 mutex_exit(&lofi_lock); 767 return (ENXIO); 768 } 769 } else if (cmlb_partinfo(lsp->ls_cmlbhandle, part, &nblks, &lba, 770 NULL, NULL, 0) == 0) { 771 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 772 mutex_exit(&lofi_lock); 773 return (ENXIO); 774 } 775 } else if (!ndelay) { 776 mutex_exit(&lofi_lock); 777 return (ENXIO); 778 } 779 } 780 781 if (otyp == OTYP_LYR) { 782 lsp->ls_open_lyr[part]++; 783 } else { 784 lsp->ls_open_reg[otyp] |= mask; 785 } 786 if (flag & FEXCL) { 787 lsp->ls_open_excl |= mask; 788 } 789 790 mutex_exit(&lofi_lock); 791 return (0); 792 } 793 794 static int 795 lofi_close(dev_t dev, int flag __unused, int otyp, struct cred *credp) 796 { 797 minor_t part; 798 int id; 799 uint64_t mask; 800 struct lofi_state *lsp; 801 802 id = LOFI_MINOR2ID(getminor(dev)); 803 part = LOFI_PART(getminor(dev)); 804 mask = (1U << part); 805 806 mutex_enter(&lofi_lock); 807 lsp = ddi_get_soft_state(lofi_statep, id); 808 if (lsp == NULL) { 809 mutex_exit(&lofi_lock); 810 return (EINVAL); 811 } 812 813 if (id == 0) { 814 mutex_exit(&lofi_lock); 815 return (0); 816 } 817 818 if (lsp->ls_open_excl & mask) 819 lsp->ls_open_excl &= ~mask; 820 821 if (otyp == OTYP_LYR) { 822 lsp->ls_open_lyr[part]--; 823 } else { 824 lsp->ls_open_reg[otyp] &= ~mask; 825 } 826 827 /* 828 * If we forcibly closed the underlying device (li_force), or 829 * asked for cleanup (li_cleanup), finish up if we're the last 830 * out of the door. 831 */ 832 if (!is_opened(lsp) && 833 (lsp->ls_cleanup == B_TRUE || lsp->ls_vp == NULL)) { 834 lofi_free_dev(lsp); 835 lofi_destroy(lsp, credp); 836 } 837 838 mutex_exit(&lofi_lock); 839 return (0); 840 } 841 842 /* 843 * Sets the mechanism's initialization vector (IV) if one is needed. 844 * The IV is computed from the data block number. lsp->ls_mech is 845 * altered so that: 846 * lsp->ls_mech.cm_param_len is set to the IV len. 847 * lsp->ls_mech.cm_param is set to the IV. 848 */ 849 static int 850 lofi_blk_mech(struct lofi_state *lsp, longlong_t lblkno) 851 { 852 int ret; 853 crypto_data_t cdata; 854 char *iv; 855 size_t iv_len; 856 size_t min; 857 void *data; 858 size_t datasz; 859 860 ASSERT(MUTEX_HELD(&lsp->ls_crypto_lock)); 861 862 if (lsp == NULL) 863 return (CRYPTO_DEVICE_ERROR); 864 865 /* lsp->ls_mech.cm_param{_len} has already been set for static iv */ 866 if (lsp->ls_iv_type == IVM_NONE) { 867 return (CRYPTO_SUCCESS); 868 } 869 870 /* 871 * if kmem already alloced from previous call and it's the same size 872 * we need now, just recycle it; allocate new kmem only if we have to 873 */ 874 if (lsp->ls_mech.cm_param == NULL || 875 lsp->ls_mech.cm_param_len != lsp->ls_iv_len) { 876 iv_len = lsp->ls_iv_len; 877 iv = kmem_zalloc(iv_len, KM_SLEEP); 878 } else { 879 iv_len = lsp->ls_mech.cm_param_len; 880 iv = lsp->ls_mech.cm_param; 881 bzero(iv, iv_len); 882 } 883 884 switch (lsp->ls_iv_type) { 885 case IVM_ENC_BLKNO: 886 /* iv is not static, lblkno changes each time */ 887 data = &lblkno; 888 datasz = sizeof (lblkno); 889 break; 890 default: 891 data = 0; 892 datasz = 0; 893 break; 894 } 895 896 /* 897 * write blkno into the iv buffer padded on the left in case 898 * blkno ever grows bigger than its current longlong_t size 899 * or a variation other than blkno is used for the iv data 900 */ 901 min = MIN(datasz, iv_len); 902 bcopy(data, iv + (iv_len - min), min); 903 904 /* encrypt the data in-place to get the IV */ 905 SETUP_C_DATA(cdata, iv, iv_len); 906 907 ret = crypto_encrypt(&lsp->ls_iv_mech, &cdata, &lsp->ls_key, 908 NULL, NULL, NULL); 909 if (ret != CRYPTO_SUCCESS) { 910 cmn_err(CE_WARN, "failed to create iv for block %lld: (0x%x)", 911 lblkno, ret); 912 if (lsp->ls_mech.cm_param != iv) 913 kmem_free(iv, iv_len); 914 915 return (ret); 916 } 917 918 /* clean up the iv from the last computation */ 919 if (lsp->ls_mech.cm_param != NULL && lsp->ls_mech.cm_param != iv) 920 kmem_free(lsp->ls_mech.cm_param, lsp->ls_mech.cm_param_len); 921 922 lsp->ls_mech.cm_param_len = iv_len; 923 lsp->ls_mech.cm_param = iv; 924 925 return (CRYPTO_SUCCESS); 926 } 927 928 /* 929 * Performs encryption and decryption of a chunk of data of size "len", 930 * one DEV_BSIZE block at a time. "len" is assumed to be a multiple of 931 * DEV_BSIZE. 932 */ 933 static int 934 lofi_crypto(struct lofi_state *lsp, struct buf *bp, caddr_t plaintext, 935 caddr_t ciphertext, size_t len, boolean_t op_encrypt) 936 { 937 crypto_data_t cdata; 938 crypto_data_t wdata; 939 int ret; 940 longlong_t lblkno = bp->b_lblkno; 941 942 mutex_enter(&lsp->ls_crypto_lock); 943 944 /* 945 * though we could encrypt/decrypt entire "len" chunk of data, we need 946 * to break it into DEV_BSIZE pieces to capture blkno incrementing 947 */ 948 SETUP_C_DATA(cdata, plaintext, len); 949 cdata.cd_length = DEV_BSIZE; 950 if (ciphertext != NULL) { /* not in-place crypto */ 951 SETUP_C_DATA(wdata, ciphertext, len); 952 wdata.cd_length = DEV_BSIZE; 953 } 954 955 do { 956 ret = lofi_blk_mech(lsp, lblkno); 957 if (ret != CRYPTO_SUCCESS) 958 continue; 959 960 if (op_encrypt) { 961 ret = crypto_encrypt(&lsp->ls_mech, &cdata, 962 &lsp->ls_key, NULL, 963 ((ciphertext != NULL) ? &wdata : NULL), NULL); 964 } else { 965 ret = crypto_decrypt(&lsp->ls_mech, &cdata, 966 &lsp->ls_key, NULL, 967 ((ciphertext != NULL) ? &wdata : NULL), NULL); 968 } 969 970 cdata.cd_offset += DEV_BSIZE; 971 if (ciphertext != NULL) 972 wdata.cd_offset += DEV_BSIZE; 973 lblkno++; 974 } while (ret == CRYPTO_SUCCESS && cdata.cd_offset < len); 975 976 mutex_exit(&lsp->ls_crypto_lock); 977 978 if (ret != CRYPTO_SUCCESS) { 979 cmn_err(CE_WARN, "%s failed for block %lld: (0x%x)", 980 op_encrypt ? "crypto_encrypt()" : "crypto_decrypt()", 981 lblkno, ret); 982 } 983 984 return (ret); 985 } 986 987 static int 988 lofi_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 989 struct lofi_state *lsp, size_t len, lofi_rdrw_method_t method, 990 caddr_t bcopy_locn) 991 { 992 ssize_t resid; 993 int isread; 994 int error; 995 996 /* 997 * Handles reads/writes for both plain and encrypted lofi 998 * Note: offset is already shifted by lsp->ls_crypto_offset 999 * when it gets here. 1000 */ 1001 1002 isread = bp->b_flags & B_READ; 1003 if (isread) { 1004 if (method == RDWR_BCOPY) { 1005 /* DO NOT update bp->b_resid for bcopy */ 1006 bcopy(bcopy_locn, bufaddr, len); 1007 error = 0; 1008 } else { /* RDWR_RAW */ 1009 error = vn_rdwr(UIO_READ, lsp->ls_vp, bufaddr, len, 1010 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, 1011 &resid); 1012 bp->b_resid = resid; 1013 } 1014 if (lsp->ls_crypto_enabled && error == 0) { 1015 if (lofi_crypto(lsp, bp, bufaddr, NULL, len, 1016 B_FALSE) != CRYPTO_SUCCESS) { 1017 /* 1018 * XXX: original code didn't set residual 1019 * back to len because no error was expected 1020 * from bcopy() if encryption is not enabled 1021 */ 1022 if (method != RDWR_BCOPY) 1023 bp->b_resid = len; 1024 error = EIO; 1025 } 1026 } 1027 return (error); 1028 } else { 1029 void *iobuf = bufaddr; 1030 1031 if (lsp->ls_crypto_enabled) { 1032 /* don't do in-place crypto to keep bufaddr intact */ 1033 iobuf = kmem_alloc(len, KM_SLEEP); 1034 if (lofi_crypto(lsp, bp, bufaddr, iobuf, len, 1035 B_TRUE) != CRYPTO_SUCCESS) { 1036 kmem_free(iobuf, len); 1037 if (method != RDWR_BCOPY) 1038 bp->b_resid = len; 1039 return (EIO); 1040 } 1041 } 1042 if (method == RDWR_BCOPY) { 1043 /* DO NOT update bp->b_resid for bcopy */ 1044 bcopy(iobuf, bcopy_locn, len); 1045 error = 0; 1046 } else { /* RDWR_RAW */ 1047 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, iobuf, len, 1048 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, 1049 &resid); 1050 bp->b_resid = resid; 1051 } 1052 if (lsp->ls_crypto_enabled) { 1053 kmem_free(iobuf, len); 1054 } 1055 return (error); 1056 } 1057 } 1058 1059 static int 1060 lofi_mapped_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 1061 struct lofi_state *lsp) 1062 { 1063 int error; 1064 offset_t alignedoffset, mapoffset; 1065 size_t xfersize; 1066 int isread; 1067 int smflags; 1068 caddr_t mapaddr; 1069 size_t len; 1070 enum seg_rw srw; 1071 int save_error; 1072 1073 /* 1074 * Note: offset is already shifted by lsp->ls_crypto_offset 1075 * when it gets here. 1076 */ 1077 if (lsp->ls_crypto_enabled) 1078 ASSERT(lsp->ls_vp_comp_size == lsp->ls_vp_size); 1079 1080 /* 1081 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on 1082 * an 8K boundary, but the buf transfer address may not be 1083 * aligned on more than a 512-byte boundary (we don't enforce 1084 * that even though we could). This matters since the initial 1085 * part of the transfer may not start at offset 0 within the 1086 * segmap'd chunk. So we have to compensate for that with 1087 * 'mapoffset'. Subsequent chunks always start off at the 1088 * beginning, and the last is capped by b_resid 1089 * 1090 * Visually, where "|" represents page map boundaries: 1091 * alignedoffset (mapaddr begins at this segmap boundary) 1092 * | offset (from beginning of file) 1093 * | | len 1094 * v v v 1095 * ===|====X========|====...======|========X====|==== 1096 * /-------------...---------------/ 1097 * ^ bp->b_bcount/bp->b_resid at start 1098 * /----/--------/----...------/--------/ 1099 * ^ ^ ^ ^ ^ 1100 * | | | | nth xfersize (<= MAXBSIZE) 1101 * | | 2nd thru n-1st xfersize (= MAXBSIZE) 1102 * | 1st xfersize (<= MAXBSIZE) 1103 * mapoffset (offset into 1st segmap, non-0 1st time, 0 thereafter) 1104 * 1105 * Notes: "alignedoffset" is "offset" rounded down to nearest 1106 * MAXBSIZE boundary. "len" is next page boundary of size 1107 * PAGESIZE after "alignedoffset". 1108 */ 1109 mapoffset = offset & MAXBOFFSET; 1110 alignedoffset = offset - mapoffset; 1111 bp->b_resid = bp->b_bcount; 1112 isread = bp->b_flags & B_READ; 1113 srw = isread ? S_READ : S_WRITE; 1114 do { 1115 xfersize = MIN(lsp->ls_vp_comp_size - offset, 1116 MIN(MAXBSIZE - mapoffset, bp->b_resid)); 1117 len = roundup(mapoffset + xfersize, PAGESIZE); 1118 mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp, 1119 alignedoffset, MAXBSIZE, 1, srw); 1120 /* 1121 * Now fault in the pages. This lets us check 1122 * for errors before we reference mapaddr and 1123 * try to resolve the fault in bcopy (which would 1124 * panic instead). And this can easily happen, 1125 * particularly if you've lofi'd a file over NFS 1126 * and someone deletes the file on the server. 1127 */ 1128 error = segmap_fault(kas.a_hat, segkmap, mapaddr, 1129 len, F_SOFTLOCK, srw); 1130 if (error) { 1131 (void) segmap_release(segkmap, mapaddr, 0); 1132 if (FC_CODE(error) == FC_OBJERR) 1133 error = FC_ERRNO(error); 1134 else 1135 error = EIO; 1136 break; 1137 } 1138 /* error may be non-zero for encrypted lofi */ 1139 error = lofi_rdwr(bufaddr, 0, bp, lsp, xfersize, 1140 RDWR_BCOPY, mapaddr + mapoffset); 1141 if (error == 0) { 1142 bp->b_resid -= xfersize; 1143 bufaddr += xfersize; 1144 offset += xfersize; 1145 } 1146 smflags = 0; 1147 if (isread) { 1148 smflags |= SM_FREE; 1149 /* 1150 * If we're reading an entire page starting 1151 * at a page boundary, there's a good chance 1152 * we won't need it again. Put it on the 1153 * head of the freelist. 1154 */ 1155 if (mapoffset == 0 && xfersize == MAXBSIZE) 1156 smflags |= SM_DONTNEED; 1157 } else { 1158 /* 1159 * Write back good pages, it is okay to 1160 * always release asynchronous here as we'll 1161 * follow with VOP_FSYNC for B_SYNC buffers. 1162 */ 1163 if (error == 0) 1164 smflags |= SM_WRITE | SM_ASYNC; 1165 } 1166 (void) segmap_fault(kas.a_hat, segkmap, mapaddr, 1167 len, F_SOFTUNLOCK, srw); 1168 save_error = segmap_release(segkmap, mapaddr, smflags); 1169 if (error == 0) 1170 error = save_error; 1171 /* only the first map may start partial */ 1172 mapoffset = 0; 1173 alignedoffset += MAXBSIZE; 1174 } while ((error == 0) && (bp->b_resid > 0) && 1175 (offset < lsp->ls_vp_comp_size)); 1176 1177 return (error); 1178 } 1179 1180 /* 1181 * Check if segment seg_index is present in the decompressed segment 1182 * data cache. 1183 * 1184 * Returns a pointer to the decompressed segment data cache entry if 1185 * found, and NULL when decompressed data for this segment is not yet 1186 * cached. 1187 */ 1188 static struct lofi_comp_cache * 1189 lofi_find_comp_data(struct lofi_state *lsp, uint64_t seg_index) 1190 { 1191 struct lofi_comp_cache *lc; 1192 1193 ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); 1194 1195 for (lc = list_head(&lsp->ls_comp_cache); lc != NULL; 1196 lc = list_next(&lsp->ls_comp_cache, lc)) { 1197 if (lc->lc_index == seg_index) { 1198 /* 1199 * Decompressed segment data was found in the 1200 * cache. 1201 * 1202 * The cache uses an LRU replacement strategy; 1203 * move the entry to head of list. 1204 */ 1205 list_remove(&lsp->ls_comp_cache, lc); 1206 list_insert_head(&lsp->ls_comp_cache, lc); 1207 return (lc); 1208 } 1209 } 1210 return (NULL); 1211 } 1212 1213 /* 1214 * Add the data for a decompressed segment at segment index 1215 * seg_index to the cache of the decompressed segments. 1216 * 1217 * Returns a pointer to the cache element structure in case 1218 * the data was added to the cache; returns NULL when the data 1219 * wasn't cached. 1220 */ 1221 static struct lofi_comp_cache * 1222 lofi_add_comp_data(struct lofi_state *lsp, uint64_t seg_index, 1223 uchar_t *data) 1224 { 1225 struct lofi_comp_cache *lc; 1226 1227 ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); 1228 1229 while (lsp->ls_comp_cache_count > lofi_max_comp_cache) { 1230 lc = list_remove_tail(&lsp->ls_comp_cache); 1231 ASSERT(lc != NULL); 1232 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 1233 kmem_free(lc, sizeof (struct lofi_comp_cache)); 1234 lsp->ls_comp_cache_count--; 1235 } 1236 1237 /* 1238 * Do not cache when disabled by tunable variable 1239 */ 1240 if (lofi_max_comp_cache == 0) 1241 return (NULL); 1242 1243 /* 1244 * When the cache has not yet reached the maximum allowed 1245 * number of segments, allocate a new cache element. 1246 * Otherwise the cache is full; reuse the last list element 1247 * (LRU) for caching the decompressed segment data. 1248 * 1249 * The cache element for the new decompressed segment data is 1250 * added to the head of the list. 1251 */ 1252 if (lsp->ls_comp_cache_count < lofi_max_comp_cache) { 1253 lc = kmem_alloc(sizeof (struct lofi_comp_cache), KM_SLEEP); 1254 lc->lc_data = NULL; 1255 list_insert_head(&lsp->ls_comp_cache, lc); 1256 lsp->ls_comp_cache_count++; 1257 } else { 1258 lc = list_remove_tail(&lsp->ls_comp_cache); 1259 if (lc == NULL) 1260 return (NULL); 1261 list_insert_head(&lsp->ls_comp_cache, lc); 1262 } 1263 1264 /* 1265 * Free old uncompressed segment data when reusing a cache 1266 * entry. 1267 */ 1268 if (lc->lc_data != NULL) 1269 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 1270 1271 lc->lc_data = data; 1272 lc->lc_index = seg_index; 1273 return (lc); 1274 } 1275 1276 static int 1277 gzip_decompress(void *src, size_t srclen, void *dst, 1278 size_t *dstlen, int level __unused) 1279 { 1280 ASSERT(*dstlen >= srclen); 1281 1282 if (z_uncompress(dst, dstlen, src, srclen) != Z_OK) 1283 return (-1); 1284 return (0); 1285 } 1286 1287 #define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8) 1288 static int 1289 lzma_decompress(void *src, size_t srclen, void *dst, 1290 size_t *dstlen, int level __unused) 1291 { 1292 size_t insizepure; 1293 void *actual_src; 1294 ELzmaStatus status; 1295 1296 insizepure = srclen - LZMA_HEADER_SIZE; 1297 actual_src = (void *)((Byte *)src + LZMA_HEADER_SIZE); 1298 1299 if (LzmaDecode((Byte *)dst, (size_t *)dstlen, 1300 (const Byte *)actual_src, &insizepure, 1301 (const Byte *)src, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, 1302 &g_Alloc) != SZ_OK) { 1303 return (-1); 1304 } 1305 return (0); 1306 } 1307 1308 static void 1309 lofi_trim_task(void *arg) 1310 { 1311 struct buf *bp = (struct buf *)arg; 1312 diskaddr_t p_lba = (diskaddr_t)(uintptr_t)bp->b_private; 1313 struct lofi_state *lsp; 1314 off64_t start, length; 1315 int error; 1316 1317 lsp = ddi_get_soft_state(lofi_statep, 1318 LOFI_MINOR2ID(getminor(bp->b_edev))); 1319 1320 if (lsp == NULL) { 1321 error = ENXIO; 1322 goto errout; 1323 } 1324 1325 if (lsp->ls_kstat != NULL) { 1326 mutex_enter(lsp->ls_kstat->ks_lock); 1327 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat)); 1328 mutex_exit(lsp->ls_kstat->ks_lock); 1329 } 1330 1331 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 1332 error = EIO; 1333 goto errout; 1334 } 1335 1336 mutex_enter(&lsp->ls_vp_lock); 1337 lsp->ls_vp_iocount++; 1338 mutex_exit(&lsp->ls_vp_lock); 1339 1340 start = (bp->b_lblkno + p_lba) << lsp->ls_lbshift; 1341 length = bp->b_bcount; 1342 1343 if (lsp->ls_vp->v_type == VCHR || lsp->ls_vp->v_type == VBLK) { 1344 int rv; 1345 dkioc_free_list_t dfl = { 1346 .dfl_num_exts = 1, 1347 .dfl_offset = 0, 1348 .dfl_flags = 0, 1349 .dfl_exts = { 1350 { 1351 .dfle_start = start, 1352 .dfle_length = length 1353 } 1354 } 1355 }; 1356 1357 error = VOP_IOCTL(lsp->ls_vp, DKIOCFREE, (intptr_t)&dfl, 1358 FKIOCTL, kcred, &rv, NULL); 1359 } else { 1360 struct flock64 flck = { 0 }; 1361 1362 flck.l_start = start; 1363 flck.l_len = length; 1364 flck.l_type = F_FREESP; 1365 flck.l_whence = 0; 1366 1367 error = VOP_SPACE(lsp->ls_vp, F_FREESP, &flck, 0, 0, kcred, 1368 NULL); 1369 } 1370 1371 mutex_enter(&lsp->ls_vp_lock); 1372 if (--lsp->ls_vp_iocount == 0) 1373 cv_broadcast(&lsp->ls_vp_cv); 1374 mutex_exit(&lsp->ls_vp_lock); 1375 1376 errout: 1377 1378 if (lsp != NULL && lsp->ls_kstat != NULL) { 1379 mutex_enter(lsp->ls_kstat->ks_lock); 1380 kstat_runq_exit(KSTAT_IO_PTR(lsp->ls_kstat)); 1381 mutex_exit(lsp->ls_kstat->ks_lock); 1382 } 1383 1384 bioerror(bp, error); 1385 biodone(bp); 1386 } 1387 1388 /* 1389 * This is basically what strategy used to be before we found we 1390 * needed task queues. 1391 */ 1392 static void 1393 lofi_strategy_task(void *arg) 1394 { 1395 struct buf *bp = (struct buf *)arg; 1396 diskaddr_t p_lba = (diskaddr_t)(uintptr_t)bp->b_private; 1397 int error; 1398 int syncflag = 0; 1399 struct lofi_state *lsp; 1400 offset_t offset; 1401 caddr_t bufaddr; 1402 size_t len; 1403 size_t xfersize; 1404 boolean_t bufinited = B_FALSE; 1405 1406 lsp = ddi_get_soft_state(lofi_statep, 1407 LOFI_MINOR2ID(getminor(bp->b_edev))); 1408 1409 if (lsp == NULL) { 1410 error = ENXIO; 1411 goto errout; 1412 } 1413 if (lsp->ls_kstat) { 1414 mutex_enter(lsp->ls_kstat->ks_lock); 1415 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat)); 1416 mutex_exit(lsp->ls_kstat->ks_lock); 1417 } 1418 1419 mutex_enter(&lsp->ls_vp_lock); 1420 lsp->ls_vp_iocount++; 1421 mutex_exit(&lsp->ls_vp_lock); 1422 1423 bp_mapin(bp); 1424 bufaddr = bp->b_un.b_addr; 1425 /* offset within file */ 1426 offset = (bp->b_lblkno + p_lba) << lsp->ls_lbshift; 1427 if (lsp->ls_crypto_enabled) { 1428 /* encrypted data really begins after crypto header */ 1429 offset += lsp->ls_crypto_offset; 1430 } 1431 len = bp->b_bcount; 1432 bufinited = B_TRUE; 1433 1434 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 1435 error = EIO; 1436 goto errout; 1437 } 1438 1439 /* 1440 * If we're writing and the buffer was not B_ASYNC 1441 * we'll follow up with a VOP_FSYNC() to force any 1442 * asynchronous I/O to stable storage. 1443 */ 1444 if (!(bp->b_flags & B_READ) && !(bp->b_flags & B_ASYNC)) 1445 syncflag = FSYNC; 1446 1447 /* 1448 * We used to always use vn_rdwr here, but we cannot do that because 1449 * we might decide to read or write from the the underlying 1450 * file during this call, which would be a deadlock because 1451 * we have the rw_lock. So instead we page, unless it's not 1452 * mapable or it's a character device or it's an encrypted lofi. 1453 */ 1454 if ((lsp->ls_vp->v_flag & VNOMAP) || (lsp->ls_vp->v_type == VCHR) || 1455 lsp->ls_crypto_enabled) { 1456 error = lofi_rdwr(bufaddr, offset, bp, lsp, len, RDWR_RAW, 1457 NULL); 1458 } else if (lsp->ls_uncomp_seg_sz == 0) { 1459 error = lofi_mapped_rdwr(bufaddr, offset, bp, lsp); 1460 } else { 1461 uchar_t *compressed_seg = NULL, *cmpbuf; 1462 uchar_t *uncompressed_seg = NULL; 1463 lofi_compress_info_t *li; 1464 size_t oblkcount; 1465 ulong_t seglen; 1466 uint64_t sblkno, eblkno, cmpbytes; 1467 uint64_t uncompressed_seg_index; 1468 struct lofi_comp_cache *lc; 1469 offset_t sblkoff, eblkoff; 1470 u_offset_t salign, ealign; 1471 u_offset_t sdiff; 1472 uint32_t comp_data_sz; 1473 uint64_t i; 1474 int j; 1475 1476 /* 1477 * From here on we're dealing primarily with compressed files 1478 */ 1479 ASSERT(!lsp->ls_crypto_enabled); 1480 1481 /* 1482 * Compressed files can only be read from and 1483 * not written to 1484 */ 1485 if (!(bp->b_flags & B_READ)) { 1486 bp->b_resid = bp->b_bcount; 1487 error = EROFS; 1488 goto done; 1489 } 1490 1491 ASSERT(lsp->ls_comp_algorithm_index >= 0); 1492 li = &lofi_compress_table[lsp->ls_comp_algorithm_index]; 1493 /* 1494 * Compute starting and ending compressed segment numbers 1495 * We use only bitwise operations avoiding division and 1496 * modulus because we enforce the compression segment size 1497 * to a power of 2 1498 */ 1499 sblkno = offset >> lsp->ls_comp_seg_shift; 1500 sblkoff = offset & (lsp->ls_uncomp_seg_sz - 1); 1501 eblkno = (offset + bp->b_bcount) >> lsp->ls_comp_seg_shift; 1502 eblkoff = (offset + bp->b_bcount) & (lsp->ls_uncomp_seg_sz - 1); 1503 1504 /* 1505 * Check the decompressed segment cache. 1506 * 1507 * The cache is used only when the requested data 1508 * is within a segment. Requests that cross 1509 * segment boundaries bypass the cache. 1510 */ 1511 if (sblkno == eblkno || 1512 (sblkno + 1 == eblkno && eblkoff == 0)) { 1513 /* 1514 * Request doesn't cross a segment boundary, 1515 * now check the cache. 1516 */ 1517 mutex_enter(&lsp->ls_comp_cache_lock); 1518 lc = lofi_find_comp_data(lsp, sblkno); 1519 if (lc != NULL) { 1520 /* 1521 * We've found the decompressed segment 1522 * data in the cache; reuse it. 1523 */ 1524 bcopy(lc->lc_data + sblkoff, bufaddr, 1525 bp->b_bcount); 1526 mutex_exit(&lsp->ls_comp_cache_lock); 1527 bp->b_resid = 0; 1528 error = 0; 1529 goto done; 1530 } 1531 mutex_exit(&lsp->ls_comp_cache_lock); 1532 } 1533 1534 /* 1535 * Align start offset to block boundary for segmap 1536 */ 1537 salign = lsp->ls_comp_seg_index[sblkno]; 1538 sdiff = salign & (DEV_BSIZE - 1); 1539 salign -= sdiff; 1540 if (eblkno >= (lsp->ls_comp_index_sz - 1)) { 1541 /* 1542 * We're dealing with the last segment of 1543 * the compressed file -- the size of this 1544 * segment *may not* be the same as the 1545 * segment size for the file 1546 */ 1547 eblkoff = (offset + bp->b_bcount) & 1548 (lsp->ls_uncomp_last_seg_sz - 1); 1549 ealign = lsp->ls_vp_comp_size; 1550 } else { 1551 ealign = lsp->ls_comp_seg_index[eblkno + 1]; 1552 } 1553 1554 /* 1555 * Preserve original request paramaters 1556 */ 1557 oblkcount = bp->b_bcount; 1558 1559 /* 1560 * Assign the calculated parameters 1561 */ 1562 comp_data_sz = ealign - salign; 1563 bp->b_bcount = comp_data_sz; 1564 1565 /* 1566 * Buffers to hold compressed segments are pre-allocated 1567 * on a per-thread basis. Find a pre-allocated buffer 1568 * that is not currently in use and mark it for use. 1569 */ 1570 mutex_enter(&lsp->ls_comp_bufs_lock); 1571 for (j = 0; j < lofi_taskq_nthreads; j++) { 1572 if (lsp->ls_comp_bufs[j].inuse == 0) { 1573 lsp->ls_comp_bufs[j].inuse = 1; 1574 break; 1575 } 1576 } 1577 1578 mutex_exit(&lsp->ls_comp_bufs_lock); 1579 ASSERT(j < lofi_taskq_nthreads); 1580 1581 /* 1582 * If the pre-allocated buffer size does not match 1583 * the size of the I/O request, re-allocate it with 1584 * the appropriate size 1585 */ 1586 if (lsp->ls_comp_bufs[j].bufsize < bp->b_bcount) { 1587 if (lsp->ls_comp_bufs[j].bufsize > 0) 1588 kmem_free(lsp->ls_comp_bufs[j].buf, 1589 lsp->ls_comp_bufs[j].bufsize); 1590 lsp->ls_comp_bufs[j].buf = kmem_alloc(bp->b_bcount, 1591 KM_SLEEP); 1592 lsp->ls_comp_bufs[j].bufsize = bp->b_bcount; 1593 } 1594 compressed_seg = lsp->ls_comp_bufs[j].buf; 1595 1596 /* 1597 * Map in the calculated number of blocks 1598 */ 1599 error = lofi_mapped_rdwr((caddr_t)compressed_seg, salign, 1600 bp, lsp); 1601 1602 bp->b_bcount = oblkcount; 1603 bp->b_resid = oblkcount; 1604 if (error != 0) 1605 goto done; 1606 1607 /* 1608 * decompress compressed blocks start 1609 */ 1610 cmpbuf = compressed_seg + sdiff; 1611 for (i = sblkno; i <= eblkno; i++) { 1612 ASSERT(i < lsp->ls_comp_index_sz - 1); 1613 uchar_t *useg; 1614 1615 /* 1616 * The last segment is special in that it is 1617 * most likely not going to be the same 1618 * (uncompressed) size as the other segments. 1619 */ 1620 if (i == (lsp->ls_comp_index_sz - 2)) { 1621 seglen = lsp->ls_uncomp_last_seg_sz; 1622 } else { 1623 seglen = lsp->ls_uncomp_seg_sz; 1624 } 1625 1626 /* 1627 * Each of the segment index entries contains 1628 * the starting block number for that segment. 1629 * The number of compressed bytes in a segment 1630 * is thus the difference between the starting 1631 * block number of this segment and the starting 1632 * block number of the next segment. 1633 */ 1634 cmpbytes = lsp->ls_comp_seg_index[i + 1] - 1635 lsp->ls_comp_seg_index[i]; 1636 1637 /* 1638 * The first byte in a compressed segment is a flag 1639 * that indicates whether this segment is compressed 1640 * at all. 1641 * 1642 * The variable 'useg' is used (instead of 1643 * uncompressed_seg) in this loop to keep a 1644 * reference to the uncompressed segment. 1645 * 1646 * N.B. If 'useg' is replaced with uncompressed_seg, 1647 * it leads to memory leaks and heap corruption in 1648 * corner cases where compressed segments lie 1649 * adjacent to uncompressed segments. 1650 */ 1651 if (*cmpbuf == UNCOMPRESSED) { 1652 useg = cmpbuf + SEGHDR; 1653 } else { 1654 if (uncompressed_seg == NULL) 1655 uncompressed_seg = 1656 kmem_alloc(lsp->ls_uncomp_seg_sz, 1657 KM_SLEEP); 1658 useg = uncompressed_seg; 1659 uncompressed_seg_index = i; 1660 1661 if (li->l_decompress((cmpbuf + SEGHDR), 1662 (cmpbytes - SEGHDR), uncompressed_seg, 1663 &seglen, li->l_level) != 0) { 1664 error = EIO; 1665 goto done; 1666 } 1667 } 1668 1669 /* 1670 * Determine how much uncompressed data we 1671 * have to copy and copy it 1672 */ 1673 xfersize = lsp->ls_uncomp_seg_sz - sblkoff; 1674 if (i == eblkno) 1675 xfersize -= (lsp->ls_uncomp_seg_sz - eblkoff); 1676 1677 bcopy((useg + sblkoff), bufaddr, xfersize); 1678 1679 cmpbuf += cmpbytes; 1680 bufaddr += xfersize; 1681 bp->b_resid -= xfersize; 1682 sblkoff = 0; 1683 1684 if (bp->b_resid == 0) 1685 break; 1686 } /* decompress compressed blocks ends */ 1687 1688 /* 1689 * Skip to done if there is no uncompressed data to cache 1690 */ 1691 if (uncompressed_seg == NULL) 1692 goto done; 1693 1694 /* 1695 * Add the data for the last decompressed segment to 1696 * the cache. 1697 * 1698 * In case the uncompressed segment data was added to (and 1699 * is referenced by) the cache, make sure we don't free it 1700 * here. 1701 */ 1702 mutex_enter(&lsp->ls_comp_cache_lock); 1703 if ((lc = lofi_add_comp_data(lsp, uncompressed_seg_index, 1704 uncompressed_seg)) != NULL) { 1705 uncompressed_seg = NULL; 1706 } 1707 mutex_exit(&lsp->ls_comp_cache_lock); 1708 1709 done: 1710 if (compressed_seg != NULL) { 1711 mutex_enter(&lsp->ls_comp_bufs_lock); 1712 lsp->ls_comp_bufs[j].inuse = 0; 1713 mutex_exit(&lsp->ls_comp_bufs_lock); 1714 } 1715 if (uncompressed_seg != NULL) 1716 kmem_free(uncompressed_seg, lsp->ls_uncomp_seg_sz); 1717 } /* end of handling compressed files */ 1718 1719 if ((error == 0) && (syncflag != 0)) 1720 error = VOP_FSYNC(lsp->ls_vp, syncflag, kcred, NULL); 1721 1722 errout: 1723 if (bufinited && lsp->ls_kstat) { 1724 size_t n_done = bp->b_bcount - bp->b_resid; 1725 kstat_io_t *kioptr; 1726 1727 mutex_enter(lsp->ls_kstat->ks_lock); 1728 kioptr = KSTAT_IO_PTR(lsp->ls_kstat); 1729 if (bp->b_flags & B_READ) { 1730 kioptr->nread += n_done; 1731 kioptr->reads++; 1732 } else { 1733 kioptr->nwritten += n_done; 1734 kioptr->writes++; 1735 } 1736 kstat_runq_exit(kioptr); 1737 mutex_exit(lsp->ls_kstat->ks_lock); 1738 } 1739 1740 mutex_enter(&lsp->ls_vp_lock); 1741 if (--lsp->ls_vp_iocount == 0) 1742 cv_broadcast(&lsp->ls_vp_cv); 1743 mutex_exit(&lsp->ls_vp_lock); 1744 1745 bioerror(bp, error); 1746 biodone(bp); 1747 } 1748 1749 static int 1750 lofi_strategy_backend(struct buf *bp, task_func_t taskfunc) 1751 { 1752 struct lofi_state *lsp; 1753 offset_t offset; 1754 minor_t part; 1755 diskaddr_t p_lba; 1756 diskaddr_t p_nblks; 1757 int shift; 1758 1759 /* 1760 * We cannot just do I/O here, because the current thread 1761 * _might_ end up back in here because the underlying filesystem 1762 * wants a buffer, which eventually gets into bio_recycle and 1763 * might call into lofi to write out a delayed-write buffer. 1764 * This is bad if the filesystem above lofi is the same as below. 1765 * 1766 * We could come up with a complex strategy using threads to 1767 * do the I/O asynchronously, or we could use task queues. task 1768 * queues were incredibly easy so they win. 1769 */ 1770 1771 lsp = ddi_get_soft_state(lofi_statep, 1772 LOFI_MINOR2ID(getminor(bp->b_edev))); 1773 part = LOFI_PART(getminor(bp->b_edev)); 1774 1775 if (lsp == NULL) { 1776 bioerror(bp, ENXIO); 1777 biodone(bp); 1778 return (0); 1779 } 1780 1781 /* Check if we are closing. */ 1782 mutex_enter(&lsp->ls_vp_lock); 1783 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 1784 mutex_exit(&lsp->ls_vp_lock); 1785 bioerror(bp, EIO); 1786 biodone(bp); 1787 return (0); 1788 } 1789 mutex_exit(&lsp->ls_vp_lock); 1790 1791 shift = lsp->ls_lbshift; 1792 p_lba = 0; 1793 p_nblks = lsp->ls_vp_size >> shift; 1794 1795 if (lsp->ls_cmlbhandle != NULL) { 1796 if (cmlb_partinfo(lsp->ls_cmlbhandle, part, &p_nblks, &p_lba, 1797 NULL, NULL, 0)) { 1798 bioerror(bp, ENXIO); 1799 biodone(bp); 1800 return (0); 1801 } 1802 } 1803 1804 /* start block past partition end? */ 1805 if (bp->b_lblkno > p_nblks) { 1806 bioerror(bp, ENXIO); 1807 biodone(bp); 1808 return (0); 1809 } 1810 1811 offset = (bp->b_lblkno + p_lba) << shift; /* offset within file */ 1812 1813 mutex_enter(&lsp->ls_vp_lock); 1814 if (lsp->ls_crypto_enabled) { 1815 /* encrypted data really begins after crypto header */ 1816 offset += lsp->ls_crypto_offset; 1817 } 1818 1819 /* make sure we will not pass the file or partition size */ 1820 if (offset == lsp->ls_vp_size || 1821 offset == (((p_lba + p_nblks) << shift) + lsp->ls_crypto_offset)) { 1822 /* EOF */ 1823 if ((bp->b_flags & B_READ) != 0) { 1824 bp->b_resid = bp->b_bcount; 1825 bioerror(bp, 0); 1826 } else { 1827 /* writes should fail */ 1828 bioerror(bp, ENXIO); 1829 } 1830 biodone(bp); 1831 mutex_exit(&lsp->ls_vp_lock); 1832 return (0); 1833 } 1834 if ((offset > lsp->ls_vp_size) || 1835 (offset > (((p_lba + p_nblks) << shift) + lsp->ls_crypto_offset)) || 1836 ((offset + bp->b_bcount) > ((p_lba + p_nblks) << shift))) { 1837 bioerror(bp, ENXIO); 1838 biodone(bp); 1839 mutex_exit(&lsp->ls_vp_lock); 1840 return (0); 1841 } 1842 1843 mutex_exit(&lsp->ls_vp_lock); 1844 1845 if (lsp->ls_kstat) { 1846 mutex_enter(lsp->ls_kstat->ks_lock); 1847 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 1848 mutex_exit(lsp->ls_kstat->ks_lock); 1849 } 1850 bp->b_private = (void *)(uintptr_t)p_lba; /* partition start */ 1851 (void) taskq_dispatch(lsp->ls_taskq, taskfunc, bp, KM_SLEEP); 1852 return (0); 1853 } 1854 1855 static int 1856 lofi_strategy(struct buf *bp) 1857 { 1858 return (lofi_strategy_backend(bp, lofi_strategy_task)); 1859 } 1860 1861 static int 1862 lofi_read(dev_t dev, struct uio *uio, struct cred *credp __unused) 1863 { 1864 if (getminor(dev) == 0) 1865 return (EINVAL); 1866 UIO_CHECK(uio); 1867 return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio)); 1868 } 1869 1870 static int 1871 lofi_write(dev_t dev, struct uio *uio, struct cred *credp __unused) 1872 { 1873 if (getminor(dev) == 0) 1874 return (EINVAL); 1875 UIO_CHECK(uio); 1876 return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio)); 1877 } 1878 1879 static int 1880 lofi_urw(struct lofi_state *lsp, uint16_t fmode, diskaddr_t off, size_t size, 1881 intptr_t arg, int flag, cred_t *credp) 1882 { 1883 struct uio uio; 1884 iovec_t iov; 1885 1886 /* 1887 * 1024 * 1024 apes cmlb_tg_max_efi_xfer as a reasonable max. 1888 */ 1889 if (size == 0 || size > 1024 * 1024 || 1890 (size % (1 << lsp->ls_lbshift)) != 0) 1891 return (EINVAL); 1892 1893 iov.iov_base = (void *)arg; 1894 iov.iov_len = size; 1895 uio.uio_iov = &iov; 1896 uio.uio_iovcnt = 1; 1897 uio.uio_loffset = off; 1898 uio.uio_segflg = (flag & FKIOCTL) ? UIO_SYSSPACE : UIO_USERSPACE; 1899 uio.uio_llimit = MAXOFFSET_T; 1900 uio.uio_resid = size; 1901 uio.uio_fmode = fmode; 1902 uio.uio_extflg = 0; 1903 1904 return (fmode == FREAD ? 1905 lofi_read(lsp->ls_dev, &uio, credp) : 1906 lofi_write(lsp->ls_dev, &uio, credp)); 1907 } 1908 1909 typedef struct { 1910 struct lofi_state *lcd_lsp; 1911 dev_t lcd_dev; 1912 } lofi_cb_data_t; 1913 1914 static int 1915 lofi_free_space_cb(dkioc_free_list_t *dfl, void *arg, int kmflag __unused) 1916 { 1917 dkioc_free_list_ext_t *ext; 1918 lofi_cb_data_t *cbd = arg; 1919 struct lofi_state *lsp = cbd->lcd_lsp; 1920 buf_t *bp = NULL; 1921 int error = 0; 1922 1923 bp = getrbuf(KM_SLEEP); 1924 1925 ext = dfl->dfl_exts; 1926 for (uint_t i = 0; i < dfl->dfl_num_exts; i++, ext++) { 1927 uint64_t start = dfl->dfl_offset + ext->dfle_start; 1928 uint64_t length = ext->dfle_length; 1929 1930 bp->b_edev = cbd->lcd_dev; 1931 bp->b_flags = B_WRITE; 1932 bp->b_un.b_addr = NULL; 1933 bp->b_resid = 0; 1934 bp->b_lblkno = start >> lsp->ls_lbshift; 1935 bp->b_bcount = length; 1936 1937 DTRACE_PROBE2(trim__issued, uint64_t, start, uint64_t, length); 1938 1939 error = lofi_strategy_backend(bp, lofi_trim_task); 1940 if (error != 0) 1941 break; 1942 (void) biowait(bp); 1943 } 1944 1945 freerbuf(bp); 1946 dfl_free(dfl); 1947 return (error); 1948 } 1949 1950 static int 1951 lofi_free_space(struct lofi_state *lsp, dkioc_free_list_t *dfl, dev_t dev) 1952 { 1953 dkioc_free_info_t dfi = { 1954 .dfi_bshift = lsp->ls_lbshift, 1955 .dfi_align = 1U << lsp->ls_lbshift, 1956 .dfi_max_bytes = 0, 1957 .dfi_max_ext = 0, 1958 .dfi_max_ext_bytes = 0 1959 }; 1960 1961 lofi_cb_data_t cbd = { 1962 .lcd_lsp = lsp, 1963 .lcd_dev = dev 1964 }; 1965 1966 return (dfl_iter(dfl, &dfi, lsp->ls_vp_size, lofi_free_space_cb, 1967 &cbd, KM_SLEEP)); 1968 } 1969 1970 static int 1971 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp __unused) 1972 { 1973 if (getminor(dev) == 0) 1974 return (EINVAL); 1975 UIO_CHECK(aio->aio_uio); 1976 return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio)); 1977 } 1978 1979 static int 1980 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp __unused) 1981 { 1982 if (getminor(dev) == 0) 1983 return (EINVAL); 1984 UIO_CHECK(aio->aio_uio); 1985 return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1986 } 1987 1988 static int 1989 lofi_info(dev_info_t *dip __unused, ddi_info_cmd_t infocmd, void *arg, 1990 void **result) 1991 { 1992 struct lofi_state *lsp; 1993 dev_t dev = (dev_t)arg; 1994 int instance; 1995 1996 instance = LOFI_MINOR2ID(getminor(dev)); 1997 switch (infocmd) { 1998 case DDI_INFO_DEVT2DEVINFO: 1999 lsp = ddi_get_soft_state(lofi_statep, instance); 2000 if (lsp == NULL) 2001 return (DDI_FAILURE); 2002 *result = lsp->ls_dip; 2003 return (DDI_SUCCESS); 2004 case DDI_INFO_DEVT2INSTANCE: 2005 *result = (void *) (intptr_t)instance; 2006 return (DDI_SUCCESS); 2007 } 2008 return (DDI_FAILURE); 2009 } 2010 2011 static int 2012 lofi_create_minor_nodes(struct lofi_state *lsp, boolean_t labeled) 2013 { 2014 int error = 0; 2015 int instance = ddi_get_instance(lsp->ls_dip); 2016 2017 if (labeled == B_TRUE) { 2018 cmlb_alloc_handle(&lsp->ls_cmlbhandle); 2019 error = cmlb_attach(lsp->ls_dip, &lofi_tg_ops, DTYPE_DIRECT, 2020 B_FALSE, B_FALSE, DDI_NT_BLOCK_CHAN, 2021 CMLB_CREATE_P0_MINOR_NODE, lsp->ls_cmlbhandle, (void *)1); 2022 2023 if (error != DDI_SUCCESS) { 2024 cmlb_free_handle(&lsp->ls_cmlbhandle); 2025 lsp->ls_cmlbhandle = NULL; 2026 error = ENXIO; 2027 } 2028 } else { 2029 /* create minor nodes */ 2030 error = ddi_create_minor_node(lsp->ls_dip, LOFI_BLOCK_NODE, 2031 S_IFBLK, LOFI_ID2MINOR(instance), DDI_PSEUDO, 0); 2032 if (error == DDI_SUCCESS) { 2033 error = ddi_create_minor_node(lsp->ls_dip, 2034 LOFI_CHAR_NODE, S_IFCHR, LOFI_ID2MINOR(instance), 2035 DDI_PSEUDO, 0); 2036 if (error != DDI_SUCCESS) { 2037 ddi_remove_minor_node(lsp->ls_dip, 2038 LOFI_BLOCK_NODE); 2039 error = ENXIO; 2040 } 2041 } else 2042 error = ENXIO; 2043 } 2044 return (error); 2045 } 2046 2047 static int 2048 lofi_zone_bind(struct lofi_state *lsp) 2049 { 2050 int error = 0; 2051 2052 mutex_enter(&curproc->p_lock); 2053 if ((error = rctl_incr_lofi(curproc, curproc->p_zone, 1)) != 0) { 2054 mutex_exit(&curproc->p_lock); 2055 return (error); 2056 } 2057 mutex_exit(&curproc->p_lock); 2058 2059 if (ddi_prop_update_string(DDI_DEV_T_NONE, lsp->ls_dip, ZONE_PROP_NAME, 2060 (char *)curproc->p_zone->zone_name) != DDI_PROP_SUCCESS) { 2061 rctl_decr_lofi(curproc->p_zone, 1); 2062 error = EINVAL; 2063 } else { 2064 zone_init_ref(&lsp->ls_zone); 2065 zone_hold_ref(curzone, &lsp->ls_zone, ZONE_REF_LOFI); 2066 } 2067 return (error); 2068 } 2069 2070 static void 2071 lofi_zone_unbind(struct lofi_state *lsp) 2072 { 2073 (void) ddi_prop_remove(DDI_DEV_T_NONE, lsp->ls_dip, ZONE_PROP_NAME); 2074 rctl_decr_lofi(curproc->p_zone, 1); 2075 zone_rele_ref(&lsp->ls_zone, ZONE_REF_LOFI); 2076 } 2077 2078 static int 2079 lofi_online_dev(dev_info_t *dip) 2080 { 2081 boolean_t labeled; 2082 int error; 2083 int instance = ddi_get_instance(dip); 2084 struct lofi_state *lsp; 2085 2086 labeled = B_FALSE; 2087 if (ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "labeled")) 2088 labeled = B_TRUE; 2089 2090 /* lsp alloc+init, soft state is freed in lofi_detach */ 2091 error = ddi_soft_state_zalloc(lofi_statep, instance); 2092 if (error == DDI_FAILURE) { 2093 return (ENOMEM); 2094 } 2095 2096 lsp = ddi_get_soft_state(lofi_statep, instance); 2097 lsp->ls_dip = dip; 2098 2099 if ((error = lofi_zone_bind(lsp)) != 0) 2100 goto err; 2101 2102 cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL); 2103 mutex_init(&lsp->ls_comp_cache_lock, NULL, MUTEX_DRIVER, NULL); 2104 mutex_init(&lsp->ls_comp_bufs_lock, NULL, MUTEX_DRIVER, NULL); 2105 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); 2106 mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL); 2107 2108 if ((error = lofi_create_minor_nodes(lsp, labeled)) != 0) { 2109 lofi_zone_unbind(lsp); 2110 goto lerr; 2111 } 2112 2113 /* driver handles kernel-issued IOCTLs */ 2114 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 2115 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 2116 error = DDI_FAILURE; 2117 goto merr; 2118 } 2119 2120 lsp->ls_kstat = kstat_create_zone(LOFI_DRIVER_NAME, instance, 2121 NULL, "disk", KSTAT_TYPE_IO, 1, 0, getzoneid()); 2122 if (lsp->ls_kstat == NULL) { 2123 (void) ddi_prop_remove(DDI_DEV_T_NONE, lsp->ls_dip, 2124 DDI_KERNEL_IOCTL); 2125 error = ENOMEM; 2126 goto merr; 2127 } 2128 2129 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; 2130 kstat_zone_add(lsp->ls_kstat, GLOBAL_ZONEID); 2131 kstat_install(lsp->ls_kstat); 2132 return (DDI_SUCCESS); 2133 merr: 2134 if (lsp->ls_cmlbhandle != NULL) { 2135 cmlb_detach(lsp->ls_cmlbhandle, 0); 2136 cmlb_free_handle(&lsp->ls_cmlbhandle); 2137 } 2138 ddi_remove_minor_node(dip, NULL); 2139 lofi_zone_unbind(lsp); 2140 lerr: 2141 mutex_destroy(&lsp->ls_comp_cache_lock); 2142 mutex_destroy(&lsp->ls_comp_bufs_lock); 2143 mutex_destroy(&lsp->ls_kstat_lock); 2144 mutex_destroy(&lsp->ls_vp_lock); 2145 cv_destroy(&lsp->ls_vp_cv); 2146 err: 2147 ddi_soft_state_free(lofi_statep, instance); 2148 return (error); 2149 } 2150 2151 static int 2152 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 2153 { 2154 int rv; 2155 int instance = ddi_get_instance(dip); 2156 struct lofi_state *lsp; 2157 2158 if (cmd != DDI_ATTACH) 2159 return (DDI_FAILURE); 2160 2161 /* 2162 * Instance 0 is control instance, attaching control instance 2163 * will set the lofi up and ready. 2164 */ 2165 if (instance == 0) { 2166 rv = ddi_soft_state_zalloc(lofi_statep, 0); 2167 if (rv == DDI_FAILURE) { 2168 return (DDI_FAILURE); 2169 } 2170 lsp = ddi_get_soft_state(lofi_statep, instance); 2171 rv = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, 2172 DDI_PSEUDO, 0); 2173 if (rv == DDI_FAILURE) { 2174 ddi_soft_state_free(lofi_statep, 0); 2175 return (DDI_FAILURE); 2176 } 2177 /* driver handles kernel-issued IOCTLs */ 2178 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 2179 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 2180 ddi_remove_minor_node(dip, NULL); 2181 ddi_soft_state_free(lofi_statep, 0); 2182 return (DDI_FAILURE); 2183 } 2184 2185 zone_key_create(&lofi_zone_key, NULL, lofi_zone_shutdown, NULL); 2186 2187 lsp->ls_dip = dip; 2188 } else { 2189 if (lofi_online_dev(dip) == DDI_FAILURE) 2190 return (DDI_FAILURE); 2191 } 2192 2193 ddi_report_dev(dip); 2194 return (DDI_SUCCESS); 2195 } 2196 2197 static int 2198 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 2199 { 2200 struct lofi_state *lsp; 2201 int instance = ddi_get_instance(dip); 2202 2203 if (cmd != DDI_DETACH) 2204 return (DDI_FAILURE); 2205 2206 /* 2207 * If the instance is not 0, release state. 2208 * The instance 0 is control device, we can not detach it 2209 * before other instances are detached. 2210 */ 2211 if (instance != 0) { 2212 lsp = ddi_get_soft_state(lofi_statep, instance); 2213 if (lsp != NULL && lsp->ls_vp_ready == B_FALSE) { 2214 ddi_soft_state_free(lofi_statep, instance); 2215 return (DDI_SUCCESS); 2216 } else 2217 return (DDI_FAILURE); 2218 } 2219 mutex_enter(&lofi_lock); 2220 2221 if (!list_is_empty(&lofi_list)) { 2222 mutex_exit(&lofi_lock); 2223 return (DDI_FAILURE); 2224 } 2225 2226 ddi_remove_minor_node(dip, NULL); 2227 ddi_prop_remove_all(dip); 2228 2229 mutex_exit(&lofi_lock); 2230 2231 if (zone_key_delete(lofi_zone_key) != 0) 2232 cmn_err(CE_WARN, "failed to delete zone key"); 2233 2234 ddi_soft_state_free(lofi_statep, 0); 2235 2236 return (DDI_SUCCESS); 2237 } 2238 2239 /* 2240 * With the addition of encryption, we must be careful that encryption key is 2241 * wiped before kernel's data structures are freed so it cannot accidentally 2242 * slip out to userland through uninitialized data elsewhere. 2243 */ 2244 static void 2245 free_lofi_ioctl(struct lofi_ioctl *klip) 2246 { 2247 /* Make sure this encryption key doesn't stick around */ 2248 bzero(klip->li_key, sizeof (klip->li_key)); 2249 kmem_free(klip, sizeof (struct lofi_ioctl)); 2250 } 2251 2252 /* 2253 * These two functions simplify the rest of the ioctls that need to copyin/out 2254 * the lofi_ioctl structure. 2255 */ 2256 int 2257 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, struct lofi_ioctl **klipp, 2258 int flag) 2259 { 2260 struct lofi_ioctl *klip; 2261 int error; 2262 2263 klip = *klipp = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); 2264 error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag); 2265 if (error) 2266 goto err; 2267 2268 /* ensure NULL termination */ 2269 klip->li_filename[MAXPATHLEN-1] = '\0'; 2270 klip->li_devpath[MAXPATHLEN-1] = '\0'; 2271 klip->li_algorithm[MAXALGLEN-1] = '\0'; 2272 klip->li_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; 2273 klip->li_iv_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; 2274 2275 if (klip->li_id > L_MAXMIN32) { 2276 error = EINVAL; 2277 goto err; 2278 } 2279 2280 return (0); 2281 2282 err: 2283 free_lofi_ioctl(klip); 2284 return (error); 2285 } 2286 2287 int 2288 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip, 2289 int flag) 2290 { 2291 int error; 2292 2293 /* 2294 * NOTE: Do NOT copy the crypto_key_t "back" to userland. 2295 * This ensures that an attacker can't trivially find the 2296 * key for a mapping just by issuing the ioctl. 2297 * 2298 * It can still be found by poking around in kmem with mdb(1), 2299 * but there is no point in making it easy when the info isn't 2300 * of any use in this direction anyway. 2301 * 2302 * Either way we don't actually have the raw key stored in 2303 * a form that we can get it anyway, since we just used it 2304 * to create a ctx template and didn't keep "the original". 2305 */ 2306 error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag); 2307 if (error) 2308 return (EFAULT); 2309 return (0); 2310 } 2311 2312 static int 2313 lofi_access(struct lofi_state *lsp) 2314 { 2315 ASSERT(MUTEX_HELD(&lofi_lock)); 2316 if (INGLOBALZONE(curproc) || lsp->ls_zone.zref_zone == curzone) 2317 return (0); 2318 return (EPERM); 2319 } 2320 2321 /* 2322 * Find the lofi state for the given filename. We compare by vnode to 2323 * allow the global zone visibility into NGZ lofi nodes. 2324 */ 2325 static int 2326 file_to_lofi_nocheck(char *filename, boolean_t readonly, 2327 struct lofi_state **lspp) 2328 { 2329 struct lofi_state *lsp; 2330 vnode_t *vp = NULL; 2331 int err = 0; 2332 int rdfiles = 0; 2333 2334 ASSERT(MUTEX_HELD(&lofi_lock)); 2335 2336 if ((err = lookupname(filename, UIO_SYSSPACE, FOLLOW, 2337 NULLVPP, &vp)) != 0) 2338 goto out; 2339 2340 if (vp->v_type == VREG) { 2341 vnode_t *realvp; 2342 if (VOP_REALVP(vp, &realvp, NULL) == 0) { 2343 VN_HOLD(realvp); 2344 VN_RELE(vp); 2345 vp = realvp; 2346 } 2347 } 2348 2349 for (lsp = list_head(&lofi_list); lsp != NULL; 2350 lsp = list_next(&lofi_list, lsp)) { 2351 if (lsp->ls_vp == vp) { 2352 if (lspp != NULL) 2353 *lspp = lsp; 2354 if (lsp->ls_readonly) { 2355 rdfiles++; 2356 /* Skip if '-r' is specified */ 2357 if (readonly) 2358 continue; 2359 } 2360 goto out; 2361 } 2362 } 2363 2364 err = ENOENT; 2365 2366 /* 2367 * If a filename is given as an argument for lofi_unmap, we shouldn't 2368 * allow unmap if there are multiple read-only lofi devices associated 2369 * with this file. 2370 */ 2371 if (lspp != NULL) { 2372 if (rdfiles == 1) 2373 err = 0; 2374 else if (rdfiles > 1) 2375 err = EBUSY; 2376 } 2377 2378 out: 2379 if (vp != NULL) 2380 VN_RELE(vp); 2381 return (err); 2382 } 2383 2384 /* 2385 * Find the minor for the given filename, checking the zone can access 2386 * it. 2387 */ 2388 static int 2389 file_to_lofi(char *filename, boolean_t readonly, struct lofi_state **lspp) 2390 { 2391 int err = 0; 2392 2393 ASSERT(MUTEX_HELD(&lofi_lock)); 2394 2395 if ((err = file_to_lofi_nocheck(filename, readonly, lspp)) != 0) 2396 return (err); 2397 2398 if ((err = lofi_access(*lspp)) != 0) 2399 return (err); 2400 2401 return (0); 2402 } 2403 2404 /* 2405 * Fakes up a disk geometry based on the size of the file. This is needed 2406 * to support newfs on traditional lofi device, but also will provide 2407 * geometry hint for cmlb. 2408 */ 2409 static void 2410 fake_disk_geometry(struct lofi_state *lsp) 2411 { 2412 u_offset_t dsize = lsp->ls_vp_size - lsp->ls_crypto_offset; 2413 2414 /* dk_geom - see dkio(4I) */ 2415 /* 2416 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 2417 * of sectors), but that breaks programs like fdisk which want to 2418 * partition a disk by cylinder. With one cylinder, you can't create 2419 * an fdisk partition and put pcfs on it for testing (hard to pick 2420 * a number between one and one). 2421 * 2422 * The cheezy floppy test is an attempt to not have too few cylinders 2423 * for a small file, or so many on a big file that you waste space 2424 * for backup superblocks or cylinder group structures. 2425 */ 2426 bzero(&lsp->ls_dkg, sizeof (lsp->ls_dkg)); 2427 if (dsize < (2 * 1024 * 1024)) /* floppy? */ 2428 lsp->ls_dkg.dkg_ncyl = dsize / (100 * 1024); 2429 else 2430 lsp->ls_dkg.dkg_ncyl = dsize / (300 * 1024); 2431 /* in case file file is < 100k */ 2432 if (lsp->ls_dkg.dkg_ncyl == 0) 2433 lsp->ls_dkg.dkg_ncyl = 1; 2434 2435 lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl; 2436 lsp->ls_dkg.dkg_nhead = 1; 2437 lsp->ls_dkg.dkg_rpm = 7200; 2438 2439 lsp->ls_dkg.dkg_nsect = dsize / 2440 (lsp->ls_dkg.dkg_ncyl << lsp->ls_pbshift); 2441 } 2442 2443 /* 2444 * build vtoc - see dkio(4I) 2445 * 2446 * Fakes one big partition based on the size of the file. This is needed 2447 * because we allow newfs'ing the traditional lofi device and newfs will 2448 * do several disk ioctls to figure out the geometry and partition information. 2449 * It uses that information to determine the parameters to pass to mkfs. 2450 */ 2451 static void 2452 fake_disk_vtoc(struct lofi_state *lsp, struct vtoc *vt) 2453 { 2454 bzero(vt, sizeof (struct vtoc)); 2455 vt->v_sanity = VTOC_SANE; 2456 vt->v_version = V_VERSION; 2457 (void) strncpy(vt->v_volume, LOFI_DRIVER_NAME, 2458 sizeof (vt->v_volume)); 2459 vt->v_sectorsz = 1 << lsp->ls_pbshift; 2460 vt->v_nparts = 1; 2461 vt->v_part[0].p_tag = V_UNASSIGNED; 2462 2463 /* 2464 * A compressed file is read-only, other files can 2465 * be read-write 2466 */ 2467 if (lsp->ls_uncomp_seg_sz > 0) { 2468 vt->v_part[0].p_flag = V_UNMNT | V_RONLY; 2469 } else { 2470 vt->v_part[0].p_flag = V_UNMNT; 2471 } 2472 vt->v_part[0].p_start = (daddr_t)0; 2473 /* 2474 * The partition size cannot just be the number of sectors, because 2475 * that might not end on a cylinder boundary. And if that's the case, 2476 * newfs/mkfs will print a scary warning. So just figure the size 2477 * based on the number of cylinders and sectors/cylinder. 2478 */ 2479 vt->v_part[0].p_size = lsp->ls_dkg.dkg_pcyl * 2480 lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead; 2481 } 2482 2483 /* 2484 * build dk_cinfo - see dkio(4I) 2485 */ 2486 static void 2487 fake_disk_info(dev_t dev, struct dk_cinfo *ci) 2488 { 2489 bzero(ci, sizeof (struct dk_cinfo)); 2490 (void) strlcpy(ci->dki_cname, LOFI_DRIVER_NAME, sizeof (ci->dki_cname)); 2491 ci->dki_ctype = DKC_SCSI_CCS; 2492 (void) strlcpy(ci->dki_dname, LOFI_DRIVER_NAME, sizeof (ci->dki_dname)); 2493 ci->dki_unit = LOFI_MINOR2ID(getminor(dev)); 2494 ci->dki_partition = LOFI_PART(getminor(dev)); 2495 /* 2496 * newfs uses this to set maxcontig. Must not be < 16, or it 2497 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 2498 * it by the block size. Then tunefs doesn't work because 2499 * maxcontig is 0. 2500 */ 2501 ci->dki_maxtransfer = 16; 2502 } 2503 2504 /* 2505 * map in a compressed file 2506 * 2507 * Read in the header and the index that follows. 2508 * 2509 * The header is as follows - 2510 * 2511 * Signature (name of the compression algorithm) 2512 * Compression segment size (a multiple of 512) 2513 * Number of index entries 2514 * Size of the last block 2515 * The array containing the index entries 2516 * 2517 * The header information is always stored in 2518 * network byte order on disk. 2519 */ 2520 static int 2521 lofi_map_compressed_file(struct lofi_state *lsp, char *buf) 2522 { 2523 uint32_t index_sz, header_len, i; 2524 ssize_t resid; 2525 enum uio_rw rw; 2526 char *tbuf = buf; 2527 int error; 2528 2529 /* The signature has already been read */ 2530 tbuf += sizeof (lsp->ls_comp_algorithm); 2531 bcopy(tbuf, &(lsp->ls_uncomp_seg_sz), sizeof (lsp->ls_uncomp_seg_sz)); 2532 lsp->ls_uncomp_seg_sz = ntohl(lsp->ls_uncomp_seg_sz); 2533 2534 /* 2535 * The compressed segment size must be a power of 2 2536 */ 2537 if (lsp->ls_uncomp_seg_sz < DEV_BSIZE || 2538 !ISP2(lsp->ls_uncomp_seg_sz)) 2539 return (EINVAL); 2540 2541 for (i = 0; !((lsp->ls_uncomp_seg_sz >> i) & 1); i++) 2542 ; 2543 2544 lsp->ls_comp_seg_shift = i; 2545 2546 tbuf += sizeof (lsp->ls_uncomp_seg_sz); 2547 bcopy(tbuf, &(lsp->ls_comp_index_sz), sizeof (lsp->ls_comp_index_sz)); 2548 lsp->ls_comp_index_sz = ntohl(lsp->ls_comp_index_sz); 2549 2550 tbuf += sizeof (lsp->ls_comp_index_sz); 2551 bcopy(tbuf, &(lsp->ls_uncomp_last_seg_sz), 2552 sizeof (lsp->ls_uncomp_last_seg_sz)); 2553 lsp->ls_uncomp_last_seg_sz = ntohl(lsp->ls_uncomp_last_seg_sz); 2554 2555 /* 2556 * Compute the total size of the uncompressed data 2557 * for use in fake_disk_geometry and other calculations. 2558 * Disk geometry has to be faked with respect to the 2559 * actual uncompressed data size rather than the 2560 * compressed file size. 2561 */ 2562 lsp->ls_vp_size = 2563 (u_offset_t)(lsp->ls_comp_index_sz - 2) * lsp->ls_uncomp_seg_sz 2564 + lsp->ls_uncomp_last_seg_sz; 2565 2566 /* 2567 * Index size is rounded up to DEV_BSIZE for ease 2568 * of segmapping 2569 */ 2570 index_sz = sizeof (*lsp->ls_comp_seg_index) * lsp->ls_comp_index_sz; 2571 header_len = sizeof (lsp->ls_comp_algorithm) + 2572 sizeof (lsp->ls_uncomp_seg_sz) + 2573 sizeof (lsp->ls_comp_index_sz) + 2574 sizeof (lsp->ls_uncomp_last_seg_sz); 2575 lsp->ls_comp_offbase = header_len + index_sz; 2576 2577 index_sz += header_len; 2578 index_sz = roundup(index_sz, DEV_BSIZE); 2579 2580 lsp->ls_comp_index_data = kmem_alloc(index_sz, KM_SLEEP); 2581 lsp->ls_comp_index_data_sz = index_sz; 2582 2583 /* 2584 * Read in the index -- this has a side-effect 2585 * of reading in the header as well 2586 */ 2587 rw = UIO_READ; 2588 error = vn_rdwr(rw, lsp->ls_vp, lsp->ls_comp_index_data, index_sz, 2589 0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 2590 2591 if (error != 0) 2592 return (error); 2593 2594 /* Skip the header, this is where the index really begins */ 2595 lsp->ls_comp_seg_index = 2596 (uint64_t *)(lsp->ls_comp_index_data + header_len); 2597 2598 /* 2599 * Now recompute offsets in the index to account for 2600 * the header length 2601 */ 2602 for (i = 0; i < lsp->ls_comp_index_sz; i++) { 2603 lsp->ls_comp_seg_index[i] = lsp->ls_comp_offbase + 2604 BE_64(lsp->ls_comp_seg_index[i]); 2605 } 2606 2607 return (error); 2608 } 2609 2610 static int 2611 lofi_init_crypto(struct lofi_state *lsp, struct lofi_ioctl *klip) 2612 { 2613 struct crypto_meta chead; 2614 char buf[DEV_BSIZE]; 2615 ssize_t resid; 2616 char *marker; 2617 int error; 2618 int ret; 2619 int i; 2620 2621 if (!klip->li_crypto_enabled) 2622 return (0); 2623 2624 /* 2625 * All current algorithms have a max of 448 bits. 2626 */ 2627 if (klip->li_iv_len > CRYPTO_BITS2BYTES(512)) 2628 return (EINVAL); 2629 2630 if (CRYPTO_BITS2BYTES(klip->li_key_len) > sizeof (klip->li_key)) 2631 return (EINVAL); 2632 2633 lsp->ls_crypto_enabled = klip->li_crypto_enabled; 2634 2635 mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL); 2636 2637 lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher); 2638 if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) { 2639 cmn_err(CE_WARN, "invalid cipher %s requested for %s", 2640 klip->li_cipher, klip->li_filename); 2641 return (EINVAL); 2642 } 2643 2644 /* this is just initialization here */ 2645 lsp->ls_mech.cm_param = NULL; 2646 lsp->ls_mech.cm_param_len = 0; 2647 2648 lsp->ls_iv_type = klip->li_iv_type; 2649 lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher); 2650 if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) { 2651 cmn_err(CE_WARN, "invalid iv cipher %s requested" 2652 " for %s", klip->li_iv_cipher, klip->li_filename); 2653 return (EINVAL); 2654 } 2655 2656 /* iv mech must itself take a null iv */ 2657 lsp->ls_iv_mech.cm_param = NULL; 2658 lsp->ls_iv_mech.cm_param_len = 0; 2659 lsp->ls_iv_len = klip->li_iv_len; 2660 2661 /* 2662 * Create ctx using li_cipher & the raw li_key after checking 2663 * that it isn't a weak key. 2664 */ 2665 lsp->ls_key.ck_format = CRYPTO_KEY_RAW; 2666 lsp->ls_key.ck_length = klip->li_key_len; 2667 lsp->ls_key.ck_data = kmem_alloc( 2668 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP); 2669 bcopy(klip->li_key, lsp->ls_key.ck_data, 2670 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 2671 2672 ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key); 2673 if (ret != CRYPTO_SUCCESS) { 2674 cmn_err(CE_WARN, "weak key check failed for cipher " 2675 "%s on file %s (0x%x)", klip->li_cipher, 2676 klip->li_filename, ret); 2677 return (EINVAL); 2678 } 2679 2680 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 2681 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 2682 if (error != 0) 2683 return (error); 2684 2685 /* 2686 * This is the case where the header in the lofi image is already 2687 * initialized to indicate it is encrypted. 2688 */ 2689 if (strncmp(buf, lofi_crypto_magic, sizeof (lofi_crypto_magic)) == 0) { 2690 /* 2691 * The encryption header information is laid out this way: 2692 * 6 bytes: hex "CFLOFI" 2693 * 2 bytes: version = 0 ... for now 2694 * 96 bytes: reserved1 (not implemented yet) 2695 * 4 bytes: data_sector = 2 ... for now 2696 * more... not implemented yet 2697 */ 2698 2699 marker = buf; 2700 2701 /* copy the magic */ 2702 bcopy(marker, lsp->ls_crypto.magic, 2703 sizeof (lsp->ls_crypto.magic)); 2704 marker += sizeof (lsp->ls_crypto.magic); 2705 2706 /* read the encryption version number */ 2707 bcopy(marker, &(lsp->ls_crypto.version), 2708 sizeof (lsp->ls_crypto.version)); 2709 lsp->ls_crypto.version = ntohs(lsp->ls_crypto.version); 2710 marker += sizeof (lsp->ls_crypto.version); 2711 2712 /* read a chunk of reserved data */ 2713 bcopy(marker, lsp->ls_crypto.reserved1, 2714 sizeof (lsp->ls_crypto.reserved1)); 2715 marker += sizeof (lsp->ls_crypto.reserved1); 2716 2717 /* read block number where encrypted data begins */ 2718 bcopy(marker, &(lsp->ls_crypto.data_sector), 2719 sizeof (lsp->ls_crypto.data_sector)); 2720 lsp->ls_crypto.data_sector = ntohl(lsp->ls_crypto.data_sector); 2721 marker += sizeof (lsp->ls_crypto.data_sector); 2722 2723 /* and ignore the rest until it is implemented */ 2724 2725 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; 2726 return (0); 2727 } 2728 2729 /* 2730 * We've requested encryption, but no magic was found, so it must be 2731 * a new image. 2732 */ 2733 2734 for (i = 0; i < sizeof (struct crypto_meta); i++) { 2735 if (buf[i] != '\0') 2736 return (EINVAL); 2737 } 2738 2739 marker = buf; 2740 bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic)); 2741 marker += sizeof (lofi_crypto_magic); 2742 chead.version = htons(LOFI_CRYPTO_VERSION); 2743 bcopy(&(chead.version), marker, sizeof (chead.version)); 2744 marker += sizeof (chead.version); 2745 marker += sizeof (chead.reserved1); 2746 chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR); 2747 bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector)); 2748 2749 /* write the header */ 2750 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, buf, DEV_BSIZE, 2751 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 2752 if (error != 0) 2753 return (error); 2754 2755 /* fix things up so it looks like we read this info */ 2756 bcopy(lofi_crypto_magic, lsp->ls_crypto.magic, 2757 sizeof (lofi_crypto_magic)); 2758 lsp->ls_crypto.version = LOFI_CRYPTO_VERSION; 2759 lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR; 2760 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; 2761 return (0); 2762 } 2763 2764 /* 2765 * Check to see if the passed in signature is a valid one. If it is 2766 * valid, return the index into lofi_compress_table. 2767 * 2768 * Return -1 if it is invalid 2769 */ 2770 static int 2771 lofi_compress_select(const char *signature) 2772 { 2773 int i; 2774 2775 for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) { 2776 if (strcmp(lofi_compress_table[i].l_name, signature) == 0) 2777 return (i); 2778 } 2779 2780 return (-1); 2781 } 2782 2783 static int 2784 lofi_init_compress(struct lofi_state *lsp) 2785 { 2786 char buf[DEV_BSIZE]; 2787 int compress_index; 2788 ssize_t resid; 2789 int error; 2790 2791 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE, 2792 0, RLIM64_INFINITY, kcred, &resid); 2793 2794 if (error != 0) 2795 return (error); 2796 2797 if ((compress_index = lofi_compress_select(buf)) == -1) 2798 return (0); 2799 2800 /* compression and encryption are mutually exclusive */ 2801 if (lsp->ls_crypto_enabled) 2802 return (ENOTSUP); 2803 2804 /* initialize compression info for compressed lofi */ 2805 lsp->ls_comp_algorithm_index = compress_index; 2806 (void) strlcpy(lsp->ls_comp_algorithm, 2807 lofi_compress_table[compress_index].l_name, 2808 sizeof (lsp->ls_comp_algorithm)); 2809 2810 /* Finally setup per-thread pre-allocated buffers */ 2811 lsp->ls_comp_bufs = kmem_zalloc(lofi_taskq_nthreads * 2812 sizeof (struct compbuf), KM_SLEEP); 2813 2814 return (lofi_map_compressed_file(lsp, buf)); 2815 } 2816 2817 /* 2818 * Allocate new or proposed id from lofi_id. 2819 * 2820 * Special cases for proposed id: 2821 * 0: not allowed, 0 is id for control device. 2822 * -1: allocate first usable id from lofi_id. 2823 * any other value is proposed value from userland 2824 * 2825 * returns DDI_SUCCESS or errno. 2826 */ 2827 static int 2828 lofi_alloc_id(int *idp) 2829 { 2830 int id, error = DDI_SUCCESS; 2831 2832 if (*idp == -1) { 2833 id = id_allocff_nosleep(lofi_id); 2834 if (id == -1) { 2835 error = EAGAIN; 2836 goto err; 2837 } 2838 } else if (*idp == 0) { 2839 error = EINVAL; 2840 goto err; 2841 } else if (*idp > ((1 << (L_BITSMINOR - LOFI_CMLB_SHIFT)) - 1)) { 2842 error = ERANGE; 2843 goto err; 2844 } else { 2845 if (ddi_get_soft_state(lofi_statep, *idp) != NULL) { 2846 error = EEXIST; 2847 goto err; 2848 } 2849 2850 id = id_alloc_specific_nosleep(lofi_id, *idp); 2851 if (id == -1) { 2852 error = EAGAIN; 2853 goto err; 2854 } 2855 } 2856 *idp = id; 2857 err: 2858 return (error); 2859 } 2860 2861 static int 2862 lofi_create_dev(struct lofi_ioctl *klip) 2863 { 2864 dev_info_t *parent, *child; 2865 struct lofi_state *lsp = NULL; 2866 char namebuf[MAXNAMELEN]; 2867 int error; 2868 2869 /* get control device */ 2870 lsp = ddi_get_soft_state(lofi_statep, 0); 2871 parent = ddi_get_parent(lsp->ls_dip); 2872 2873 if ((error = lofi_alloc_id((int *)&klip->li_id))) 2874 return (error); 2875 2876 (void) snprintf(namebuf, sizeof (namebuf), LOFI_DRIVER_NAME "@%d", 2877 klip->li_id); 2878 2879 ndi_devi_enter(parent); 2880 child = ndi_devi_findchild(parent, namebuf); 2881 ndi_devi_exit(parent); 2882 2883 if (child == NULL) { 2884 child = ddi_add_child(parent, LOFI_DRIVER_NAME, 2885 (pnode_t)DEVI_SID_NODEID, klip->li_id); 2886 if ((error = ddi_prop_update_int(DDI_DEV_T_NONE, child, 2887 "instance", klip->li_id)) != DDI_PROP_SUCCESS) 2888 goto err; 2889 2890 if (klip->li_labeled == B_TRUE) { 2891 if ((error = ddi_prop_create(DDI_DEV_T_NONE, child, 2892 DDI_PROP_CANSLEEP, "labeled", 0, 0)) 2893 != DDI_PROP_SUCCESS) 2894 goto err; 2895 } 2896 2897 if ((error = ndi_devi_online(child, NDI_ONLINE_ATTACH)) 2898 != NDI_SUCCESS) 2899 goto err; 2900 } else { 2901 id_free(lofi_id, klip->li_id); 2902 error = EEXIST; 2903 return (error); 2904 } 2905 2906 goto done; 2907 2908 err: 2909 ddi_prop_remove_all(child); 2910 (void) ndi_devi_offline(child, NDI_DEVI_REMOVE); 2911 id_free(lofi_id, klip->li_id); 2912 done: 2913 2914 return (error); 2915 } 2916 2917 static void 2918 lofi_create_inquiry(struct lofi_state *lsp, struct scsi_inquiry *inq) 2919 { 2920 char *p = NULL; 2921 2922 (void) strlcpy(inq->inq_vid, LOFI_DRIVER_NAME, sizeof (inq->inq_vid)); 2923 2924 mutex_enter(&lsp->ls_vp_lock); 2925 if (lsp->ls_vp != NULL) 2926 p = strrchr(lsp->ls_vp->v_path, '/'); 2927 if (p != NULL) 2928 (void) strncpy(inq->inq_pid, p + 1, sizeof (inq->inq_pid)); 2929 mutex_exit(&lsp->ls_vp_lock); 2930 (void) strlcpy(inq->inq_revision, "1.0", sizeof (inq->inq_revision)); 2931 } 2932 2933 /* 2934 * copy devlink name from event cache 2935 */ 2936 static void 2937 lofi_copy_devpath(struct lofi_ioctl *klip) 2938 { 2939 int error; 2940 char namebuf[MAXNAMELEN], *str; 2941 clock_t ticks; 2942 nvlist_t *nvl = NULL; 2943 2944 if (klip->li_labeled == B_TRUE) 2945 klip->li_devpath[0] = '\0'; 2946 else { 2947 /* no need to wait for messages */ 2948 (void) snprintf(klip->li_devpath, sizeof (klip->li_devpath), 2949 "/dev/" LOFI_CHAR_NAME "/%d", klip->li_id); 2950 return; 2951 } 2952 2953 (void) snprintf(namebuf, sizeof (namebuf), "%d", klip->li_id); 2954 2955 mutex_enter(&lofi_devlink_cache.ln_lock); 2956 for (;;) { 2957 error = nvlist_lookup_nvlist(lofi_devlink_cache.ln_data, 2958 namebuf, &nvl); 2959 2960 if (error == 0 && 2961 nvlist_lookup_string(nvl, DEV_NAME, &str) == 0 && 2962 strncmp(str, "/dev/" LOFI_CHAR_NAME, 2963 sizeof ("/dev/" LOFI_CHAR_NAME) - 1) != 0) { 2964 (void) strlcpy(klip->li_devpath, str, 2965 sizeof (klip->li_devpath)); 2966 break; 2967 } 2968 /* 2969 * Either there is no data in the cache, or the 2970 * cache entry still has the wrong device name. 2971 */ 2972 ticks = ddi_get_lbolt() + lofi_timeout * drv_usectohz(1000000); 2973 error = cv_timedwait(&lofi_devlink_cache.ln_cv, 2974 &lofi_devlink_cache.ln_lock, ticks); 2975 if (error == -1) 2976 break; /* timeout */ 2977 } 2978 mutex_exit(&lofi_devlink_cache.ln_lock); 2979 } 2980 2981 /* 2982 * map a file to a minor number. Return the minor number. 2983 */ 2984 static int 2985 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, 2986 int *rvalp, struct cred *credp, int ioctl_flag) 2987 { 2988 int id = -1; 2989 struct lofi_state *lsp = NULL; 2990 struct lofi_ioctl *klip; 2991 int error, canfree; 2992 struct vnode *vp = NULL; 2993 vattr_t vattr; 2994 int flag = 0; 2995 char namebuf[MAXNAMELEN]; 2996 2997 error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 2998 if (error != 0) 2999 return (error); 3000 3001 mutex_enter(&lofi_lock); 3002 3003 if (file_to_lofi_nocheck(klip->li_filename, klip->li_readonly, 3004 NULL) == 0) { 3005 error = EBUSY; 3006 goto err; 3007 } 3008 3009 flag = FREAD | FWRITE | FOFFMAX | FEXCL; 3010 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); 3011 if (error) { 3012 /* try read-only */ 3013 flag &= ~FWRITE; 3014 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, 3015 &vp, 0, 0); 3016 if (error) 3017 goto err; 3018 } 3019 3020 if (!V_ISLOFIABLE(vp->v_type)) { 3021 error = EINVAL; 3022 goto err; 3023 } 3024 3025 vattr.va_mask = AT_SIZE; 3026 error = VOP_GETATTR(vp, &vattr, 0, credp, NULL); 3027 if (error) 3028 goto err; 3029 3030 /* the file needs to be a multiple of the block size */ 3031 if ((vattr.va_size % DEV_BSIZE) != 0) { 3032 error = EINVAL; 3033 goto err; 3034 } 3035 3036 if (pickminor) { 3037 klip->li_id = (uint32_t)-1; 3038 } 3039 if ((error = lofi_create_dev(klip)) != 0) 3040 goto err; 3041 3042 id = klip->li_id; 3043 lsp = ddi_get_soft_state(lofi_statep, id); 3044 if (lsp == NULL) 3045 goto err; 3046 3047 /* 3048 * from this point lofi_destroy() is used to clean up on error 3049 * make sure the basic data is set 3050 */ 3051 list_insert_tail(&lofi_list, lsp); 3052 lsp->ls_dev = makedevice(getmajor(dev), LOFI_ID2MINOR(id)); 3053 3054 list_create(&lsp->ls_comp_cache, sizeof (struct lofi_comp_cache), 3055 offsetof(struct lofi_comp_cache, lc_list)); 3056 3057 /* 3058 * save open mode so file can be closed properly and vnode counts 3059 * updated correctly. 3060 */ 3061 lsp->ls_openflag = flag; 3062 3063 lsp->ls_vp = vp; 3064 lsp->ls_stacked_vp = vp; 3065 3066 lsp->ls_vp_size = vattr.va_size; 3067 lsp->ls_vp_comp_size = lsp->ls_vp_size; 3068 3069 /* 3070 * Try to handle stacked lofs vnodes. 3071 */ 3072 if (vp->v_type == VREG) { 3073 vnode_t *realvp; 3074 3075 if (VOP_REALVP(vp, &realvp, NULL) == 0) { 3076 /* 3077 * We need to use the realvp for uniqueness 3078 * checking, but keep the stacked vp for 3079 * LOFI_GET_FILENAME display. 3080 */ 3081 VN_HOLD(realvp); 3082 lsp->ls_vp = realvp; 3083 } 3084 } 3085 3086 lsp->ls_lbshift = highbit(DEV_BSIZE) - 1; 3087 lsp->ls_pbshift = lsp->ls_lbshift; 3088 3089 lsp->ls_readonly = klip->li_readonly; 3090 lsp->ls_uncomp_seg_sz = 0; 3091 lsp->ls_comp_algorithm[0] = '\0'; 3092 lsp->ls_crypto_offset = 0; 3093 3094 (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", 3095 LOFI_DRIVER_NAME, id); 3096 lsp->ls_taskq = taskq_create_proc(namebuf, lofi_taskq_nthreads, 3097 minclsyspri, 1, lofi_taskq_maxalloc, curzone->zone_zsched, 0); 3098 3099 if ((error = lofi_init_crypto(lsp, klip)) != 0) 3100 goto err; 3101 3102 if ((error = lofi_init_compress(lsp)) != 0) 3103 goto err; 3104 3105 fake_disk_geometry(lsp); 3106 3107 /* For unlabeled lofi add Nblocks and Size */ 3108 if (klip->li_labeled == B_FALSE) { 3109 error = ddi_prop_update_int64(lsp->ls_dev, lsp->ls_dip, 3110 SIZE_PROP_NAME, lsp->ls_vp_size - lsp->ls_crypto_offset); 3111 if (error != DDI_PROP_SUCCESS) { 3112 error = EINVAL; 3113 goto err; 3114 } 3115 error = ddi_prop_update_int64(lsp->ls_dev, lsp->ls_dip, 3116 NBLOCKS_PROP_NAME, 3117 (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE); 3118 if (error != DDI_PROP_SUCCESS) { 3119 error = EINVAL; 3120 goto err; 3121 } 3122 } 3123 3124 /* Determine if the underlying device supports TRIM/DISCARD */ 3125 if (lsp->ls_vp->v_type == VCHR || lsp->ls_vp->v_type == VBLK) { 3126 if (VOP_IOCTL(lsp->ls_vp, DKIOC_CANFREE, (intptr_t)&canfree, 3127 FKIOCTL, kcred, &error, NULL) != 0) { 3128 canfree = 0; 3129 } 3130 } else { 3131 /* 3132 * We don't have a way to discover if a file supports 3133 * the FREESP fcntl cmd (other than trying it). 3134 * However, since zfs, ufs, tmpfs, and udfs all have 3135 * support, and NFSv4 also forwards these requests to 3136 * the server, we always enable it for file based 3137 * volumes. When it comes to executing the commands 3138 * they may silently fail. 3139 */ 3140 canfree = 1; 3141 } 3142 3143 if (lsp->ls_readonly) 3144 canfree = 0; 3145 3146 lsp->ls_canfree = (canfree != 0); 3147 3148 /* 3149 * Notify we are ready to rock. If we are a labeled device, we must now 3150 * ask cmlb to validate the label, which will finally create the right 3151 * minors for us. In particular, this is a challenge because before 3152 * ls_vp_ready was set, we couldn't even get the geometry count which 3153 * means that we'll have the wrong default label if we have a larger 3154 * device. 3155 */ 3156 mutex_enter(&lsp->ls_vp_lock); 3157 lsp->ls_vp_ready = B_TRUE; 3158 cv_broadcast(&lsp->ls_vp_cv); 3159 mutex_exit(&lsp->ls_vp_lock); 3160 if (lsp->ls_cmlbhandle != NULL) { 3161 (void) cmlb_validate(lsp->ls_cmlbhandle, 0, 0); 3162 } 3163 mutex_exit(&lofi_lock); 3164 3165 lofi_copy_devpath(klip); 3166 3167 if (rvalp) 3168 *rvalp = id; 3169 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 3170 free_lofi_ioctl(klip); 3171 return (0); 3172 3173 err: 3174 if (lsp != NULL) { 3175 lofi_destroy(lsp, credp); 3176 } else { 3177 if (vp != NULL) { 3178 (void) VOP_PUTPAGE(vp, 0, 0, B_FREE, credp, NULL); 3179 (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL); 3180 VN_RELE(vp); 3181 } 3182 } 3183 3184 mutex_exit(&lofi_lock); 3185 free_lofi_ioctl(klip); 3186 return (error); 3187 } 3188 3189 /* 3190 * unmap a file. 3191 */ 3192 static int 3193 lofi_unmap_file(struct lofi_ioctl *ulip, int byfilename, 3194 struct cred *credp, int ioctl_flag) 3195 { 3196 struct lofi_state *lsp; 3197 struct lofi_ioctl *klip; 3198 char namebuf[MAXNAMELEN]; 3199 int err; 3200 3201 err = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 3202 if (err != 0) 3203 return (err); 3204 3205 mutex_enter(&lofi_lock); 3206 if (byfilename) { 3207 if ((err = file_to_lofi(klip->li_filename, klip->li_readonly, 3208 &lsp)) != 0) { 3209 goto done; 3210 } 3211 } else if (klip->li_id == 0) { 3212 err = ENXIO; 3213 goto done; 3214 } else { 3215 lsp = ddi_get_soft_state(lofi_statep, klip->li_id); 3216 } 3217 3218 if (lsp == NULL || lsp->ls_vp == NULL || lofi_access(lsp) != 0) { 3219 err = ENXIO; 3220 goto done; 3221 } 3222 3223 klip->li_id = LOFI_MINOR2ID(getminor(lsp->ls_dev)); 3224 (void) snprintf(namebuf, sizeof (namebuf), "%u", klip->li_id); 3225 3226 /* 3227 * If it's still held open, we'll do one of three things: 3228 * 3229 * If no flag is set, just return EBUSY. 3230 * 3231 * If the 'cleanup' flag is set, unmap and remove the device when 3232 * the last user finishes. 3233 * 3234 * If the 'force' flag is set, then we forcibly close the underlying 3235 * file. Subsequent operations will fail, and the DKIOCSTATE ioctl 3236 * will return DKIO_DEV_GONE. When the device is last closed, the 3237 * device will be cleaned up appropriately. 3238 * 3239 * This is complicated by the fact that we may have outstanding 3240 * dispatched I/Os. Rather than having a single mutex to serialize all 3241 * I/O, we keep a count of the number of outstanding I/O requests 3242 * (ls_vp_iocount), as well as a flag to indicate that no new I/Os 3243 * should be dispatched (ls_vp_closereq). 3244 * 3245 * We set the flag, wait for the number of outstanding I/Os to reach 0, 3246 * and then close the underlying vnode. 3247 */ 3248 if (is_opened(lsp)) { 3249 if (klip->li_force) { 3250 /* Mark the device for cleanup. */ 3251 lofi_set_cleanup(lsp); 3252 mutex_enter(&lsp->ls_vp_lock); 3253 lsp->ls_vp_closereq = B_TRUE; 3254 /* Wake up any threads waiting on dkiocstate. */ 3255 cv_broadcast(&lsp->ls_vp_cv); 3256 while (lsp->ls_vp_iocount > 0) 3257 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); 3258 mutex_exit(&lsp->ls_vp_lock); 3259 } else if (klip->li_cleanup) { 3260 lofi_set_cleanup(lsp); 3261 } else { 3262 err = EBUSY; 3263 } 3264 } else { 3265 lofi_free_dev(lsp); 3266 lofi_destroy(lsp, credp); 3267 } 3268 3269 /* Remove name from devlink cache */ 3270 mutex_enter(&lofi_devlink_cache.ln_lock); 3271 (void) nvlist_remove_all(lofi_devlink_cache.ln_data, namebuf); 3272 cv_broadcast(&lofi_devlink_cache.ln_cv); 3273 mutex_exit(&lofi_devlink_cache.ln_lock); 3274 done: 3275 mutex_exit(&lofi_lock); 3276 if (err == 0) 3277 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 3278 free_lofi_ioctl(klip); 3279 return (err); 3280 } 3281 3282 /* 3283 * get the filename given the minor number, or the minor number given 3284 * the name. 3285 */ 3286 static int 3287 lofi_get_info(dev_t dev __unused, struct lofi_ioctl *ulip, int which, 3288 struct cred *credp __unused, int ioctl_flag) 3289 { 3290 struct lofi_ioctl *klip; 3291 struct lofi_state *lsp; 3292 int error; 3293 3294 error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 3295 if (error != 0) 3296 return (error); 3297 3298 switch (which) { 3299 case LOFI_GET_FILENAME: 3300 if (klip->li_id == 0) { 3301 free_lofi_ioctl(klip); 3302 return (EINVAL); 3303 } 3304 3305 mutex_enter(&lofi_lock); 3306 lsp = ddi_get_soft_state(lofi_statep, klip->li_id); 3307 if (lsp == NULL || lofi_access(lsp) != 0) { 3308 mutex_exit(&lofi_lock); 3309 free_lofi_ioctl(klip); 3310 return (ENXIO); 3311 } 3312 3313 /* 3314 * This may fail if, for example, we're trying to look 3315 * up a zoned NFS path from the global zone. 3316 */ 3317 if (lsp->ls_stacked_vp == NULL || 3318 vnodetopath(NULL, lsp->ls_stacked_vp, klip->li_filename, 3319 sizeof (klip->li_filename), CRED()) != 0) { 3320 (void) strlcpy(klip->li_filename, "?", 3321 sizeof (klip->li_filename)); 3322 } 3323 3324 klip->li_readonly = lsp->ls_readonly; 3325 klip->li_labeled = lsp->ls_cmlbhandle != NULL; 3326 3327 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 3328 sizeof (klip->li_algorithm)); 3329 klip->li_crypto_enabled = lsp->ls_crypto_enabled; 3330 mutex_exit(&lofi_lock); 3331 3332 lofi_copy_devpath(klip); 3333 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 3334 free_lofi_ioctl(klip); 3335 return (error); 3336 case LOFI_GET_MINOR: 3337 mutex_enter(&lofi_lock); 3338 error = file_to_lofi(klip->li_filename, 3339 klip->li_readonly, &lsp); 3340 if (error != 0) { 3341 mutex_exit(&lofi_lock); 3342 free_lofi_ioctl(klip); 3343 return (error); 3344 } 3345 klip->li_id = LOFI_MINOR2ID(getminor(lsp->ls_dev)); 3346 3347 klip->li_readonly = lsp->ls_readonly; 3348 klip->li_labeled = lsp->ls_cmlbhandle != NULL; 3349 mutex_exit(&lofi_lock); 3350 3351 lofi_copy_devpath(klip); 3352 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 3353 3354 free_lofi_ioctl(klip); 3355 return (error); 3356 case LOFI_CHECK_COMPRESSED: 3357 mutex_enter(&lofi_lock); 3358 error = file_to_lofi(klip->li_filename, 3359 klip->li_readonly, &lsp); 3360 if (error != 0) { 3361 mutex_exit(&lofi_lock); 3362 free_lofi_ioctl(klip); 3363 return (error); 3364 } 3365 3366 klip->li_id = LOFI_MINOR2ID(getminor(lsp->ls_dev)); 3367 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 3368 sizeof (klip->li_algorithm)); 3369 3370 mutex_exit(&lofi_lock); 3371 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 3372 free_lofi_ioctl(klip); 3373 return (error); 3374 default: 3375 free_lofi_ioctl(klip); 3376 return (EINVAL); 3377 } 3378 } 3379 3380 static int 3381 uscsi_is_inquiry(intptr_t arg, int flag, union scsi_cdb *cdb, 3382 struct uscsi_cmd *uscmd) 3383 { 3384 int rval; 3385 3386 #ifdef _MULTI_DATAMODEL 3387 switch (ddi_model_convert_from(flag & FMODELS)) { 3388 case DDI_MODEL_ILP32: { 3389 struct uscsi_cmd32 ucmd32; 3390 3391 if (ddi_copyin((void *)arg, &ucmd32, sizeof (ucmd32), flag)) { 3392 rval = EFAULT; 3393 goto err; 3394 } 3395 uscsi_cmd32touscsi_cmd((&ucmd32), uscmd); 3396 break; 3397 } 3398 case DDI_MODEL_NONE: 3399 if (ddi_copyin((void *)arg, uscmd, sizeof (*uscmd), flag)) { 3400 rval = EFAULT; 3401 goto err; 3402 } 3403 break; 3404 default: 3405 rval = EFAULT; 3406 goto err; 3407 } 3408 #else 3409 if (ddi_copyin((void *)arg, uscmd, sizeof (*uscmd), flag)) { 3410 rval = EFAULT; 3411 goto err; 3412 } 3413 #endif /* _MULTI_DATAMODEL */ 3414 if (ddi_copyin(uscmd->uscsi_cdb, cdb, uscmd->uscsi_cdblen, flag)) { 3415 rval = EFAULT; 3416 goto err; 3417 } 3418 if (cdb->scc_cmd == SCMD_INQUIRY) { 3419 return (0); 3420 } 3421 err: 3422 return (rval); 3423 } 3424 3425 static int 3426 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 3427 int *rvalp) 3428 { 3429 int error; 3430 enum dkio_state dkstate; 3431 struct lofi_state *lsp; 3432 dk_efi_t user_efi; 3433 int id; 3434 3435 id = LOFI_MINOR2ID(getminor(dev)); 3436 3437 /* lofi ioctls only apply to the master device */ 3438 if (id == 0) { 3439 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg; 3440 3441 /* 3442 * the query command only need read-access - i.e., normal 3443 * users are allowed to do those on the ctl device as 3444 * long as they can open it read-only. 3445 */ 3446 switch (cmd) { 3447 case LOFI_MAP_FILE: 3448 if ((flag & FWRITE) == 0) 3449 return (EPERM); 3450 return (lofi_map_file(dev, lip, 1, rvalp, credp, flag)); 3451 case LOFI_MAP_FILE_MINOR: 3452 if ((flag & FWRITE) == 0) 3453 return (EPERM); 3454 return (lofi_map_file(dev, lip, 0, rvalp, credp, flag)); 3455 case LOFI_UNMAP_FILE: 3456 if ((flag & FWRITE) == 0) 3457 return (EPERM); 3458 return (lofi_unmap_file(lip, 1, credp, flag)); 3459 case LOFI_UNMAP_FILE_MINOR: 3460 if ((flag & FWRITE) == 0) 3461 return (EPERM); 3462 return (lofi_unmap_file(lip, 0, credp, flag)); 3463 case LOFI_GET_FILENAME: 3464 return (lofi_get_info(dev, lip, LOFI_GET_FILENAME, 3465 credp, flag)); 3466 case LOFI_GET_MINOR: 3467 return (lofi_get_info(dev, lip, LOFI_GET_MINOR, 3468 credp, flag)); 3469 3470 /* 3471 * This API made limited sense when this value was fixed 3472 * at LOFI_MAX_FILES. However, its use to iterate 3473 * across all possible devices in lofiadm means we don't 3474 * want to return L_MAXMIN, but the highest 3475 * *allocated* id. 3476 */ 3477 case LOFI_GET_MAXMINOR: 3478 id = 0; 3479 3480 mutex_enter(&lofi_lock); 3481 3482 for (lsp = list_head(&lofi_list); lsp != NULL; 3483 lsp = list_next(&lofi_list, lsp)) { 3484 int i; 3485 if (lofi_access(lsp) != 0) 3486 continue; 3487 3488 i = ddi_get_instance(lsp->ls_dip); 3489 if (i > id) 3490 id = i; 3491 } 3492 3493 mutex_exit(&lofi_lock); 3494 3495 error = ddi_copyout(&id, &lip->li_id, 3496 sizeof (id), flag); 3497 if (error) 3498 return (EFAULT); 3499 return (0); 3500 3501 case LOFI_CHECK_COMPRESSED: 3502 return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED, 3503 credp, flag)); 3504 default: 3505 return (EINVAL); 3506 } 3507 } 3508 3509 mutex_enter(&lofi_lock); 3510 lsp = ddi_get_soft_state(lofi_statep, id); 3511 if (lsp == NULL || lsp->ls_cleanup) { 3512 mutex_exit(&lofi_lock); 3513 return (ENXIO); 3514 } 3515 mutex_exit(&lofi_lock); 3516 3517 if (ddi_prop_exists(DDI_DEV_T_ANY, lsp->ls_dip, DDI_PROP_DONTPASS, 3518 "labeled") == 1) { 3519 error = cmlb_ioctl(lsp->ls_cmlbhandle, dev, cmd, arg, flag, 3520 credp, rvalp, 0); 3521 if (error != ENOTTY) 3522 return (error); 3523 } 3524 3525 /* 3526 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with 3527 * EIO as if the device was no longer present. 3528 */ 3529 if (lsp->ls_vp == NULL && cmd != DKIOCSTATE) 3530 return (EIO); 3531 3532 /* these are for faking out utilities like newfs */ 3533 switch (cmd) { 3534 case DKIOCGMEDIAINFO: 3535 case DKIOCGMEDIAINFOEXT: { 3536 struct dk_minfo_ext media_info; 3537 int shift = lsp->ls_lbshift; 3538 int size; 3539 3540 if (cmd == DKIOCGMEDIAINFOEXT) { 3541 media_info.dki_pbsize = 1U << lsp->ls_pbshift; 3542 switch (ddi_model_convert_from(flag & FMODELS)) { 3543 case DDI_MODEL_ILP32: 3544 size = sizeof (struct dk_minfo_ext32); 3545 break; 3546 default: 3547 size = sizeof (struct dk_minfo_ext); 3548 break; 3549 } 3550 } else { 3551 size = sizeof (struct dk_minfo); 3552 } 3553 3554 media_info.dki_media_type = DK_FIXED_DISK; 3555 media_info.dki_lbsize = 1U << shift; 3556 media_info.dki_capacity = 3557 (lsp->ls_vp_size - lsp->ls_crypto_offset) >> shift; 3558 3559 if (ddi_copyout(&media_info, (void *)arg, size, flag)) 3560 return (EFAULT); 3561 return (0); 3562 } 3563 case DKIOCREMOVABLE: { 3564 int i = 0; 3565 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), flag)) 3566 return (EFAULT); 3567 return (0); 3568 } 3569 3570 case DKIOCGVTOC: { 3571 struct vtoc vt; 3572 fake_disk_vtoc(lsp, &vt); 3573 3574 switch (ddi_model_convert_from(flag & FMODELS)) { 3575 case DDI_MODEL_ILP32: { 3576 struct vtoc32 vtoc32; 3577 3578 vtoctovtoc32(vt, vtoc32); 3579 if (ddi_copyout(&vtoc32, (void *)arg, 3580 sizeof (struct vtoc32), flag)) 3581 return (EFAULT); 3582 break; 3583 } 3584 3585 case DDI_MODEL_NONE: 3586 if (ddi_copyout(&vt, (void *)arg, 3587 sizeof (struct vtoc), flag)) 3588 return (EFAULT); 3589 break; 3590 } 3591 return (0); 3592 } 3593 case DKIOCINFO: { 3594 struct dk_cinfo ci; 3595 fake_disk_info(dev, &ci); 3596 if (ddi_copyout(&ci, (void *)arg, sizeof (ci), flag)) 3597 return (EFAULT); 3598 return (0); 3599 } 3600 case DKIOCG_VIRTGEOM: 3601 case DKIOCG_PHYGEOM: 3602 case DKIOCGGEOM: 3603 error = ddi_copyout(&lsp->ls_dkg, (void *)arg, 3604 sizeof (struct dk_geom), flag); 3605 if (error) 3606 return (EFAULT); 3607 return (0); 3608 case DKIOCSTATE: 3609 /* 3610 * Normally, lofi devices are always in the INSERTED state. If 3611 * a device is forcefully unmapped, then the device transitions 3612 * to the DKIO_DEV_GONE state. 3613 */ 3614 if (ddi_copyin((void *)arg, &dkstate, sizeof (dkstate), 3615 flag) != 0) 3616 return (EFAULT); 3617 3618 mutex_enter(&lsp->ls_vp_lock); 3619 while (((dkstate == DKIO_INSERTED && lsp->ls_vp != NULL) || 3620 (dkstate == DKIO_DEV_GONE && lsp->ls_vp == NULL)) && 3621 !lsp->ls_cleanup) { 3622 /* 3623 * By virtue of having the device open, we know that 3624 * 'lsp' will remain valid when we return. 3625 */ 3626 if (!cv_wait_sig(&lsp->ls_vp_cv, &lsp->ls_vp_lock)) { 3627 mutex_exit(&lsp->ls_vp_lock); 3628 return (EINTR); 3629 } 3630 } 3631 3632 dkstate = (!lsp->ls_cleanup && lsp->ls_vp != NULL ? 3633 DKIO_INSERTED : DKIO_DEV_GONE); 3634 mutex_exit(&lsp->ls_vp_lock); 3635 3636 if (ddi_copyout(&dkstate, (void *)arg, 3637 sizeof (dkstate), flag) != 0) 3638 return (EFAULT); 3639 return (0); 3640 case USCSICMD: { 3641 struct uscsi_cmd uscmd; 3642 union scsi_cdb cdb; 3643 3644 if (uscsi_is_inquiry(arg, flag, &cdb, &uscmd) == 0) { 3645 struct scsi_inquiry inq = {0}; 3646 3647 lofi_create_inquiry(lsp, &inq); 3648 if (ddi_copyout(&inq, uscmd.uscsi_bufaddr, 3649 uscmd.uscsi_buflen, flag) != 0) 3650 return (EFAULT); 3651 return (0); 3652 } else if (cdb.scc_cmd == SCMD_READ_CAPACITY) { 3653 struct scsi_capacity capacity; 3654 3655 capacity.capacity = 3656 BE_32((lsp->ls_vp_size - lsp->ls_crypto_offset) >> 3657 lsp->ls_lbshift); 3658 capacity.lbasize = BE_32(1 << lsp->ls_lbshift); 3659 if (ddi_copyout(&capacity, uscmd.uscsi_bufaddr, 3660 uscmd.uscsi_buflen, flag) != 0) 3661 return (EFAULT); 3662 return (0); 3663 } 3664 3665 uscmd.uscsi_rqstatus = 0xff; 3666 #ifdef _MULTI_DATAMODEL 3667 switch (ddi_model_convert_from(flag & FMODELS)) { 3668 case DDI_MODEL_ILP32: { 3669 struct uscsi_cmd32 ucmd32; 3670 uscsi_cmdtouscsi_cmd32((&uscmd), (&ucmd32)); 3671 if (ddi_copyout(&ucmd32, (void *)arg, sizeof (ucmd32), 3672 flag) != 0) 3673 return (EFAULT); 3674 break; 3675 } 3676 case DDI_MODEL_NONE: 3677 if (ddi_copyout(&uscmd, (void *)arg, sizeof (uscmd), 3678 flag) != 0) 3679 return (EFAULT); 3680 break; 3681 default: 3682 return (EFAULT); 3683 } 3684 #else 3685 if (ddi_copyout(&uscmd, (void *)arg, sizeof (uscmd), flag) != 0) 3686 return (EFAULT); 3687 #endif /* _MULTI_DATAMODEL */ 3688 return (0); 3689 } 3690 3691 case DKIOCGMBOOT: 3692 return (lofi_urw(lsp, FREAD, 0, 1 << lsp->ls_lbshift, 3693 arg, flag, credp)); 3694 3695 case DKIOCSMBOOT: 3696 return (lofi_urw(lsp, FWRITE, 0, 1 << lsp->ls_lbshift, 3697 arg, flag, credp)); 3698 3699 case DKIOCGETEFI: 3700 if (ddi_copyin((void *)arg, &user_efi, 3701 sizeof (dk_efi_t), flag) != 0) 3702 return (EFAULT); 3703 3704 return (lofi_urw(lsp, FREAD, 3705 user_efi.dki_lba * (1 << lsp->ls_lbshift), 3706 user_efi.dki_length, (intptr_t)user_efi.dki_data, 3707 flag, credp)); 3708 3709 case DKIOCSETEFI: 3710 if (ddi_copyin((void *)arg, &user_efi, 3711 sizeof (dk_efi_t), flag) != 0) 3712 return (EFAULT); 3713 3714 return (lofi_urw(lsp, FWRITE, 3715 user_efi.dki_lba * (1 << lsp->ls_lbshift), 3716 user_efi.dki_length, (intptr_t)user_efi.dki_data, 3717 flag, credp)); 3718 3719 case DKIOC_CANFREE: { 3720 int canfree = lsp->ls_canfree ? 1 : 0; 3721 3722 if (ddi_copyout(&canfree, (void *)arg, sizeof (canfree), flag)) 3723 return (EFAULT); 3724 3725 return (0); 3726 } 3727 3728 case DKIOCFREE: { 3729 dkioc_free_list_t *dfl; 3730 3731 if (!lsp->ls_canfree) 3732 return (ENOTSUP); 3733 3734 error = dfl_copyin((void *)arg, &dfl, flag, KM_SLEEP); 3735 if (error != 0) 3736 return (error); 3737 3738 /* 3739 * lofi_free_space() calls dfl_iter() which consumes dfl; 3740 * there is no need to call dfl_free() here. 3741 */ 3742 return (lofi_free_space(lsp, dfl, dev)); 3743 } 3744 3745 default: 3746 #ifdef DEBUG 3747 cmn_err(CE_WARN, "lofi_ioctl: %d is not implemented\n", cmd); 3748 #endif /* DEBUG */ 3749 return (ENOTTY); 3750 } 3751 } 3752 3753 static int 3754 lofi_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 3755 char *name, caddr_t valuep, int *lengthp) 3756 { 3757 struct lofi_state *lsp; 3758 int rc; 3759 3760 lsp = ddi_get_soft_state(lofi_statep, ddi_get_instance(dip)); 3761 if (lsp == NULL) { 3762 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 3763 name, valuep, lengthp)); 3764 } 3765 3766 rc = cmlb_prop_op(lsp->ls_cmlbhandle, dev, dip, prop_op, mod_flags, 3767 name, valuep, lengthp, LOFI_PART(getminor(dev)), NULL); 3768 if (rc == DDI_PROP_SUCCESS) 3769 return (rc); 3770 3771 return (ddi_prop_op(DDI_DEV_T_ANY, dip, prop_op, mod_flags, 3772 name, valuep, lengthp)); 3773 } 3774 3775 static struct cb_ops lofi_cb_ops = { 3776 lofi_open, /* open */ 3777 lofi_close, /* close */ 3778 lofi_strategy, /* strategy */ 3779 nodev, /* print */ 3780 nodev, /* dump */ 3781 lofi_read, /* read */ 3782 lofi_write, /* write */ 3783 lofi_ioctl, /* ioctl */ 3784 nodev, /* devmap */ 3785 nodev, /* mmap */ 3786 nodev, /* segmap */ 3787 nochpoll, /* poll */ 3788 lofi_prop_op, /* prop_op */ 3789 0, /* streamtab */ 3790 D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */ 3791 CB_REV, 3792 lofi_aread, 3793 lofi_awrite 3794 }; 3795 3796 static struct dev_ops lofi_ops = { 3797 DEVO_REV, /* devo_rev, */ 3798 0, /* refcnt */ 3799 lofi_info, /* info */ 3800 nulldev, /* identify */ 3801 nulldev, /* probe */ 3802 lofi_attach, /* attach */ 3803 lofi_detach, /* detach */ 3804 nodev, /* reset */ 3805 &lofi_cb_ops, /* driver operations */ 3806 NULL, /* no bus operations */ 3807 NULL, /* power */ 3808 ddi_quiesce_not_needed, /* quiesce */ 3809 }; 3810 3811 static struct modldrv modldrv = { 3812 &mod_driverops, 3813 "loopback file driver", 3814 &lofi_ops, 3815 }; 3816 3817 static struct modlinkage modlinkage = { 3818 MODREV_1, 3819 &modldrv, 3820 NULL 3821 }; 3822 3823 int 3824 _init(void) 3825 { 3826 int error; 3827 3828 list_create(&lofi_list, sizeof (struct lofi_state), 3829 offsetof(struct lofi_state, ls_list)); 3830 3831 error = ddi_soft_state_init((void **)&lofi_statep, 3832 sizeof (struct lofi_state), 0); 3833 if (error) { 3834 list_destroy(&lofi_list); 3835 return (error); 3836 } 3837 3838 /* 3839 * The minor number is stored as id << LOFI_CMLB_SHIFT as 3840 * we need to reserve space for cmlb minor numbers. 3841 * This will leave out 4096 id values on 32bit kernel, which should 3842 * still suffice. 3843 */ 3844 lofi_id = id_space_create("lofi_id", 1, 3845 (1 << (L_BITSMINOR - LOFI_CMLB_SHIFT))); 3846 3847 if (lofi_id == NULL) { 3848 ddi_soft_state_fini((void **)&lofi_statep); 3849 list_destroy(&lofi_list); 3850 return (DDI_FAILURE); 3851 } 3852 3853 mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); 3854 3855 error = mod_install(&modlinkage); 3856 3857 if (error) { 3858 id_space_destroy(lofi_id); 3859 mutex_destroy(&lofi_lock); 3860 ddi_soft_state_fini((void **)&lofi_statep); 3861 list_destroy(&lofi_list); 3862 } 3863 3864 return (error); 3865 } 3866 3867 int 3868 _fini(void) 3869 { 3870 int error; 3871 3872 mutex_enter(&lofi_lock); 3873 3874 if (!list_is_empty(&lofi_list)) { 3875 mutex_exit(&lofi_lock); 3876 return (EBUSY); 3877 } 3878 3879 mutex_exit(&lofi_lock); 3880 3881 error = mod_remove(&modlinkage); 3882 if (error) 3883 return (error); 3884 3885 mutex_destroy(&lofi_lock); 3886 id_space_destroy(lofi_id); 3887 ddi_soft_state_fini((void **)&lofi_statep); 3888 list_destroy(&lofi_list); 3889 3890 return (error); 3891 } 3892 3893 int 3894 _info(struct modinfo *modinfop) 3895 { 3896 return (mod_info(&modlinkage, modinfop)); 3897 } 3898