1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. 23 * 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2016 Andrey Sokolov 26 * Copyright 2016 Toomas Soome <tsoome@me.com> 27 */ 28 29 /* 30 * lofi (loopback file) driver - allows you to attach a file to a device, 31 * which can then be accessed through that device. The simple model is that 32 * you tell lofi to open a file, and then use the block device you get as 33 * you would any block device. lofi translates access to the block device 34 * into I/O on the underlying file. This is mostly useful for 35 * mounting images of filesystems. 36 * 37 * lofi is controlled through /dev/lofictl - this is the only device exported 38 * during attach, and is instance number 0. lofiadm communicates with lofi 39 * through ioctls on this device. When a file is attached to lofi, block and 40 * character devices are exported in /dev/lofi and /dev/rlofi. These devices 41 * are identified by lofi instance number, and the instance number is also used 42 * as the name in /dev/lofi. 43 * 44 * Virtual disks, or, labeled lofi, implements virtual disk support to 45 * support partition table and related tools. Such mappings will cause 46 * block and character devices to be exported in /dev/dsk and /dev/rdsk 47 * directories. 48 * 49 * To support virtual disks, the instance number space is divided to two 50 * parts, upper part for instance number and lower part for minor number 51 * space to identify partitions and slices. The virtual disk support is 52 * implemented by stacking cmlb module. For virtual disks, the partition 53 * related ioctl calls are routed to cmlb module. Compression and encryption 54 * is not supported for virtual disks. 55 * 56 * Mapped devices are tracked with state structures handled with 57 * ddi_soft_state(9F) for simplicity. 58 * 59 * A file attached to lofi is opened when attached and not closed until 60 * explicitly detached from lofi. This seems more sensible than deferring 61 * the open until the /dev/lofi device is opened, for a number of reasons. 62 * One is that any failure is likely to be noticed by the person (or script) 63 * running lofiadm. Another is that it would be a security problem if the 64 * file was replaced by another one after being added but before being opened. 65 * 66 * The only hard part about lofi is the ioctls. In order to support things 67 * like 'newfs' on a lofi device, it needs to support certain disk ioctls. 68 * So it has to fake disk geometry and partition information. More may need 69 * to be faked if your favorite utility doesn't work and you think it should 70 * (fdformat doesn't work because it really wants to know the type of floppy 71 * controller to talk to, and that didn't seem easy to fake. Or possibly even 72 * necessary, since we have mkfs_pcfs now). 73 * 74 * Normally, a lofi device cannot be detached if it is open (i.e. busy). To 75 * support simulation of hotplug events, an optional force flag is provided. 76 * If a lofi device is open when a force detach is requested, then the 77 * underlying file is closed and any subsequent operations return EIO. When the 78 * device is closed for the last time, it will be cleaned up at that time. In 79 * addition, the DKIOCSTATE ioctl will return DKIO_DEV_GONE when the device is 80 * detached but not removed. 81 * 82 * If detach was requested and lofi device is not open, we will perform 83 * unmap and remove the lofi instance. 84 * 85 * If the lofi device is open and the li_cleanup is set on ioctl request, 86 * we set ls_cleanup flag to notify the cleanup is requested, and the 87 * last lofi_close will perform the unmapping and this lofi instance will be 88 * removed. 89 * 90 * If the lofi device is open and the li_force is set on ioctl request, 91 * we set ls_cleanup flag to notify the cleanup is requested, 92 * we also set ls_vp_closereq to notify IO tasks to return EIO on new 93 * IO requests and wait in process IO count to become 0, indicating there 94 * are no more IO requests. Since ls_cleanup is set, the last lofi_close 95 * will perform unmap and this lofi instance will be removed. 96 * See also lofi_unmap_file() for details. 97 * 98 * Once ls_cleanup is set for the instance, we do not allow lofi_open() 99 * calls to succeed and can have last lofi_close() to remove the instance. 100 * 101 * Known problems: 102 * 103 * UFS logging. Mounting a UFS filesystem image "logging" 104 * works for basic copy testing but wedges during a build of ON through 105 * that image. Some deadlock in lufs holding the log mutex and then 106 * getting stuck on a buf. So for now, don't do that. 107 * 108 * Direct I/O. Since the filesystem data is being cached in the buffer 109 * cache, _and_ again in the underlying filesystem, it's tempting to 110 * enable direct I/O on the underlying file. Don't, because that deadlocks. 111 * I think to fix the cache-twice problem we might need filesystem support. 112 * 113 * Interesting things to do: 114 * 115 * Allow multiple files for each device. A poor-man's metadisk, basically. 116 * 117 * Pass-through ioctls on block devices. You can (though it's not 118 * documented), give lofi a block device as a file name. Then we shouldn't 119 * need to fake a geometry, however, it may be relevant if you're replacing 120 * metadisk, or using lofi to get crypto. 121 * It makes sense to do lofiadm -c aes -a /dev/dsk/c0t0d0s4 /dev/lofi/1 122 * and then in /etc/vfstab have an entry for /dev/lofi/1 as /export/home. 123 * In fact this even makes sense if you have lofi "above" metadisk. 124 * 125 * Encryption: 126 * Each lofi device can have its own symmetric key and cipher. 127 * They are passed to us by lofiadm(1m) in the correct format for use 128 * with the misc/kcf crypto_* routines. 129 * 130 * Each block has its own IV, that is calculated in lofi_blk_mech(), based 131 * on the "master" key held in the lsp and the block number of the buffer. 132 */ 133 134 #include <sys/types.h> 135 #include <netinet/in.h> 136 #include <sys/sysmacros.h> 137 #include <sys/uio.h> 138 #include <sys/kmem.h> 139 #include <sys/cred.h> 140 #include <sys/mman.h> 141 #include <sys/errno.h> 142 #include <sys/aio_req.h> 143 #include <sys/stat.h> 144 #include <sys/file.h> 145 #include <sys/modctl.h> 146 #include <sys/conf.h> 147 #include <sys/debug.h> 148 #include <sys/vnode.h> 149 #include <sys/lofi.h> 150 #include <sys/lofi_impl.h> /* for cache structure */ 151 #include <sys/fcntl.h> 152 #include <sys/pathname.h> 153 #include <sys/filio.h> 154 #include <sys/fdio.h> 155 #include <sys/open.h> 156 #include <sys/disp.h> 157 #include <vm/seg_map.h> 158 #include <sys/ddi.h> 159 #include <sys/sunddi.h> 160 #include <sys/zmod.h> 161 #include <sys/id_space.h> 162 #include <sys/mkdev.h> 163 #include <sys/crypto/common.h> 164 #include <sys/crypto/api.h> 165 #include <sys/rctl.h> 166 #include <sys/vtoc.h> 167 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */ 168 #include <sys/scsi/impl/uscsi.h> 169 #include <sys/sysevent/dev.h> 170 #include <LzmaDec.h> 171 172 #define NBLOCKS_PROP_NAME "Nblocks" 173 #define SIZE_PROP_NAME "Size" 174 #define ZONE_PROP_NAME "zone" 175 176 #define SETUP_C_DATA(cd, buf, len) \ 177 (cd).cd_format = CRYPTO_DATA_RAW; \ 178 (cd).cd_offset = 0; \ 179 (cd).cd_miscdata = NULL; \ 180 (cd).cd_length = (len); \ 181 (cd).cd_raw.iov_base = (buf); \ 182 (cd).cd_raw.iov_len = (len); 183 184 #define UIO_CHECK(uio) \ 185 if (((uio)->uio_loffset % DEV_BSIZE) != 0 || \ 186 ((uio)->uio_resid % DEV_BSIZE) != 0) { \ 187 return (EINVAL); \ 188 } 189 190 #define LOFI_TIMEOUT 30 191 192 static void *lofi_statep; 193 static kmutex_t lofi_lock; /* state lock */ 194 static id_space_t *lofi_id; /* lofi ID values */ 195 static list_t lofi_list; 196 static zone_key_t lofi_zone_key; 197 198 /* 199 * Because lofi_taskq_nthreads limits the actual swamping of the device, the 200 * maxalloc parameter (lofi_taskq_maxalloc) should be tuned conservatively 201 * high. If we want to be assured that the underlying device is always busy, 202 * we must be sure that the number of bytes enqueued when the number of 203 * enqueued tasks exceeds maxalloc is sufficient to keep the device busy for 204 * the duration of the sleep time in taskq_ent_alloc(). That is, lofi should 205 * set maxalloc to be the maximum throughput (in bytes per second) of the 206 * underlying device divided by the minimum I/O size. We assume a realistic 207 * maximum throughput of one hundred megabytes per second; we set maxalloc on 208 * the lofi task queue to be 104857600 divided by DEV_BSIZE. 209 */ 210 static int lofi_taskq_maxalloc = 104857600 / DEV_BSIZE; 211 static int lofi_taskq_nthreads = 4; /* # of taskq threads per device */ 212 213 const char lofi_crypto_magic[6] = LOFI_CRYPTO_MAGIC; 214 215 /* 216 * To avoid decompressing data in a compressed segment multiple times 217 * when accessing small parts of a segment's data, we cache and reuse 218 * the uncompressed segment's data. 219 * 220 * A single cached segment is sufficient to avoid lots of duplicate 221 * segment decompress operations. A small cache size also reduces the 222 * memory footprint. 223 * 224 * lofi_max_comp_cache is the maximum number of decompressed data segments 225 * cached for each compressed lofi image. It can be set to 0 to disable 226 * caching. 227 */ 228 229 uint32_t lofi_max_comp_cache = 1; 230 231 static int gzip_decompress(void *src, size_t srclen, void *dst, 232 size_t *destlen, int level); 233 234 static int lzma_decompress(void *src, size_t srclen, void *dst, 235 size_t *dstlen, int level); 236 237 lofi_compress_info_t lofi_compress_table[LOFI_COMPRESS_FUNCTIONS] = { 238 {gzip_decompress, NULL, 6, "gzip"}, /* default */ 239 {gzip_decompress, NULL, 6, "gzip-6"}, 240 {gzip_decompress, NULL, 9, "gzip-9"}, 241 {lzma_decompress, NULL, 0, "lzma"} 242 }; 243 244 static void lofi_strategy_task(void *); 245 static int lofi_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, 246 size_t, void *); 247 static int lofi_tg_getinfo(dev_info_t *, int, void *, void *); 248 249 struct cmlb_tg_ops lofi_tg_ops = { 250 TG_DK_OPS_VERSION_1, 251 lofi_tg_rdwr, 252 lofi_tg_getinfo 253 }; 254 255 /*ARGSUSED*/ 256 static void 257 *SzAlloc(void *p, size_t size) 258 { 259 return (kmem_alloc(size, KM_SLEEP)); 260 } 261 262 /*ARGSUSED*/ 263 static void 264 SzFree(void *p, void *address, size_t size) 265 { 266 kmem_free(address, size); 267 } 268 269 static ISzAlloc g_Alloc = { SzAlloc, SzFree }; 270 271 /* 272 * Free data referenced by the linked list of cached uncompressed 273 * segments. 274 */ 275 static void 276 lofi_free_comp_cache(struct lofi_state *lsp) 277 { 278 struct lofi_comp_cache *lc; 279 280 while ((lc = list_remove_head(&lsp->ls_comp_cache)) != NULL) { 281 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 282 kmem_free(lc, sizeof (struct lofi_comp_cache)); 283 lsp->ls_comp_cache_count--; 284 } 285 ASSERT(lsp->ls_comp_cache_count == 0); 286 } 287 288 static int 289 is_opened(struct lofi_state *lsp) 290 { 291 int i; 292 boolean_t last = B_TRUE; 293 294 ASSERT(MUTEX_HELD(&lofi_lock)); 295 for (i = 0; i < LOFI_PART_MAX; i++) { 296 if (lsp->ls_open_lyr[i]) { 297 last = B_FALSE; 298 break; 299 } 300 } 301 302 for (i = 0; last && (i < OTYP_LYR); i++) { 303 if (lsp->ls_open_reg[i]) { 304 last = B_FALSE; 305 } 306 } 307 308 return (!last); 309 } 310 311 static void 312 lofi_set_cleanup(struct lofi_state *lsp) 313 { 314 ASSERT(MUTEX_HELD(&lofi_lock)); 315 316 lsp->ls_cleanup = B_TRUE; 317 318 /* wake up any threads waiting on dkiocstate */ 319 cv_broadcast(&lsp->ls_vp_cv); 320 } 321 322 static void 323 lofi_free_crypto(struct lofi_state *lsp) 324 { 325 ASSERT(MUTEX_HELD(&lofi_lock)); 326 327 if (lsp->ls_crypto_enabled) { 328 /* 329 * Clean up the crypto state so that it doesn't hang around 330 * in memory after we are done with it. 331 */ 332 if (lsp->ls_key.ck_data != NULL) { 333 bzero(lsp->ls_key.ck_data, 334 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 335 kmem_free(lsp->ls_key.ck_data, 336 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 337 lsp->ls_key.ck_data = NULL; 338 lsp->ls_key.ck_length = 0; 339 } 340 341 if (lsp->ls_mech.cm_param != NULL) { 342 kmem_free(lsp->ls_mech.cm_param, 343 lsp->ls_mech.cm_param_len); 344 lsp->ls_mech.cm_param = NULL; 345 lsp->ls_mech.cm_param_len = 0; 346 } 347 348 if (lsp->ls_iv_mech.cm_param != NULL) { 349 kmem_free(lsp->ls_iv_mech.cm_param, 350 lsp->ls_iv_mech.cm_param_len); 351 lsp->ls_iv_mech.cm_param = NULL; 352 lsp->ls_iv_mech.cm_param_len = 0; 353 } 354 355 mutex_destroy(&lsp->ls_crypto_lock); 356 } 357 } 358 359 /* ARGSUSED */ 360 static int 361 lofi_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start, 362 size_t length, void *tg_cookie) 363 { 364 struct lofi_state *lsp; 365 buf_t *bp; 366 int instance; 367 int rv = 0; 368 369 instance = ddi_get_instance(dip); 370 if (instance == 0) /* control node does not have disk */ 371 return (ENXIO); 372 373 lsp = ddi_get_soft_state(lofi_statep, instance); 374 375 if (lsp == NULL) 376 return (ENXIO); 377 378 if (cmd != TG_READ && cmd != TG_WRITE) 379 return (EINVAL); 380 381 /* 382 * Make sure the mapping is set up by checking lsp->ls_vp_ready. 383 */ 384 mutex_enter(&lsp->ls_vp_lock); 385 while (lsp->ls_vp_ready == B_FALSE) 386 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); 387 mutex_exit(&lsp->ls_vp_lock); 388 389 if (P2PHASE(length, (1U << lsp->ls_lbshift)) != 0) { 390 /* We can only transfer whole blocks at a time! */ 391 return (EINVAL); 392 } 393 394 bp = getrbuf(KM_SLEEP); 395 396 if (cmd == TG_READ) { 397 bp->b_flags = B_READ; 398 } else { 399 if (lsp->ls_readonly == B_TRUE) { 400 freerbuf(bp); 401 return (EROFS); 402 } 403 bp->b_flags = B_WRITE; 404 } 405 406 bp->b_un.b_addr = bufaddr; 407 bp->b_bcount = length; 408 bp->b_lblkno = start; 409 bp->b_private = NULL; 410 bp->b_edev = lsp->ls_dev; 411 412 if (lsp->ls_kstat) { 413 mutex_enter(lsp->ls_kstat->ks_lock); 414 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 415 mutex_exit(lsp->ls_kstat->ks_lock); 416 } 417 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP); 418 (void) biowait(bp); 419 420 rv = geterror(bp); 421 freerbuf(bp); 422 return (rv); 423 } 424 425 /* 426 * Get device geometry info for cmlb. 427 * 428 * We have mapped disk image as virtual block device and have to report 429 * physical/virtual geometry to cmlb. 430 * 431 * So we have two principal cases: 432 * 1. Uninitialised image without any existing labels, 433 * for this case we fabricate the data based on mapped image. 434 * 2. Image with existing label information. 435 * Since we have no information how the image was created (it may be 436 * dump from some physical device), we need to rely on label information 437 * from image, or we get "corrupted label" errors. 438 * NOTE: label can be MBR, MBR+SMI, GPT 439 */ 440 static int 441 lofi_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) 442 { 443 struct lofi_state *lsp; 444 int instance; 445 int ashift; 446 447 _NOTE(ARGUNUSED(tg_cookie)); 448 instance = ddi_get_instance(dip); 449 if (instance == 0) /* control device has no storage */ 450 return (ENXIO); 451 452 lsp = ddi_get_soft_state(lofi_statep, instance); 453 454 if (lsp == NULL) 455 return (ENXIO); 456 457 /* 458 * Make sure the mapping is set up by checking lsp->ls_vp_ready. 459 * 460 * When mapping is created, new lofi instance is created and 461 * lofi_attach() will call cmlb_attach() as part of the procedure 462 * to set the mapping up. This chain of events will happen in 463 * the same thread. 464 * Since cmlb_attach() will call lofi_tg_getinfo to get 465 * capacity, we return error on that call if cookie is set, 466 * otherwise lofi_attach will be stuck as the mapping is not yet 467 * finalized and lofi is not yet ready. 468 * Note, such error is not fatal for cmlb, as the label setup 469 * will be finalized when cmlb_validate() is called. 470 */ 471 mutex_enter(&lsp->ls_vp_lock); 472 if (tg_cookie != NULL && lsp->ls_vp_ready == B_FALSE) { 473 mutex_exit(&lsp->ls_vp_lock); 474 return (ENXIO); 475 } 476 while (lsp->ls_vp_ready == B_FALSE) 477 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); 478 mutex_exit(&lsp->ls_vp_lock); 479 480 ashift = lsp->ls_lbshift; 481 482 switch (cmd) { 483 case TG_GETPHYGEOM: { 484 cmlb_geom_t *geomp = arg; 485 486 geomp->g_capacity = 487 (lsp->ls_vp_size - lsp->ls_crypto_offset) >> ashift; 488 geomp->g_nsect = lsp->ls_dkg.dkg_nsect; 489 geomp->g_nhead = lsp->ls_dkg.dkg_nhead; 490 geomp->g_acyl = lsp->ls_dkg.dkg_acyl; 491 geomp->g_ncyl = lsp->ls_dkg.dkg_ncyl; 492 geomp->g_secsize = (1U << ashift); 493 geomp->g_intrlv = lsp->ls_dkg.dkg_intrlv; 494 geomp->g_rpm = lsp->ls_dkg.dkg_rpm; 495 return (0); 496 } 497 498 case TG_GETCAPACITY: 499 *(diskaddr_t *)arg = 500 (lsp->ls_vp_size - lsp->ls_crypto_offset) >> ashift; 501 return (0); 502 503 case TG_GETBLOCKSIZE: 504 *(uint32_t *)arg = (1U << ashift); 505 return (0); 506 507 case TG_GETATTR: { 508 tg_attribute_t *tgattr = arg; 509 510 tgattr->media_is_writable = !lsp->ls_readonly; 511 tgattr->media_is_solid_state = B_FALSE; 512 tgattr->media_is_rotational = B_FALSE; 513 return (0); 514 } 515 516 default: 517 return (EINVAL); 518 } 519 } 520 521 static void 522 lofi_destroy(struct lofi_state *lsp, cred_t *credp) 523 { 524 int id = LOFI_MINOR2ID(getminor(lsp->ls_dev)); 525 int i; 526 527 ASSERT(MUTEX_HELD(&lofi_lock)); 528 529 /* 530 * Before we can start to release the other resources, 531 * make sure we have all tasks completed and taskq removed. 532 */ 533 if (lsp->ls_taskq != NULL) { 534 taskq_destroy(lsp->ls_taskq); 535 lsp->ls_taskq = NULL; 536 } 537 538 list_remove(&lofi_list, lsp); 539 540 lofi_free_crypto(lsp); 541 542 /* 543 * Free pre-allocated compressed buffers 544 */ 545 if (lsp->ls_comp_bufs != NULL) { 546 for (i = 0; i < lofi_taskq_nthreads; i++) { 547 if (lsp->ls_comp_bufs[i].bufsize > 0) 548 kmem_free(lsp->ls_comp_bufs[i].buf, 549 lsp->ls_comp_bufs[i].bufsize); 550 } 551 kmem_free(lsp->ls_comp_bufs, 552 sizeof (struct compbuf) * lofi_taskq_nthreads); 553 } 554 555 if (lsp->ls_vp != NULL) { 556 (void) VOP_PUTPAGE(lsp->ls_vp, 0, 0, B_INVAL, credp, NULL); 557 (void) VOP_CLOSE(lsp->ls_vp, lsp->ls_openflag, 558 1, 0, credp, NULL); 559 VN_RELE(lsp->ls_vp); 560 } 561 if (lsp->ls_stacked_vp != lsp->ls_vp) 562 VN_RELE(lsp->ls_stacked_vp); 563 lsp->ls_vp = lsp->ls_stacked_vp = NULL; 564 565 if (lsp->ls_kstat != NULL) { 566 kstat_delete(lsp->ls_kstat); 567 lsp->ls_kstat = NULL; 568 } 569 570 /* 571 * Free cached decompressed segment data 572 */ 573 lofi_free_comp_cache(lsp); 574 list_destroy(&lsp->ls_comp_cache); 575 576 if (lsp->ls_uncomp_seg_sz > 0) { 577 kmem_free(lsp->ls_comp_index_data, lsp->ls_comp_index_data_sz); 578 lsp->ls_uncomp_seg_sz = 0; 579 } 580 581 rctl_decr_lofi(lsp->ls_zone.zref_zone, 1); 582 zone_rele_ref(&lsp->ls_zone, ZONE_REF_LOFI); 583 584 mutex_destroy(&lsp->ls_comp_cache_lock); 585 mutex_destroy(&lsp->ls_comp_bufs_lock); 586 mutex_destroy(&lsp->ls_kstat_lock); 587 mutex_destroy(&lsp->ls_vp_lock); 588 cv_destroy(&lsp->ls_vp_cv); 589 lsp->ls_vp_ready = B_FALSE; 590 lsp->ls_vp_closereq = B_FALSE; 591 592 ASSERT(ddi_get_soft_state(lofi_statep, id) == lsp); 593 (void) ndi_devi_offline(lsp->ls_dip, NDI_DEVI_REMOVE); 594 id_free(lofi_id, id); 595 } 596 597 static void 598 lofi_free_dev(struct lofi_state *lsp) 599 { 600 ASSERT(MUTEX_HELD(&lofi_lock)); 601 602 if (lsp->ls_cmlbhandle != NULL) { 603 cmlb_invalidate(lsp->ls_cmlbhandle, 0); 604 cmlb_detach(lsp->ls_cmlbhandle, 0); 605 cmlb_free_handle(&lsp->ls_cmlbhandle); 606 lsp->ls_cmlbhandle = NULL; 607 } 608 (void) ddi_prop_remove_all(lsp->ls_dip); 609 ddi_remove_minor_node(lsp->ls_dip, NULL); 610 } 611 612 /*ARGSUSED*/ 613 static void 614 lofi_zone_shutdown(zoneid_t zoneid, void *arg) 615 { 616 struct lofi_state *lsp; 617 struct lofi_state *next; 618 619 mutex_enter(&lofi_lock); 620 621 for (lsp = list_head(&lofi_list); lsp != NULL; lsp = next) { 622 623 /* lofi_destroy() frees lsp */ 624 next = list_next(&lofi_list, lsp); 625 626 if (lsp->ls_zone.zref_zone->zone_id != zoneid) 627 continue; 628 629 /* 630 * No in-zone processes are running, but something has this 631 * open. It's either a global zone process, or a lofi 632 * mount. In either case we set ls_cleanup so the last 633 * user destroys the device. 634 */ 635 if (is_opened(lsp)) { 636 lofi_set_cleanup(lsp); 637 } else { 638 lofi_free_dev(lsp); 639 lofi_destroy(lsp, kcred); 640 } 641 } 642 643 mutex_exit(&lofi_lock); 644 } 645 646 /*ARGSUSED*/ 647 static int 648 lofi_open(dev_t *devp, int flag, int otyp, struct cred *credp) 649 { 650 int id; 651 minor_t part; 652 uint64_t mask; 653 diskaddr_t nblks; 654 diskaddr_t lba; 655 boolean_t ndelay; 656 657 struct lofi_state *lsp; 658 659 if (otyp >= OTYPCNT) 660 return (EINVAL); 661 662 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE; 663 664 /* 665 * lofiadm -a /dev/lofi/1 gets us here. 666 */ 667 if (mutex_owner(&lofi_lock) == curthread) 668 return (EINVAL); 669 670 mutex_enter(&lofi_lock); 671 672 id = LOFI_MINOR2ID(getminor(*devp)); 673 part = LOFI_PART(getminor(*devp)); 674 mask = (1U << part); 675 676 /* master control device */ 677 if (id == 0) { 678 mutex_exit(&lofi_lock); 679 return (0); 680 } 681 682 /* otherwise, the mapping should already exist */ 683 lsp = ddi_get_soft_state(lofi_statep, id); 684 if (lsp == NULL) { 685 mutex_exit(&lofi_lock); 686 return (EINVAL); 687 } 688 689 if (lsp->ls_cleanup == B_TRUE) { 690 mutex_exit(&lofi_lock); 691 return (ENXIO); 692 } 693 694 if (lsp->ls_vp == NULL) { 695 mutex_exit(&lofi_lock); 696 return (ENXIO); 697 } 698 699 if (lsp->ls_readonly && (flag & FWRITE)) { 700 mutex_exit(&lofi_lock); 701 return (EROFS); 702 } 703 704 if ((lsp->ls_open_excl) & (mask)) { 705 mutex_exit(&lofi_lock); 706 return (EBUSY); 707 } 708 709 if (flag & FEXCL) { 710 if (lsp->ls_open_lyr[part]) { 711 mutex_exit(&lofi_lock); 712 return (EBUSY); 713 } 714 for (int i = 0; i < OTYP_LYR; i++) { 715 if (lsp->ls_open_reg[i] & mask) { 716 mutex_exit(&lofi_lock); 717 return (EBUSY); 718 } 719 } 720 } 721 722 if (lsp->ls_cmlbhandle != NULL) { 723 if (cmlb_validate(lsp->ls_cmlbhandle, 0, 0) != 0) { 724 /* 725 * non-blocking opens are allowed to succeed to 726 * support format and fdisk to create partitioning. 727 */ 728 if (!ndelay) { 729 mutex_exit(&lofi_lock); 730 return (ENXIO); 731 } 732 } else if (cmlb_partinfo(lsp->ls_cmlbhandle, part, &nblks, &lba, 733 NULL, NULL, 0) == 0) { 734 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) { 735 mutex_exit(&lofi_lock); 736 return (ENXIO); 737 } 738 } else if (!ndelay) { 739 mutex_exit(&lofi_lock); 740 return (ENXIO); 741 } 742 } 743 744 if (otyp == OTYP_LYR) { 745 lsp->ls_open_lyr[part]++; 746 } else { 747 lsp->ls_open_reg[otyp] |= mask; 748 } 749 if (flag & FEXCL) { 750 lsp->ls_open_excl |= mask; 751 } 752 753 mutex_exit(&lofi_lock); 754 return (0); 755 } 756 757 /*ARGSUSED*/ 758 static int 759 lofi_close(dev_t dev, int flag, int otyp, struct cred *credp) 760 { 761 minor_t part; 762 int id; 763 uint64_t mask; 764 struct lofi_state *lsp; 765 766 id = LOFI_MINOR2ID(getminor(dev)); 767 part = LOFI_PART(getminor(dev)); 768 mask = (1U << part); 769 770 mutex_enter(&lofi_lock); 771 lsp = ddi_get_soft_state(lofi_statep, id); 772 if (lsp == NULL) { 773 mutex_exit(&lofi_lock); 774 return (EINVAL); 775 } 776 777 if (id == 0) { 778 mutex_exit(&lofi_lock); 779 return (0); 780 } 781 782 if (lsp->ls_open_excl & mask) 783 lsp->ls_open_excl &= ~mask; 784 785 if (otyp == OTYP_LYR) { 786 lsp->ls_open_lyr[part]--; 787 } else { 788 lsp->ls_open_reg[otyp] &= ~mask; 789 } 790 791 /* 792 * If we forcibly closed the underlying device (li_force), or 793 * asked for cleanup (li_cleanup), finish up if we're the last 794 * out of the door. 795 */ 796 if (!is_opened(lsp) && 797 (lsp->ls_cleanup == B_TRUE || lsp->ls_vp == NULL)) { 798 lofi_free_dev(lsp); 799 lofi_destroy(lsp, credp); 800 } 801 802 mutex_exit(&lofi_lock); 803 return (0); 804 } 805 806 /* 807 * Sets the mechanism's initialization vector (IV) if one is needed. 808 * The IV is computed from the data block number. lsp->ls_mech is 809 * altered so that: 810 * lsp->ls_mech.cm_param_len is set to the IV len. 811 * lsp->ls_mech.cm_param is set to the IV. 812 */ 813 static int 814 lofi_blk_mech(struct lofi_state *lsp, longlong_t lblkno) 815 { 816 int ret; 817 crypto_data_t cdata; 818 char *iv; 819 size_t iv_len; 820 size_t min; 821 void *data; 822 size_t datasz; 823 824 ASSERT(MUTEX_HELD(&lsp->ls_crypto_lock)); 825 826 if (lsp == NULL) 827 return (CRYPTO_DEVICE_ERROR); 828 829 /* lsp->ls_mech.cm_param{_len} has already been set for static iv */ 830 if (lsp->ls_iv_type == IVM_NONE) { 831 return (CRYPTO_SUCCESS); 832 } 833 834 /* 835 * if kmem already alloced from previous call and it's the same size 836 * we need now, just recycle it; allocate new kmem only if we have to 837 */ 838 if (lsp->ls_mech.cm_param == NULL || 839 lsp->ls_mech.cm_param_len != lsp->ls_iv_len) { 840 iv_len = lsp->ls_iv_len; 841 iv = kmem_zalloc(iv_len, KM_SLEEP); 842 } else { 843 iv_len = lsp->ls_mech.cm_param_len; 844 iv = lsp->ls_mech.cm_param; 845 bzero(iv, iv_len); 846 } 847 848 switch (lsp->ls_iv_type) { 849 case IVM_ENC_BLKNO: 850 /* iv is not static, lblkno changes each time */ 851 data = &lblkno; 852 datasz = sizeof (lblkno); 853 break; 854 default: 855 data = 0; 856 datasz = 0; 857 break; 858 } 859 860 /* 861 * write blkno into the iv buffer padded on the left in case 862 * blkno ever grows bigger than its current longlong_t size 863 * or a variation other than blkno is used for the iv data 864 */ 865 min = MIN(datasz, iv_len); 866 bcopy(data, iv + (iv_len - min), min); 867 868 /* encrypt the data in-place to get the IV */ 869 SETUP_C_DATA(cdata, iv, iv_len); 870 871 ret = crypto_encrypt(&lsp->ls_iv_mech, &cdata, &lsp->ls_key, 872 NULL, NULL, NULL); 873 if (ret != CRYPTO_SUCCESS) { 874 cmn_err(CE_WARN, "failed to create iv for block %lld: (0x%x)", 875 lblkno, ret); 876 if (lsp->ls_mech.cm_param != iv) 877 kmem_free(iv, iv_len); 878 879 return (ret); 880 } 881 882 /* clean up the iv from the last computation */ 883 if (lsp->ls_mech.cm_param != NULL && lsp->ls_mech.cm_param != iv) 884 kmem_free(lsp->ls_mech.cm_param, lsp->ls_mech.cm_param_len); 885 886 lsp->ls_mech.cm_param_len = iv_len; 887 lsp->ls_mech.cm_param = iv; 888 889 return (CRYPTO_SUCCESS); 890 } 891 892 /* 893 * Performs encryption and decryption of a chunk of data of size "len", 894 * one DEV_BSIZE block at a time. "len" is assumed to be a multiple of 895 * DEV_BSIZE. 896 */ 897 static int 898 lofi_crypto(struct lofi_state *lsp, struct buf *bp, caddr_t plaintext, 899 caddr_t ciphertext, size_t len, boolean_t op_encrypt) 900 { 901 crypto_data_t cdata; 902 crypto_data_t wdata; 903 int ret; 904 longlong_t lblkno = bp->b_lblkno; 905 906 mutex_enter(&lsp->ls_crypto_lock); 907 908 /* 909 * though we could encrypt/decrypt entire "len" chunk of data, we need 910 * to break it into DEV_BSIZE pieces to capture blkno incrementing 911 */ 912 SETUP_C_DATA(cdata, plaintext, len); 913 cdata.cd_length = DEV_BSIZE; 914 if (ciphertext != NULL) { /* not in-place crypto */ 915 SETUP_C_DATA(wdata, ciphertext, len); 916 wdata.cd_length = DEV_BSIZE; 917 } 918 919 do { 920 ret = lofi_blk_mech(lsp, lblkno); 921 if (ret != CRYPTO_SUCCESS) 922 continue; 923 924 if (op_encrypt) { 925 ret = crypto_encrypt(&lsp->ls_mech, &cdata, 926 &lsp->ls_key, NULL, 927 ((ciphertext != NULL) ? &wdata : NULL), NULL); 928 } else { 929 ret = crypto_decrypt(&lsp->ls_mech, &cdata, 930 &lsp->ls_key, NULL, 931 ((ciphertext != NULL) ? &wdata : NULL), NULL); 932 } 933 934 cdata.cd_offset += DEV_BSIZE; 935 if (ciphertext != NULL) 936 wdata.cd_offset += DEV_BSIZE; 937 lblkno++; 938 } while (ret == CRYPTO_SUCCESS && cdata.cd_offset < len); 939 940 mutex_exit(&lsp->ls_crypto_lock); 941 942 if (ret != CRYPTO_SUCCESS) { 943 cmn_err(CE_WARN, "%s failed for block %lld: (0x%x)", 944 op_encrypt ? "crypto_encrypt()" : "crypto_decrypt()", 945 lblkno, ret); 946 } 947 948 return (ret); 949 } 950 951 #define RDWR_RAW 1 952 #define RDWR_BCOPY 2 953 954 static int 955 lofi_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 956 struct lofi_state *lsp, size_t len, int method, caddr_t bcopy_locn) 957 { 958 ssize_t resid; 959 int isread; 960 int error; 961 962 /* 963 * Handles reads/writes for both plain and encrypted lofi 964 * Note: offset is already shifted by lsp->ls_crypto_offset 965 * when it gets here. 966 */ 967 968 isread = bp->b_flags & B_READ; 969 if (isread) { 970 if (method == RDWR_BCOPY) { 971 /* DO NOT update bp->b_resid for bcopy */ 972 bcopy(bcopy_locn, bufaddr, len); 973 error = 0; 974 } else { /* RDWR_RAW */ 975 error = vn_rdwr(UIO_READ, lsp->ls_vp, bufaddr, len, 976 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, 977 &resid); 978 bp->b_resid = resid; 979 } 980 if (lsp->ls_crypto_enabled && error == 0) { 981 if (lofi_crypto(lsp, bp, bufaddr, NULL, len, 982 B_FALSE) != CRYPTO_SUCCESS) { 983 /* 984 * XXX: original code didn't set residual 985 * back to len because no error was expected 986 * from bcopy() if encryption is not enabled 987 */ 988 if (method != RDWR_BCOPY) 989 bp->b_resid = len; 990 error = EIO; 991 } 992 } 993 return (error); 994 } else { 995 void *iobuf = bufaddr; 996 997 if (lsp->ls_crypto_enabled) { 998 /* don't do in-place crypto to keep bufaddr intact */ 999 iobuf = kmem_alloc(len, KM_SLEEP); 1000 if (lofi_crypto(lsp, bp, bufaddr, iobuf, len, 1001 B_TRUE) != CRYPTO_SUCCESS) { 1002 kmem_free(iobuf, len); 1003 if (method != RDWR_BCOPY) 1004 bp->b_resid = len; 1005 return (EIO); 1006 } 1007 } 1008 if (method == RDWR_BCOPY) { 1009 /* DO NOT update bp->b_resid for bcopy */ 1010 bcopy(iobuf, bcopy_locn, len); 1011 error = 0; 1012 } else { /* RDWR_RAW */ 1013 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, iobuf, len, 1014 offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, 1015 &resid); 1016 bp->b_resid = resid; 1017 } 1018 if (lsp->ls_crypto_enabled) { 1019 kmem_free(iobuf, len); 1020 } 1021 return (error); 1022 } 1023 } 1024 1025 static int 1026 lofi_mapped_rdwr(caddr_t bufaddr, offset_t offset, struct buf *bp, 1027 struct lofi_state *lsp) 1028 { 1029 int error; 1030 offset_t alignedoffset, mapoffset; 1031 size_t xfersize; 1032 int isread; 1033 int smflags; 1034 caddr_t mapaddr; 1035 size_t len; 1036 enum seg_rw srw; 1037 int save_error; 1038 1039 /* 1040 * Note: offset is already shifted by lsp->ls_crypto_offset 1041 * when it gets here. 1042 */ 1043 if (lsp->ls_crypto_enabled) 1044 ASSERT(lsp->ls_vp_comp_size == lsp->ls_vp_size); 1045 1046 /* 1047 * segmap always gives us an 8K (MAXBSIZE) chunk, aligned on 1048 * an 8K boundary, but the buf transfer address may not be 1049 * aligned on more than a 512-byte boundary (we don't enforce 1050 * that even though we could). This matters since the initial 1051 * part of the transfer may not start at offset 0 within the 1052 * segmap'd chunk. So we have to compensate for that with 1053 * 'mapoffset'. Subsequent chunks always start off at the 1054 * beginning, and the last is capped by b_resid 1055 * 1056 * Visually, where "|" represents page map boundaries: 1057 * alignedoffset (mapaddr begins at this segmap boundary) 1058 * | offset (from beginning of file) 1059 * | | len 1060 * v v v 1061 * ===|====X========|====...======|========X====|==== 1062 * /-------------...---------------/ 1063 * ^ bp->b_bcount/bp->b_resid at start 1064 * /----/--------/----...------/--------/ 1065 * ^ ^ ^ ^ ^ 1066 * | | | | nth xfersize (<= MAXBSIZE) 1067 * | | 2nd thru n-1st xfersize (= MAXBSIZE) 1068 * | 1st xfersize (<= MAXBSIZE) 1069 * mapoffset (offset into 1st segmap, non-0 1st time, 0 thereafter) 1070 * 1071 * Notes: "alignedoffset" is "offset" rounded down to nearest 1072 * MAXBSIZE boundary. "len" is next page boundary of size 1073 * PAGESIZE after "alignedoffset". 1074 */ 1075 mapoffset = offset & MAXBOFFSET; 1076 alignedoffset = offset - mapoffset; 1077 bp->b_resid = bp->b_bcount; 1078 isread = bp->b_flags & B_READ; 1079 srw = isread ? S_READ : S_WRITE; 1080 do { 1081 xfersize = MIN(lsp->ls_vp_comp_size - offset, 1082 MIN(MAXBSIZE - mapoffset, bp->b_resid)); 1083 len = roundup(mapoffset + xfersize, PAGESIZE); 1084 mapaddr = segmap_getmapflt(segkmap, lsp->ls_vp, 1085 alignedoffset, MAXBSIZE, 1, srw); 1086 /* 1087 * Now fault in the pages. This lets us check 1088 * for errors before we reference mapaddr and 1089 * try to resolve the fault in bcopy (which would 1090 * panic instead). And this can easily happen, 1091 * particularly if you've lofi'd a file over NFS 1092 * and someone deletes the file on the server. 1093 */ 1094 error = segmap_fault(kas.a_hat, segkmap, mapaddr, 1095 len, F_SOFTLOCK, srw); 1096 if (error) { 1097 (void) segmap_release(segkmap, mapaddr, 0); 1098 if (FC_CODE(error) == FC_OBJERR) 1099 error = FC_ERRNO(error); 1100 else 1101 error = EIO; 1102 break; 1103 } 1104 /* error may be non-zero for encrypted lofi */ 1105 error = lofi_rdwr(bufaddr, 0, bp, lsp, xfersize, 1106 RDWR_BCOPY, mapaddr + mapoffset); 1107 if (error == 0) { 1108 bp->b_resid -= xfersize; 1109 bufaddr += xfersize; 1110 offset += xfersize; 1111 } 1112 smflags = 0; 1113 if (isread) { 1114 smflags |= SM_FREE; 1115 /* 1116 * If we're reading an entire page starting 1117 * at a page boundary, there's a good chance 1118 * we won't need it again. Put it on the 1119 * head of the freelist. 1120 */ 1121 if (mapoffset == 0 && xfersize == MAXBSIZE) 1122 smflags |= SM_DONTNEED; 1123 } else { 1124 /* 1125 * Write back good pages, it is okay to 1126 * always release asynchronous here as we'll 1127 * follow with VOP_FSYNC for B_SYNC buffers. 1128 */ 1129 if (error == 0) 1130 smflags |= SM_WRITE | SM_ASYNC; 1131 } 1132 (void) segmap_fault(kas.a_hat, segkmap, mapaddr, 1133 len, F_SOFTUNLOCK, srw); 1134 save_error = segmap_release(segkmap, mapaddr, smflags); 1135 if (error == 0) 1136 error = save_error; 1137 /* only the first map may start partial */ 1138 mapoffset = 0; 1139 alignedoffset += MAXBSIZE; 1140 } while ((error == 0) && (bp->b_resid > 0) && 1141 (offset < lsp->ls_vp_comp_size)); 1142 1143 return (error); 1144 } 1145 1146 /* 1147 * Check if segment seg_index is present in the decompressed segment 1148 * data cache. 1149 * 1150 * Returns a pointer to the decompressed segment data cache entry if 1151 * found, and NULL when decompressed data for this segment is not yet 1152 * cached. 1153 */ 1154 static struct lofi_comp_cache * 1155 lofi_find_comp_data(struct lofi_state *lsp, uint64_t seg_index) 1156 { 1157 struct lofi_comp_cache *lc; 1158 1159 ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); 1160 1161 for (lc = list_head(&lsp->ls_comp_cache); lc != NULL; 1162 lc = list_next(&lsp->ls_comp_cache, lc)) { 1163 if (lc->lc_index == seg_index) { 1164 /* 1165 * Decompressed segment data was found in the 1166 * cache. 1167 * 1168 * The cache uses an LRU replacement strategy; 1169 * move the entry to head of list. 1170 */ 1171 list_remove(&lsp->ls_comp_cache, lc); 1172 list_insert_head(&lsp->ls_comp_cache, lc); 1173 return (lc); 1174 } 1175 } 1176 return (NULL); 1177 } 1178 1179 /* 1180 * Add the data for a decompressed segment at segment index 1181 * seg_index to the cache of the decompressed segments. 1182 * 1183 * Returns a pointer to the cache element structure in case 1184 * the data was added to the cache; returns NULL when the data 1185 * wasn't cached. 1186 */ 1187 static struct lofi_comp_cache * 1188 lofi_add_comp_data(struct lofi_state *lsp, uint64_t seg_index, 1189 uchar_t *data) 1190 { 1191 struct lofi_comp_cache *lc; 1192 1193 ASSERT(MUTEX_HELD(&lsp->ls_comp_cache_lock)); 1194 1195 while (lsp->ls_comp_cache_count > lofi_max_comp_cache) { 1196 lc = list_remove_tail(&lsp->ls_comp_cache); 1197 ASSERT(lc != NULL); 1198 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 1199 kmem_free(lc, sizeof (struct lofi_comp_cache)); 1200 lsp->ls_comp_cache_count--; 1201 } 1202 1203 /* 1204 * Do not cache when disabled by tunable variable 1205 */ 1206 if (lofi_max_comp_cache == 0) 1207 return (NULL); 1208 1209 /* 1210 * When the cache has not yet reached the maximum allowed 1211 * number of segments, allocate a new cache element. 1212 * Otherwise the cache is full; reuse the last list element 1213 * (LRU) for caching the decompressed segment data. 1214 * 1215 * The cache element for the new decompressed segment data is 1216 * added to the head of the list. 1217 */ 1218 if (lsp->ls_comp_cache_count < lofi_max_comp_cache) { 1219 lc = kmem_alloc(sizeof (struct lofi_comp_cache), KM_SLEEP); 1220 lc->lc_data = NULL; 1221 list_insert_head(&lsp->ls_comp_cache, lc); 1222 lsp->ls_comp_cache_count++; 1223 } else { 1224 lc = list_remove_tail(&lsp->ls_comp_cache); 1225 if (lc == NULL) 1226 return (NULL); 1227 list_insert_head(&lsp->ls_comp_cache, lc); 1228 } 1229 1230 /* 1231 * Free old uncompressed segment data when reusing a cache 1232 * entry. 1233 */ 1234 if (lc->lc_data != NULL) 1235 kmem_free(lc->lc_data, lsp->ls_uncomp_seg_sz); 1236 1237 lc->lc_data = data; 1238 lc->lc_index = seg_index; 1239 return (lc); 1240 } 1241 1242 1243 /*ARGSUSED*/ 1244 static int 1245 gzip_decompress(void *src, size_t srclen, void *dst, 1246 size_t *dstlen, int level) 1247 { 1248 ASSERT(*dstlen >= srclen); 1249 1250 if (z_uncompress(dst, dstlen, src, srclen) != Z_OK) 1251 return (-1); 1252 return (0); 1253 } 1254 1255 #define LZMA_HEADER_SIZE (LZMA_PROPS_SIZE + 8) 1256 /*ARGSUSED*/ 1257 static int 1258 lzma_decompress(void *src, size_t srclen, void *dst, 1259 size_t *dstlen, int level) 1260 { 1261 size_t insizepure; 1262 void *actual_src; 1263 ELzmaStatus status; 1264 1265 insizepure = srclen - LZMA_HEADER_SIZE; 1266 actual_src = (void *)((Byte *)src + LZMA_HEADER_SIZE); 1267 1268 if (LzmaDecode((Byte *)dst, (size_t *)dstlen, 1269 (const Byte *)actual_src, &insizepure, 1270 (const Byte *)src, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, 1271 &g_Alloc) != SZ_OK) { 1272 return (-1); 1273 } 1274 return (0); 1275 } 1276 1277 /* 1278 * This is basically what strategy used to be before we found we 1279 * needed task queues. 1280 */ 1281 static void 1282 lofi_strategy_task(void *arg) 1283 { 1284 struct buf *bp = (struct buf *)arg; 1285 int error; 1286 int syncflag = 0; 1287 struct lofi_state *lsp; 1288 offset_t offset; 1289 caddr_t bufaddr; 1290 size_t len; 1291 size_t xfersize; 1292 boolean_t bufinited = B_FALSE; 1293 1294 lsp = ddi_get_soft_state(lofi_statep, 1295 LOFI_MINOR2ID(getminor(bp->b_edev))); 1296 1297 if (lsp == NULL) { 1298 error = ENXIO; 1299 goto errout; 1300 } 1301 if (lsp->ls_kstat) { 1302 mutex_enter(lsp->ls_kstat->ks_lock); 1303 kstat_waitq_to_runq(KSTAT_IO_PTR(lsp->ls_kstat)); 1304 mutex_exit(lsp->ls_kstat->ks_lock); 1305 } 1306 1307 mutex_enter(&lsp->ls_vp_lock); 1308 lsp->ls_vp_iocount++; 1309 mutex_exit(&lsp->ls_vp_lock); 1310 1311 bp_mapin(bp); 1312 bufaddr = bp->b_un.b_addr; 1313 offset = (bp->b_lblkno + (diskaddr_t)(uintptr_t)bp->b_private) 1314 << lsp->ls_lbshift; /* offset within file */ 1315 if (lsp->ls_crypto_enabled) { 1316 /* encrypted data really begins after crypto header */ 1317 offset += lsp->ls_crypto_offset; 1318 } 1319 len = bp->b_bcount; 1320 bufinited = B_TRUE; 1321 1322 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 1323 error = EIO; 1324 goto errout; 1325 } 1326 1327 /* 1328 * If we're writing and the buffer was not B_ASYNC 1329 * we'll follow up with a VOP_FSYNC() to force any 1330 * asynchronous I/O to stable storage. 1331 */ 1332 if (!(bp->b_flags & B_READ) && !(bp->b_flags & B_ASYNC)) 1333 syncflag = FSYNC; 1334 1335 /* 1336 * We used to always use vn_rdwr here, but we cannot do that because 1337 * we might decide to read or write from the the underlying 1338 * file during this call, which would be a deadlock because 1339 * we have the rw_lock. So instead we page, unless it's not 1340 * mapable or it's a character device or it's an encrypted lofi. 1341 */ 1342 if ((lsp->ls_vp->v_flag & VNOMAP) || (lsp->ls_vp->v_type == VCHR) || 1343 lsp->ls_crypto_enabled) { 1344 error = lofi_rdwr(bufaddr, offset, bp, lsp, len, RDWR_RAW, 1345 NULL); 1346 } else if (lsp->ls_uncomp_seg_sz == 0) { 1347 error = lofi_mapped_rdwr(bufaddr, offset, bp, lsp); 1348 } else { 1349 uchar_t *compressed_seg = NULL, *cmpbuf; 1350 uchar_t *uncompressed_seg = NULL; 1351 lofi_compress_info_t *li; 1352 size_t oblkcount; 1353 ulong_t seglen; 1354 uint64_t sblkno, eblkno, cmpbytes; 1355 uint64_t uncompressed_seg_index; 1356 struct lofi_comp_cache *lc; 1357 offset_t sblkoff, eblkoff; 1358 u_offset_t salign, ealign; 1359 u_offset_t sdiff; 1360 uint32_t comp_data_sz; 1361 uint64_t i; 1362 int j; 1363 1364 /* 1365 * From here on we're dealing primarily with compressed files 1366 */ 1367 ASSERT(!lsp->ls_crypto_enabled); 1368 1369 /* 1370 * Compressed files can only be read from and 1371 * not written to 1372 */ 1373 if (!(bp->b_flags & B_READ)) { 1374 bp->b_resid = bp->b_bcount; 1375 error = EROFS; 1376 goto done; 1377 } 1378 1379 ASSERT(lsp->ls_comp_algorithm_index >= 0); 1380 li = &lofi_compress_table[lsp->ls_comp_algorithm_index]; 1381 /* 1382 * Compute starting and ending compressed segment numbers 1383 * We use only bitwise operations avoiding division and 1384 * modulus because we enforce the compression segment size 1385 * to a power of 2 1386 */ 1387 sblkno = offset >> lsp->ls_comp_seg_shift; 1388 sblkoff = offset & (lsp->ls_uncomp_seg_sz - 1); 1389 eblkno = (offset + bp->b_bcount) >> lsp->ls_comp_seg_shift; 1390 eblkoff = (offset + bp->b_bcount) & (lsp->ls_uncomp_seg_sz - 1); 1391 1392 /* 1393 * Check the decompressed segment cache. 1394 * 1395 * The cache is used only when the requested data 1396 * is within a segment. Requests that cross 1397 * segment boundaries bypass the cache. 1398 */ 1399 if (sblkno == eblkno || 1400 (sblkno + 1 == eblkno && eblkoff == 0)) { 1401 /* 1402 * Request doesn't cross a segment boundary, 1403 * now check the cache. 1404 */ 1405 mutex_enter(&lsp->ls_comp_cache_lock); 1406 lc = lofi_find_comp_data(lsp, sblkno); 1407 if (lc != NULL) { 1408 /* 1409 * We've found the decompressed segment 1410 * data in the cache; reuse it. 1411 */ 1412 bcopy(lc->lc_data + sblkoff, bufaddr, 1413 bp->b_bcount); 1414 mutex_exit(&lsp->ls_comp_cache_lock); 1415 bp->b_resid = 0; 1416 error = 0; 1417 goto done; 1418 } 1419 mutex_exit(&lsp->ls_comp_cache_lock); 1420 } 1421 1422 /* 1423 * Align start offset to block boundary for segmap 1424 */ 1425 salign = lsp->ls_comp_seg_index[sblkno]; 1426 sdiff = salign & (DEV_BSIZE - 1); 1427 salign -= sdiff; 1428 if (eblkno >= (lsp->ls_comp_index_sz - 1)) { 1429 /* 1430 * We're dealing with the last segment of 1431 * the compressed file -- the size of this 1432 * segment *may not* be the same as the 1433 * segment size for the file 1434 */ 1435 eblkoff = (offset + bp->b_bcount) & 1436 (lsp->ls_uncomp_last_seg_sz - 1); 1437 ealign = lsp->ls_vp_comp_size; 1438 } else { 1439 ealign = lsp->ls_comp_seg_index[eblkno + 1]; 1440 } 1441 1442 /* 1443 * Preserve original request paramaters 1444 */ 1445 oblkcount = bp->b_bcount; 1446 1447 /* 1448 * Assign the calculated parameters 1449 */ 1450 comp_data_sz = ealign - salign; 1451 bp->b_bcount = comp_data_sz; 1452 1453 /* 1454 * Buffers to hold compressed segments are pre-allocated 1455 * on a per-thread basis. Find a pre-allocated buffer 1456 * that is not currently in use and mark it for use. 1457 */ 1458 mutex_enter(&lsp->ls_comp_bufs_lock); 1459 for (j = 0; j < lofi_taskq_nthreads; j++) { 1460 if (lsp->ls_comp_bufs[j].inuse == 0) { 1461 lsp->ls_comp_bufs[j].inuse = 1; 1462 break; 1463 } 1464 } 1465 1466 mutex_exit(&lsp->ls_comp_bufs_lock); 1467 ASSERT(j < lofi_taskq_nthreads); 1468 1469 /* 1470 * If the pre-allocated buffer size does not match 1471 * the size of the I/O request, re-allocate it with 1472 * the appropriate size 1473 */ 1474 if (lsp->ls_comp_bufs[j].bufsize < bp->b_bcount) { 1475 if (lsp->ls_comp_bufs[j].bufsize > 0) 1476 kmem_free(lsp->ls_comp_bufs[j].buf, 1477 lsp->ls_comp_bufs[j].bufsize); 1478 lsp->ls_comp_bufs[j].buf = kmem_alloc(bp->b_bcount, 1479 KM_SLEEP); 1480 lsp->ls_comp_bufs[j].bufsize = bp->b_bcount; 1481 } 1482 compressed_seg = lsp->ls_comp_bufs[j].buf; 1483 1484 /* 1485 * Map in the calculated number of blocks 1486 */ 1487 error = lofi_mapped_rdwr((caddr_t)compressed_seg, salign, 1488 bp, lsp); 1489 1490 bp->b_bcount = oblkcount; 1491 bp->b_resid = oblkcount; 1492 if (error != 0) 1493 goto done; 1494 1495 /* 1496 * decompress compressed blocks start 1497 */ 1498 cmpbuf = compressed_seg + sdiff; 1499 for (i = sblkno; i <= eblkno; i++) { 1500 ASSERT(i < lsp->ls_comp_index_sz - 1); 1501 uchar_t *useg; 1502 1503 /* 1504 * The last segment is special in that it is 1505 * most likely not going to be the same 1506 * (uncompressed) size as the other segments. 1507 */ 1508 if (i == (lsp->ls_comp_index_sz - 2)) { 1509 seglen = lsp->ls_uncomp_last_seg_sz; 1510 } else { 1511 seglen = lsp->ls_uncomp_seg_sz; 1512 } 1513 1514 /* 1515 * Each of the segment index entries contains 1516 * the starting block number for that segment. 1517 * The number of compressed bytes in a segment 1518 * is thus the difference between the starting 1519 * block number of this segment and the starting 1520 * block number of the next segment. 1521 */ 1522 cmpbytes = lsp->ls_comp_seg_index[i + 1] - 1523 lsp->ls_comp_seg_index[i]; 1524 1525 /* 1526 * The first byte in a compressed segment is a flag 1527 * that indicates whether this segment is compressed 1528 * at all. 1529 * 1530 * The variable 'useg' is used (instead of 1531 * uncompressed_seg) in this loop to keep a 1532 * reference to the uncompressed segment. 1533 * 1534 * N.B. If 'useg' is replaced with uncompressed_seg, 1535 * it leads to memory leaks and heap corruption in 1536 * corner cases where compressed segments lie 1537 * adjacent to uncompressed segments. 1538 */ 1539 if (*cmpbuf == UNCOMPRESSED) { 1540 useg = cmpbuf + SEGHDR; 1541 } else { 1542 if (uncompressed_seg == NULL) 1543 uncompressed_seg = 1544 kmem_alloc(lsp->ls_uncomp_seg_sz, 1545 KM_SLEEP); 1546 useg = uncompressed_seg; 1547 uncompressed_seg_index = i; 1548 1549 if (li->l_decompress((cmpbuf + SEGHDR), 1550 (cmpbytes - SEGHDR), uncompressed_seg, 1551 &seglen, li->l_level) != 0) { 1552 error = EIO; 1553 goto done; 1554 } 1555 } 1556 1557 /* 1558 * Determine how much uncompressed data we 1559 * have to copy and copy it 1560 */ 1561 xfersize = lsp->ls_uncomp_seg_sz - sblkoff; 1562 if (i == eblkno) 1563 xfersize -= (lsp->ls_uncomp_seg_sz - eblkoff); 1564 1565 bcopy((useg + sblkoff), bufaddr, xfersize); 1566 1567 cmpbuf += cmpbytes; 1568 bufaddr += xfersize; 1569 bp->b_resid -= xfersize; 1570 sblkoff = 0; 1571 1572 if (bp->b_resid == 0) 1573 break; 1574 } /* decompress compressed blocks ends */ 1575 1576 /* 1577 * Skip to done if there is no uncompressed data to cache 1578 */ 1579 if (uncompressed_seg == NULL) 1580 goto done; 1581 1582 /* 1583 * Add the data for the last decompressed segment to 1584 * the cache. 1585 * 1586 * In case the uncompressed segment data was added to (and 1587 * is referenced by) the cache, make sure we don't free it 1588 * here. 1589 */ 1590 mutex_enter(&lsp->ls_comp_cache_lock); 1591 if ((lc = lofi_add_comp_data(lsp, uncompressed_seg_index, 1592 uncompressed_seg)) != NULL) { 1593 uncompressed_seg = NULL; 1594 } 1595 mutex_exit(&lsp->ls_comp_cache_lock); 1596 1597 done: 1598 if (compressed_seg != NULL) { 1599 mutex_enter(&lsp->ls_comp_bufs_lock); 1600 lsp->ls_comp_bufs[j].inuse = 0; 1601 mutex_exit(&lsp->ls_comp_bufs_lock); 1602 } 1603 if (uncompressed_seg != NULL) 1604 kmem_free(uncompressed_seg, lsp->ls_uncomp_seg_sz); 1605 } /* end of handling compressed files */ 1606 1607 if ((error == 0) && (syncflag != 0)) 1608 error = VOP_FSYNC(lsp->ls_vp, syncflag, kcred, NULL); 1609 1610 errout: 1611 if (bufinited && lsp->ls_kstat) { 1612 size_t n_done = bp->b_bcount - bp->b_resid; 1613 kstat_io_t *kioptr; 1614 1615 mutex_enter(lsp->ls_kstat->ks_lock); 1616 kioptr = KSTAT_IO_PTR(lsp->ls_kstat); 1617 if (bp->b_flags & B_READ) { 1618 kioptr->nread += n_done; 1619 kioptr->reads++; 1620 } else { 1621 kioptr->nwritten += n_done; 1622 kioptr->writes++; 1623 } 1624 kstat_runq_exit(kioptr); 1625 mutex_exit(lsp->ls_kstat->ks_lock); 1626 } 1627 1628 mutex_enter(&lsp->ls_vp_lock); 1629 if (--lsp->ls_vp_iocount == 0) 1630 cv_broadcast(&lsp->ls_vp_cv); 1631 mutex_exit(&lsp->ls_vp_lock); 1632 1633 bioerror(bp, error); 1634 biodone(bp); 1635 } 1636 1637 static int 1638 lofi_strategy(struct buf *bp) 1639 { 1640 struct lofi_state *lsp; 1641 offset_t offset; 1642 minor_t part; 1643 diskaddr_t p_lba; 1644 diskaddr_t p_nblks; 1645 int shift; 1646 1647 /* 1648 * We cannot just do I/O here, because the current thread 1649 * _might_ end up back in here because the underlying filesystem 1650 * wants a buffer, which eventually gets into bio_recycle and 1651 * might call into lofi to write out a delayed-write buffer. 1652 * This is bad if the filesystem above lofi is the same as below. 1653 * 1654 * We could come up with a complex strategy using threads to 1655 * do the I/O asynchronously, or we could use task queues. task 1656 * queues were incredibly easy so they win. 1657 */ 1658 1659 lsp = ddi_get_soft_state(lofi_statep, 1660 LOFI_MINOR2ID(getminor(bp->b_edev))); 1661 part = LOFI_PART(getminor(bp->b_edev)); 1662 1663 if (lsp == NULL) { 1664 bioerror(bp, ENXIO); 1665 biodone(bp); 1666 return (0); 1667 } 1668 1669 /* Check if we are closing. */ 1670 mutex_enter(&lsp->ls_vp_lock); 1671 if (lsp->ls_vp == NULL || lsp->ls_vp_closereq) { 1672 mutex_exit(&lsp->ls_vp_lock); 1673 bioerror(bp, EIO); 1674 biodone(bp); 1675 return (0); 1676 } 1677 mutex_exit(&lsp->ls_vp_lock); 1678 1679 shift = lsp->ls_lbshift; 1680 p_lba = 0; 1681 p_nblks = lsp->ls_vp_size >> shift; 1682 1683 if (lsp->ls_cmlbhandle != NULL) { 1684 if (cmlb_partinfo(lsp->ls_cmlbhandle, part, &p_nblks, &p_lba, 1685 NULL, NULL, 0)) { 1686 bioerror(bp, ENXIO); 1687 biodone(bp); 1688 return (0); 1689 } 1690 } 1691 1692 /* start block past partition end? */ 1693 if (bp->b_lblkno > p_nblks) { 1694 bioerror(bp, ENXIO); 1695 biodone(bp); 1696 return (0); 1697 } 1698 1699 offset = (bp->b_lblkno+p_lba) << shift; /* offset within file */ 1700 1701 mutex_enter(&lsp->ls_vp_lock); 1702 if (lsp->ls_crypto_enabled) { 1703 /* encrypted data really begins after crypto header */ 1704 offset += lsp->ls_crypto_offset; 1705 } 1706 1707 /* make sure we will not pass the file or partition size */ 1708 if (offset == lsp->ls_vp_size || 1709 offset == (((p_lba + p_nblks) << shift) + lsp->ls_crypto_offset)) { 1710 /* EOF */ 1711 if ((bp->b_flags & B_READ) != 0) { 1712 bp->b_resid = bp->b_bcount; 1713 bioerror(bp, 0); 1714 } else { 1715 /* writes should fail */ 1716 bioerror(bp, ENXIO); 1717 } 1718 biodone(bp); 1719 mutex_exit(&lsp->ls_vp_lock); 1720 return (0); 1721 } 1722 if ((offset > lsp->ls_vp_size) || 1723 (offset > (((p_lba + p_nblks) << shift) + lsp->ls_crypto_offset)) || 1724 ((offset + bp->b_bcount) > ((p_lba + p_nblks) << shift))) { 1725 bioerror(bp, ENXIO); 1726 biodone(bp); 1727 mutex_exit(&lsp->ls_vp_lock); 1728 return (0); 1729 } 1730 1731 mutex_exit(&lsp->ls_vp_lock); 1732 1733 if (lsp->ls_kstat) { 1734 mutex_enter(lsp->ls_kstat->ks_lock); 1735 kstat_waitq_enter(KSTAT_IO_PTR(lsp->ls_kstat)); 1736 mutex_exit(lsp->ls_kstat->ks_lock); 1737 } 1738 bp->b_private = (void *)(uintptr_t)p_lba; /* partition start */ 1739 (void) taskq_dispatch(lsp->ls_taskq, lofi_strategy_task, bp, KM_SLEEP); 1740 return (0); 1741 } 1742 1743 /*ARGSUSED2*/ 1744 static int 1745 lofi_read(dev_t dev, struct uio *uio, struct cred *credp) 1746 { 1747 if (getminor(dev) == 0) 1748 return (EINVAL); 1749 UIO_CHECK(uio); 1750 return (physio(lofi_strategy, NULL, dev, B_READ, minphys, uio)); 1751 } 1752 1753 /*ARGSUSED2*/ 1754 static int 1755 lofi_write(dev_t dev, struct uio *uio, struct cred *credp) 1756 { 1757 if (getminor(dev) == 0) 1758 return (EINVAL); 1759 UIO_CHECK(uio); 1760 return (physio(lofi_strategy, NULL, dev, B_WRITE, minphys, uio)); 1761 } 1762 1763 /*ARGSUSED2*/ 1764 static int 1765 lofi_aread(dev_t dev, struct aio_req *aio, struct cred *credp) 1766 { 1767 if (getminor(dev) == 0) 1768 return (EINVAL); 1769 UIO_CHECK(aio->aio_uio); 1770 return (aphysio(lofi_strategy, anocancel, dev, B_READ, minphys, aio)); 1771 } 1772 1773 /*ARGSUSED2*/ 1774 static int 1775 lofi_awrite(dev_t dev, struct aio_req *aio, struct cred *credp) 1776 { 1777 if (getminor(dev) == 0) 1778 return (EINVAL); 1779 UIO_CHECK(aio->aio_uio); 1780 return (aphysio(lofi_strategy, anocancel, dev, B_WRITE, minphys, aio)); 1781 } 1782 1783 /*ARGSUSED*/ 1784 static int 1785 lofi_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1786 { 1787 struct lofi_state *lsp; 1788 dev_t dev = (dev_t)arg; 1789 int instance; 1790 1791 instance = LOFI_MINOR2ID(getminor(dev)); 1792 switch (infocmd) { 1793 case DDI_INFO_DEVT2DEVINFO: 1794 lsp = ddi_get_soft_state(lofi_statep, instance); 1795 if (lsp == NULL) 1796 return (DDI_FAILURE); 1797 *result = lsp->ls_dip; 1798 return (DDI_SUCCESS); 1799 case DDI_INFO_DEVT2INSTANCE: 1800 *result = (void *) (intptr_t)instance; 1801 return (DDI_SUCCESS); 1802 } 1803 return (DDI_FAILURE); 1804 } 1805 1806 static int 1807 lofi_create_minor_nodes(struct lofi_state *lsp, boolean_t labeled) 1808 { 1809 int error = 0; 1810 int instance = ddi_get_instance(lsp->ls_dip); 1811 1812 if (labeled == B_TRUE) { 1813 cmlb_alloc_handle(&lsp->ls_cmlbhandle); 1814 error = cmlb_attach(lsp->ls_dip, &lofi_tg_ops, DTYPE_DIRECT, 1815 B_FALSE, B_FALSE, DDI_NT_BLOCK_CHAN, 1816 CMLB_CREATE_P0_MINOR_NODE, lsp->ls_cmlbhandle, (void *)1); 1817 1818 if (error != DDI_SUCCESS) { 1819 cmlb_free_handle(&lsp->ls_cmlbhandle); 1820 lsp->ls_cmlbhandle = NULL; 1821 error = ENXIO; 1822 } 1823 } else { 1824 /* create minor nodes */ 1825 error = ddi_create_minor_node(lsp->ls_dip, LOFI_BLOCK_NODE, 1826 S_IFBLK, LOFI_ID2MINOR(instance), DDI_PSEUDO, 0); 1827 if (error == DDI_SUCCESS) { 1828 error = ddi_create_minor_node(lsp->ls_dip, 1829 LOFI_CHAR_NODE, S_IFCHR, LOFI_ID2MINOR(instance), 1830 DDI_PSEUDO, 0); 1831 if (error != DDI_SUCCESS) { 1832 ddi_remove_minor_node(lsp->ls_dip, 1833 LOFI_BLOCK_NODE); 1834 error = ENXIO; 1835 } 1836 } else 1837 error = ENXIO; 1838 } 1839 return (error); 1840 } 1841 1842 static int 1843 lofi_zone_bind(struct lofi_state *lsp) 1844 { 1845 int error = 0; 1846 1847 mutex_enter(&curproc->p_lock); 1848 if ((error = rctl_incr_lofi(curproc, curproc->p_zone, 1)) != 0) { 1849 mutex_exit(&curproc->p_lock); 1850 return (error); 1851 } 1852 mutex_exit(&curproc->p_lock); 1853 1854 if (ddi_prop_update_string(DDI_DEV_T_NONE, lsp->ls_dip, ZONE_PROP_NAME, 1855 (char *)curproc->p_zone->zone_name) != DDI_PROP_SUCCESS) { 1856 rctl_decr_lofi(curproc->p_zone, 1); 1857 error = EINVAL; 1858 } else { 1859 zone_init_ref(&lsp->ls_zone); 1860 zone_hold_ref(curzone, &lsp->ls_zone, ZONE_REF_LOFI); 1861 } 1862 return (error); 1863 } 1864 1865 static void 1866 lofi_zone_unbind(struct lofi_state *lsp) 1867 { 1868 (void) ddi_prop_remove(DDI_DEV_T_NONE, lsp->ls_dip, ZONE_PROP_NAME); 1869 rctl_decr_lofi(curproc->p_zone, 1); 1870 zone_rele_ref(&lsp->ls_zone, ZONE_REF_LOFI); 1871 } 1872 1873 static int 1874 lofi_online_dev(dev_info_t *dip) 1875 { 1876 boolean_t labeled; 1877 int error; 1878 int instance = ddi_get_instance(dip); 1879 struct lofi_state *lsp; 1880 1881 labeled = B_FALSE; 1882 if (ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "labeled")) 1883 labeled = B_TRUE; 1884 1885 /* lsp alloc+init, soft state is freed in lofi_detach */ 1886 error = ddi_soft_state_zalloc(lofi_statep, instance); 1887 if (error == DDI_FAILURE) { 1888 return (ENOMEM); 1889 } 1890 1891 lsp = ddi_get_soft_state(lofi_statep, instance); 1892 lsp->ls_dip = dip; 1893 1894 if ((error = lofi_zone_bind(lsp)) != 0) 1895 goto err; 1896 1897 cv_init(&lsp->ls_vp_cv, NULL, CV_DRIVER, NULL); 1898 mutex_init(&lsp->ls_comp_cache_lock, NULL, MUTEX_DRIVER, NULL); 1899 mutex_init(&lsp->ls_comp_bufs_lock, NULL, MUTEX_DRIVER, NULL); 1900 mutex_init(&lsp->ls_kstat_lock, NULL, MUTEX_DRIVER, NULL); 1901 mutex_init(&lsp->ls_vp_lock, NULL, MUTEX_DRIVER, NULL); 1902 1903 if ((error = lofi_create_minor_nodes(lsp, labeled)) != 0) { 1904 lofi_zone_unbind(lsp); 1905 goto lerr; 1906 } 1907 1908 /* driver handles kernel-issued IOCTLs */ 1909 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 1910 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 1911 error = DDI_FAILURE; 1912 goto merr; 1913 } 1914 1915 lsp->ls_kstat = kstat_create_zone(LOFI_DRIVER_NAME, instance, 1916 NULL, "disk", KSTAT_TYPE_IO, 1, 0, getzoneid()); 1917 if (lsp->ls_kstat == NULL) { 1918 (void) ddi_prop_remove(DDI_DEV_T_NONE, lsp->ls_dip, 1919 DDI_KERNEL_IOCTL); 1920 error = ENOMEM; 1921 goto merr; 1922 } 1923 1924 lsp->ls_kstat->ks_lock = &lsp->ls_kstat_lock; 1925 kstat_zone_add(lsp->ls_kstat, GLOBAL_ZONEID); 1926 kstat_install(lsp->ls_kstat); 1927 return (DDI_SUCCESS); 1928 merr: 1929 if (lsp->ls_cmlbhandle != NULL) { 1930 cmlb_detach(lsp->ls_cmlbhandle, 0); 1931 cmlb_free_handle(&lsp->ls_cmlbhandle); 1932 } 1933 ddi_remove_minor_node(dip, NULL); 1934 lofi_zone_unbind(lsp); 1935 lerr: 1936 mutex_destroy(&lsp->ls_comp_cache_lock); 1937 mutex_destroy(&lsp->ls_comp_bufs_lock); 1938 mutex_destroy(&lsp->ls_kstat_lock); 1939 mutex_destroy(&lsp->ls_vp_lock); 1940 cv_destroy(&lsp->ls_vp_cv); 1941 err: 1942 ddi_soft_state_free(lofi_statep, instance); 1943 return (error); 1944 } 1945 1946 static int 1947 lofi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1948 { 1949 int rv; 1950 int instance = ddi_get_instance(dip); 1951 struct lofi_state *lsp; 1952 1953 if (cmd != DDI_ATTACH) 1954 return (DDI_FAILURE); 1955 1956 /* 1957 * Instance 0 is control instance, attaching control instance 1958 * will set the lofi up and ready. 1959 */ 1960 if (instance == 0) { 1961 rv = ddi_soft_state_zalloc(lofi_statep, 0); 1962 if (rv == DDI_FAILURE) { 1963 return (DDI_FAILURE); 1964 } 1965 lsp = ddi_get_soft_state(lofi_statep, instance); 1966 rv = ddi_create_minor_node(dip, LOFI_CTL_NODE, S_IFCHR, 0, 1967 DDI_PSEUDO, 0); 1968 if (rv == DDI_FAILURE) { 1969 ddi_soft_state_free(lofi_statep, 0); 1970 return (DDI_FAILURE); 1971 } 1972 /* driver handles kernel-issued IOCTLs */ 1973 if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, 1974 DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { 1975 ddi_remove_minor_node(dip, NULL); 1976 ddi_soft_state_free(lofi_statep, 0); 1977 return (DDI_FAILURE); 1978 } 1979 1980 zone_key_create(&lofi_zone_key, NULL, lofi_zone_shutdown, NULL); 1981 1982 lsp->ls_dip = dip; 1983 } else { 1984 if (lofi_online_dev(dip) == DDI_FAILURE) 1985 return (DDI_FAILURE); 1986 } 1987 1988 ddi_report_dev(dip); 1989 return (DDI_SUCCESS); 1990 } 1991 1992 static int 1993 lofi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1994 { 1995 struct lofi_state *lsp; 1996 int instance = ddi_get_instance(dip); 1997 1998 if (cmd != DDI_DETACH) 1999 return (DDI_FAILURE); 2000 2001 /* 2002 * If the instance is not 0, release state. 2003 * The instance 0 is control device, we can not detach it 2004 * before other instances are detached. 2005 */ 2006 if (instance != 0) { 2007 lsp = ddi_get_soft_state(lofi_statep, instance); 2008 if (lsp != NULL && lsp->ls_vp_ready == B_FALSE) { 2009 ddi_soft_state_free(lofi_statep, instance); 2010 return (DDI_SUCCESS); 2011 } else 2012 return (DDI_FAILURE); 2013 } 2014 mutex_enter(&lofi_lock); 2015 2016 if (!list_is_empty(&lofi_list)) { 2017 mutex_exit(&lofi_lock); 2018 return (DDI_FAILURE); 2019 } 2020 2021 ddi_remove_minor_node(dip, NULL); 2022 ddi_prop_remove_all(dip); 2023 2024 mutex_exit(&lofi_lock); 2025 2026 if (zone_key_delete(lofi_zone_key) != 0) 2027 cmn_err(CE_WARN, "failed to delete zone key"); 2028 2029 ddi_soft_state_free(lofi_statep, 0); 2030 2031 return (DDI_SUCCESS); 2032 } 2033 2034 /* 2035 * With the addition of encryption, we must be careful that encryption key is 2036 * wiped before kernel's data structures are freed so it cannot accidentally 2037 * slip out to userland through uninitialized data elsewhere. 2038 */ 2039 static void 2040 free_lofi_ioctl(struct lofi_ioctl *klip) 2041 { 2042 /* Make sure this encryption key doesn't stick around */ 2043 bzero(klip->li_key, sizeof (klip->li_key)); 2044 kmem_free(klip, sizeof (struct lofi_ioctl)); 2045 } 2046 2047 /* 2048 * These two functions simplify the rest of the ioctls that need to copyin/out 2049 * the lofi_ioctl structure. 2050 */ 2051 int 2052 copy_in_lofi_ioctl(const struct lofi_ioctl *ulip, struct lofi_ioctl **klipp, 2053 int flag) 2054 { 2055 struct lofi_ioctl *klip; 2056 int error; 2057 2058 klip = *klipp = kmem_alloc(sizeof (struct lofi_ioctl), KM_SLEEP); 2059 error = ddi_copyin(ulip, klip, sizeof (struct lofi_ioctl), flag); 2060 if (error) 2061 goto err; 2062 2063 /* ensure NULL termination */ 2064 klip->li_filename[MAXPATHLEN-1] = '\0'; 2065 klip->li_devpath[MAXPATHLEN-1] = '\0'; 2066 klip->li_algorithm[MAXALGLEN-1] = '\0'; 2067 klip->li_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; 2068 klip->li_iv_cipher[CRYPTO_MAX_MECH_NAME-1] = '\0'; 2069 2070 if (klip->li_id > L_MAXMIN32) { 2071 error = EINVAL; 2072 goto err; 2073 } 2074 2075 return (0); 2076 2077 err: 2078 free_lofi_ioctl(klip); 2079 return (error); 2080 } 2081 2082 int 2083 copy_out_lofi_ioctl(const struct lofi_ioctl *klip, struct lofi_ioctl *ulip, 2084 int flag) 2085 { 2086 int error; 2087 2088 /* 2089 * NOTE: Do NOT copy the crypto_key_t "back" to userland. 2090 * This ensures that an attacker can't trivially find the 2091 * key for a mapping just by issuing the ioctl. 2092 * 2093 * It can still be found by poking around in kmem with mdb(1), 2094 * but there is no point in making it easy when the info isn't 2095 * of any use in this direction anyway. 2096 * 2097 * Either way we don't actually have the raw key stored in 2098 * a form that we can get it anyway, since we just used it 2099 * to create a ctx template and didn't keep "the original". 2100 */ 2101 error = ddi_copyout(klip, ulip, sizeof (struct lofi_ioctl), flag); 2102 if (error) 2103 return (EFAULT); 2104 return (0); 2105 } 2106 2107 static int 2108 lofi_access(struct lofi_state *lsp) 2109 { 2110 ASSERT(MUTEX_HELD(&lofi_lock)); 2111 if (INGLOBALZONE(curproc) || lsp->ls_zone.zref_zone == curzone) 2112 return (0); 2113 return (EPERM); 2114 } 2115 2116 /* 2117 * Find the lofi state for the given filename. We compare by vnode to 2118 * allow the global zone visibility into NGZ lofi nodes. 2119 */ 2120 static int 2121 file_to_lofi_nocheck(char *filename, boolean_t readonly, 2122 struct lofi_state **lspp) 2123 { 2124 struct lofi_state *lsp; 2125 vnode_t *vp = NULL; 2126 int err = 0; 2127 int rdfiles = 0; 2128 2129 ASSERT(MUTEX_HELD(&lofi_lock)); 2130 2131 if ((err = lookupname(filename, UIO_SYSSPACE, FOLLOW, 2132 NULLVPP, &vp)) != 0) 2133 goto out; 2134 2135 if (vp->v_type == VREG) { 2136 vnode_t *realvp; 2137 if (VOP_REALVP(vp, &realvp, NULL) == 0) { 2138 VN_HOLD(realvp); 2139 VN_RELE(vp); 2140 vp = realvp; 2141 } 2142 } 2143 2144 for (lsp = list_head(&lofi_list); lsp != NULL; 2145 lsp = list_next(&lofi_list, lsp)) { 2146 if (lsp->ls_vp == vp) { 2147 if (lspp != NULL) 2148 *lspp = lsp; 2149 if (lsp->ls_readonly) { 2150 rdfiles++; 2151 /* Skip if '-r' is specified */ 2152 if (readonly) 2153 continue; 2154 } 2155 goto out; 2156 } 2157 } 2158 2159 err = ENOENT; 2160 2161 /* 2162 * If a filename is given as an argument for lofi_unmap, we shouldn't 2163 * allow unmap if there are multiple read-only lofi devices associated 2164 * with this file. 2165 */ 2166 if (lspp != NULL) { 2167 if (rdfiles == 1) 2168 err = 0; 2169 else if (rdfiles > 1) 2170 err = EBUSY; 2171 } 2172 2173 out: 2174 if (vp != NULL) 2175 VN_RELE(vp); 2176 return (err); 2177 } 2178 2179 /* 2180 * Find the minor for the given filename, checking the zone can access 2181 * it. 2182 */ 2183 static int 2184 file_to_lofi(char *filename, boolean_t readonly, struct lofi_state **lspp) 2185 { 2186 int err = 0; 2187 2188 ASSERT(MUTEX_HELD(&lofi_lock)); 2189 2190 if ((err = file_to_lofi_nocheck(filename, readonly, lspp)) != 0) 2191 return (err); 2192 2193 if ((err = lofi_access(*lspp)) != 0) 2194 return (err); 2195 2196 return (0); 2197 } 2198 2199 /* 2200 * Fakes up a disk geometry based on the size of the file. This is needed 2201 * to support newfs on traditional lofi device, but also will provide 2202 * geometry hint for cmlb. 2203 */ 2204 static void 2205 fake_disk_geometry(struct lofi_state *lsp) 2206 { 2207 u_offset_t dsize = lsp->ls_vp_size - lsp->ls_crypto_offset; 2208 2209 /* dk_geom - see dkio(7I) */ 2210 /* 2211 * dkg_ncyl _could_ be set to one here (one big cylinder with gobs 2212 * of sectors), but that breaks programs like fdisk which want to 2213 * partition a disk by cylinder. With one cylinder, you can't create 2214 * an fdisk partition and put pcfs on it for testing (hard to pick 2215 * a number between one and one). 2216 * 2217 * The cheezy floppy test is an attempt to not have too few cylinders 2218 * for a small file, or so many on a big file that you waste space 2219 * for backup superblocks or cylinder group structures. 2220 */ 2221 bzero(&lsp->ls_dkg, sizeof (lsp->ls_dkg)); 2222 if (dsize < (2 * 1024 * 1024)) /* floppy? */ 2223 lsp->ls_dkg.dkg_ncyl = dsize / (100 * 1024); 2224 else 2225 lsp->ls_dkg.dkg_ncyl = dsize / (300 * 1024); 2226 /* in case file file is < 100k */ 2227 if (lsp->ls_dkg.dkg_ncyl == 0) 2228 lsp->ls_dkg.dkg_ncyl = 1; 2229 2230 lsp->ls_dkg.dkg_pcyl = lsp->ls_dkg.dkg_ncyl; 2231 lsp->ls_dkg.dkg_nhead = 1; 2232 lsp->ls_dkg.dkg_rpm = 7200; 2233 2234 lsp->ls_dkg.dkg_nsect = dsize / 2235 (lsp->ls_dkg.dkg_ncyl << lsp->ls_pbshift); 2236 } 2237 2238 /* 2239 * build vtoc - see dkio(7I) 2240 * 2241 * Fakes one big partition based on the size of the file. This is needed 2242 * because we allow newfs'ing the traditional lofi device and newfs will 2243 * do several disk ioctls to figure out the geometry and partition information. 2244 * It uses that information to determine the parameters to pass to mkfs. 2245 */ 2246 static void 2247 fake_disk_vtoc(struct lofi_state *lsp, struct vtoc *vt) 2248 { 2249 bzero(vt, sizeof (struct vtoc)); 2250 vt->v_sanity = VTOC_SANE; 2251 vt->v_version = V_VERSION; 2252 (void) strncpy(vt->v_volume, LOFI_DRIVER_NAME, 2253 sizeof (vt->v_volume)); 2254 vt->v_sectorsz = 1 << lsp->ls_pbshift; 2255 vt->v_nparts = 1; 2256 vt->v_part[0].p_tag = V_UNASSIGNED; 2257 2258 /* 2259 * A compressed file is read-only, other files can 2260 * be read-write 2261 */ 2262 if (lsp->ls_uncomp_seg_sz > 0) { 2263 vt->v_part[0].p_flag = V_UNMNT | V_RONLY; 2264 } else { 2265 vt->v_part[0].p_flag = V_UNMNT; 2266 } 2267 vt->v_part[0].p_start = (daddr_t)0; 2268 /* 2269 * The partition size cannot just be the number of sectors, because 2270 * that might not end on a cylinder boundary. And if that's the case, 2271 * newfs/mkfs will print a scary warning. So just figure the size 2272 * based on the number of cylinders and sectors/cylinder. 2273 */ 2274 vt->v_part[0].p_size = lsp->ls_dkg.dkg_pcyl * 2275 lsp->ls_dkg.dkg_nsect * lsp->ls_dkg.dkg_nhead; 2276 } 2277 2278 /* 2279 * build dk_cinfo - see dkio(7I) 2280 */ 2281 static void 2282 fake_disk_info(dev_t dev, struct dk_cinfo *ci) 2283 { 2284 bzero(ci, sizeof (struct dk_cinfo)); 2285 (void) strlcpy(ci->dki_cname, LOFI_DRIVER_NAME, sizeof (ci->dki_cname)); 2286 ci->dki_ctype = DKC_SCSI_CCS; 2287 (void) strlcpy(ci->dki_dname, LOFI_DRIVER_NAME, sizeof (ci->dki_dname)); 2288 ci->dki_unit = LOFI_MINOR2ID(getminor(dev)); 2289 ci->dki_partition = LOFI_PART(getminor(dev)); 2290 /* 2291 * newfs uses this to set maxcontig. Must not be < 16, or it 2292 * will be 0 when newfs multiplies it by DEV_BSIZE and divides 2293 * it by the block size. Then tunefs doesn't work because 2294 * maxcontig is 0. 2295 */ 2296 ci->dki_maxtransfer = 16; 2297 } 2298 2299 /* 2300 * map in a compressed file 2301 * 2302 * Read in the header and the index that follows. 2303 * 2304 * The header is as follows - 2305 * 2306 * Signature (name of the compression algorithm) 2307 * Compression segment size (a multiple of 512) 2308 * Number of index entries 2309 * Size of the last block 2310 * The array containing the index entries 2311 * 2312 * The header information is always stored in 2313 * network byte order on disk. 2314 */ 2315 static int 2316 lofi_map_compressed_file(struct lofi_state *lsp, char *buf) 2317 { 2318 uint32_t index_sz, header_len, i; 2319 ssize_t resid; 2320 enum uio_rw rw; 2321 char *tbuf = buf; 2322 int error; 2323 2324 /* The signature has already been read */ 2325 tbuf += sizeof (lsp->ls_comp_algorithm); 2326 bcopy(tbuf, &(lsp->ls_uncomp_seg_sz), sizeof (lsp->ls_uncomp_seg_sz)); 2327 lsp->ls_uncomp_seg_sz = ntohl(lsp->ls_uncomp_seg_sz); 2328 2329 /* 2330 * The compressed segment size must be a power of 2 2331 */ 2332 if (lsp->ls_uncomp_seg_sz < DEV_BSIZE || 2333 !ISP2(lsp->ls_uncomp_seg_sz)) 2334 return (EINVAL); 2335 2336 for (i = 0; !((lsp->ls_uncomp_seg_sz >> i) & 1); i++) 2337 ; 2338 2339 lsp->ls_comp_seg_shift = i; 2340 2341 tbuf += sizeof (lsp->ls_uncomp_seg_sz); 2342 bcopy(tbuf, &(lsp->ls_comp_index_sz), sizeof (lsp->ls_comp_index_sz)); 2343 lsp->ls_comp_index_sz = ntohl(lsp->ls_comp_index_sz); 2344 2345 tbuf += sizeof (lsp->ls_comp_index_sz); 2346 bcopy(tbuf, &(lsp->ls_uncomp_last_seg_sz), 2347 sizeof (lsp->ls_uncomp_last_seg_sz)); 2348 lsp->ls_uncomp_last_seg_sz = ntohl(lsp->ls_uncomp_last_seg_sz); 2349 2350 /* 2351 * Compute the total size of the uncompressed data 2352 * for use in fake_disk_geometry and other calculations. 2353 * Disk geometry has to be faked with respect to the 2354 * actual uncompressed data size rather than the 2355 * compressed file size. 2356 */ 2357 lsp->ls_vp_size = 2358 (u_offset_t)(lsp->ls_comp_index_sz - 2) * lsp->ls_uncomp_seg_sz 2359 + lsp->ls_uncomp_last_seg_sz; 2360 2361 /* 2362 * Index size is rounded up to DEV_BSIZE for ease 2363 * of segmapping 2364 */ 2365 index_sz = sizeof (*lsp->ls_comp_seg_index) * lsp->ls_comp_index_sz; 2366 header_len = sizeof (lsp->ls_comp_algorithm) + 2367 sizeof (lsp->ls_uncomp_seg_sz) + 2368 sizeof (lsp->ls_comp_index_sz) + 2369 sizeof (lsp->ls_uncomp_last_seg_sz); 2370 lsp->ls_comp_offbase = header_len + index_sz; 2371 2372 index_sz += header_len; 2373 index_sz = roundup(index_sz, DEV_BSIZE); 2374 2375 lsp->ls_comp_index_data = kmem_alloc(index_sz, KM_SLEEP); 2376 lsp->ls_comp_index_data_sz = index_sz; 2377 2378 /* 2379 * Read in the index -- this has a side-effect 2380 * of reading in the header as well 2381 */ 2382 rw = UIO_READ; 2383 error = vn_rdwr(rw, lsp->ls_vp, lsp->ls_comp_index_data, index_sz, 2384 0, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 2385 2386 if (error != 0) 2387 return (error); 2388 2389 /* Skip the header, this is where the index really begins */ 2390 lsp->ls_comp_seg_index = 2391 /*LINTED*/ 2392 (uint64_t *)(lsp->ls_comp_index_data + header_len); 2393 2394 /* 2395 * Now recompute offsets in the index to account for 2396 * the header length 2397 */ 2398 for (i = 0; i < lsp->ls_comp_index_sz; i++) { 2399 lsp->ls_comp_seg_index[i] = lsp->ls_comp_offbase + 2400 BE_64(lsp->ls_comp_seg_index[i]); 2401 } 2402 2403 return (error); 2404 } 2405 2406 static int 2407 lofi_init_crypto(struct lofi_state *lsp, struct lofi_ioctl *klip) 2408 { 2409 struct crypto_meta chead; 2410 char buf[DEV_BSIZE]; 2411 ssize_t resid; 2412 char *marker; 2413 int error; 2414 int ret; 2415 int i; 2416 2417 if (!klip->li_crypto_enabled) 2418 return (0); 2419 2420 /* 2421 * All current algorithms have a max of 448 bits. 2422 */ 2423 if (klip->li_iv_len > CRYPTO_BITS2BYTES(512)) 2424 return (EINVAL); 2425 2426 if (CRYPTO_BITS2BYTES(klip->li_key_len) > sizeof (klip->li_key)) 2427 return (EINVAL); 2428 2429 lsp->ls_crypto_enabled = klip->li_crypto_enabled; 2430 2431 mutex_init(&lsp->ls_crypto_lock, NULL, MUTEX_DRIVER, NULL); 2432 2433 lsp->ls_mech.cm_type = crypto_mech2id(klip->li_cipher); 2434 if (lsp->ls_mech.cm_type == CRYPTO_MECH_INVALID) { 2435 cmn_err(CE_WARN, "invalid cipher %s requested for %s", 2436 klip->li_cipher, klip->li_filename); 2437 return (EINVAL); 2438 } 2439 2440 /* this is just initialization here */ 2441 lsp->ls_mech.cm_param = NULL; 2442 lsp->ls_mech.cm_param_len = 0; 2443 2444 lsp->ls_iv_type = klip->li_iv_type; 2445 lsp->ls_iv_mech.cm_type = crypto_mech2id(klip->li_iv_cipher); 2446 if (lsp->ls_iv_mech.cm_type == CRYPTO_MECH_INVALID) { 2447 cmn_err(CE_WARN, "invalid iv cipher %s requested" 2448 " for %s", klip->li_iv_cipher, klip->li_filename); 2449 return (EINVAL); 2450 } 2451 2452 /* iv mech must itself take a null iv */ 2453 lsp->ls_iv_mech.cm_param = NULL; 2454 lsp->ls_iv_mech.cm_param_len = 0; 2455 lsp->ls_iv_len = klip->li_iv_len; 2456 2457 /* 2458 * Create ctx using li_cipher & the raw li_key after checking 2459 * that it isn't a weak key. 2460 */ 2461 lsp->ls_key.ck_format = CRYPTO_KEY_RAW; 2462 lsp->ls_key.ck_length = klip->li_key_len; 2463 lsp->ls_key.ck_data = kmem_alloc( 2464 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length), KM_SLEEP); 2465 bcopy(klip->li_key, lsp->ls_key.ck_data, 2466 CRYPTO_BITS2BYTES(lsp->ls_key.ck_length)); 2467 2468 ret = crypto_key_check(&lsp->ls_mech, &lsp->ls_key); 2469 if (ret != CRYPTO_SUCCESS) { 2470 cmn_err(CE_WARN, "weak key check failed for cipher " 2471 "%s on file %s (0x%x)", klip->li_cipher, 2472 klip->li_filename, ret); 2473 return (EINVAL); 2474 } 2475 2476 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 2477 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 2478 if (error != 0) 2479 return (error); 2480 2481 /* 2482 * This is the case where the header in the lofi image is already 2483 * initialized to indicate it is encrypted. 2484 */ 2485 if (strncmp(buf, lofi_crypto_magic, sizeof (lofi_crypto_magic)) == 0) { 2486 /* 2487 * The encryption header information is laid out this way: 2488 * 6 bytes: hex "CFLOFI" 2489 * 2 bytes: version = 0 ... for now 2490 * 96 bytes: reserved1 (not implemented yet) 2491 * 4 bytes: data_sector = 2 ... for now 2492 * more... not implemented yet 2493 */ 2494 2495 marker = buf; 2496 2497 /* copy the magic */ 2498 bcopy(marker, lsp->ls_crypto.magic, 2499 sizeof (lsp->ls_crypto.magic)); 2500 marker += sizeof (lsp->ls_crypto.magic); 2501 2502 /* read the encryption version number */ 2503 bcopy(marker, &(lsp->ls_crypto.version), 2504 sizeof (lsp->ls_crypto.version)); 2505 lsp->ls_crypto.version = ntohs(lsp->ls_crypto.version); 2506 marker += sizeof (lsp->ls_crypto.version); 2507 2508 /* read a chunk of reserved data */ 2509 bcopy(marker, lsp->ls_crypto.reserved1, 2510 sizeof (lsp->ls_crypto.reserved1)); 2511 marker += sizeof (lsp->ls_crypto.reserved1); 2512 2513 /* read block number where encrypted data begins */ 2514 bcopy(marker, &(lsp->ls_crypto.data_sector), 2515 sizeof (lsp->ls_crypto.data_sector)); 2516 lsp->ls_crypto.data_sector = ntohl(lsp->ls_crypto.data_sector); 2517 marker += sizeof (lsp->ls_crypto.data_sector); 2518 2519 /* and ignore the rest until it is implemented */ 2520 2521 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; 2522 return (0); 2523 } 2524 2525 /* 2526 * We've requested encryption, but no magic was found, so it must be 2527 * a new image. 2528 */ 2529 2530 for (i = 0; i < sizeof (struct crypto_meta); i++) { 2531 if (buf[i] != '\0') 2532 return (EINVAL); 2533 } 2534 2535 marker = buf; 2536 bcopy(lofi_crypto_magic, marker, sizeof (lofi_crypto_magic)); 2537 marker += sizeof (lofi_crypto_magic); 2538 chead.version = htons(LOFI_CRYPTO_VERSION); 2539 bcopy(&(chead.version), marker, sizeof (chead.version)); 2540 marker += sizeof (chead.version); 2541 marker += sizeof (chead.reserved1); 2542 chead.data_sector = htonl(LOFI_CRYPTO_DATA_SECTOR); 2543 bcopy(&(chead.data_sector), marker, sizeof (chead.data_sector)); 2544 2545 /* write the header */ 2546 error = vn_rdwr(UIO_WRITE, lsp->ls_vp, buf, DEV_BSIZE, 2547 CRYOFF, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 2548 if (error != 0) 2549 return (error); 2550 2551 /* fix things up so it looks like we read this info */ 2552 bcopy(lofi_crypto_magic, lsp->ls_crypto.magic, 2553 sizeof (lofi_crypto_magic)); 2554 lsp->ls_crypto.version = LOFI_CRYPTO_VERSION; 2555 lsp->ls_crypto.data_sector = LOFI_CRYPTO_DATA_SECTOR; 2556 lsp->ls_crypto_offset = lsp->ls_crypto.data_sector * DEV_BSIZE; 2557 return (0); 2558 } 2559 2560 /* 2561 * Check to see if the passed in signature is a valid one. If it is 2562 * valid, return the index into lofi_compress_table. 2563 * 2564 * Return -1 if it is invalid 2565 */ 2566 static int 2567 lofi_compress_select(const char *signature) 2568 { 2569 int i; 2570 2571 for (i = 0; i < LOFI_COMPRESS_FUNCTIONS; i++) { 2572 if (strcmp(lofi_compress_table[i].l_name, signature) == 0) 2573 return (i); 2574 } 2575 2576 return (-1); 2577 } 2578 2579 static int 2580 lofi_init_compress(struct lofi_state *lsp) 2581 { 2582 char buf[DEV_BSIZE]; 2583 int compress_index; 2584 ssize_t resid; 2585 int error; 2586 2587 error = vn_rdwr(UIO_READ, lsp->ls_vp, buf, DEV_BSIZE, 0, UIO_SYSSPACE, 2588 0, RLIM64_INFINITY, kcred, &resid); 2589 2590 if (error != 0) 2591 return (error); 2592 2593 if ((compress_index = lofi_compress_select(buf)) == -1) 2594 return (0); 2595 2596 /* compression and encryption are mutually exclusive */ 2597 if (lsp->ls_crypto_enabled) 2598 return (ENOTSUP); 2599 2600 /* initialize compression info for compressed lofi */ 2601 lsp->ls_comp_algorithm_index = compress_index; 2602 (void) strlcpy(lsp->ls_comp_algorithm, 2603 lofi_compress_table[compress_index].l_name, 2604 sizeof (lsp->ls_comp_algorithm)); 2605 2606 /* Finally setup per-thread pre-allocated buffers */ 2607 lsp->ls_comp_bufs = kmem_zalloc(lofi_taskq_nthreads * 2608 sizeof (struct compbuf), KM_SLEEP); 2609 2610 return (lofi_map_compressed_file(lsp, buf)); 2611 } 2612 2613 /* 2614 * Allocate new or proposed id from lofi_id. 2615 * 2616 * Special cases for proposed id: 2617 * 0: not allowed, 0 is id for control device. 2618 * -1: allocate first usable id from lofi_id. 2619 * any other value is proposed value from userland 2620 * 2621 * returns DDI_SUCCESS or errno. 2622 */ 2623 static int 2624 lofi_alloc_id(int *idp) 2625 { 2626 int id, error = DDI_SUCCESS; 2627 2628 if (*idp == -1) { 2629 id = id_allocff_nosleep(lofi_id); 2630 if (id == -1) { 2631 error = EAGAIN; 2632 goto err; 2633 } 2634 } else if (*idp == 0) { 2635 error = EINVAL; 2636 goto err; 2637 } else if (*idp > ((1 << (L_BITSMINOR - LOFI_CMLB_SHIFT)) - 1)) { 2638 error = ERANGE; 2639 goto err; 2640 } else { 2641 if (ddi_get_soft_state(lofi_statep, *idp) != NULL) { 2642 error = EEXIST; 2643 goto err; 2644 } 2645 2646 id = id_alloc_specific_nosleep(lofi_id, *idp); 2647 if (id == -1) { 2648 error = EAGAIN; 2649 goto err; 2650 } 2651 } 2652 *idp = id; 2653 err: 2654 return (error); 2655 } 2656 2657 static int 2658 lofi_create_dev(struct lofi_ioctl *klip) 2659 { 2660 dev_info_t *parent, *child; 2661 struct lofi_state *lsp = NULL; 2662 char namebuf[MAXNAMELEN]; 2663 int error, circ; 2664 2665 /* get control device */ 2666 lsp = ddi_get_soft_state(lofi_statep, 0); 2667 parent = ddi_get_parent(lsp->ls_dip); 2668 2669 if ((error = lofi_alloc_id((int *)&klip->li_id))) 2670 return (error); 2671 2672 (void) snprintf(namebuf, sizeof (namebuf), LOFI_DRIVER_NAME "@%d", 2673 klip->li_id); 2674 2675 ndi_devi_enter(parent, &circ); 2676 child = ndi_devi_findchild(parent, namebuf); 2677 ndi_devi_exit(parent, circ); 2678 2679 if (child == NULL) { 2680 child = ddi_add_child(parent, LOFI_DRIVER_NAME, 2681 (pnode_t)DEVI_SID_NODEID, klip->li_id); 2682 if ((error = ddi_prop_update_int(DDI_DEV_T_NONE, child, 2683 "instance", klip->li_id)) != DDI_PROP_SUCCESS) 2684 goto err; 2685 2686 if (klip->li_labeled == B_TRUE) { 2687 if ((error = ddi_prop_create(DDI_DEV_T_NONE, child, 2688 DDI_PROP_CANSLEEP, "labeled", 0, 0)) 2689 != DDI_PROP_SUCCESS) 2690 goto err; 2691 } 2692 2693 if ((error = ndi_devi_online(child, NDI_ONLINE_ATTACH)) 2694 != NDI_SUCCESS) 2695 goto err; 2696 } else { 2697 id_free(lofi_id, klip->li_id); 2698 error = EEXIST; 2699 return (error); 2700 } 2701 2702 goto done; 2703 2704 err: 2705 ddi_prop_remove_all(child); 2706 (void) ndi_devi_offline(child, NDI_DEVI_REMOVE); 2707 id_free(lofi_id, klip->li_id); 2708 done: 2709 2710 return (error); 2711 } 2712 2713 static void 2714 lofi_create_inquiry(struct lofi_state *lsp, struct scsi_inquiry *inq) 2715 { 2716 char *p = NULL; 2717 2718 (void) strlcpy(inq->inq_vid, LOFI_DRIVER_NAME, sizeof (inq->inq_vid)); 2719 2720 mutex_enter(&lsp->ls_vp_lock); 2721 if (lsp->ls_vp != NULL) 2722 p = strrchr(lsp->ls_vp->v_path, '/'); 2723 if (p != NULL) 2724 (void) strncpy(inq->inq_pid, p + 1, sizeof (inq->inq_pid)); 2725 mutex_exit(&lsp->ls_vp_lock); 2726 (void) strlcpy(inq->inq_revision, "1.0", sizeof (inq->inq_revision)); 2727 } 2728 2729 /* 2730 * copy devlink name from event cache 2731 */ 2732 static void 2733 lofi_copy_devpath(struct lofi_ioctl *klip) 2734 { 2735 int error; 2736 char namebuf[MAXNAMELEN], *str; 2737 clock_t ticks; 2738 nvlist_t *nvl = NULL; 2739 2740 if (klip->li_labeled == B_TRUE) 2741 klip->li_devpath[0] = '\0'; 2742 else { 2743 /* no need to wait for messages */ 2744 (void) snprintf(klip->li_devpath, sizeof (klip->li_devpath), 2745 "/dev/" LOFI_CHAR_NAME "/%d", klip->li_id); 2746 return; 2747 } 2748 2749 (void) snprintf(namebuf, sizeof (namebuf), "%d", klip->li_id); 2750 ticks = ddi_get_lbolt() + LOFI_TIMEOUT * drv_usectohz(1000000); 2751 2752 mutex_enter(&lofi_devlink_cache.ln_lock); 2753 error = nvlist_lookup_nvlist(lofi_devlink_cache.ln_data, namebuf, &nvl); 2754 while (error != 0) { 2755 error = cv_timedwait(&lofi_devlink_cache.ln_cv, 2756 &lofi_devlink_cache.ln_lock, ticks); 2757 if (error == -1) 2758 break; 2759 error = nvlist_lookup_nvlist(lofi_devlink_cache.ln_data, 2760 namebuf, &nvl); 2761 } 2762 2763 if (nvl != NULL) { 2764 if (nvlist_lookup_string(nvl, DEV_NAME, &str) == 0) { 2765 (void) strlcpy(klip->li_devpath, str, 2766 sizeof (klip->li_devpath)); 2767 } 2768 } 2769 mutex_exit(&lofi_devlink_cache.ln_lock); 2770 } 2771 2772 /* 2773 * map a file to a minor number. Return the minor number. 2774 */ 2775 static int 2776 lofi_map_file(dev_t dev, struct lofi_ioctl *ulip, int pickminor, 2777 int *rvalp, struct cred *credp, int ioctl_flag) 2778 { 2779 int id = -1; 2780 struct lofi_state *lsp = NULL; 2781 struct lofi_ioctl *klip; 2782 int error; 2783 struct vnode *vp = NULL; 2784 vattr_t vattr; 2785 int flag; 2786 char namebuf[MAXNAMELEN]; 2787 2788 error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 2789 if (error != 0) 2790 return (error); 2791 2792 mutex_enter(&lofi_lock); 2793 2794 if (file_to_lofi_nocheck(klip->li_filename, klip->li_readonly, 2795 NULL) == 0) { 2796 error = EBUSY; 2797 goto err; 2798 } 2799 2800 flag = FREAD | FWRITE | FOFFMAX | FEXCL; 2801 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, &vp, 0, 0); 2802 if (error) { 2803 /* try read-only */ 2804 flag &= ~FWRITE; 2805 error = vn_open(klip->li_filename, UIO_SYSSPACE, flag, 0, 2806 &vp, 0, 0); 2807 if (error) 2808 goto err; 2809 } 2810 2811 if (!V_ISLOFIABLE(vp->v_type)) { 2812 error = EINVAL; 2813 goto err; 2814 } 2815 2816 vattr.va_mask = AT_SIZE; 2817 error = VOP_GETATTR(vp, &vattr, 0, credp, NULL); 2818 if (error) 2819 goto err; 2820 2821 /* the file needs to be a multiple of the block size */ 2822 if ((vattr.va_size % DEV_BSIZE) != 0) { 2823 error = EINVAL; 2824 goto err; 2825 } 2826 2827 if (pickminor) { 2828 klip->li_id = (uint32_t)-1; 2829 } 2830 if ((error = lofi_create_dev(klip)) != 0) 2831 goto err; 2832 2833 id = klip->li_id; 2834 lsp = ddi_get_soft_state(lofi_statep, id); 2835 if (lsp == NULL) 2836 goto err; 2837 2838 /* 2839 * from this point lofi_destroy() is used to clean up on error 2840 * make sure the basic data is set 2841 */ 2842 list_insert_tail(&lofi_list, lsp); 2843 lsp->ls_dev = makedevice(getmajor(dev), LOFI_ID2MINOR(id)); 2844 2845 list_create(&lsp->ls_comp_cache, sizeof (struct lofi_comp_cache), 2846 offsetof(struct lofi_comp_cache, lc_list)); 2847 2848 /* 2849 * save open mode so file can be closed properly and vnode counts 2850 * updated correctly. 2851 */ 2852 lsp->ls_openflag = flag; 2853 2854 lsp->ls_vp = vp; 2855 lsp->ls_stacked_vp = vp; 2856 2857 lsp->ls_vp_size = vattr.va_size; 2858 lsp->ls_vp_comp_size = lsp->ls_vp_size; 2859 2860 /* 2861 * Try to handle stacked lofs vnodes. 2862 */ 2863 if (vp->v_type == VREG) { 2864 vnode_t *realvp; 2865 2866 if (VOP_REALVP(vp, &realvp, NULL) == 0) { 2867 /* 2868 * We need to use the realvp for uniqueness 2869 * checking, but keep the stacked vp for 2870 * LOFI_GET_FILENAME display. 2871 */ 2872 VN_HOLD(realvp); 2873 lsp->ls_vp = realvp; 2874 } 2875 } 2876 2877 lsp->ls_lbshift = highbit(DEV_BSIZE) - 1; 2878 lsp->ls_pbshift = lsp->ls_lbshift; 2879 2880 lsp->ls_readonly = klip->li_readonly; 2881 lsp->ls_uncomp_seg_sz = 0; 2882 lsp->ls_comp_algorithm[0] = '\0'; 2883 lsp->ls_crypto_offset = 0; 2884 2885 (void) snprintf(namebuf, sizeof (namebuf), "%s_taskq_%d", 2886 LOFI_DRIVER_NAME, id); 2887 lsp->ls_taskq = taskq_create_proc(namebuf, lofi_taskq_nthreads, 2888 minclsyspri, 1, lofi_taskq_maxalloc, curzone->zone_zsched, 0); 2889 2890 if ((error = lofi_init_crypto(lsp, klip)) != 0) 2891 goto err; 2892 2893 if ((error = lofi_init_compress(lsp)) != 0) 2894 goto err; 2895 2896 fake_disk_geometry(lsp); 2897 2898 /* For unlabeled lofi add Nblocks and Size */ 2899 if (klip->li_labeled == B_FALSE) { 2900 error = ddi_prop_update_int64(lsp->ls_dev, lsp->ls_dip, 2901 SIZE_PROP_NAME, lsp->ls_vp_size - lsp->ls_crypto_offset); 2902 if (error != DDI_PROP_SUCCESS) { 2903 error = EINVAL; 2904 goto err; 2905 } 2906 error = ddi_prop_update_int64(lsp->ls_dev, lsp->ls_dip, 2907 NBLOCKS_PROP_NAME, 2908 (lsp->ls_vp_size - lsp->ls_crypto_offset) / DEV_BSIZE); 2909 if (error != DDI_PROP_SUCCESS) { 2910 error = EINVAL; 2911 goto err; 2912 } 2913 } 2914 2915 /* 2916 * Notify we are ready to rock. 2917 */ 2918 mutex_enter(&lsp->ls_vp_lock); 2919 lsp->ls_vp_ready = B_TRUE; 2920 cv_broadcast(&lsp->ls_vp_cv); 2921 mutex_exit(&lsp->ls_vp_lock); 2922 mutex_exit(&lofi_lock); 2923 2924 lofi_copy_devpath(klip); 2925 2926 if (rvalp) 2927 *rvalp = id; 2928 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 2929 free_lofi_ioctl(klip); 2930 return (0); 2931 2932 err: 2933 if (lsp != NULL) { 2934 lofi_destroy(lsp, credp); 2935 } else { 2936 if (vp != NULL) { 2937 (void) VOP_PUTPAGE(vp, 0, 0, B_INVAL, credp, NULL); 2938 (void) VOP_CLOSE(vp, flag, 1, 0, credp, NULL); 2939 VN_RELE(vp); 2940 } 2941 } 2942 2943 mutex_exit(&lofi_lock); 2944 free_lofi_ioctl(klip); 2945 return (error); 2946 } 2947 2948 /* 2949 * unmap a file. 2950 */ 2951 static int 2952 lofi_unmap_file(struct lofi_ioctl *ulip, int byfilename, 2953 struct cred *credp, int ioctl_flag) 2954 { 2955 struct lofi_state *lsp; 2956 struct lofi_ioctl *klip; 2957 int err; 2958 2959 err = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 2960 if (err != 0) 2961 return (err); 2962 2963 mutex_enter(&lofi_lock); 2964 if (byfilename) { 2965 if ((err = file_to_lofi(klip->li_filename, klip->li_readonly, 2966 &lsp)) != 0) { 2967 goto done; 2968 } 2969 } else if (klip->li_id == 0) { 2970 err = ENXIO; 2971 goto done; 2972 } else { 2973 lsp = ddi_get_soft_state(lofi_statep, klip->li_id); 2974 } 2975 2976 if (lsp == NULL || lsp->ls_vp == NULL || lofi_access(lsp) != 0) { 2977 err = ENXIO; 2978 goto done; 2979 } 2980 2981 klip->li_id = LOFI_MINOR2ID(getminor(lsp->ls_dev)); 2982 2983 /* 2984 * If it's still held open, we'll do one of three things: 2985 * 2986 * If no flag is set, just return EBUSY. 2987 * 2988 * If the 'cleanup' flag is set, unmap and remove the device when 2989 * the last user finishes. 2990 * 2991 * If the 'force' flag is set, then we forcibly close the underlying 2992 * file. Subsequent operations will fail, and the DKIOCSTATE ioctl 2993 * will return DKIO_DEV_GONE. When the device is last closed, the 2994 * device will be cleaned up appropriately. 2995 * 2996 * This is complicated by the fact that we may have outstanding 2997 * dispatched I/Os. Rather than having a single mutex to serialize all 2998 * I/O, we keep a count of the number of outstanding I/O requests 2999 * (ls_vp_iocount), as well as a flag to indicate that no new I/Os 3000 * should be dispatched (ls_vp_closereq). 3001 * 3002 * We set the flag, wait for the number of outstanding I/Os to reach 0, 3003 * and then close the underlying vnode. 3004 */ 3005 if (is_opened(lsp)) { 3006 if (klip->li_force) { 3007 /* Mark the device for cleanup. */ 3008 lofi_set_cleanup(lsp); 3009 mutex_enter(&lsp->ls_vp_lock); 3010 lsp->ls_vp_closereq = B_TRUE; 3011 /* Wake up any threads waiting on dkiocstate. */ 3012 cv_broadcast(&lsp->ls_vp_cv); 3013 while (lsp->ls_vp_iocount > 0) 3014 cv_wait(&lsp->ls_vp_cv, &lsp->ls_vp_lock); 3015 mutex_exit(&lsp->ls_vp_lock); 3016 } else if (klip->li_cleanup) { 3017 lofi_set_cleanup(lsp); 3018 } else { 3019 err = EBUSY; 3020 } 3021 } else { 3022 lofi_free_dev(lsp); 3023 lofi_destroy(lsp, credp); 3024 } 3025 3026 done: 3027 mutex_exit(&lofi_lock); 3028 if (err == 0) 3029 (void) copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 3030 free_lofi_ioctl(klip); 3031 return (err); 3032 } 3033 3034 /* 3035 * get the filename given the minor number, or the minor number given 3036 * the name. 3037 */ 3038 /*ARGSUSED*/ 3039 static int 3040 lofi_get_info(dev_t dev, struct lofi_ioctl *ulip, int which, 3041 struct cred *credp, int ioctl_flag) 3042 { 3043 struct lofi_ioctl *klip; 3044 struct lofi_state *lsp; 3045 int error; 3046 3047 error = copy_in_lofi_ioctl(ulip, &klip, ioctl_flag); 3048 if (error != 0) 3049 return (error); 3050 3051 switch (which) { 3052 case LOFI_GET_FILENAME: 3053 if (klip->li_id == 0) { 3054 free_lofi_ioctl(klip); 3055 return (EINVAL); 3056 } 3057 3058 mutex_enter(&lofi_lock); 3059 lsp = ddi_get_soft_state(lofi_statep, klip->li_id); 3060 if (lsp == NULL || lofi_access(lsp) != 0) { 3061 mutex_exit(&lofi_lock); 3062 free_lofi_ioctl(klip); 3063 return (ENXIO); 3064 } 3065 3066 /* 3067 * This may fail if, for example, we're trying to look 3068 * up a zoned NFS path from the global zone. 3069 */ 3070 if (vnodetopath(NULL, lsp->ls_stacked_vp, klip->li_filename, 3071 sizeof (klip->li_filename), CRED()) != 0) { 3072 (void) strlcpy(klip->li_filename, "?", 3073 sizeof (klip->li_filename)); 3074 } 3075 3076 klip->li_readonly = lsp->ls_readonly; 3077 klip->li_labeled = lsp->ls_cmlbhandle != NULL; 3078 3079 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 3080 sizeof (klip->li_algorithm)); 3081 klip->li_crypto_enabled = lsp->ls_crypto_enabled; 3082 mutex_exit(&lofi_lock); 3083 3084 lofi_copy_devpath(klip); 3085 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 3086 free_lofi_ioctl(klip); 3087 return (error); 3088 case LOFI_GET_MINOR: 3089 mutex_enter(&lofi_lock); 3090 error = file_to_lofi(klip->li_filename, 3091 klip->li_readonly, &lsp); 3092 if (error != 0) { 3093 mutex_exit(&lofi_lock); 3094 free_lofi_ioctl(klip); 3095 return (error); 3096 } 3097 klip->li_id = LOFI_MINOR2ID(getminor(lsp->ls_dev)); 3098 3099 klip->li_readonly = lsp->ls_readonly; 3100 klip->li_labeled = lsp->ls_cmlbhandle != NULL; 3101 mutex_exit(&lofi_lock); 3102 3103 lofi_copy_devpath(klip); 3104 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 3105 3106 free_lofi_ioctl(klip); 3107 return (error); 3108 case LOFI_CHECK_COMPRESSED: 3109 mutex_enter(&lofi_lock); 3110 error = file_to_lofi(klip->li_filename, 3111 klip->li_readonly, &lsp); 3112 if (error != 0) { 3113 mutex_exit(&lofi_lock); 3114 free_lofi_ioctl(klip); 3115 return (error); 3116 } 3117 3118 klip->li_id = LOFI_MINOR2ID(getminor(lsp->ls_dev)); 3119 (void) strlcpy(klip->li_algorithm, lsp->ls_comp_algorithm, 3120 sizeof (klip->li_algorithm)); 3121 3122 mutex_exit(&lofi_lock); 3123 error = copy_out_lofi_ioctl(klip, ulip, ioctl_flag); 3124 free_lofi_ioctl(klip); 3125 return (error); 3126 default: 3127 free_lofi_ioctl(klip); 3128 return (EINVAL); 3129 } 3130 } 3131 3132 static int 3133 uscsi_is_inquiry(intptr_t arg, int flag, union scsi_cdb *cdb, 3134 struct uscsi_cmd *uscmd) 3135 { 3136 int rval; 3137 3138 #ifdef _MULTI_DATAMODEL 3139 switch (ddi_model_convert_from(flag & FMODELS)) { 3140 case DDI_MODEL_ILP32: { 3141 struct uscsi_cmd32 ucmd32; 3142 3143 if (ddi_copyin((void *)arg, &ucmd32, sizeof (ucmd32), flag)) { 3144 rval = EFAULT; 3145 goto err; 3146 } 3147 uscsi_cmd32touscsi_cmd((&ucmd32), uscmd); 3148 break; 3149 } 3150 case DDI_MODEL_NONE: 3151 if (ddi_copyin((void *)arg, uscmd, sizeof (*uscmd), flag)) { 3152 rval = EFAULT; 3153 goto err; 3154 } 3155 break; 3156 default: 3157 rval = EFAULT; 3158 goto err; 3159 } 3160 #else 3161 if (ddi_copyin((void *)arg, uscmd, sizeof (*uscmd), flag)) { 3162 rval = EFAULT; 3163 goto err; 3164 } 3165 #endif /* _MULTI_DATAMODEL */ 3166 if (ddi_copyin(uscmd->uscsi_cdb, cdb, uscmd->uscsi_cdblen, flag)) { 3167 rval = EFAULT; 3168 goto err; 3169 } 3170 if (cdb->scc_cmd == SCMD_INQUIRY) { 3171 return (0); 3172 } 3173 err: 3174 return (rval); 3175 } 3176 3177 static int 3178 lofi_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, 3179 int *rvalp) 3180 { 3181 int error; 3182 enum dkio_state dkstate; 3183 struct lofi_state *lsp; 3184 int id; 3185 3186 id = LOFI_MINOR2ID(getminor(dev)); 3187 3188 /* lofi ioctls only apply to the master device */ 3189 if (id == 0) { 3190 struct lofi_ioctl *lip = (struct lofi_ioctl *)arg; 3191 3192 /* 3193 * the query command only need read-access - i.e., normal 3194 * users are allowed to do those on the ctl device as 3195 * long as they can open it read-only. 3196 */ 3197 switch (cmd) { 3198 case LOFI_MAP_FILE: 3199 if ((flag & FWRITE) == 0) 3200 return (EPERM); 3201 return (lofi_map_file(dev, lip, 1, rvalp, credp, flag)); 3202 case LOFI_MAP_FILE_MINOR: 3203 if ((flag & FWRITE) == 0) 3204 return (EPERM); 3205 return (lofi_map_file(dev, lip, 0, rvalp, credp, flag)); 3206 case LOFI_UNMAP_FILE: 3207 if ((flag & FWRITE) == 0) 3208 return (EPERM); 3209 return (lofi_unmap_file(lip, 1, credp, flag)); 3210 case LOFI_UNMAP_FILE_MINOR: 3211 if ((flag & FWRITE) == 0) 3212 return (EPERM); 3213 return (lofi_unmap_file(lip, 0, credp, flag)); 3214 case LOFI_GET_FILENAME: 3215 return (lofi_get_info(dev, lip, LOFI_GET_FILENAME, 3216 credp, flag)); 3217 case LOFI_GET_MINOR: 3218 return (lofi_get_info(dev, lip, LOFI_GET_MINOR, 3219 credp, flag)); 3220 3221 /* 3222 * This API made limited sense when this value was fixed 3223 * at LOFI_MAX_FILES. However, its use to iterate 3224 * across all possible devices in lofiadm means we don't 3225 * want to return L_MAXMIN, but the highest 3226 * *allocated* id. 3227 */ 3228 case LOFI_GET_MAXMINOR: 3229 id = 0; 3230 3231 mutex_enter(&lofi_lock); 3232 3233 for (lsp = list_head(&lofi_list); lsp != NULL; 3234 lsp = list_next(&lofi_list, lsp)) { 3235 int i; 3236 if (lofi_access(lsp) != 0) 3237 continue; 3238 3239 i = ddi_get_instance(lsp->ls_dip); 3240 if (i > id) 3241 id = i; 3242 } 3243 3244 mutex_exit(&lofi_lock); 3245 3246 error = ddi_copyout(&id, &lip->li_id, 3247 sizeof (id), flag); 3248 if (error) 3249 return (EFAULT); 3250 return (0); 3251 3252 case LOFI_CHECK_COMPRESSED: 3253 return (lofi_get_info(dev, lip, LOFI_CHECK_COMPRESSED, 3254 credp, flag)); 3255 default: 3256 return (EINVAL); 3257 } 3258 } 3259 3260 mutex_enter(&lofi_lock); 3261 lsp = ddi_get_soft_state(lofi_statep, id); 3262 if (lsp == NULL || lsp->ls_cleanup) { 3263 mutex_exit(&lofi_lock); 3264 return (ENXIO); 3265 } 3266 mutex_exit(&lofi_lock); 3267 3268 if (ddi_prop_exists(DDI_DEV_T_ANY, lsp->ls_dip, DDI_PROP_DONTPASS, 3269 "labeled") == 1) { 3270 error = cmlb_ioctl(lsp->ls_cmlbhandle, dev, cmd, arg, flag, 3271 credp, rvalp, 0); 3272 if (error != ENOTTY) 3273 return (error); 3274 } 3275 3276 /* 3277 * We explicitly allow DKIOCSTATE, but all other ioctls should fail with 3278 * EIO as if the device was no longer present. 3279 */ 3280 if (lsp->ls_vp == NULL && cmd != DKIOCSTATE) 3281 return (EIO); 3282 3283 /* these are for faking out utilities like newfs */ 3284 switch (cmd) { 3285 case DKIOCGMEDIAINFO: 3286 case DKIOCGMEDIAINFOEXT: { 3287 struct dk_minfo_ext media_info; 3288 int shift = lsp->ls_lbshift; 3289 int size; 3290 3291 if (cmd == DKIOCGMEDIAINFOEXT) { 3292 media_info.dki_pbsize = 1U << lsp->ls_pbshift; 3293 size = sizeof (struct dk_minfo_ext); 3294 } else { 3295 size = sizeof (struct dk_minfo); 3296 } 3297 3298 media_info.dki_media_type = DK_FIXED_DISK; 3299 media_info.dki_lbsize = 1U << shift; 3300 media_info.dki_capacity = 3301 (lsp->ls_vp_size - lsp->ls_crypto_offset) >> shift; 3302 3303 if (ddi_copyout(&media_info, (void *)arg, size, flag)) 3304 return (EFAULT); 3305 return (0); 3306 } 3307 case DKIOCREMOVABLE: { 3308 int i = 0; 3309 if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), flag)) 3310 return (EFAULT); 3311 return (0); 3312 } 3313 3314 case DKIOCGVTOC: { 3315 struct vtoc vt; 3316 fake_disk_vtoc(lsp, &vt); 3317 3318 switch (ddi_model_convert_from(flag & FMODELS)) { 3319 case DDI_MODEL_ILP32: { 3320 struct vtoc32 vtoc32; 3321 3322 vtoctovtoc32(vt, vtoc32); 3323 if (ddi_copyout(&vtoc32, (void *)arg, 3324 sizeof (struct vtoc32), flag)) 3325 return (EFAULT); 3326 break; 3327 } 3328 3329 case DDI_MODEL_NONE: 3330 if (ddi_copyout(&vt, (void *)arg, 3331 sizeof (struct vtoc), flag)) 3332 return (EFAULT); 3333 break; 3334 } 3335 return (0); 3336 } 3337 case DKIOCINFO: { 3338 struct dk_cinfo ci; 3339 fake_disk_info(dev, &ci); 3340 if (ddi_copyout(&ci, (void *)arg, sizeof (ci), flag)) 3341 return (EFAULT); 3342 return (0); 3343 } 3344 case DKIOCG_VIRTGEOM: 3345 case DKIOCG_PHYGEOM: 3346 case DKIOCGGEOM: 3347 error = ddi_copyout(&lsp->ls_dkg, (void *)arg, 3348 sizeof (struct dk_geom), flag); 3349 if (error) 3350 return (EFAULT); 3351 return (0); 3352 case DKIOCSTATE: 3353 /* 3354 * Normally, lofi devices are always in the INSERTED state. If 3355 * a device is forcefully unmapped, then the device transitions 3356 * to the DKIO_DEV_GONE state. 3357 */ 3358 if (ddi_copyin((void *)arg, &dkstate, sizeof (dkstate), 3359 flag) != 0) 3360 return (EFAULT); 3361 3362 mutex_enter(&lsp->ls_vp_lock); 3363 while (((dkstate == DKIO_INSERTED && lsp->ls_vp != NULL) || 3364 (dkstate == DKIO_DEV_GONE && lsp->ls_vp == NULL)) && 3365 !lsp->ls_cleanup) { 3366 /* 3367 * By virtue of having the device open, we know that 3368 * 'lsp' will remain valid when we return. 3369 */ 3370 if (!cv_wait_sig(&lsp->ls_vp_cv, &lsp->ls_vp_lock)) { 3371 mutex_exit(&lsp->ls_vp_lock); 3372 return (EINTR); 3373 } 3374 } 3375 3376 dkstate = (!lsp->ls_cleanup && lsp->ls_vp != NULL ? 3377 DKIO_INSERTED : DKIO_DEV_GONE); 3378 mutex_exit(&lsp->ls_vp_lock); 3379 3380 if (ddi_copyout(&dkstate, (void *)arg, 3381 sizeof (dkstate), flag) != 0) 3382 return (EFAULT); 3383 return (0); 3384 case USCSICMD: { 3385 struct uscsi_cmd uscmd; 3386 union scsi_cdb cdb; 3387 3388 if (uscsi_is_inquiry(arg, flag, &cdb, &uscmd) == 0) { 3389 struct scsi_inquiry inq = {0}; 3390 3391 lofi_create_inquiry(lsp, &inq); 3392 if (ddi_copyout(&inq, uscmd.uscsi_bufaddr, 3393 uscmd.uscsi_buflen, flag) != 0) 3394 return (EFAULT); 3395 return (0); 3396 } else if (cdb.scc_cmd == SCMD_READ_CAPACITY) { 3397 struct scsi_capacity capacity; 3398 3399 capacity.capacity = 3400 BE_32((lsp->ls_vp_size - lsp->ls_crypto_offset) >> 3401 lsp->ls_lbshift); 3402 capacity.lbasize = BE_32(1 << lsp->ls_lbshift); 3403 if (ddi_copyout(&capacity, uscmd.uscsi_bufaddr, 3404 uscmd.uscsi_buflen, flag) != 0) 3405 return (EFAULT); 3406 return (0); 3407 } 3408 3409 uscmd.uscsi_rqstatus = 0xff; 3410 #ifdef _MULTI_DATAMODEL 3411 switch (ddi_model_convert_from(flag & FMODELS)) { 3412 case DDI_MODEL_ILP32: { 3413 struct uscsi_cmd32 ucmd32; 3414 uscsi_cmdtouscsi_cmd32((&uscmd), (&ucmd32)); 3415 if (ddi_copyout(&ucmd32, (void *)arg, sizeof (ucmd32), 3416 flag) != 0) 3417 return (EFAULT); 3418 break; 3419 } 3420 case DDI_MODEL_NONE: 3421 if (ddi_copyout(&uscmd, (void *)arg, sizeof (uscmd), 3422 flag) != 0) 3423 return (EFAULT); 3424 break; 3425 default: 3426 return (EFAULT); 3427 } 3428 #else 3429 if (ddi_copyout(&uscmd, (void *)arg, sizeof (uscmd), flag) != 0) 3430 return (EFAULT); 3431 #endif /* _MULTI_DATAMODEL */ 3432 return (0); 3433 } 3434 default: 3435 #ifdef DEBUG 3436 cmn_err(CE_WARN, "lofi_ioctl: %d is not implemented\n", cmd); 3437 #endif /* DEBUG */ 3438 return (ENOTTY); 3439 } 3440 } 3441 3442 static int 3443 lofi_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, 3444 char *name, caddr_t valuep, int *lengthp) 3445 { 3446 struct lofi_state *lsp; 3447 int rc; 3448 3449 lsp = ddi_get_soft_state(lofi_statep, ddi_get_instance(dip)); 3450 if (lsp == NULL) { 3451 return (ddi_prop_op(dev, dip, prop_op, mod_flags, 3452 name, valuep, lengthp)); 3453 } 3454 3455 rc = cmlb_prop_op(lsp->ls_cmlbhandle, dev, dip, prop_op, mod_flags, 3456 name, valuep, lengthp, LOFI_PART(getminor(dev)), NULL); 3457 if (rc == DDI_PROP_SUCCESS) 3458 return (rc); 3459 3460 return (ddi_prop_op(DDI_DEV_T_ANY, dip, prop_op, mod_flags, 3461 name, valuep, lengthp)); 3462 } 3463 3464 static struct cb_ops lofi_cb_ops = { 3465 lofi_open, /* open */ 3466 lofi_close, /* close */ 3467 lofi_strategy, /* strategy */ 3468 nodev, /* print */ 3469 nodev, /* dump */ 3470 lofi_read, /* read */ 3471 lofi_write, /* write */ 3472 lofi_ioctl, /* ioctl */ 3473 nodev, /* devmap */ 3474 nodev, /* mmap */ 3475 nodev, /* segmap */ 3476 nochpoll, /* poll */ 3477 lofi_prop_op, /* prop_op */ 3478 0, /* streamtab */ 3479 D_64BIT | D_NEW | D_MP, /* Driver compatibility flag */ 3480 CB_REV, 3481 lofi_aread, 3482 lofi_awrite 3483 }; 3484 3485 static struct dev_ops lofi_ops = { 3486 DEVO_REV, /* devo_rev, */ 3487 0, /* refcnt */ 3488 lofi_info, /* info */ 3489 nulldev, /* identify */ 3490 nulldev, /* probe */ 3491 lofi_attach, /* attach */ 3492 lofi_detach, /* detach */ 3493 nodev, /* reset */ 3494 &lofi_cb_ops, /* driver operations */ 3495 NULL, /* no bus operations */ 3496 NULL, /* power */ 3497 ddi_quiesce_not_needed, /* quiesce */ 3498 }; 3499 3500 static struct modldrv modldrv = { 3501 &mod_driverops, 3502 "loopback file driver", 3503 &lofi_ops, 3504 }; 3505 3506 static struct modlinkage modlinkage = { 3507 MODREV_1, 3508 &modldrv, 3509 NULL 3510 }; 3511 3512 int 3513 _init(void) 3514 { 3515 int error; 3516 3517 list_create(&lofi_list, sizeof (struct lofi_state), 3518 offsetof(struct lofi_state, ls_list)); 3519 3520 error = ddi_soft_state_init((void **)&lofi_statep, 3521 sizeof (struct lofi_state), 0); 3522 if (error) { 3523 list_destroy(&lofi_list); 3524 return (error); 3525 } 3526 3527 /* 3528 * The minor number is stored as id << LOFI_CMLB_SHIFT as 3529 * we need to reserve space for cmlb minor numbers. 3530 * This will leave out 4096 id values on 32bit kernel, which should 3531 * still suffice. 3532 */ 3533 lofi_id = id_space_create("lofi_id", 1, 3534 (1 << (L_BITSMINOR - LOFI_CMLB_SHIFT))); 3535 3536 if (lofi_id == NULL) { 3537 ddi_soft_state_fini((void **)&lofi_statep); 3538 list_destroy(&lofi_list); 3539 return (DDI_FAILURE); 3540 } 3541 3542 mutex_init(&lofi_lock, NULL, MUTEX_DRIVER, NULL); 3543 3544 error = mod_install(&modlinkage); 3545 3546 if (error) { 3547 id_space_destroy(lofi_id); 3548 mutex_destroy(&lofi_lock); 3549 ddi_soft_state_fini((void **)&lofi_statep); 3550 list_destroy(&lofi_list); 3551 } 3552 3553 return (error); 3554 } 3555 3556 int 3557 _fini(void) 3558 { 3559 int error; 3560 3561 mutex_enter(&lofi_lock); 3562 3563 if (!list_is_empty(&lofi_list)) { 3564 mutex_exit(&lofi_lock); 3565 return (EBUSY); 3566 } 3567 3568 mutex_exit(&lofi_lock); 3569 3570 error = mod_remove(&modlinkage); 3571 if (error) 3572 return (error); 3573 3574 mutex_destroy(&lofi_lock); 3575 id_space_destroy(lofi_id); 3576 ddi_soft_state_fini((void **)&lofi_statep); 3577 list_destroy(&lofi_list); 3578 3579 return (error); 3580 } 3581 3582 int 3583 _info(struct modinfo *modinfop) 3584 { 3585 return (mod_info(&modlinkage, modinfop)); 3586 } 3587