1 /* 2 * linux/drivers/block/loop.c 3 * 4 * Written by Theodore Ts'o, 3/29/93 5 * 6 * Copyright 1993 by Theodore Ts'o. Redistribution of this file is 7 * permitted under the GNU General Public License. 8 * 9 * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993 10 * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996 11 * 12 * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994 13 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996 14 * 15 * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997 16 * 17 * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998 18 * 19 * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998 20 * 21 * Loadable modules and other fixes by AK, 1998 22 * 23 * Make real block number available to downstream transfer functions, enables 24 * CBC (and relatives) mode encryption requiring unique IVs per data block. 25 * Reed H. Petty, rhp@draper.net 26 * 27 * Maximum number of loop devices now dynamic via max_loop module parameter. 28 * Russell Kroll <rkroll@exploits.org> 19990701 29 * 30 * Maximum number of loop devices when compiled-in now selectable by passing 31 * max_loop=<1-255> to the kernel on boot. 32 * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999 33 * 34 * Completely rewrite request handling to be make_request_fn style and 35 * non blocking, pushing work to a helper thread. Lots of fixes from 36 * Al Viro too. 37 * Jens Axboe <axboe@suse.de>, Nov 2000 38 * 39 * Support up to 256 loop devices 40 * Heinz Mauelshagen <mge@sistina.com>, Feb 2002 41 * 42 * Support for falling back on the write file operation when the address space 43 * operations write_begin is not available on the backing filesystem. 44 * Anton Altaparmakov, 16 Feb 2005 45 * 46 * Still To Fix: 47 * - Advisory locking is ignored here. 48 * - Should use an own CAP_* category instead of CAP_SYS_ADMIN 49 * 50 */ 51 52 #include <linux/module.h> 53 #include <linux/moduleparam.h> 54 #include <linux/sched.h> 55 #include <linux/fs.h> 56 #include <linux/file.h> 57 #include <linux/stat.h> 58 #include <linux/errno.h> 59 #include <linux/major.h> 60 #include <linux/wait.h> 61 #include <linux/blkdev.h> 62 #include <linux/blkpg.h> 63 #include <linux/init.h> 64 #include <linux/swap.h> 65 #include <linux/slab.h> 66 #include <linux/loop.h> 67 #include <linux/compat.h> 68 #include <linux/suspend.h> 69 #include <linux/freezer.h> 70 #include <linux/mutex.h> 71 #include <linux/writeback.h> 72 #include <linux/buffer_head.h> /* for invalidate_bdev() */ 73 #include <linux/completion.h> 74 #include <linux/highmem.h> 75 #include <linux/kthread.h> 76 #include <linux/splice.h> 77 #include <linux/sysfs.h> 78 #include <linux/miscdevice.h> 79 #include <linux/falloc.h> 80 81 #include <asm/uaccess.h> 82 83 static DEFINE_IDR(loop_index_idr); 84 static DEFINE_MUTEX(loop_index_mutex); 85 86 static int max_part; 87 static int part_shift; 88 89 /* 90 * Transfer functions 91 */ 92 static int transfer_none(struct loop_device *lo, int cmd, 93 struct page *raw_page, unsigned raw_off, 94 struct page *loop_page, unsigned loop_off, 95 int size, sector_t real_block) 96 { 97 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; 98 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; 99 100 if (cmd == READ) 101 memcpy(loop_buf, raw_buf, size); 102 else 103 memcpy(raw_buf, loop_buf, size); 104 105 kunmap_atomic(loop_buf, KM_USER1); 106 kunmap_atomic(raw_buf, KM_USER0); 107 cond_resched(); 108 return 0; 109 } 110 111 static int transfer_xor(struct loop_device *lo, int cmd, 112 struct page *raw_page, unsigned raw_off, 113 struct page *loop_page, unsigned loop_off, 114 int size, sector_t real_block) 115 { 116 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off; 117 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off; 118 char *in, *out, *key; 119 int i, keysize; 120 121 if (cmd == READ) { 122 in = raw_buf; 123 out = loop_buf; 124 } else { 125 in = loop_buf; 126 out = raw_buf; 127 } 128 129 key = lo->lo_encrypt_key; 130 keysize = lo->lo_encrypt_key_size; 131 for (i = 0; i < size; i++) 132 *out++ = *in++ ^ key[(i & 511) % keysize]; 133 134 kunmap_atomic(loop_buf, KM_USER1); 135 kunmap_atomic(raw_buf, KM_USER0); 136 cond_resched(); 137 return 0; 138 } 139 140 static int xor_init(struct loop_device *lo, const struct loop_info64 *info) 141 { 142 if (unlikely(info->lo_encrypt_key_size <= 0)) 143 return -EINVAL; 144 return 0; 145 } 146 147 static struct loop_func_table none_funcs = { 148 .number = LO_CRYPT_NONE, 149 .transfer = transfer_none, 150 }; 151 152 static struct loop_func_table xor_funcs = { 153 .number = LO_CRYPT_XOR, 154 .transfer = transfer_xor, 155 .init = xor_init 156 }; 157 158 /* xfer_funcs[0] is special - its release function is never called */ 159 static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { 160 &none_funcs, 161 &xor_funcs 162 }; 163 164 static loff_t get_loop_size(struct loop_device *lo, struct file *file) 165 { 166 loff_t size, offset, loopsize; 167 168 /* Compute loopsize in bytes */ 169 size = i_size_read(file->f_mapping->host); 170 offset = lo->lo_offset; 171 loopsize = size - offset; 172 if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize) 173 loopsize = lo->lo_sizelimit; 174 175 /* 176 * Unfortunately, if we want to do I/O on the device, 177 * the number of 512-byte sectors has to fit into a sector_t. 178 */ 179 return loopsize >> 9; 180 } 181 182 static int 183 figure_loop_size(struct loop_device *lo) 184 { 185 loff_t size = get_loop_size(lo, lo->lo_backing_file); 186 sector_t x = (sector_t)size; 187 188 if (unlikely((loff_t)x != size)) 189 return -EFBIG; 190 191 set_capacity(lo->lo_disk, x); 192 return 0; 193 } 194 195 static inline int 196 lo_do_transfer(struct loop_device *lo, int cmd, 197 struct page *rpage, unsigned roffs, 198 struct page *lpage, unsigned loffs, 199 int size, sector_t rblock) 200 { 201 if (unlikely(!lo->transfer)) 202 return 0; 203 204 return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); 205 } 206 207 /** 208 * __do_lo_send_write - helper for writing data to a loop device 209 * 210 * This helper just factors out common code between do_lo_send_direct_write() 211 * and do_lo_send_write(). 212 */ 213 static int __do_lo_send_write(struct file *file, 214 u8 *buf, const int len, loff_t pos) 215 { 216 ssize_t bw; 217 mm_segment_t old_fs = get_fs(); 218 219 set_fs(get_ds()); 220 bw = file->f_op->write(file, buf, len, &pos); 221 set_fs(old_fs); 222 if (likely(bw == len)) 223 return 0; 224 printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", 225 (unsigned long long)pos, len); 226 if (bw >= 0) 227 bw = -EIO; 228 return bw; 229 } 230 231 /** 232 * do_lo_send_direct_write - helper for writing data to a loop device 233 * 234 * This is the fast, non-transforming version that does not need double 235 * buffering. 236 */ 237 static int do_lo_send_direct_write(struct loop_device *lo, 238 struct bio_vec *bvec, loff_t pos, struct page *page) 239 { 240 ssize_t bw = __do_lo_send_write(lo->lo_backing_file, 241 kmap(bvec->bv_page) + bvec->bv_offset, 242 bvec->bv_len, pos); 243 kunmap(bvec->bv_page); 244 cond_resched(); 245 return bw; 246 } 247 248 /** 249 * do_lo_send_write - helper for writing data to a loop device 250 * 251 * This is the slow, transforming version that needs to double buffer the 252 * data as it cannot do the transformations in place without having direct 253 * access to the destination pages of the backing file. 254 */ 255 static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, 256 loff_t pos, struct page *page) 257 { 258 int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page, 259 bvec->bv_offset, bvec->bv_len, pos >> 9); 260 if (likely(!ret)) 261 return __do_lo_send_write(lo->lo_backing_file, 262 page_address(page), bvec->bv_len, 263 pos); 264 printk(KERN_ERR "loop: Transfer error at byte offset %llu, " 265 "length %i.\n", (unsigned long long)pos, bvec->bv_len); 266 if (ret > 0) 267 ret = -EIO; 268 return ret; 269 } 270 271 static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos) 272 { 273 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, 274 struct page *page); 275 struct bio_vec *bvec; 276 struct page *page = NULL; 277 int i, ret = 0; 278 279 if (lo->transfer != transfer_none) { 280 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); 281 if (unlikely(!page)) 282 goto fail; 283 kmap(page); 284 do_lo_send = do_lo_send_write; 285 } else { 286 do_lo_send = do_lo_send_direct_write; 287 } 288 289 bio_for_each_segment(bvec, bio, i) { 290 ret = do_lo_send(lo, bvec, pos, page); 291 if (ret < 0) 292 break; 293 pos += bvec->bv_len; 294 } 295 if (page) { 296 kunmap(page); 297 __free_page(page); 298 } 299 out: 300 return ret; 301 fail: 302 printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); 303 ret = -ENOMEM; 304 goto out; 305 } 306 307 struct lo_read_data { 308 struct loop_device *lo; 309 struct page *page; 310 unsigned offset; 311 int bsize; 312 }; 313 314 static int 315 lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 316 struct splice_desc *sd) 317 { 318 struct lo_read_data *p = sd->u.data; 319 struct loop_device *lo = p->lo; 320 struct page *page = buf->page; 321 sector_t IV; 322 int size; 323 324 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + 325 (buf->offset >> 9); 326 size = sd->len; 327 if (size > p->bsize) 328 size = p->bsize; 329 330 if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { 331 printk(KERN_ERR "loop: transfer error block %ld\n", 332 page->index); 333 size = -EINVAL; 334 } 335 336 flush_dcache_page(p->page); 337 338 if (size > 0) 339 p->offset += size; 340 341 return size; 342 } 343 344 static int 345 lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) 346 { 347 return __splice_from_pipe(pipe, sd, lo_splice_actor); 348 } 349 350 static int 351 do_lo_receive(struct loop_device *lo, 352 struct bio_vec *bvec, int bsize, loff_t pos) 353 { 354 struct lo_read_data cookie; 355 struct splice_desc sd; 356 struct file *file; 357 long retval; 358 359 cookie.lo = lo; 360 cookie.page = bvec->bv_page; 361 cookie.offset = bvec->bv_offset; 362 cookie.bsize = bsize; 363 364 sd.len = 0; 365 sd.total_len = bvec->bv_len; 366 sd.flags = 0; 367 sd.pos = pos; 368 sd.u.data = &cookie; 369 370 file = lo->lo_backing_file; 371 retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); 372 373 if (retval < 0) 374 return retval; 375 376 return 0; 377 } 378 379 static int 380 lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) 381 { 382 struct bio_vec *bvec; 383 int i, ret = 0; 384 385 bio_for_each_segment(bvec, bio, i) { 386 ret = do_lo_receive(lo, bvec, bsize, pos); 387 if (ret < 0) 388 break; 389 pos += bvec->bv_len; 390 } 391 return ret; 392 } 393 394 static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) 395 { 396 loff_t pos; 397 int ret; 398 399 pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; 400 401 if (bio_rw(bio) == WRITE) { 402 struct file *file = lo->lo_backing_file; 403 404 if (bio->bi_rw & REQ_FLUSH) { 405 ret = vfs_fsync(file, 0); 406 if (unlikely(ret && ret != -EINVAL)) { 407 ret = -EIO; 408 goto out; 409 } 410 } 411 412 /* 413 * We use punch hole to reclaim the free space used by the 414 * image a.k.a. discard. However we do support discard if 415 * encryption is enabled, because it may give an attacker 416 * useful information. 417 */ 418 if (bio->bi_rw & REQ_DISCARD) { 419 struct file *file = lo->lo_backing_file; 420 int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; 421 422 if ((!file->f_op->fallocate) || 423 lo->lo_encrypt_key_size) { 424 ret = -EOPNOTSUPP; 425 goto out; 426 } 427 ret = file->f_op->fallocate(file, mode, pos, 428 bio->bi_size); 429 if (unlikely(ret && ret != -EINVAL && 430 ret != -EOPNOTSUPP)) 431 ret = -EIO; 432 goto out; 433 } 434 435 ret = lo_send(lo, bio, pos); 436 437 if ((bio->bi_rw & REQ_FUA) && !ret) { 438 ret = vfs_fsync(file, 0); 439 if (unlikely(ret && ret != -EINVAL)) 440 ret = -EIO; 441 } 442 } else 443 ret = lo_receive(lo, bio, lo->lo_blocksize, pos); 444 445 out: 446 return ret; 447 } 448 449 /* 450 * Add bio to back of pending list 451 */ 452 static void loop_add_bio(struct loop_device *lo, struct bio *bio) 453 { 454 bio_list_add(&lo->lo_bio_list, bio); 455 } 456 457 /* 458 * Grab first pending buffer 459 */ 460 static struct bio *loop_get_bio(struct loop_device *lo) 461 { 462 return bio_list_pop(&lo->lo_bio_list); 463 } 464 465 static void loop_make_request(struct request_queue *q, struct bio *old_bio) 466 { 467 struct loop_device *lo = q->queuedata; 468 int rw = bio_rw(old_bio); 469 470 if (rw == READA) 471 rw = READ; 472 473 BUG_ON(!lo || (rw != READ && rw != WRITE)); 474 475 spin_lock_irq(&lo->lo_lock); 476 if (lo->lo_state != Lo_bound) 477 goto out; 478 if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) 479 goto out; 480 loop_add_bio(lo, old_bio); 481 wake_up(&lo->lo_event); 482 spin_unlock_irq(&lo->lo_lock); 483 return; 484 485 out: 486 spin_unlock_irq(&lo->lo_lock); 487 bio_io_error(old_bio); 488 } 489 490 struct switch_request { 491 struct file *file; 492 struct completion wait; 493 }; 494 495 static void do_loop_switch(struct loop_device *, struct switch_request *); 496 497 static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) 498 { 499 if (unlikely(!bio->bi_bdev)) { 500 do_loop_switch(lo, bio->bi_private); 501 bio_put(bio); 502 } else { 503 int ret = do_bio_filebacked(lo, bio); 504 bio_endio(bio, ret); 505 } 506 } 507 508 /* 509 * worker thread that handles reads/writes to file backed loop devices, 510 * to avoid blocking in our make_request_fn. it also does loop decrypting 511 * on reads for block backed loop, as that is too heavy to do from 512 * b_end_io context where irqs may be disabled. 513 * 514 * Loop explanation: loop_clr_fd() sets lo_state to Lo_rundown before 515 * calling kthread_stop(). Therefore once kthread_should_stop() is 516 * true, make_request will not place any more requests. Therefore 517 * once kthread_should_stop() is true and lo_bio is NULL, we are 518 * done with the loop. 519 */ 520 static int loop_thread(void *data) 521 { 522 struct loop_device *lo = data; 523 struct bio *bio; 524 525 set_user_nice(current, -20); 526 527 while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) { 528 529 wait_event_interruptible(lo->lo_event, 530 !bio_list_empty(&lo->lo_bio_list) || 531 kthread_should_stop()); 532 533 if (bio_list_empty(&lo->lo_bio_list)) 534 continue; 535 spin_lock_irq(&lo->lo_lock); 536 bio = loop_get_bio(lo); 537 spin_unlock_irq(&lo->lo_lock); 538 539 BUG_ON(!bio); 540 loop_handle_bio(lo, bio); 541 } 542 543 return 0; 544 } 545 546 /* 547 * loop_switch performs the hard work of switching a backing store. 548 * First it needs to flush existing IO, it does this by sending a magic 549 * BIO down the pipe. The completion of this BIO does the actual switch. 550 */ 551 static int loop_switch(struct loop_device *lo, struct file *file) 552 { 553 struct switch_request w; 554 struct bio *bio = bio_alloc(GFP_KERNEL, 0); 555 if (!bio) 556 return -ENOMEM; 557 init_completion(&w.wait); 558 w.file = file; 559 bio->bi_private = &w; 560 bio->bi_bdev = NULL; 561 loop_make_request(lo->lo_queue, bio); 562 wait_for_completion(&w.wait); 563 return 0; 564 } 565 566 /* 567 * Helper to flush the IOs in loop, but keeping loop thread running 568 */ 569 static int loop_flush(struct loop_device *lo) 570 { 571 /* loop not yet configured, no running thread, nothing to flush */ 572 if (!lo->lo_thread) 573 return 0; 574 575 return loop_switch(lo, NULL); 576 } 577 578 /* 579 * Do the actual switch; called from the BIO completion routine 580 */ 581 static void do_loop_switch(struct loop_device *lo, struct switch_request *p) 582 { 583 struct file *file = p->file; 584 struct file *old_file = lo->lo_backing_file; 585 struct address_space *mapping; 586 587 /* if no new file, only flush of queued bios requested */ 588 if (!file) 589 goto out; 590 591 mapping = file->f_mapping; 592 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); 593 lo->lo_backing_file = file; 594 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? 595 mapping->host->i_bdev->bd_block_size : PAGE_SIZE; 596 lo->old_gfp_mask = mapping_gfp_mask(mapping); 597 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 598 out: 599 complete(&p->wait); 600 } 601 602 603 /* 604 * loop_change_fd switched the backing store of a loopback device to 605 * a new file. This is useful for operating system installers to free up 606 * the original file and in High Availability environments to switch to 607 * an alternative location for the content in case of server meltdown. 608 * This can only work if the loop device is used read-only, and if the 609 * new backing store is the same size and type as the old backing store. 610 */ 611 static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, 612 unsigned int arg) 613 { 614 struct file *file, *old_file; 615 struct inode *inode; 616 int error; 617 618 error = -ENXIO; 619 if (lo->lo_state != Lo_bound) 620 goto out; 621 622 /* the loop device has to be read-only */ 623 error = -EINVAL; 624 if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) 625 goto out; 626 627 error = -EBADF; 628 file = fget(arg); 629 if (!file) 630 goto out; 631 632 inode = file->f_mapping->host; 633 old_file = lo->lo_backing_file; 634 635 error = -EINVAL; 636 637 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) 638 goto out_putf; 639 640 /* size of the new backing store needs to be the same */ 641 if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) 642 goto out_putf; 643 644 /* and ... switch */ 645 error = loop_switch(lo, file); 646 if (error) 647 goto out_putf; 648 649 fput(old_file); 650 if (lo->lo_flags & LO_FLAGS_PARTSCAN) 651 ioctl_by_bdev(bdev, BLKRRPART, 0); 652 return 0; 653 654 out_putf: 655 fput(file); 656 out: 657 return error; 658 } 659 660 static inline int is_loop_device(struct file *file) 661 { 662 struct inode *i = file->f_mapping->host; 663 664 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; 665 } 666 667 /* loop sysfs attributes */ 668 669 static ssize_t loop_attr_show(struct device *dev, char *page, 670 ssize_t (*callback)(struct loop_device *, char *)) 671 { 672 struct gendisk *disk = dev_to_disk(dev); 673 struct loop_device *lo = disk->private_data; 674 675 return callback(lo, page); 676 } 677 678 #define LOOP_ATTR_RO(_name) \ 679 static ssize_t loop_attr_##_name##_show(struct loop_device *, char *); \ 680 static ssize_t loop_attr_do_show_##_name(struct device *d, \ 681 struct device_attribute *attr, char *b) \ 682 { \ 683 return loop_attr_show(d, b, loop_attr_##_name##_show); \ 684 } \ 685 static struct device_attribute loop_attr_##_name = \ 686 __ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL); 687 688 static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) 689 { 690 ssize_t ret; 691 char *p = NULL; 692 693 spin_lock_irq(&lo->lo_lock); 694 if (lo->lo_backing_file) 695 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); 696 spin_unlock_irq(&lo->lo_lock); 697 698 if (IS_ERR_OR_NULL(p)) 699 ret = PTR_ERR(p); 700 else { 701 ret = strlen(p); 702 memmove(buf, p, ret); 703 buf[ret++] = '\n'; 704 buf[ret] = 0; 705 } 706 707 return ret; 708 } 709 710 static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) 711 { 712 return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); 713 } 714 715 static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) 716 { 717 return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); 718 } 719 720 static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) 721 { 722 int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); 723 724 return sprintf(buf, "%s\n", autoclear ? "1" : "0"); 725 } 726 727 static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) 728 { 729 int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); 730 731 return sprintf(buf, "%s\n", partscan ? "1" : "0"); 732 } 733 734 LOOP_ATTR_RO(backing_file); 735 LOOP_ATTR_RO(offset); 736 LOOP_ATTR_RO(sizelimit); 737 LOOP_ATTR_RO(autoclear); 738 LOOP_ATTR_RO(partscan); 739 740 static struct attribute *loop_attrs[] = { 741 &loop_attr_backing_file.attr, 742 &loop_attr_offset.attr, 743 &loop_attr_sizelimit.attr, 744 &loop_attr_autoclear.attr, 745 &loop_attr_partscan.attr, 746 NULL, 747 }; 748 749 static struct attribute_group loop_attribute_group = { 750 .name = "loop", 751 .attrs= loop_attrs, 752 }; 753 754 static int loop_sysfs_init(struct loop_device *lo) 755 { 756 return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj, 757 &loop_attribute_group); 758 } 759 760 static void loop_sysfs_exit(struct loop_device *lo) 761 { 762 sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj, 763 &loop_attribute_group); 764 } 765 766 static void loop_config_discard(struct loop_device *lo) 767 { 768 struct file *file = lo->lo_backing_file; 769 struct inode *inode = file->f_mapping->host; 770 struct request_queue *q = lo->lo_queue; 771 772 /* 773 * We use punch hole to reclaim the free space used by the 774 * image a.k.a. discard. However we do support discard if 775 * encryption is enabled, because it may give an attacker 776 * useful information. 777 */ 778 if ((!file->f_op->fallocate) || 779 lo->lo_encrypt_key_size) { 780 q->limits.discard_granularity = 0; 781 q->limits.discard_alignment = 0; 782 q->limits.max_discard_sectors = 0; 783 q->limits.discard_zeroes_data = 0; 784 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); 785 return; 786 } 787 788 q->limits.discard_granularity = inode->i_sb->s_blocksize; 789 q->limits.discard_alignment = inode->i_sb->s_blocksize; 790 q->limits.max_discard_sectors = UINT_MAX >> 9; 791 q->limits.discard_zeroes_data = 1; 792 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); 793 } 794 795 static int loop_set_fd(struct loop_device *lo, fmode_t mode, 796 struct block_device *bdev, unsigned int arg) 797 { 798 struct file *file, *f; 799 struct inode *inode; 800 struct address_space *mapping; 801 unsigned lo_blocksize; 802 int lo_flags = 0; 803 int error; 804 loff_t size; 805 806 /* This is safe, since we have a reference from open(). */ 807 __module_get(THIS_MODULE); 808 809 error = -EBADF; 810 file = fget(arg); 811 if (!file) 812 goto out; 813 814 error = -EBUSY; 815 if (lo->lo_state != Lo_unbound) 816 goto out_putf; 817 818 /* Avoid recursion */ 819 f = file; 820 while (is_loop_device(f)) { 821 struct loop_device *l; 822 823 if (f->f_mapping->host->i_bdev == bdev) 824 goto out_putf; 825 826 l = f->f_mapping->host->i_bdev->bd_disk->private_data; 827 if (l->lo_state == Lo_unbound) { 828 error = -EINVAL; 829 goto out_putf; 830 } 831 f = l->lo_backing_file; 832 } 833 834 mapping = file->f_mapping; 835 inode = mapping->host; 836 837 error = -EINVAL; 838 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) 839 goto out_putf; 840 841 if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || 842 !file->f_op->write) 843 lo_flags |= LO_FLAGS_READ_ONLY; 844 845 lo_blocksize = S_ISBLK(inode->i_mode) ? 846 inode->i_bdev->bd_block_size : PAGE_SIZE; 847 848 error = -EFBIG; 849 size = get_loop_size(lo, file); 850 if ((loff_t)(sector_t)size != size) 851 goto out_putf; 852 853 error = 0; 854 855 set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); 856 857 lo->lo_blocksize = lo_blocksize; 858 lo->lo_device = bdev; 859 lo->lo_flags = lo_flags; 860 lo->lo_backing_file = file; 861 lo->transfer = transfer_none; 862 lo->ioctl = NULL; 863 lo->lo_sizelimit = 0; 864 lo->old_gfp_mask = mapping_gfp_mask(mapping); 865 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 866 867 bio_list_init(&lo->lo_bio_list); 868 869 /* 870 * set queue make_request_fn, and add limits based on lower level 871 * device 872 */ 873 blk_queue_make_request(lo->lo_queue, loop_make_request); 874 lo->lo_queue->queuedata = lo; 875 876 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) 877 blk_queue_flush(lo->lo_queue, REQ_FLUSH); 878 879 set_capacity(lo->lo_disk, size); 880 bd_set_size(bdev, size << 9); 881 loop_sysfs_init(lo); 882 /* let user-space know about the new size */ 883 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 884 885 set_blocksize(bdev, lo_blocksize); 886 887 lo->lo_thread = kthread_create(loop_thread, lo, "loop%d", 888 lo->lo_number); 889 if (IS_ERR(lo->lo_thread)) { 890 error = PTR_ERR(lo->lo_thread); 891 goto out_clr; 892 } 893 lo->lo_state = Lo_bound; 894 wake_up_process(lo->lo_thread); 895 if (part_shift) 896 lo->lo_flags |= LO_FLAGS_PARTSCAN; 897 if (lo->lo_flags & LO_FLAGS_PARTSCAN) 898 ioctl_by_bdev(bdev, BLKRRPART, 0); 899 return 0; 900 901 out_clr: 902 loop_sysfs_exit(lo); 903 lo->lo_thread = NULL; 904 lo->lo_device = NULL; 905 lo->lo_backing_file = NULL; 906 lo->lo_flags = 0; 907 set_capacity(lo->lo_disk, 0); 908 invalidate_bdev(bdev); 909 bd_set_size(bdev, 0); 910 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 911 mapping_set_gfp_mask(mapping, lo->old_gfp_mask); 912 lo->lo_state = Lo_unbound; 913 out_putf: 914 fput(file); 915 out: 916 /* This is safe: open() is still holding a reference. */ 917 module_put(THIS_MODULE); 918 return error; 919 } 920 921 static int 922 loop_release_xfer(struct loop_device *lo) 923 { 924 int err = 0; 925 struct loop_func_table *xfer = lo->lo_encryption; 926 927 if (xfer) { 928 if (xfer->release) 929 err = xfer->release(lo); 930 lo->transfer = NULL; 931 lo->lo_encryption = NULL; 932 module_put(xfer->owner); 933 } 934 return err; 935 } 936 937 static int 938 loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, 939 const struct loop_info64 *i) 940 { 941 int err = 0; 942 943 if (xfer) { 944 struct module *owner = xfer->owner; 945 946 if (!try_module_get(owner)) 947 return -EINVAL; 948 if (xfer->init) 949 err = xfer->init(lo, i); 950 if (err) 951 module_put(owner); 952 else 953 lo->lo_encryption = xfer; 954 } 955 return err; 956 } 957 958 static int loop_clr_fd(struct loop_device *lo) 959 { 960 struct file *filp = lo->lo_backing_file; 961 gfp_t gfp = lo->old_gfp_mask; 962 struct block_device *bdev = lo->lo_device; 963 964 if (lo->lo_state != Lo_bound) 965 return -ENXIO; 966 967 if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */ 968 return -EBUSY; 969 970 if (filp == NULL) 971 return -EINVAL; 972 973 spin_lock_irq(&lo->lo_lock); 974 lo->lo_state = Lo_rundown; 975 spin_unlock_irq(&lo->lo_lock); 976 977 kthread_stop(lo->lo_thread); 978 979 spin_lock_irq(&lo->lo_lock); 980 lo->lo_backing_file = NULL; 981 spin_unlock_irq(&lo->lo_lock); 982 983 loop_release_xfer(lo); 984 lo->transfer = NULL; 985 lo->ioctl = NULL; 986 lo->lo_device = NULL; 987 lo->lo_encryption = NULL; 988 lo->lo_offset = 0; 989 lo->lo_sizelimit = 0; 990 lo->lo_encrypt_key_size = 0; 991 lo->lo_thread = NULL; 992 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); 993 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); 994 memset(lo->lo_file_name, 0, LO_NAME_SIZE); 995 if (bdev) 996 invalidate_bdev(bdev); 997 set_capacity(lo->lo_disk, 0); 998 loop_sysfs_exit(lo); 999 if (bdev) { 1000 bd_set_size(bdev, 0); 1001 /* let user-space know about this change */ 1002 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 1003 } 1004 mapping_set_gfp_mask(filp->f_mapping, gfp); 1005 lo->lo_state = Lo_unbound; 1006 /* This is safe: open() is still holding a reference. */ 1007 module_put(THIS_MODULE); 1008 if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) 1009 ioctl_by_bdev(bdev, BLKRRPART, 0); 1010 lo->lo_flags = 0; 1011 if (!part_shift) 1012 lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; 1013 mutex_unlock(&lo->lo_ctl_mutex); 1014 /* 1015 * Need not hold lo_ctl_mutex to fput backing file. 1016 * Calling fput holding lo_ctl_mutex triggers a circular 1017 * lock dependency possibility warning as fput can take 1018 * bd_mutex which is usually taken before lo_ctl_mutex. 1019 */ 1020 fput(filp); 1021 return 0; 1022 } 1023 1024 static int 1025 loop_set_status(struct loop_device *lo, const struct loop_info64 *info) 1026 { 1027 int err; 1028 struct loop_func_table *xfer; 1029 uid_t uid = current_uid(); 1030 1031 if (lo->lo_encrypt_key_size && 1032 lo->lo_key_owner != uid && 1033 !capable(CAP_SYS_ADMIN)) 1034 return -EPERM; 1035 if (lo->lo_state != Lo_bound) 1036 return -ENXIO; 1037 if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) 1038 return -EINVAL; 1039 1040 err = loop_release_xfer(lo); 1041 if (err) 1042 return err; 1043 1044 if (info->lo_encrypt_type) { 1045 unsigned int type = info->lo_encrypt_type; 1046 1047 if (type >= MAX_LO_CRYPT) 1048 return -EINVAL; 1049 xfer = xfer_funcs[type]; 1050 if (xfer == NULL) 1051 return -EINVAL; 1052 } else 1053 xfer = NULL; 1054 1055 err = loop_init_xfer(lo, xfer, info); 1056 if (err) 1057 return err; 1058 1059 if (lo->lo_offset != info->lo_offset || 1060 lo->lo_sizelimit != info->lo_sizelimit) { 1061 lo->lo_offset = info->lo_offset; 1062 lo->lo_sizelimit = info->lo_sizelimit; 1063 if (figure_loop_size(lo)) 1064 return -EFBIG; 1065 } 1066 loop_config_discard(lo); 1067 1068 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); 1069 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); 1070 lo->lo_file_name[LO_NAME_SIZE-1] = 0; 1071 lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; 1072 1073 if (!xfer) 1074 xfer = &none_funcs; 1075 lo->transfer = xfer->transfer; 1076 lo->ioctl = xfer->ioctl; 1077 1078 if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != 1079 (info->lo_flags & LO_FLAGS_AUTOCLEAR)) 1080 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; 1081 1082 if ((info->lo_flags & LO_FLAGS_PARTSCAN) && 1083 !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { 1084 lo->lo_flags |= LO_FLAGS_PARTSCAN; 1085 lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; 1086 ioctl_by_bdev(lo->lo_device, BLKRRPART, 0); 1087 } 1088 1089 lo->lo_encrypt_key_size = info->lo_encrypt_key_size; 1090 lo->lo_init[0] = info->lo_init[0]; 1091 lo->lo_init[1] = info->lo_init[1]; 1092 if (info->lo_encrypt_key_size) { 1093 memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, 1094 info->lo_encrypt_key_size); 1095 lo->lo_key_owner = uid; 1096 } 1097 1098 return 0; 1099 } 1100 1101 static int 1102 loop_get_status(struct loop_device *lo, struct loop_info64 *info) 1103 { 1104 struct file *file = lo->lo_backing_file; 1105 struct kstat stat; 1106 int error; 1107 1108 if (lo->lo_state != Lo_bound) 1109 return -ENXIO; 1110 error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat); 1111 if (error) 1112 return error; 1113 memset(info, 0, sizeof(*info)); 1114 info->lo_number = lo->lo_number; 1115 info->lo_device = huge_encode_dev(stat.dev); 1116 info->lo_inode = stat.ino; 1117 info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev); 1118 info->lo_offset = lo->lo_offset; 1119 info->lo_sizelimit = lo->lo_sizelimit; 1120 info->lo_flags = lo->lo_flags; 1121 memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); 1122 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE); 1123 info->lo_encrypt_type = 1124 lo->lo_encryption ? lo->lo_encryption->number : 0; 1125 if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) { 1126 info->lo_encrypt_key_size = lo->lo_encrypt_key_size; 1127 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, 1128 lo->lo_encrypt_key_size); 1129 } 1130 return 0; 1131 } 1132 1133 static void 1134 loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) 1135 { 1136 memset(info64, 0, sizeof(*info64)); 1137 info64->lo_number = info->lo_number; 1138 info64->lo_device = info->lo_device; 1139 info64->lo_inode = info->lo_inode; 1140 info64->lo_rdevice = info->lo_rdevice; 1141 info64->lo_offset = info->lo_offset; 1142 info64->lo_sizelimit = 0; 1143 info64->lo_encrypt_type = info->lo_encrypt_type; 1144 info64->lo_encrypt_key_size = info->lo_encrypt_key_size; 1145 info64->lo_flags = info->lo_flags; 1146 info64->lo_init[0] = info->lo_init[0]; 1147 info64->lo_init[1] = info->lo_init[1]; 1148 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1149 memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE); 1150 else 1151 memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); 1152 memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE); 1153 } 1154 1155 static int 1156 loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info) 1157 { 1158 memset(info, 0, sizeof(*info)); 1159 info->lo_number = info64->lo_number; 1160 info->lo_device = info64->lo_device; 1161 info->lo_inode = info64->lo_inode; 1162 info->lo_rdevice = info64->lo_rdevice; 1163 info->lo_offset = info64->lo_offset; 1164 info->lo_encrypt_type = info64->lo_encrypt_type; 1165 info->lo_encrypt_key_size = info64->lo_encrypt_key_size; 1166 info->lo_flags = info64->lo_flags; 1167 info->lo_init[0] = info64->lo_init[0]; 1168 info->lo_init[1] = info64->lo_init[1]; 1169 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1170 memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE); 1171 else 1172 memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); 1173 memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); 1174 1175 /* error in case values were truncated */ 1176 if (info->lo_device != info64->lo_device || 1177 info->lo_rdevice != info64->lo_rdevice || 1178 info->lo_inode != info64->lo_inode || 1179 info->lo_offset != info64->lo_offset) 1180 return -EOVERFLOW; 1181 1182 return 0; 1183 } 1184 1185 static int 1186 loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg) 1187 { 1188 struct loop_info info; 1189 struct loop_info64 info64; 1190 1191 if (copy_from_user(&info, arg, sizeof (struct loop_info))) 1192 return -EFAULT; 1193 loop_info64_from_old(&info, &info64); 1194 return loop_set_status(lo, &info64); 1195 } 1196 1197 static int 1198 loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg) 1199 { 1200 struct loop_info64 info64; 1201 1202 if (copy_from_user(&info64, arg, sizeof (struct loop_info64))) 1203 return -EFAULT; 1204 return loop_set_status(lo, &info64); 1205 } 1206 1207 static int 1208 loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { 1209 struct loop_info info; 1210 struct loop_info64 info64; 1211 int err = 0; 1212 1213 if (!arg) 1214 err = -EINVAL; 1215 if (!err) 1216 err = loop_get_status(lo, &info64); 1217 if (!err) 1218 err = loop_info64_to_old(&info64, &info); 1219 if (!err && copy_to_user(arg, &info, sizeof(info))) 1220 err = -EFAULT; 1221 1222 return err; 1223 } 1224 1225 static int 1226 loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { 1227 struct loop_info64 info64; 1228 int err = 0; 1229 1230 if (!arg) 1231 err = -EINVAL; 1232 if (!err) 1233 err = loop_get_status(lo, &info64); 1234 if (!err && copy_to_user(arg, &info64, sizeof(info64))) 1235 err = -EFAULT; 1236 1237 return err; 1238 } 1239 1240 static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) 1241 { 1242 int err; 1243 sector_t sec; 1244 loff_t sz; 1245 1246 err = -ENXIO; 1247 if (unlikely(lo->lo_state != Lo_bound)) 1248 goto out; 1249 err = figure_loop_size(lo); 1250 if (unlikely(err)) 1251 goto out; 1252 sec = get_capacity(lo->lo_disk); 1253 /* the width of sector_t may be narrow for bit-shift */ 1254 sz = sec; 1255 sz <<= 9; 1256 mutex_lock(&bdev->bd_mutex); 1257 bd_set_size(bdev, sz); 1258 /* let user-space know about the new size */ 1259 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 1260 mutex_unlock(&bdev->bd_mutex); 1261 1262 out: 1263 return err; 1264 } 1265 1266 static int lo_ioctl(struct block_device *bdev, fmode_t mode, 1267 unsigned int cmd, unsigned long arg) 1268 { 1269 struct loop_device *lo = bdev->bd_disk->private_data; 1270 int err; 1271 1272 mutex_lock_nested(&lo->lo_ctl_mutex, 1); 1273 switch (cmd) { 1274 case LOOP_SET_FD: 1275 err = loop_set_fd(lo, mode, bdev, arg); 1276 break; 1277 case LOOP_CHANGE_FD: 1278 err = loop_change_fd(lo, bdev, arg); 1279 break; 1280 case LOOP_CLR_FD: 1281 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ 1282 err = loop_clr_fd(lo); 1283 if (!err) 1284 goto out_unlocked; 1285 break; 1286 case LOOP_SET_STATUS: 1287 err = loop_set_status_old(lo, (struct loop_info __user *) arg); 1288 break; 1289 case LOOP_GET_STATUS: 1290 err = loop_get_status_old(lo, (struct loop_info __user *) arg); 1291 break; 1292 case LOOP_SET_STATUS64: 1293 err = loop_set_status64(lo, (struct loop_info64 __user *) arg); 1294 break; 1295 case LOOP_GET_STATUS64: 1296 err = loop_get_status64(lo, (struct loop_info64 __user *) arg); 1297 break; 1298 case LOOP_SET_CAPACITY: 1299 err = -EPERM; 1300 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) 1301 err = loop_set_capacity(lo, bdev); 1302 break; 1303 default: 1304 err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; 1305 } 1306 mutex_unlock(&lo->lo_ctl_mutex); 1307 1308 out_unlocked: 1309 return err; 1310 } 1311 1312 #ifdef CONFIG_COMPAT 1313 struct compat_loop_info { 1314 compat_int_t lo_number; /* ioctl r/o */ 1315 compat_dev_t lo_device; /* ioctl r/o */ 1316 compat_ulong_t lo_inode; /* ioctl r/o */ 1317 compat_dev_t lo_rdevice; /* ioctl r/o */ 1318 compat_int_t lo_offset; 1319 compat_int_t lo_encrypt_type; 1320 compat_int_t lo_encrypt_key_size; /* ioctl w/o */ 1321 compat_int_t lo_flags; /* ioctl r/o */ 1322 char lo_name[LO_NAME_SIZE]; 1323 unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ 1324 compat_ulong_t lo_init[2]; 1325 char reserved[4]; 1326 }; 1327 1328 /* 1329 * Transfer 32-bit compatibility structure in userspace to 64-bit loop info 1330 * - noinlined to reduce stack space usage in main part of driver 1331 */ 1332 static noinline int 1333 loop_info64_from_compat(const struct compat_loop_info __user *arg, 1334 struct loop_info64 *info64) 1335 { 1336 struct compat_loop_info info; 1337 1338 if (copy_from_user(&info, arg, sizeof(info))) 1339 return -EFAULT; 1340 1341 memset(info64, 0, sizeof(*info64)); 1342 info64->lo_number = info.lo_number; 1343 info64->lo_device = info.lo_device; 1344 info64->lo_inode = info.lo_inode; 1345 info64->lo_rdevice = info.lo_rdevice; 1346 info64->lo_offset = info.lo_offset; 1347 info64->lo_sizelimit = 0; 1348 info64->lo_encrypt_type = info.lo_encrypt_type; 1349 info64->lo_encrypt_key_size = info.lo_encrypt_key_size; 1350 info64->lo_flags = info.lo_flags; 1351 info64->lo_init[0] = info.lo_init[0]; 1352 info64->lo_init[1] = info.lo_init[1]; 1353 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1354 memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE); 1355 else 1356 memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); 1357 memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE); 1358 return 0; 1359 } 1360 1361 /* 1362 * Transfer 64-bit loop info to 32-bit compatibility structure in userspace 1363 * - noinlined to reduce stack space usage in main part of driver 1364 */ 1365 static noinline int 1366 loop_info64_to_compat(const struct loop_info64 *info64, 1367 struct compat_loop_info __user *arg) 1368 { 1369 struct compat_loop_info info; 1370 1371 memset(&info, 0, sizeof(info)); 1372 info.lo_number = info64->lo_number; 1373 info.lo_device = info64->lo_device; 1374 info.lo_inode = info64->lo_inode; 1375 info.lo_rdevice = info64->lo_rdevice; 1376 info.lo_offset = info64->lo_offset; 1377 info.lo_encrypt_type = info64->lo_encrypt_type; 1378 info.lo_encrypt_key_size = info64->lo_encrypt_key_size; 1379 info.lo_flags = info64->lo_flags; 1380 info.lo_init[0] = info64->lo_init[0]; 1381 info.lo_init[1] = info64->lo_init[1]; 1382 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI) 1383 memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE); 1384 else 1385 memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); 1386 memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE); 1387 1388 /* error in case values were truncated */ 1389 if (info.lo_device != info64->lo_device || 1390 info.lo_rdevice != info64->lo_rdevice || 1391 info.lo_inode != info64->lo_inode || 1392 info.lo_offset != info64->lo_offset || 1393 info.lo_init[0] != info64->lo_init[0] || 1394 info.lo_init[1] != info64->lo_init[1]) 1395 return -EOVERFLOW; 1396 1397 if (copy_to_user(arg, &info, sizeof(info))) 1398 return -EFAULT; 1399 return 0; 1400 } 1401 1402 static int 1403 loop_set_status_compat(struct loop_device *lo, 1404 const struct compat_loop_info __user *arg) 1405 { 1406 struct loop_info64 info64; 1407 int ret; 1408 1409 ret = loop_info64_from_compat(arg, &info64); 1410 if (ret < 0) 1411 return ret; 1412 return loop_set_status(lo, &info64); 1413 } 1414 1415 static int 1416 loop_get_status_compat(struct loop_device *lo, 1417 struct compat_loop_info __user *arg) 1418 { 1419 struct loop_info64 info64; 1420 int err = 0; 1421 1422 if (!arg) 1423 err = -EINVAL; 1424 if (!err) 1425 err = loop_get_status(lo, &info64); 1426 if (!err) 1427 err = loop_info64_to_compat(&info64, arg); 1428 return err; 1429 } 1430 1431 static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, 1432 unsigned int cmd, unsigned long arg) 1433 { 1434 struct loop_device *lo = bdev->bd_disk->private_data; 1435 int err; 1436 1437 switch(cmd) { 1438 case LOOP_SET_STATUS: 1439 mutex_lock(&lo->lo_ctl_mutex); 1440 err = loop_set_status_compat( 1441 lo, (const struct compat_loop_info __user *) arg); 1442 mutex_unlock(&lo->lo_ctl_mutex); 1443 break; 1444 case LOOP_GET_STATUS: 1445 mutex_lock(&lo->lo_ctl_mutex); 1446 err = loop_get_status_compat( 1447 lo, (struct compat_loop_info __user *) arg); 1448 mutex_unlock(&lo->lo_ctl_mutex); 1449 break; 1450 case LOOP_SET_CAPACITY: 1451 case LOOP_CLR_FD: 1452 case LOOP_GET_STATUS64: 1453 case LOOP_SET_STATUS64: 1454 arg = (unsigned long) compat_ptr(arg); 1455 case LOOP_SET_FD: 1456 case LOOP_CHANGE_FD: 1457 err = lo_ioctl(bdev, mode, cmd, arg); 1458 break; 1459 default: 1460 err = -ENOIOCTLCMD; 1461 break; 1462 } 1463 return err; 1464 } 1465 #endif 1466 1467 static int lo_open(struct block_device *bdev, fmode_t mode) 1468 { 1469 struct loop_device *lo; 1470 int err = 0; 1471 1472 mutex_lock(&loop_index_mutex); 1473 lo = bdev->bd_disk->private_data; 1474 if (!lo) { 1475 err = -ENXIO; 1476 goto out; 1477 } 1478 1479 mutex_lock(&lo->lo_ctl_mutex); 1480 lo->lo_refcnt++; 1481 mutex_unlock(&lo->lo_ctl_mutex); 1482 out: 1483 mutex_unlock(&loop_index_mutex); 1484 return err; 1485 } 1486 1487 static int lo_release(struct gendisk *disk, fmode_t mode) 1488 { 1489 struct loop_device *lo = disk->private_data; 1490 int err; 1491 1492 mutex_lock(&lo->lo_ctl_mutex); 1493 1494 if (--lo->lo_refcnt) 1495 goto out; 1496 1497 if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) { 1498 /* 1499 * In autoclear mode, stop the loop thread 1500 * and remove configuration after last close. 1501 */ 1502 err = loop_clr_fd(lo); 1503 if (!err) 1504 goto out_unlocked; 1505 } else { 1506 /* 1507 * Otherwise keep thread (if running) and config, 1508 * but flush possible ongoing bios in thread. 1509 */ 1510 loop_flush(lo); 1511 } 1512 1513 out: 1514 mutex_unlock(&lo->lo_ctl_mutex); 1515 out_unlocked: 1516 return 0; 1517 } 1518 1519 static const struct block_device_operations lo_fops = { 1520 .owner = THIS_MODULE, 1521 .open = lo_open, 1522 .release = lo_release, 1523 .ioctl = lo_ioctl, 1524 #ifdef CONFIG_COMPAT 1525 .compat_ioctl = lo_compat_ioctl, 1526 #endif 1527 }; 1528 1529 /* 1530 * And now the modules code and kernel interface. 1531 */ 1532 static int max_loop; 1533 module_param(max_loop, int, S_IRUGO); 1534 MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); 1535 module_param(max_part, int, S_IRUGO); 1536 MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); 1537 MODULE_LICENSE("GPL"); 1538 MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); 1539 1540 int loop_register_transfer(struct loop_func_table *funcs) 1541 { 1542 unsigned int n = funcs->number; 1543 1544 if (n >= MAX_LO_CRYPT || xfer_funcs[n]) 1545 return -EINVAL; 1546 xfer_funcs[n] = funcs; 1547 return 0; 1548 } 1549 1550 static int unregister_transfer_cb(int id, void *ptr, void *data) 1551 { 1552 struct loop_device *lo = ptr; 1553 struct loop_func_table *xfer = data; 1554 1555 mutex_lock(&lo->lo_ctl_mutex); 1556 if (lo->lo_encryption == xfer) 1557 loop_release_xfer(lo); 1558 mutex_unlock(&lo->lo_ctl_mutex); 1559 return 0; 1560 } 1561 1562 int loop_unregister_transfer(int number) 1563 { 1564 unsigned int n = number; 1565 struct loop_func_table *xfer; 1566 1567 if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) 1568 return -EINVAL; 1569 1570 xfer_funcs[n] = NULL; 1571 idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer); 1572 return 0; 1573 } 1574 1575 EXPORT_SYMBOL(loop_register_transfer); 1576 EXPORT_SYMBOL(loop_unregister_transfer); 1577 1578 static int loop_add(struct loop_device **l, int i) 1579 { 1580 struct loop_device *lo; 1581 struct gendisk *disk; 1582 int err; 1583 1584 lo = kzalloc(sizeof(*lo), GFP_KERNEL); 1585 if (!lo) { 1586 err = -ENOMEM; 1587 goto out; 1588 } 1589 1590 err = idr_pre_get(&loop_index_idr, GFP_KERNEL); 1591 if (err < 0) 1592 goto out_free_dev; 1593 1594 if (i >= 0) { 1595 int m; 1596 1597 /* create specific i in the index */ 1598 err = idr_get_new_above(&loop_index_idr, lo, i, &m); 1599 if (err >= 0 && i != m) { 1600 idr_remove(&loop_index_idr, m); 1601 err = -EEXIST; 1602 } 1603 } else if (i == -1) { 1604 int m; 1605 1606 /* get next free nr */ 1607 err = idr_get_new(&loop_index_idr, lo, &m); 1608 if (err >= 0) 1609 i = m; 1610 } else { 1611 err = -EINVAL; 1612 } 1613 if (err < 0) 1614 goto out_free_dev; 1615 1616 lo->lo_queue = blk_alloc_queue(GFP_KERNEL); 1617 if (!lo->lo_queue) 1618 goto out_free_dev; 1619 1620 disk = lo->lo_disk = alloc_disk(1 << part_shift); 1621 if (!disk) 1622 goto out_free_queue; 1623 1624 /* 1625 * Disable partition scanning by default. The in-kernel partition 1626 * scanning can be requested individually per-device during its 1627 * setup. Userspace can always add and remove partitions from all 1628 * devices. The needed partition minors are allocated from the 1629 * extended minor space, the main loop device numbers will continue 1630 * to match the loop minors, regardless of the number of partitions 1631 * used. 1632 * 1633 * If max_part is given, partition scanning is globally enabled for 1634 * all loop devices. The minors for the main loop devices will be 1635 * multiples of max_part. 1636 * 1637 * Note: Global-for-all-devices, set-only-at-init, read-only module 1638 * parameteters like 'max_loop' and 'max_part' make things needlessly 1639 * complicated, are too static, inflexible and may surprise 1640 * userspace tools. Parameters like this in general should be avoided. 1641 */ 1642 if (!part_shift) 1643 disk->flags |= GENHD_FL_NO_PART_SCAN; 1644 disk->flags |= GENHD_FL_EXT_DEVT; 1645 mutex_init(&lo->lo_ctl_mutex); 1646 lo->lo_number = i; 1647 lo->lo_thread = NULL; 1648 init_waitqueue_head(&lo->lo_event); 1649 spin_lock_init(&lo->lo_lock); 1650 disk->major = LOOP_MAJOR; 1651 disk->first_minor = i << part_shift; 1652 disk->fops = &lo_fops; 1653 disk->private_data = lo; 1654 disk->queue = lo->lo_queue; 1655 sprintf(disk->disk_name, "loop%d", i); 1656 add_disk(disk); 1657 *l = lo; 1658 return lo->lo_number; 1659 1660 out_free_queue: 1661 blk_cleanup_queue(lo->lo_queue); 1662 out_free_dev: 1663 kfree(lo); 1664 out: 1665 return err; 1666 } 1667 1668 static void loop_remove(struct loop_device *lo) 1669 { 1670 del_gendisk(lo->lo_disk); 1671 blk_cleanup_queue(lo->lo_queue); 1672 put_disk(lo->lo_disk); 1673 kfree(lo); 1674 } 1675 1676 static int find_free_cb(int id, void *ptr, void *data) 1677 { 1678 struct loop_device *lo = ptr; 1679 struct loop_device **l = data; 1680 1681 if (lo->lo_state == Lo_unbound) { 1682 *l = lo; 1683 return 1; 1684 } 1685 return 0; 1686 } 1687 1688 static int loop_lookup(struct loop_device **l, int i) 1689 { 1690 struct loop_device *lo; 1691 int ret = -ENODEV; 1692 1693 if (i < 0) { 1694 int err; 1695 1696 err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); 1697 if (err == 1) { 1698 *l = lo; 1699 ret = lo->lo_number; 1700 } 1701 goto out; 1702 } 1703 1704 /* lookup and return a specific i */ 1705 lo = idr_find(&loop_index_idr, i); 1706 if (lo) { 1707 *l = lo; 1708 ret = lo->lo_number; 1709 } 1710 out: 1711 return ret; 1712 } 1713 1714 static struct kobject *loop_probe(dev_t dev, int *part, void *data) 1715 { 1716 struct loop_device *lo; 1717 struct kobject *kobj; 1718 int err; 1719 1720 mutex_lock(&loop_index_mutex); 1721 err = loop_lookup(&lo, MINOR(dev) >> part_shift); 1722 if (err < 0) 1723 err = loop_add(&lo, MINOR(dev) >> part_shift); 1724 if (err < 0) 1725 kobj = ERR_PTR(err); 1726 else 1727 kobj = get_disk(lo->lo_disk); 1728 mutex_unlock(&loop_index_mutex); 1729 1730 *part = 0; 1731 return kobj; 1732 } 1733 1734 static long loop_control_ioctl(struct file *file, unsigned int cmd, 1735 unsigned long parm) 1736 { 1737 struct loop_device *lo; 1738 int ret = -ENOSYS; 1739 1740 mutex_lock(&loop_index_mutex); 1741 switch (cmd) { 1742 case LOOP_CTL_ADD: 1743 ret = loop_lookup(&lo, parm); 1744 if (ret >= 0) { 1745 ret = -EEXIST; 1746 break; 1747 } 1748 ret = loop_add(&lo, parm); 1749 break; 1750 case LOOP_CTL_REMOVE: 1751 ret = loop_lookup(&lo, parm); 1752 if (ret < 0) 1753 break; 1754 mutex_lock(&lo->lo_ctl_mutex); 1755 if (lo->lo_state != Lo_unbound) { 1756 ret = -EBUSY; 1757 mutex_unlock(&lo->lo_ctl_mutex); 1758 break; 1759 } 1760 if (lo->lo_refcnt > 0) { 1761 ret = -EBUSY; 1762 mutex_unlock(&lo->lo_ctl_mutex); 1763 break; 1764 } 1765 lo->lo_disk->private_data = NULL; 1766 mutex_unlock(&lo->lo_ctl_mutex); 1767 idr_remove(&loop_index_idr, lo->lo_number); 1768 loop_remove(lo); 1769 break; 1770 case LOOP_CTL_GET_FREE: 1771 ret = loop_lookup(&lo, -1); 1772 if (ret >= 0) 1773 break; 1774 ret = loop_add(&lo, -1); 1775 } 1776 mutex_unlock(&loop_index_mutex); 1777 1778 return ret; 1779 } 1780 1781 static const struct file_operations loop_ctl_fops = { 1782 .open = nonseekable_open, 1783 .unlocked_ioctl = loop_control_ioctl, 1784 .compat_ioctl = loop_control_ioctl, 1785 .owner = THIS_MODULE, 1786 .llseek = noop_llseek, 1787 }; 1788 1789 static struct miscdevice loop_misc = { 1790 .minor = LOOP_CTRL_MINOR, 1791 .name = "loop-control", 1792 .fops = &loop_ctl_fops, 1793 }; 1794 1795 MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR); 1796 MODULE_ALIAS("devname:loop-control"); 1797 1798 static int __init loop_init(void) 1799 { 1800 int i, nr; 1801 unsigned long range; 1802 struct loop_device *lo; 1803 int err; 1804 1805 err = misc_register(&loop_misc); 1806 if (err < 0) 1807 return err; 1808 1809 part_shift = 0; 1810 if (max_part > 0) { 1811 part_shift = fls(max_part); 1812 1813 /* 1814 * Adjust max_part according to part_shift as it is exported 1815 * to user space so that user can decide correct minor number 1816 * if [s]he want to create more devices. 1817 * 1818 * Note that -1 is required because partition 0 is reserved 1819 * for the whole disk. 1820 */ 1821 max_part = (1UL << part_shift) - 1; 1822 } 1823 1824 if ((1UL << part_shift) > DISK_MAX_PARTS) 1825 return -EINVAL; 1826 1827 if (max_loop > 1UL << (MINORBITS - part_shift)) 1828 return -EINVAL; 1829 1830 /* 1831 * If max_loop is specified, create that many devices upfront. 1832 * This also becomes a hard limit. If max_loop is not specified, 1833 * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module 1834 * init time. Loop devices can be requested on-demand with the 1835 * /dev/loop-control interface, or be instantiated by accessing 1836 * a 'dead' device node. 1837 */ 1838 if (max_loop) { 1839 nr = max_loop; 1840 range = max_loop << part_shift; 1841 } else { 1842 nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; 1843 range = 1UL << MINORBITS; 1844 } 1845 1846 if (register_blkdev(LOOP_MAJOR, "loop")) 1847 return -EIO; 1848 1849 blk_register_region(MKDEV(LOOP_MAJOR, 0), range, 1850 THIS_MODULE, loop_probe, NULL, NULL); 1851 1852 /* pre-create number of devices given by config or max_loop */ 1853 mutex_lock(&loop_index_mutex); 1854 for (i = 0; i < nr; i++) 1855 loop_add(&lo, i); 1856 mutex_unlock(&loop_index_mutex); 1857 1858 printk(KERN_INFO "loop: module loaded\n"); 1859 return 0; 1860 } 1861 1862 static int loop_exit_cb(int id, void *ptr, void *data) 1863 { 1864 struct loop_device *lo = ptr; 1865 1866 loop_remove(lo); 1867 return 0; 1868 } 1869 1870 static void __exit loop_exit(void) 1871 { 1872 unsigned long range; 1873 1874 range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; 1875 1876 idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); 1877 idr_remove_all(&loop_index_idr); 1878 idr_destroy(&loop_index_idr); 1879 1880 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); 1881 unregister_blkdev(LOOP_MAJOR, "loop"); 1882 1883 misc_deregister(&loop_misc); 1884 } 1885 1886 module_init(loop_init); 1887 module_exit(loop_exit); 1888 1889 #ifndef MODULE 1890 static int __init max_loop_setup(char *str) 1891 { 1892 max_loop = simple_strtol(str, NULL, 0); 1893 return 1; 1894 } 1895 1896 __setup("max_loop=", max_loop_setup); 1897 #endif 1898