1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/capability.h> 3 #include <linux/compat.h> 4 #include <linux/blkdev.h> 5 #include <linux/export.h> 6 #include <linux/gfp.h> 7 #include <linux/blkpg.h> 8 #include <linux/hdreg.h> 9 #include <linux/backing-dev.h> 10 #include <linux/fs.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/pr.h> 13 #include <linux/uaccess.h> 14 #include <linux/pagemap.h> 15 #include <linux/io_uring/cmd.h> 16 #include <linux/blk-integrity.h> 17 #include <uapi/linux/blkdev.h> 18 #include "blk.h" 19 #include "blk-crypto-internal.h" 20 21 static int blkpg_do_ioctl(struct block_device *bdev, 22 struct blkpg_partition __user *upart, int op) 23 { 24 struct gendisk *disk = bdev->bd_disk; 25 struct blkpg_partition p; 26 sector_t start, length, capacity, end; 27 28 if (!capable(CAP_SYS_ADMIN)) 29 return -EACCES; 30 if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) 31 return -EFAULT; 32 if (bdev_is_partition(bdev)) 33 return -EINVAL; 34 35 if (p.pno <= 0) 36 return -EINVAL; 37 38 if (op == BLKPG_DEL_PARTITION) 39 return bdev_del_partition(disk, p.pno); 40 41 if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) 42 return -EINVAL; 43 /* Check that the partition is aligned to the block size */ 44 if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) 45 return -EINVAL; 46 47 start = p.start >> SECTOR_SHIFT; 48 length = p.length >> SECTOR_SHIFT; 49 capacity = get_capacity(disk); 50 51 if (check_add_overflow(start, length, &end)) 52 return -EINVAL; 53 54 if (start >= capacity || end > capacity) 55 return -EINVAL; 56 57 switch (op) { 58 case BLKPG_ADD_PARTITION: 59 return bdev_add_partition(disk, p.pno, start, length); 60 case BLKPG_RESIZE_PARTITION: 61 return bdev_resize_partition(disk, p.pno, start, length); 62 default: 63 return -EINVAL; 64 } 65 } 66 67 static int blkpg_ioctl(struct block_device *bdev, 68 struct blkpg_ioctl_arg __user *arg) 69 { 70 struct blkpg_partition __user *udata; 71 int op; 72 73 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 74 return -EFAULT; 75 76 return blkpg_do_ioctl(bdev, udata, op); 77 } 78 79 #ifdef CONFIG_COMPAT 80 struct compat_blkpg_ioctl_arg { 81 compat_int_t op; 82 compat_int_t flags; 83 compat_int_t datalen; 84 compat_caddr_t data; 85 }; 86 87 static int compat_blkpg_ioctl(struct block_device *bdev, 88 struct compat_blkpg_ioctl_arg __user *arg) 89 { 90 compat_caddr_t udata; 91 int op; 92 93 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 94 return -EFAULT; 95 96 return blkpg_do_ioctl(bdev, compat_ptr(udata), op); 97 } 98 #endif 99 100 /* 101 * Check that [start, start + len) is a valid range from the block device's 102 * perspective, including verifying that it can be correctly translated into 103 * logical block addresses. 104 */ 105 static int blk_validate_byte_range(struct block_device *bdev, 106 uint64_t start, uint64_t len) 107 { 108 unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; 109 uint64_t end; 110 111 if ((start | len) & bs_mask) 112 return -EINVAL; 113 if (!len) 114 return -EINVAL; 115 if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) 116 return -EINVAL; 117 118 return 0; 119 } 120 121 static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, 122 unsigned long arg) 123 { 124 uint64_t range[2], start, len; 125 struct bio *prev = NULL, *bio; 126 sector_t sector, nr_sects; 127 struct blk_plug plug; 128 int err; 129 130 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 131 return -EFAULT; 132 start = range[0]; 133 len = range[1]; 134 135 if (!bdev_max_discard_sectors(bdev)) 136 return -EOPNOTSUPP; 137 138 if (!(mode & BLK_OPEN_WRITE)) 139 return -EBADF; 140 if (bdev_read_only(bdev)) 141 return -EPERM; 142 err = blk_validate_byte_range(bdev, start, len); 143 if (err) 144 return err; 145 146 inode_lock(bdev->bd_mapping->host); 147 filemap_invalidate_lock(bdev->bd_mapping); 148 err = truncate_bdev_range(bdev, mode, start, start + len - 1); 149 if (err) 150 goto fail; 151 152 sector = start >> SECTOR_SHIFT; 153 nr_sects = len >> SECTOR_SHIFT; 154 155 blk_start_plug(&plug); 156 while (!fatal_signal_pending(current)) { 157 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, 158 GFP_KERNEL); 159 if (!bio) 160 break; 161 prev = bio_chain_and_submit(prev, bio); 162 } 163 if (prev) { 164 err = bio_submit_or_kill(prev, BLKDEV_ZERO_KILLABLE); 165 if (err == -EOPNOTSUPP) 166 err = 0; 167 bio_put(prev); 168 } 169 blk_finish_plug(&plug); 170 fail: 171 filemap_invalidate_unlock(bdev->bd_mapping); 172 inode_unlock(bdev->bd_mapping->host); 173 return err; 174 } 175 176 static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, 177 void __user *argp) 178 { 179 uint64_t start, len, end; 180 uint64_t range[2]; 181 int err; 182 183 if (!(mode & BLK_OPEN_WRITE)) 184 return -EBADF; 185 if (!bdev_max_secure_erase_sectors(bdev)) 186 return -EOPNOTSUPP; 187 if (copy_from_user(range, argp, sizeof(range))) 188 return -EFAULT; 189 190 start = range[0]; 191 len = range[1]; 192 if ((start & 511) || (len & 511)) 193 return -EINVAL; 194 if (check_add_overflow(start, len, &end) || 195 end > bdev_nr_bytes(bdev)) 196 return -EINVAL; 197 198 inode_lock(bdev->bd_mapping->host); 199 filemap_invalidate_lock(bdev->bd_mapping); 200 err = truncate_bdev_range(bdev, mode, start, end - 1); 201 if (!err) 202 err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, 203 GFP_KERNEL); 204 filemap_invalidate_unlock(bdev->bd_mapping); 205 inode_unlock(bdev->bd_mapping->host); 206 return err; 207 } 208 209 210 static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, 211 unsigned long arg) 212 { 213 uint64_t range[2]; 214 uint64_t start, end, len; 215 int err; 216 217 if (!(mode & BLK_OPEN_WRITE)) 218 return -EBADF; 219 220 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 221 return -EFAULT; 222 223 start = range[0]; 224 len = range[1]; 225 end = start + len - 1; 226 227 if (start & 511) 228 return -EINVAL; 229 if (len & 511) 230 return -EINVAL; 231 if (end >= (uint64_t)bdev_nr_bytes(bdev)) 232 return -EINVAL; 233 if (end < start) 234 return -EINVAL; 235 236 /* Invalidate the page cache, including dirty pages */ 237 inode_lock(bdev->bd_mapping->host); 238 filemap_invalidate_lock(bdev->bd_mapping); 239 err = truncate_bdev_range(bdev, mode, start, end); 240 if (err) 241 goto fail; 242 243 err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, 244 BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); 245 246 fail: 247 filemap_invalidate_unlock(bdev->bd_mapping); 248 inode_unlock(bdev->bd_mapping->host); 249 return err; 250 } 251 252 static int put_ushort(unsigned short __user *argp, unsigned short val) 253 { 254 return put_user(val, argp); 255 } 256 257 static int put_int(int __user *argp, int val) 258 { 259 return put_user(val, argp); 260 } 261 262 static int put_uint(unsigned int __user *argp, unsigned int val) 263 { 264 return put_user(val, argp); 265 } 266 267 static int put_long(long __user *argp, long val) 268 { 269 return put_user(val, argp); 270 } 271 272 static int put_ulong(unsigned long __user *argp, unsigned long val) 273 { 274 return put_user(val, argp); 275 } 276 277 static int put_u64(u64 __user *argp, u64 val) 278 { 279 return put_user(val, argp); 280 } 281 282 #ifdef CONFIG_COMPAT 283 static int compat_put_long(compat_long_t __user *argp, long val) 284 { 285 return put_user(val, argp); 286 } 287 288 static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) 289 { 290 return put_user(val, argp); 291 } 292 #endif 293 294 #ifdef CONFIG_COMPAT 295 /* 296 * This is the equivalent of compat_ptr_ioctl(), to be used by block 297 * drivers that implement only commands that are completely compatible 298 * between 32-bit and 64-bit user space 299 */ 300 int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, 301 unsigned cmd, unsigned long arg) 302 { 303 struct gendisk *disk = bdev->bd_disk; 304 305 if (disk->fops->ioctl) 306 return disk->fops->ioctl(bdev, mode, cmd, 307 (unsigned long)compat_ptr(arg)); 308 309 return -ENOIOCTLCMD; 310 } 311 EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); 312 #endif 313 314 enum pr_direction { 315 PR_IN, /* read from device */ 316 PR_OUT, /* write to device */ 317 }; 318 319 static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode, 320 enum pr_direction dir) 321 { 322 /* no sense to make reservations for partitions */ 323 if (bdev_is_partition(bdev)) 324 return false; 325 326 if (capable(CAP_SYS_ADMIN)) 327 return true; 328 329 /* 330 * Only allow unprivileged reservation _out_ commands if the file 331 * descriptor is open for writing. Allow reservation _in_ commands if 332 * the file descriptor is open for reading since they do not modify the 333 * device. 334 */ 335 if (dir == PR_IN) 336 return mode & BLK_OPEN_READ; 337 else 338 return mode & BLK_OPEN_WRITE; 339 } 340 341 static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, 342 struct pr_registration __user *arg) 343 { 344 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 345 struct pr_registration reg; 346 347 if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 348 return -EPERM; 349 if (!ops || !ops->pr_register) 350 return -EOPNOTSUPP; 351 if (copy_from_user(®, arg, sizeof(reg))) 352 return -EFAULT; 353 354 if (reg.flags & ~PR_FL_IGNORE_KEY) 355 return -EOPNOTSUPP; 356 return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); 357 } 358 359 static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, 360 struct pr_reservation __user *arg) 361 { 362 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 363 struct pr_reservation rsv; 364 365 if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 366 return -EPERM; 367 if (!ops || !ops->pr_reserve) 368 return -EOPNOTSUPP; 369 if (copy_from_user(&rsv, arg, sizeof(rsv))) 370 return -EFAULT; 371 372 if (rsv.flags & ~PR_FL_IGNORE_KEY) 373 return -EOPNOTSUPP; 374 return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); 375 } 376 377 static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, 378 struct pr_reservation __user *arg) 379 { 380 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 381 struct pr_reservation rsv; 382 383 if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 384 return -EPERM; 385 if (!ops || !ops->pr_release) 386 return -EOPNOTSUPP; 387 if (copy_from_user(&rsv, arg, sizeof(rsv))) 388 return -EFAULT; 389 390 if (rsv.flags) 391 return -EOPNOTSUPP; 392 return ops->pr_release(bdev, rsv.key, rsv.type); 393 } 394 395 static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, 396 struct pr_preempt __user *arg, bool abort) 397 { 398 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 399 struct pr_preempt p; 400 401 if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 402 return -EPERM; 403 if (!ops || !ops->pr_preempt) 404 return -EOPNOTSUPP; 405 if (copy_from_user(&p, arg, sizeof(p))) 406 return -EFAULT; 407 408 if (p.flags) 409 return -EOPNOTSUPP; 410 return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); 411 } 412 413 static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, 414 struct pr_clear __user *arg) 415 { 416 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 417 struct pr_clear c; 418 419 if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 420 return -EPERM; 421 if (!ops || !ops->pr_clear) 422 return -EOPNOTSUPP; 423 if (copy_from_user(&c, arg, sizeof(c))) 424 return -EFAULT; 425 426 if (c.flags) 427 return -EOPNOTSUPP; 428 return ops->pr_clear(bdev, c.key); 429 } 430 431 static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, 432 struct pr_read_keys __user *arg) 433 { 434 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 435 struct pr_keys *keys_info; 436 struct pr_read_keys read_keys; 437 u64 __user *keys_ptr; 438 size_t keys_info_len; 439 size_t keys_copy_len; 440 int ret; 441 442 if (!blkdev_pr_allowed(bdev, mode, PR_IN)) 443 return -EPERM; 444 if (!ops || !ops->pr_read_keys) 445 return -EOPNOTSUPP; 446 447 if (copy_from_user(&read_keys, arg, sizeof(read_keys))) 448 return -EFAULT; 449 450 if (read_keys.num_keys > PR_KEYS_MAX) 451 return -EINVAL; 452 453 keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); 454 455 keys_info = kvzalloc(keys_info_len, GFP_KERNEL); 456 if (!keys_info) 457 return -ENOMEM; 458 459 keys_info->num_keys = read_keys.num_keys; 460 461 ret = ops->pr_read_keys(bdev, keys_info); 462 if (ret) 463 goto out; 464 465 /* Copy out individual keys */ 466 keys_ptr = u64_to_user_ptr(read_keys.keys_ptr); 467 keys_copy_len = min(read_keys.num_keys, keys_info->num_keys) * 468 sizeof(keys_info->keys[0]); 469 470 if (copy_to_user(keys_ptr, keys_info->keys, keys_copy_len)) { 471 ret = -EFAULT; 472 goto out; 473 } 474 475 /* Copy out the arg struct */ 476 read_keys.generation = keys_info->generation; 477 read_keys.num_keys = keys_info->num_keys; 478 479 if (copy_to_user(arg, &read_keys, sizeof(read_keys))) 480 ret = -EFAULT; 481 out: 482 kvfree(keys_info); 483 return ret; 484 } 485 486 static int blkdev_pr_read_reservation(struct block_device *bdev, 487 blk_mode_t mode, struct pr_read_reservation __user *arg) 488 { 489 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 490 struct pr_held_reservation rsv = {}; 491 struct pr_read_reservation out = {}; 492 int ret; 493 494 if (!blkdev_pr_allowed(bdev, mode, PR_IN)) 495 return -EPERM; 496 if (!ops || !ops->pr_read_reservation) 497 return -EOPNOTSUPP; 498 499 ret = ops->pr_read_reservation(bdev, &rsv); 500 if (ret) 501 return ret; 502 503 out.key = rsv.key; 504 out.generation = rsv.generation; 505 out.type = rsv.type; 506 507 if (copy_to_user(arg, &out, sizeof(out))) 508 return -EFAULT; 509 return 0; 510 } 511 512 static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, 513 unsigned long arg) 514 { 515 if (!capable(CAP_SYS_ADMIN)) 516 return -EACCES; 517 518 mutex_lock(&bdev->bd_holder_lock); 519 if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) 520 bdev->bd_holder_ops->sync(bdev); 521 else { 522 mutex_unlock(&bdev->bd_holder_lock); 523 sync_blockdev(bdev); 524 } 525 526 invalidate_bdev(bdev); 527 return 0; 528 } 529 530 static int blkdev_roset(struct block_device *bdev, unsigned cmd, 531 unsigned long arg) 532 { 533 int ret, n; 534 535 if (!capable(CAP_SYS_ADMIN)) 536 return -EACCES; 537 538 if (get_user(n, (int __user *)arg)) 539 return -EFAULT; 540 if (bdev->bd_disk->fops->set_read_only) { 541 ret = bdev->bd_disk->fops->set_read_only(bdev, n); 542 if (ret) 543 return ret; 544 } 545 if (n) 546 bdev_set_flag(bdev, BD_READ_ONLY); 547 else 548 bdev_clear_flag(bdev, BD_READ_ONLY); 549 return 0; 550 } 551 552 static int blkdev_getgeo(struct block_device *bdev, 553 struct hd_geometry __user *argp) 554 { 555 struct gendisk *disk = bdev->bd_disk; 556 struct hd_geometry geo; 557 int ret; 558 559 if (!argp) 560 return -EINVAL; 561 if (!disk->fops->getgeo) 562 return -ENOTTY; 563 564 /* 565 * We need to set the startsect first, the driver may 566 * want to override it. 567 */ 568 memset(&geo, 0, sizeof(geo)); 569 geo.start = get_start_sect(bdev); 570 ret = disk->fops->getgeo(disk, &geo); 571 if (ret) 572 return ret; 573 if (copy_to_user(argp, &geo, sizeof(geo))) 574 return -EFAULT; 575 return 0; 576 } 577 578 #ifdef CONFIG_COMPAT 579 struct compat_hd_geometry { 580 unsigned char heads; 581 unsigned char sectors; 582 unsigned short cylinders; 583 u32 start; 584 }; 585 586 static int compat_hdio_getgeo(struct block_device *bdev, 587 struct compat_hd_geometry __user *ugeo) 588 { 589 struct gendisk *disk = bdev->bd_disk; 590 struct hd_geometry geo; 591 int ret; 592 593 if (!ugeo) 594 return -EINVAL; 595 if (!disk->fops->getgeo) 596 return -ENOTTY; 597 598 memset(&geo, 0, sizeof(geo)); 599 /* 600 * We need to set the startsect first, the driver may 601 * want to override it. 602 */ 603 geo.start = get_start_sect(bdev); 604 ret = disk->fops->getgeo(disk, &geo); 605 if (ret) 606 return ret; 607 608 ret = copy_to_user(ugeo, &geo, 4); 609 ret |= put_user(geo.start, &ugeo->start); 610 if (ret) 611 ret = -EFAULT; 612 613 return ret; 614 } 615 #endif 616 617 /* set the logical block size */ 618 static int blkdev_bszset(struct file *file, blk_mode_t mode, 619 int __user *argp) 620 { 621 // this one might be file_inode(file)->i_rdev - a rare valid 622 // use of file_inode() for those. 623 dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; 624 struct file *excl_file; 625 int ret, n; 626 627 if (!capable(CAP_SYS_ADMIN)) 628 return -EACCES; 629 if (!argp) 630 return -EINVAL; 631 if (get_user(n, argp)) 632 return -EFAULT; 633 634 if (mode & BLK_OPEN_EXCL) 635 return set_blocksize(file, n); 636 637 excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); 638 if (IS_ERR(excl_file)) 639 return -EBUSY; 640 ret = set_blocksize(excl_file, n); 641 fput(excl_file); 642 return ret; 643 } 644 645 /* 646 * Common commands that are handled the same way on native and compat 647 * user space. Note the separate arg/argp parameters that are needed 648 * to deal with the compat_ptr() conversion. 649 */ 650 static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, 651 unsigned int cmd, unsigned long arg, 652 void __user *argp) 653 { 654 unsigned int max_sectors; 655 656 switch (cmd) { 657 case BLKFLSBUF: 658 return blkdev_flushbuf(bdev, cmd, arg); 659 case BLKROSET: 660 return blkdev_roset(bdev, cmd, arg); 661 case BLKDISCARD: 662 return blk_ioctl_discard(bdev, mode, arg); 663 case BLKSECDISCARD: 664 return blk_ioctl_secure_erase(bdev, mode, argp); 665 case BLKZEROOUT: 666 return blk_ioctl_zeroout(bdev, mode, arg); 667 case BLKGETDISKSEQ: 668 return put_u64(argp, bdev->bd_disk->diskseq); 669 case BLKREPORTZONE: 670 case BLKREPORTZONEV2: 671 return blkdev_report_zones_ioctl(bdev, cmd, arg); 672 case BLKRESETZONE: 673 case BLKOPENZONE: 674 case BLKCLOSEZONE: 675 case BLKFINISHZONE: 676 return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); 677 case BLKGETZONESZ: 678 return put_uint(argp, bdev_zone_sectors(bdev)); 679 case BLKGETNRZONES: 680 return put_uint(argp, bdev_nr_zones(bdev)); 681 case BLKROGET: 682 return put_int(argp, bdev_read_only(bdev) != 0); 683 case BLKSSZGET: /* get block device logical block size */ 684 return put_int(argp, bdev_logical_block_size(bdev)); 685 case BLKPBSZGET: /* get block device physical block size */ 686 return put_uint(argp, bdev_physical_block_size(bdev)); 687 case BLKIOMIN: 688 return put_uint(argp, bdev_io_min(bdev)); 689 case BLKIOOPT: 690 return put_uint(argp, bdev_io_opt(bdev)); 691 case BLKALIGNOFF: 692 return put_int(argp, bdev_alignment_offset(bdev)); 693 case BLKDISCARDZEROES: 694 return put_uint(argp, 0); 695 case BLKSECTGET: 696 max_sectors = min_t(unsigned int, USHRT_MAX, 697 queue_max_sectors(bdev_get_queue(bdev))); 698 return put_ushort(argp, max_sectors); 699 case BLKROTATIONAL: 700 return put_ushort(argp, bdev_rot(bdev)); 701 case BLKRASET: 702 case BLKFRASET: 703 if(!capable(CAP_SYS_ADMIN)) 704 return -EACCES; 705 bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; 706 return 0; 707 case BLKRRPART: 708 if (!capable(CAP_SYS_ADMIN)) 709 return -EACCES; 710 if (bdev_is_partition(bdev)) 711 return -EINVAL; 712 return disk_scan_partitions(bdev->bd_disk, 713 mode | BLK_OPEN_STRICT_SCAN); 714 case BLKTRACESTART: 715 case BLKTRACESTOP: 716 case BLKTRACETEARDOWN: 717 return blk_trace_ioctl(bdev, cmd, argp); 718 case BLKCRYPTOIMPORTKEY: 719 case BLKCRYPTOGENERATEKEY: 720 case BLKCRYPTOPREPAREKEY: 721 return blk_crypto_ioctl(bdev, cmd, argp); 722 case IOC_PR_REGISTER: 723 return blkdev_pr_register(bdev, mode, argp); 724 case IOC_PR_RESERVE: 725 return blkdev_pr_reserve(bdev, mode, argp); 726 case IOC_PR_RELEASE: 727 return blkdev_pr_release(bdev, mode, argp); 728 case IOC_PR_PREEMPT: 729 return blkdev_pr_preempt(bdev, mode, argp, false); 730 case IOC_PR_PREEMPT_ABORT: 731 return blkdev_pr_preempt(bdev, mode, argp, true); 732 case IOC_PR_CLEAR: 733 return blkdev_pr_clear(bdev, mode, argp); 734 case IOC_PR_READ_KEYS: 735 return blkdev_pr_read_keys(bdev, mode, argp); 736 case IOC_PR_READ_RESERVATION: 737 return blkdev_pr_read_reservation(bdev, mode, argp); 738 default: 739 return blk_get_meta_cap(bdev, cmd, argp); 740 } 741 } 742 743 /* 744 * Always keep this in sync with compat_blkdev_ioctl() 745 * to handle all incompatible commands in both functions. 746 * 747 * New commands must be compatible and go into blkdev_common_ioctl 748 */ 749 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 750 { 751 struct block_device *bdev = I_BDEV(file->f_mapping->host); 752 void __user *argp = (void __user *)arg; 753 blk_mode_t mode = file_to_blk_mode(file); 754 int ret; 755 756 switch (cmd) { 757 /* These need separate implementations for the data structure */ 758 case HDIO_GETGEO: 759 return blkdev_getgeo(bdev, argp); 760 case BLKPG: 761 return blkpg_ioctl(bdev, argp); 762 763 /* Compat mode returns 32-bit data instead of 'long' */ 764 case BLKRAGET: 765 case BLKFRAGET: 766 if (!argp) 767 return -EINVAL; 768 return put_long(argp, 769 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 770 case BLKGETSIZE: 771 if (bdev_nr_sectors(bdev) > ~0UL) 772 return -EFBIG; 773 return put_ulong(argp, bdev_nr_sectors(bdev)); 774 775 /* The data is compatible, but the command number is different */ 776 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ 777 return put_int(argp, block_size(bdev)); 778 case BLKBSZSET: 779 return blkdev_bszset(file, mode, argp); 780 case BLKGETSIZE64: 781 return put_u64(argp, bdev_nr_bytes(bdev)); 782 783 /* Incompatible alignment on i386 */ 784 case BLKTRACESETUP: 785 case BLKTRACESETUP2: 786 return blk_trace_ioctl(bdev, cmd, argp); 787 default: 788 break; 789 } 790 791 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 792 if (ret != -ENOIOCTLCMD) 793 return ret; 794 795 if (!bdev->bd_disk->fops->ioctl) 796 return -ENOTTY; 797 return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 798 } 799 800 #ifdef CONFIG_COMPAT 801 802 #define BLKBSZGET_32 _IOR(0x12, 112, int) 803 #define BLKBSZSET_32 _IOW(0x12, 113, int) 804 #define BLKGETSIZE64_32 _IOR(0x12, 114, int) 805 806 /* Most of the generic ioctls are handled in the normal fallback path. 807 This assumes the blkdev's low level compat_ioctl always returns 808 ENOIOCTLCMD for unknown ioctls. */ 809 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 810 { 811 int ret; 812 void __user *argp = compat_ptr(arg); 813 struct block_device *bdev = I_BDEV(file->f_mapping->host); 814 struct gendisk *disk = bdev->bd_disk; 815 blk_mode_t mode = file_to_blk_mode(file); 816 817 switch (cmd) { 818 /* These need separate implementations for the data structure */ 819 case HDIO_GETGEO: 820 return compat_hdio_getgeo(bdev, argp); 821 case BLKPG: 822 return compat_blkpg_ioctl(bdev, argp); 823 824 /* Compat mode returns 32-bit data instead of 'long' */ 825 case BLKRAGET: 826 case BLKFRAGET: 827 if (!argp) 828 return -EINVAL; 829 return compat_put_long(argp, 830 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 831 case BLKGETSIZE: 832 if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) 833 return -EFBIG; 834 return compat_put_ulong(argp, bdev_nr_sectors(bdev)); 835 836 /* The data is compatible, but the command number is different */ 837 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 838 return put_int(argp, bdev_logical_block_size(bdev)); 839 case BLKBSZSET_32: 840 return blkdev_bszset(file, mode, argp); 841 case BLKGETSIZE64_32: 842 return put_u64(argp, bdev_nr_bytes(bdev)); 843 844 /* Incompatible alignment on i386 */ 845 case BLKTRACESETUP32: 846 return blk_trace_ioctl(bdev, cmd, argp); 847 default: 848 break; 849 } 850 851 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 852 if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) 853 ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); 854 855 return ret; 856 } 857 #endif 858 859 struct blk_iou_cmd { 860 int res; 861 bool nowait; 862 }; 863 864 static void blk_cmd_complete(struct io_tw_req tw_req, io_tw_token_t tw) 865 { 866 struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req); 867 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 868 869 if (bic->res == -EAGAIN && bic->nowait) 870 io_uring_cmd_issue_blocking(cmd); 871 else 872 io_uring_cmd_done(cmd, bic->res, 873 IO_URING_CMD_TASK_WORK_ISSUE_FLAGS); 874 } 875 876 static void bio_cmd_bio_end_io(struct bio *bio) 877 { 878 struct io_uring_cmd *cmd = bio->bi_private; 879 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 880 881 if (unlikely(bio->bi_status) && !bic->res) 882 bic->res = blk_status_to_errno(bio->bi_status); 883 884 io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); 885 bio_put(bio); 886 } 887 888 static int blkdev_cmd_discard(struct io_uring_cmd *cmd, 889 struct block_device *bdev, 890 uint64_t start, uint64_t len, bool nowait) 891 { 892 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 893 gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; 894 sector_t sector = start >> SECTOR_SHIFT; 895 sector_t nr_sects = len >> SECTOR_SHIFT; 896 struct bio *prev = NULL, *bio; 897 int err; 898 899 if (!bdev_max_discard_sectors(bdev)) 900 return -EOPNOTSUPP; 901 if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) 902 return -EBADF; 903 if (bdev_read_only(bdev)) 904 return -EPERM; 905 err = blk_validate_byte_range(bdev, start, len); 906 if (err) 907 return err; 908 909 err = filemap_invalidate_pages(bdev->bd_mapping, start, 910 start + len - 1, nowait); 911 if (err) 912 return err; 913 914 while (true) { 915 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); 916 if (!bio) 917 break; 918 if (nowait) { 919 /* 920 * Don't allow multi-bio non-blocking submissions as 921 * subsequent bios may fail but we won't get a direct 922 * indication of that. Normally, the caller should 923 * retry from a blocking context. 924 */ 925 if (unlikely(nr_sects)) { 926 bio_put(bio); 927 return -EAGAIN; 928 } 929 bio->bi_opf |= REQ_NOWAIT; 930 } 931 932 prev = bio_chain_and_submit(prev, bio); 933 } 934 if (unlikely(!prev)) 935 return -EAGAIN; 936 if (unlikely(nr_sects)) 937 bic->res = -EAGAIN; 938 939 prev->bi_private = cmd; 940 prev->bi_end_io = bio_cmd_bio_end_io; 941 submit_bio(prev); 942 return -EIOCBQUEUED; 943 } 944 945 int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 946 { 947 struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); 948 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 949 const struct io_uring_sqe *sqe = cmd->sqe; 950 u32 cmd_op = cmd->cmd_op; 951 uint64_t start, len; 952 953 if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || 954 sqe->rw_flags || sqe->file_index)) 955 return -EINVAL; 956 957 bic->res = 0; 958 bic->nowait = issue_flags & IO_URING_F_NONBLOCK; 959 960 start = READ_ONCE(sqe->addr); 961 len = READ_ONCE(sqe->addr3); 962 963 switch (cmd_op) { 964 case BLOCK_URING_CMD_DISCARD: 965 return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); 966 } 967 return -EINVAL; 968 } 969