1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/capability.h> 3 #include <linux/compat.h> 4 #include <linux/blkdev.h> 5 #include <linux/export.h> 6 #include <linux/gfp.h> 7 #include <linux/blkpg.h> 8 #include <linux/hdreg.h> 9 #include <linux/backing-dev.h> 10 #include <linux/fs.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/pr.h> 13 #include <linux/uaccess.h> 14 #include <linux/pagemap.h> 15 #include <linux/io_uring/cmd.h> 16 #include <linux/blk-integrity.h> 17 #include <uapi/linux/blkdev.h> 18 #include "blk.h" 19 #include "blk-crypto-internal.h" 20 21 static int blkpg_do_ioctl(struct block_device *bdev, 22 struct blkpg_partition __user *upart, int op) 23 { 24 struct gendisk *disk = bdev->bd_disk; 25 struct blkpg_partition p; 26 sector_t start, length, capacity, end; 27 28 if (!capable(CAP_SYS_ADMIN)) 29 return -EACCES; 30 if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) 31 return -EFAULT; 32 if (bdev_is_partition(bdev)) 33 return -EINVAL; 34 35 if (p.pno <= 0) 36 return -EINVAL; 37 38 if (op == BLKPG_DEL_PARTITION) 39 return bdev_del_partition(disk, p.pno); 40 41 if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) 42 return -EINVAL; 43 /* Check that the partition is aligned to the block size */ 44 if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) 45 return -EINVAL; 46 47 start = p.start >> SECTOR_SHIFT; 48 length = p.length >> SECTOR_SHIFT; 49 capacity = get_capacity(disk); 50 51 if (check_add_overflow(start, length, &end)) 52 return -EINVAL; 53 54 if (start >= capacity || end > capacity) 55 return -EINVAL; 56 57 switch (op) { 58 case BLKPG_ADD_PARTITION: 59 return bdev_add_partition(disk, p.pno, start, length); 60 case BLKPG_RESIZE_PARTITION: 61 return bdev_resize_partition(disk, p.pno, start, length); 62 default: 63 return -EINVAL; 64 } 65 } 66 67 static int blkpg_ioctl(struct block_device *bdev, 68 struct blkpg_ioctl_arg __user *arg) 69 { 70 struct blkpg_partition __user *udata; 71 int op; 72 73 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 74 return -EFAULT; 75 76 return blkpg_do_ioctl(bdev, udata, op); 77 } 78 79 #ifdef CONFIG_COMPAT 80 struct compat_blkpg_ioctl_arg { 81 compat_int_t op; 82 compat_int_t flags; 83 compat_int_t datalen; 84 compat_caddr_t data; 85 }; 86 87 static int compat_blkpg_ioctl(struct block_device *bdev, 88 struct compat_blkpg_ioctl_arg __user *arg) 89 { 90 compat_caddr_t udata; 91 int op; 92 93 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 94 return -EFAULT; 95 96 return blkpg_do_ioctl(bdev, compat_ptr(udata), op); 97 } 98 #endif 99 100 /* 101 * Check that [start, start + len) is a valid range from the block device's 102 * perspective, including verifying that it can be correctly translated into 103 * logical block addresses. 104 */ 105 static int blk_validate_byte_range(struct block_device *bdev, 106 uint64_t start, uint64_t len) 107 { 108 unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; 109 uint64_t end; 110 111 if ((start | len) & bs_mask) 112 return -EINVAL; 113 if (!len) 114 return -EINVAL; 115 if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) 116 return -EINVAL; 117 118 return 0; 119 } 120 121 static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, 122 unsigned long arg) 123 { 124 uint64_t range[2], start, len; 125 struct bio *prev = NULL, *bio; 126 sector_t sector, nr_sects; 127 struct blk_plug plug; 128 int err; 129 130 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 131 return -EFAULT; 132 start = range[0]; 133 len = range[1]; 134 135 if (!bdev_max_discard_sectors(bdev)) 136 return -EOPNOTSUPP; 137 138 if (!(mode & BLK_OPEN_WRITE)) 139 return -EBADF; 140 if (bdev_read_only(bdev)) 141 return -EPERM; 142 err = blk_validate_byte_range(bdev, start, len); 143 if (err) 144 return err; 145 146 inode_lock(bdev->bd_mapping->host); 147 filemap_invalidate_lock(bdev->bd_mapping); 148 err = truncate_bdev_range(bdev, mode, start, start + len - 1); 149 if (err) 150 goto fail; 151 152 sector = start >> SECTOR_SHIFT; 153 nr_sects = len >> SECTOR_SHIFT; 154 155 blk_start_plug(&plug); 156 while (1) { 157 if (fatal_signal_pending(current)) { 158 if (prev) 159 bio_await_chain(prev); 160 err = -EINTR; 161 goto out_unplug; 162 } 163 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, 164 GFP_KERNEL); 165 if (!bio) 166 break; 167 prev = bio_chain_and_submit(prev, bio); 168 } 169 if (prev) { 170 err = submit_bio_wait(prev); 171 if (err == -EOPNOTSUPP) 172 err = 0; 173 bio_put(prev); 174 } 175 out_unplug: 176 blk_finish_plug(&plug); 177 fail: 178 filemap_invalidate_unlock(bdev->bd_mapping); 179 inode_unlock(bdev->bd_mapping->host); 180 return err; 181 } 182 183 static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, 184 void __user *argp) 185 { 186 uint64_t start, len, end; 187 uint64_t range[2]; 188 int err; 189 190 if (!(mode & BLK_OPEN_WRITE)) 191 return -EBADF; 192 if (!bdev_max_secure_erase_sectors(bdev)) 193 return -EOPNOTSUPP; 194 if (copy_from_user(range, argp, sizeof(range))) 195 return -EFAULT; 196 197 start = range[0]; 198 len = range[1]; 199 if ((start & 511) || (len & 511)) 200 return -EINVAL; 201 if (check_add_overflow(start, len, &end) || 202 end > bdev_nr_bytes(bdev)) 203 return -EINVAL; 204 205 inode_lock(bdev->bd_mapping->host); 206 filemap_invalidate_lock(bdev->bd_mapping); 207 err = truncate_bdev_range(bdev, mode, start, end - 1); 208 if (!err) 209 err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, 210 GFP_KERNEL); 211 filemap_invalidate_unlock(bdev->bd_mapping); 212 inode_unlock(bdev->bd_mapping->host); 213 return err; 214 } 215 216 217 static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, 218 unsigned long arg) 219 { 220 uint64_t range[2]; 221 uint64_t start, end, len; 222 int err; 223 224 if (!(mode & BLK_OPEN_WRITE)) 225 return -EBADF; 226 227 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 228 return -EFAULT; 229 230 start = range[0]; 231 len = range[1]; 232 end = start + len - 1; 233 234 if (start & 511) 235 return -EINVAL; 236 if (len & 511) 237 return -EINVAL; 238 if (end >= (uint64_t)bdev_nr_bytes(bdev)) 239 return -EINVAL; 240 if (end < start) 241 return -EINVAL; 242 243 /* Invalidate the page cache, including dirty pages */ 244 inode_lock(bdev->bd_mapping->host); 245 filemap_invalidate_lock(bdev->bd_mapping); 246 err = truncate_bdev_range(bdev, mode, start, end); 247 if (err) 248 goto fail; 249 250 err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, 251 BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); 252 253 fail: 254 filemap_invalidate_unlock(bdev->bd_mapping); 255 inode_unlock(bdev->bd_mapping->host); 256 return err; 257 } 258 259 static int put_ushort(unsigned short __user *argp, unsigned short val) 260 { 261 return put_user(val, argp); 262 } 263 264 static int put_int(int __user *argp, int val) 265 { 266 return put_user(val, argp); 267 } 268 269 static int put_uint(unsigned int __user *argp, unsigned int val) 270 { 271 return put_user(val, argp); 272 } 273 274 static int put_long(long __user *argp, long val) 275 { 276 return put_user(val, argp); 277 } 278 279 static int put_ulong(unsigned long __user *argp, unsigned long val) 280 { 281 return put_user(val, argp); 282 } 283 284 static int put_u64(u64 __user *argp, u64 val) 285 { 286 return put_user(val, argp); 287 } 288 289 #ifdef CONFIG_COMPAT 290 static int compat_put_long(compat_long_t __user *argp, long val) 291 { 292 return put_user(val, argp); 293 } 294 295 static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) 296 { 297 return put_user(val, argp); 298 } 299 #endif 300 301 #ifdef CONFIG_COMPAT 302 /* 303 * This is the equivalent of compat_ptr_ioctl(), to be used by block 304 * drivers that implement only commands that are completely compatible 305 * between 32-bit and 64-bit user space 306 */ 307 int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, 308 unsigned cmd, unsigned long arg) 309 { 310 struct gendisk *disk = bdev->bd_disk; 311 312 if (disk->fops->ioctl) 313 return disk->fops->ioctl(bdev, mode, cmd, 314 (unsigned long)compat_ptr(arg)); 315 316 return -ENOIOCTLCMD; 317 } 318 EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); 319 #endif 320 321 enum pr_direction { 322 PR_IN, /* read from device */ 323 PR_OUT, /* write to device */ 324 }; 325 326 static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode, 327 enum pr_direction dir) 328 { 329 /* no sense to make reservations for partitions */ 330 if (bdev_is_partition(bdev)) 331 return false; 332 333 if (capable(CAP_SYS_ADMIN)) 334 return true; 335 336 /* 337 * Only allow unprivileged reservation _out_ commands if the file 338 * descriptor is open for writing. Allow reservation _in_ commands if 339 * the file descriptor is open for reading since they do not modify the 340 * device. 341 */ 342 if (dir == PR_IN) 343 return mode & BLK_OPEN_READ; 344 else 345 return mode & BLK_OPEN_WRITE; 346 } 347 348 static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, 349 struct pr_registration __user *arg) 350 { 351 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 352 struct pr_registration reg; 353 354 if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 355 return -EPERM; 356 if (!ops || !ops->pr_register) 357 return -EOPNOTSUPP; 358 if (copy_from_user(®, arg, sizeof(reg))) 359 return -EFAULT; 360 361 if (reg.flags & ~PR_FL_IGNORE_KEY) 362 return -EOPNOTSUPP; 363 return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); 364 } 365 366 static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, 367 struct pr_reservation __user *arg) 368 { 369 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 370 struct pr_reservation rsv; 371 372 if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 373 return -EPERM; 374 if (!ops || !ops->pr_reserve) 375 return -EOPNOTSUPP; 376 if (copy_from_user(&rsv, arg, sizeof(rsv))) 377 return -EFAULT; 378 379 if (rsv.flags & ~PR_FL_IGNORE_KEY) 380 return -EOPNOTSUPP; 381 return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); 382 } 383 384 static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, 385 struct pr_reservation __user *arg) 386 { 387 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 388 struct pr_reservation rsv; 389 390 if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 391 return -EPERM; 392 if (!ops || !ops->pr_release) 393 return -EOPNOTSUPP; 394 if (copy_from_user(&rsv, arg, sizeof(rsv))) 395 return -EFAULT; 396 397 if (rsv.flags) 398 return -EOPNOTSUPP; 399 return ops->pr_release(bdev, rsv.key, rsv.type); 400 } 401 402 static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, 403 struct pr_preempt __user *arg, bool abort) 404 { 405 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 406 struct pr_preempt p; 407 408 if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 409 return -EPERM; 410 if (!ops || !ops->pr_preempt) 411 return -EOPNOTSUPP; 412 if (copy_from_user(&p, arg, sizeof(p))) 413 return -EFAULT; 414 415 if (p.flags) 416 return -EOPNOTSUPP; 417 return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); 418 } 419 420 static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, 421 struct pr_clear __user *arg) 422 { 423 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 424 struct pr_clear c; 425 426 if (!blkdev_pr_allowed(bdev, mode, PR_OUT)) 427 return -EPERM; 428 if (!ops || !ops->pr_clear) 429 return -EOPNOTSUPP; 430 if (copy_from_user(&c, arg, sizeof(c))) 431 return -EFAULT; 432 433 if (c.flags) 434 return -EOPNOTSUPP; 435 return ops->pr_clear(bdev, c.key); 436 } 437 438 static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, 439 struct pr_read_keys __user *arg) 440 { 441 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 442 struct pr_keys *keys_info; 443 struct pr_read_keys read_keys; 444 u64 __user *keys_ptr; 445 size_t keys_info_len; 446 size_t keys_copy_len; 447 int ret; 448 449 if (!blkdev_pr_allowed(bdev, mode, PR_IN)) 450 return -EPERM; 451 if (!ops || !ops->pr_read_keys) 452 return -EOPNOTSUPP; 453 454 if (copy_from_user(&read_keys, arg, sizeof(read_keys))) 455 return -EFAULT; 456 457 if (read_keys.num_keys > PR_KEYS_MAX) 458 return -EINVAL; 459 460 keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); 461 462 keys_info = kvzalloc(keys_info_len, GFP_KERNEL); 463 if (!keys_info) 464 return -ENOMEM; 465 466 keys_info->num_keys = read_keys.num_keys; 467 468 ret = ops->pr_read_keys(bdev, keys_info); 469 if (ret) 470 goto out; 471 472 /* Copy out individual keys */ 473 keys_ptr = u64_to_user_ptr(read_keys.keys_ptr); 474 keys_copy_len = min(read_keys.num_keys, keys_info->num_keys) * 475 sizeof(keys_info->keys[0]); 476 477 if (copy_to_user(keys_ptr, keys_info->keys, keys_copy_len)) { 478 ret = -EFAULT; 479 goto out; 480 } 481 482 /* Copy out the arg struct */ 483 read_keys.generation = keys_info->generation; 484 read_keys.num_keys = keys_info->num_keys; 485 486 if (copy_to_user(arg, &read_keys, sizeof(read_keys))) 487 ret = -EFAULT; 488 out: 489 kvfree(keys_info); 490 return ret; 491 } 492 493 static int blkdev_pr_read_reservation(struct block_device *bdev, 494 blk_mode_t mode, struct pr_read_reservation __user *arg) 495 { 496 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 497 struct pr_held_reservation rsv = {}; 498 struct pr_read_reservation out = {}; 499 int ret; 500 501 if (!blkdev_pr_allowed(bdev, mode, PR_IN)) 502 return -EPERM; 503 if (!ops || !ops->pr_read_reservation) 504 return -EOPNOTSUPP; 505 506 ret = ops->pr_read_reservation(bdev, &rsv); 507 if (ret) 508 return ret; 509 510 out.key = rsv.key; 511 out.generation = rsv.generation; 512 out.type = rsv.type; 513 514 if (copy_to_user(arg, &out, sizeof(out))) 515 return -EFAULT; 516 return 0; 517 } 518 519 static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, 520 unsigned long arg) 521 { 522 if (!capable(CAP_SYS_ADMIN)) 523 return -EACCES; 524 525 mutex_lock(&bdev->bd_holder_lock); 526 if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) 527 bdev->bd_holder_ops->sync(bdev); 528 else { 529 mutex_unlock(&bdev->bd_holder_lock); 530 sync_blockdev(bdev); 531 } 532 533 invalidate_bdev(bdev); 534 return 0; 535 } 536 537 static int blkdev_roset(struct block_device *bdev, unsigned cmd, 538 unsigned long arg) 539 { 540 int ret, n; 541 542 if (!capable(CAP_SYS_ADMIN)) 543 return -EACCES; 544 545 if (get_user(n, (int __user *)arg)) 546 return -EFAULT; 547 if (bdev->bd_disk->fops->set_read_only) { 548 ret = bdev->bd_disk->fops->set_read_only(bdev, n); 549 if (ret) 550 return ret; 551 } 552 if (n) 553 bdev_set_flag(bdev, BD_READ_ONLY); 554 else 555 bdev_clear_flag(bdev, BD_READ_ONLY); 556 return 0; 557 } 558 559 static int blkdev_getgeo(struct block_device *bdev, 560 struct hd_geometry __user *argp) 561 { 562 struct gendisk *disk = bdev->bd_disk; 563 struct hd_geometry geo; 564 int ret; 565 566 if (!argp) 567 return -EINVAL; 568 if (!disk->fops->getgeo) 569 return -ENOTTY; 570 571 /* 572 * We need to set the startsect first, the driver may 573 * want to override it. 574 */ 575 memset(&geo, 0, sizeof(geo)); 576 geo.start = get_start_sect(bdev); 577 ret = disk->fops->getgeo(disk, &geo); 578 if (ret) 579 return ret; 580 if (copy_to_user(argp, &geo, sizeof(geo))) 581 return -EFAULT; 582 return 0; 583 } 584 585 #ifdef CONFIG_COMPAT 586 struct compat_hd_geometry { 587 unsigned char heads; 588 unsigned char sectors; 589 unsigned short cylinders; 590 u32 start; 591 }; 592 593 static int compat_hdio_getgeo(struct block_device *bdev, 594 struct compat_hd_geometry __user *ugeo) 595 { 596 struct gendisk *disk = bdev->bd_disk; 597 struct hd_geometry geo; 598 int ret; 599 600 if (!ugeo) 601 return -EINVAL; 602 if (!disk->fops->getgeo) 603 return -ENOTTY; 604 605 memset(&geo, 0, sizeof(geo)); 606 /* 607 * We need to set the startsect first, the driver may 608 * want to override it. 609 */ 610 geo.start = get_start_sect(bdev); 611 ret = disk->fops->getgeo(disk, &geo); 612 if (ret) 613 return ret; 614 615 ret = copy_to_user(ugeo, &geo, 4); 616 ret |= put_user(geo.start, &ugeo->start); 617 if (ret) 618 ret = -EFAULT; 619 620 return ret; 621 } 622 #endif 623 624 /* set the logical block size */ 625 static int blkdev_bszset(struct file *file, blk_mode_t mode, 626 int __user *argp) 627 { 628 // this one might be file_inode(file)->i_rdev - a rare valid 629 // use of file_inode() for those. 630 dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; 631 struct file *excl_file; 632 int ret, n; 633 634 if (!capable(CAP_SYS_ADMIN)) 635 return -EACCES; 636 if (!argp) 637 return -EINVAL; 638 if (get_user(n, argp)) 639 return -EFAULT; 640 641 if (mode & BLK_OPEN_EXCL) 642 return set_blocksize(file, n); 643 644 excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); 645 if (IS_ERR(excl_file)) 646 return -EBUSY; 647 ret = set_blocksize(excl_file, n); 648 fput(excl_file); 649 return ret; 650 } 651 652 /* 653 * Common commands that are handled the same way on native and compat 654 * user space. Note the separate arg/argp parameters that are needed 655 * to deal with the compat_ptr() conversion. 656 */ 657 static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, 658 unsigned int cmd, unsigned long arg, 659 void __user *argp) 660 { 661 unsigned int max_sectors; 662 663 switch (cmd) { 664 case BLKFLSBUF: 665 return blkdev_flushbuf(bdev, cmd, arg); 666 case BLKROSET: 667 return blkdev_roset(bdev, cmd, arg); 668 case BLKDISCARD: 669 return blk_ioctl_discard(bdev, mode, arg); 670 case BLKSECDISCARD: 671 return blk_ioctl_secure_erase(bdev, mode, argp); 672 case BLKZEROOUT: 673 return blk_ioctl_zeroout(bdev, mode, arg); 674 case BLKGETDISKSEQ: 675 return put_u64(argp, bdev->bd_disk->diskseq); 676 case BLKREPORTZONE: 677 case BLKREPORTZONEV2: 678 return blkdev_report_zones_ioctl(bdev, cmd, arg); 679 case BLKRESETZONE: 680 case BLKOPENZONE: 681 case BLKCLOSEZONE: 682 case BLKFINISHZONE: 683 return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); 684 case BLKGETZONESZ: 685 return put_uint(argp, bdev_zone_sectors(bdev)); 686 case BLKGETNRZONES: 687 return put_uint(argp, bdev_nr_zones(bdev)); 688 case BLKROGET: 689 return put_int(argp, bdev_read_only(bdev) != 0); 690 case BLKSSZGET: /* get block device logical block size */ 691 return put_int(argp, bdev_logical_block_size(bdev)); 692 case BLKPBSZGET: /* get block device physical block size */ 693 return put_uint(argp, bdev_physical_block_size(bdev)); 694 case BLKIOMIN: 695 return put_uint(argp, bdev_io_min(bdev)); 696 case BLKIOOPT: 697 return put_uint(argp, bdev_io_opt(bdev)); 698 case BLKALIGNOFF: 699 return put_int(argp, bdev_alignment_offset(bdev)); 700 case BLKDISCARDZEROES: 701 return put_uint(argp, 0); 702 case BLKSECTGET: 703 max_sectors = min_t(unsigned int, USHRT_MAX, 704 queue_max_sectors(bdev_get_queue(bdev))); 705 return put_ushort(argp, max_sectors); 706 case BLKROTATIONAL: 707 return put_ushort(argp, bdev_rot(bdev)); 708 case BLKRASET: 709 case BLKFRASET: 710 if(!capable(CAP_SYS_ADMIN)) 711 return -EACCES; 712 bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; 713 return 0; 714 case BLKRRPART: 715 if (!capable(CAP_SYS_ADMIN)) 716 return -EACCES; 717 if (bdev_is_partition(bdev)) 718 return -EINVAL; 719 return disk_scan_partitions(bdev->bd_disk, 720 mode | BLK_OPEN_STRICT_SCAN); 721 case BLKTRACESTART: 722 case BLKTRACESTOP: 723 case BLKTRACETEARDOWN: 724 return blk_trace_ioctl(bdev, cmd, argp); 725 case BLKCRYPTOIMPORTKEY: 726 case BLKCRYPTOGENERATEKEY: 727 case BLKCRYPTOPREPAREKEY: 728 return blk_crypto_ioctl(bdev, cmd, argp); 729 case IOC_PR_REGISTER: 730 return blkdev_pr_register(bdev, mode, argp); 731 case IOC_PR_RESERVE: 732 return blkdev_pr_reserve(bdev, mode, argp); 733 case IOC_PR_RELEASE: 734 return blkdev_pr_release(bdev, mode, argp); 735 case IOC_PR_PREEMPT: 736 return blkdev_pr_preempt(bdev, mode, argp, false); 737 case IOC_PR_PREEMPT_ABORT: 738 return blkdev_pr_preempt(bdev, mode, argp, true); 739 case IOC_PR_CLEAR: 740 return blkdev_pr_clear(bdev, mode, argp); 741 case IOC_PR_READ_KEYS: 742 return blkdev_pr_read_keys(bdev, mode, argp); 743 case IOC_PR_READ_RESERVATION: 744 return blkdev_pr_read_reservation(bdev, mode, argp); 745 default: 746 return blk_get_meta_cap(bdev, cmd, argp); 747 } 748 } 749 750 /* 751 * Always keep this in sync with compat_blkdev_ioctl() 752 * to handle all incompatible commands in both functions. 753 * 754 * New commands must be compatible and go into blkdev_common_ioctl 755 */ 756 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 757 { 758 struct block_device *bdev = I_BDEV(file->f_mapping->host); 759 void __user *argp = (void __user *)arg; 760 blk_mode_t mode = file_to_blk_mode(file); 761 int ret; 762 763 switch (cmd) { 764 /* These need separate implementations for the data structure */ 765 case HDIO_GETGEO: 766 return blkdev_getgeo(bdev, argp); 767 case BLKPG: 768 return blkpg_ioctl(bdev, argp); 769 770 /* Compat mode returns 32-bit data instead of 'long' */ 771 case BLKRAGET: 772 case BLKFRAGET: 773 if (!argp) 774 return -EINVAL; 775 return put_long(argp, 776 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 777 case BLKGETSIZE: 778 if (bdev_nr_sectors(bdev) > ~0UL) 779 return -EFBIG; 780 return put_ulong(argp, bdev_nr_sectors(bdev)); 781 782 /* The data is compatible, but the command number is different */ 783 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ 784 return put_int(argp, block_size(bdev)); 785 case BLKBSZSET: 786 return blkdev_bszset(file, mode, argp); 787 case BLKGETSIZE64: 788 return put_u64(argp, bdev_nr_bytes(bdev)); 789 790 /* Incompatible alignment on i386 */ 791 case BLKTRACESETUP: 792 case BLKTRACESETUP2: 793 return blk_trace_ioctl(bdev, cmd, argp); 794 default: 795 break; 796 } 797 798 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 799 if (ret != -ENOIOCTLCMD) 800 return ret; 801 802 if (!bdev->bd_disk->fops->ioctl) 803 return -ENOTTY; 804 return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 805 } 806 807 #ifdef CONFIG_COMPAT 808 809 #define BLKBSZGET_32 _IOR(0x12, 112, int) 810 #define BLKBSZSET_32 _IOW(0x12, 113, int) 811 #define BLKGETSIZE64_32 _IOR(0x12, 114, int) 812 813 /* Most of the generic ioctls are handled in the normal fallback path. 814 This assumes the blkdev's low level compat_ioctl always returns 815 ENOIOCTLCMD for unknown ioctls. */ 816 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 817 { 818 int ret; 819 void __user *argp = compat_ptr(arg); 820 struct block_device *bdev = I_BDEV(file->f_mapping->host); 821 struct gendisk *disk = bdev->bd_disk; 822 blk_mode_t mode = file_to_blk_mode(file); 823 824 switch (cmd) { 825 /* These need separate implementations for the data structure */ 826 case HDIO_GETGEO: 827 return compat_hdio_getgeo(bdev, argp); 828 case BLKPG: 829 return compat_blkpg_ioctl(bdev, argp); 830 831 /* Compat mode returns 32-bit data instead of 'long' */ 832 case BLKRAGET: 833 case BLKFRAGET: 834 if (!argp) 835 return -EINVAL; 836 return compat_put_long(argp, 837 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 838 case BLKGETSIZE: 839 if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) 840 return -EFBIG; 841 return compat_put_ulong(argp, bdev_nr_sectors(bdev)); 842 843 /* The data is compatible, but the command number is different */ 844 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 845 return put_int(argp, bdev_logical_block_size(bdev)); 846 case BLKBSZSET_32: 847 return blkdev_bszset(file, mode, argp); 848 case BLKGETSIZE64_32: 849 return put_u64(argp, bdev_nr_bytes(bdev)); 850 851 /* Incompatible alignment on i386 */ 852 case BLKTRACESETUP32: 853 return blk_trace_ioctl(bdev, cmd, argp); 854 default: 855 break; 856 } 857 858 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 859 if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) 860 ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); 861 862 return ret; 863 } 864 #endif 865 866 struct blk_iou_cmd { 867 int res; 868 bool nowait; 869 }; 870 871 static void blk_cmd_complete(struct io_tw_req tw_req, io_tw_token_t tw) 872 { 873 struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req); 874 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 875 876 if (bic->res == -EAGAIN && bic->nowait) 877 io_uring_cmd_issue_blocking(cmd); 878 else 879 io_uring_cmd_done(cmd, bic->res, 880 IO_URING_CMD_TASK_WORK_ISSUE_FLAGS); 881 } 882 883 static void bio_cmd_bio_end_io(struct bio *bio) 884 { 885 struct io_uring_cmd *cmd = bio->bi_private; 886 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 887 888 if (unlikely(bio->bi_status) && !bic->res) 889 bic->res = blk_status_to_errno(bio->bi_status); 890 891 io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); 892 bio_put(bio); 893 } 894 895 static int blkdev_cmd_discard(struct io_uring_cmd *cmd, 896 struct block_device *bdev, 897 uint64_t start, uint64_t len, bool nowait) 898 { 899 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 900 gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; 901 sector_t sector = start >> SECTOR_SHIFT; 902 sector_t nr_sects = len >> SECTOR_SHIFT; 903 struct bio *prev = NULL, *bio; 904 int err; 905 906 if (!bdev_max_discard_sectors(bdev)) 907 return -EOPNOTSUPP; 908 if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) 909 return -EBADF; 910 if (bdev_read_only(bdev)) 911 return -EPERM; 912 err = blk_validate_byte_range(bdev, start, len); 913 if (err) 914 return err; 915 916 err = filemap_invalidate_pages(bdev->bd_mapping, start, 917 start + len - 1, nowait); 918 if (err) 919 return err; 920 921 while (true) { 922 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); 923 if (!bio) 924 break; 925 if (nowait) { 926 /* 927 * Don't allow multi-bio non-blocking submissions as 928 * subsequent bios may fail but we won't get a direct 929 * indication of that. Normally, the caller should 930 * retry from a blocking context. 931 */ 932 if (unlikely(nr_sects)) { 933 bio_put(bio); 934 return -EAGAIN; 935 } 936 bio->bi_opf |= REQ_NOWAIT; 937 } 938 939 prev = bio_chain_and_submit(prev, bio); 940 } 941 if (unlikely(!prev)) 942 return -EAGAIN; 943 if (unlikely(nr_sects)) 944 bic->res = -EAGAIN; 945 946 prev->bi_private = cmd; 947 prev->bi_end_io = bio_cmd_bio_end_io; 948 submit_bio(prev); 949 return -EIOCBQUEUED; 950 } 951 952 int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 953 { 954 struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); 955 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 956 const struct io_uring_sqe *sqe = cmd->sqe; 957 u32 cmd_op = cmd->cmd_op; 958 uint64_t start, len; 959 960 if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || 961 sqe->rw_flags || sqe->file_index)) 962 return -EINVAL; 963 964 bic->res = 0; 965 bic->nowait = issue_flags & IO_URING_F_NONBLOCK; 966 967 start = READ_ONCE(sqe->addr); 968 len = READ_ONCE(sqe->addr3); 969 970 switch (cmd_op) { 971 case BLOCK_URING_CMD_DISCARD: 972 return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); 973 } 974 return -EINVAL; 975 } 976