1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/capability.h> 3 #include <linux/compat.h> 4 #include <linux/blkdev.h> 5 #include <linux/export.h> 6 #include <linux/gfp.h> 7 #include <linux/blkpg.h> 8 #include <linux/hdreg.h> 9 #include <linux/backing-dev.h> 10 #include <linux/fs.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/pr.h> 13 #include <linux/uaccess.h> 14 #include <linux/pagemap.h> 15 #include <linux/io_uring/cmd.h> 16 #include <linux/blk-integrity.h> 17 #include <uapi/linux/blkdev.h> 18 #include "blk.h" 19 #include "blk-crypto-internal.h" 20 21 static int blkpg_do_ioctl(struct block_device *bdev, 22 struct blkpg_partition __user *upart, int op) 23 { 24 struct gendisk *disk = bdev->bd_disk; 25 struct blkpg_partition p; 26 sector_t start, length, capacity, end; 27 28 if (!capable(CAP_SYS_ADMIN)) 29 return -EACCES; 30 if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) 31 return -EFAULT; 32 if (bdev_is_partition(bdev)) 33 return -EINVAL; 34 35 if (p.pno <= 0) 36 return -EINVAL; 37 38 if (op == BLKPG_DEL_PARTITION) 39 return bdev_del_partition(disk, p.pno); 40 41 if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) 42 return -EINVAL; 43 /* Check that the partition is aligned to the block size */ 44 if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) 45 return -EINVAL; 46 47 start = p.start >> SECTOR_SHIFT; 48 length = p.length >> SECTOR_SHIFT; 49 capacity = get_capacity(disk); 50 51 if (check_add_overflow(start, length, &end)) 52 return -EINVAL; 53 54 if (start >= capacity || end > capacity) 55 return -EINVAL; 56 57 switch (op) { 58 case BLKPG_ADD_PARTITION: 59 return bdev_add_partition(disk, p.pno, start, length); 60 case BLKPG_RESIZE_PARTITION: 61 return bdev_resize_partition(disk, p.pno, start, length); 62 default: 63 return -EINVAL; 64 } 65 } 66 67 static int blkpg_ioctl(struct block_device *bdev, 68 struct blkpg_ioctl_arg __user *arg) 69 { 70 struct blkpg_partition __user *udata; 71 int op; 72 73 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 74 return -EFAULT; 75 76 return blkpg_do_ioctl(bdev, udata, op); 77 } 78 79 #ifdef CONFIG_COMPAT 80 struct compat_blkpg_ioctl_arg { 81 compat_int_t op; 82 compat_int_t flags; 83 compat_int_t datalen; 84 compat_caddr_t data; 85 }; 86 87 static int compat_blkpg_ioctl(struct block_device *bdev, 88 struct compat_blkpg_ioctl_arg __user *arg) 89 { 90 compat_caddr_t udata; 91 int op; 92 93 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 94 return -EFAULT; 95 96 return blkpg_do_ioctl(bdev, compat_ptr(udata), op); 97 } 98 #endif 99 100 /* 101 * Check that [start, start + len) is a valid range from the block device's 102 * perspective, including verifying that it can be correctly translated into 103 * logical block addresses. 104 */ 105 static int blk_validate_byte_range(struct block_device *bdev, 106 uint64_t start, uint64_t len) 107 { 108 unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; 109 uint64_t end; 110 111 if ((start | len) & bs_mask) 112 return -EINVAL; 113 if (!len) 114 return -EINVAL; 115 if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) 116 return -EINVAL; 117 118 return 0; 119 } 120 121 static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, 122 unsigned long arg) 123 { 124 uint64_t range[2], start, len; 125 struct bio *prev = NULL, *bio; 126 sector_t sector, nr_sects; 127 struct blk_plug plug; 128 int err; 129 130 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 131 return -EFAULT; 132 start = range[0]; 133 len = range[1]; 134 135 if (!bdev_max_discard_sectors(bdev)) 136 return -EOPNOTSUPP; 137 138 if (!(mode & BLK_OPEN_WRITE)) 139 return -EBADF; 140 if (bdev_read_only(bdev)) 141 return -EPERM; 142 err = blk_validate_byte_range(bdev, start, len); 143 if (err) 144 return err; 145 146 inode_lock(bdev->bd_mapping->host); 147 filemap_invalidate_lock(bdev->bd_mapping); 148 err = truncate_bdev_range(bdev, mode, start, start + len - 1); 149 if (err) 150 goto fail; 151 152 sector = start >> SECTOR_SHIFT; 153 nr_sects = len >> SECTOR_SHIFT; 154 155 blk_start_plug(&plug); 156 while (1) { 157 if (fatal_signal_pending(current)) { 158 if (prev) 159 bio_await_chain(prev); 160 err = -EINTR; 161 goto out_unplug; 162 } 163 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, 164 GFP_KERNEL); 165 if (!bio) 166 break; 167 prev = bio_chain_and_submit(prev, bio); 168 } 169 if (prev) { 170 err = submit_bio_wait(prev); 171 if (err == -EOPNOTSUPP) 172 err = 0; 173 bio_put(prev); 174 } 175 out_unplug: 176 blk_finish_plug(&plug); 177 fail: 178 filemap_invalidate_unlock(bdev->bd_mapping); 179 inode_unlock(bdev->bd_mapping->host); 180 return err; 181 } 182 183 static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, 184 void __user *argp) 185 { 186 uint64_t start, len, end; 187 uint64_t range[2]; 188 int err; 189 190 if (!(mode & BLK_OPEN_WRITE)) 191 return -EBADF; 192 if (!bdev_max_secure_erase_sectors(bdev)) 193 return -EOPNOTSUPP; 194 if (copy_from_user(range, argp, sizeof(range))) 195 return -EFAULT; 196 197 start = range[0]; 198 len = range[1]; 199 if ((start & 511) || (len & 511)) 200 return -EINVAL; 201 if (check_add_overflow(start, len, &end) || 202 end > bdev_nr_bytes(bdev)) 203 return -EINVAL; 204 205 inode_lock(bdev->bd_mapping->host); 206 filemap_invalidate_lock(bdev->bd_mapping); 207 err = truncate_bdev_range(bdev, mode, start, end - 1); 208 if (!err) 209 err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, 210 GFP_KERNEL); 211 filemap_invalidate_unlock(bdev->bd_mapping); 212 inode_unlock(bdev->bd_mapping->host); 213 return err; 214 } 215 216 217 static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, 218 unsigned long arg) 219 { 220 uint64_t range[2]; 221 uint64_t start, end, len; 222 int err; 223 224 if (!(mode & BLK_OPEN_WRITE)) 225 return -EBADF; 226 227 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 228 return -EFAULT; 229 230 start = range[0]; 231 len = range[1]; 232 end = start + len - 1; 233 234 if (start & 511) 235 return -EINVAL; 236 if (len & 511) 237 return -EINVAL; 238 if (end >= (uint64_t)bdev_nr_bytes(bdev)) 239 return -EINVAL; 240 if (end < start) 241 return -EINVAL; 242 243 /* Invalidate the page cache, including dirty pages */ 244 inode_lock(bdev->bd_mapping->host); 245 filemap_invalidate_lock(bdev->bd_mapping); 246 err = truncate_bdev_range(bdev, mode, start, end); 247 if (err) 248 goto fail; 249 250 err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, 251 BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); 252 253 fail: 254 filemap_invalidate_unlock(bdev->bd_mapping); 255 inode_unlock(bdev->bd_mapping->host); 256 return err; 257 } 258 259 static int put_ushort(unsigned short __user *argp, unsigned short val) 260 { 261 return put_user(val, argp); 262 } 263 264 static int put_int(int __user *argp, int val) 265 { 266 return put_user(val, argp); 267 } 268 269 static int put_uint(unsigned int __user *argp, unsigned int val) 270 { 271 return put_user(val, argp); 272 } 273 274 static int put_long(long __user *argp, long val) 275 { 276 return put_user(val, argp); 277 } 278 279 static int put_ulong(unsigned long __user *argp, unsigned long val) 280 { 281 return put_user(val, argp); 282 } 283 284 static int put_u64(u64 __user *argp, u64 val) 285 { 286 return put_user(val, argp); 287 } 288 289 #ifdef CONFIG_COMPAT 290 static int compat_put_long(compat_long_t __user *argp, long val) 291 { 292 return put_user(val, argp); 293 } 294 295 static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) 296 { 297 return put_user(val, argp); 298 } 299 #endif 300 301 #ifdef CONFIG_COMPAT 302 /* 303 * This is the equivalent of compat_ptr_ioctl(), to be used by block 304 * drivers that implement only commands that are completely compatible 305 * between 32-bit and 64-bit user space 306 */ 307 int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, 308 unsigned cmd, unsigned long arg) 309 { 310 struct gendisk *disk = bdev->bd_disk; 311 312 if (disk->fops->ioctl) 313 return disk->fops->ioctl(bdev, mode, cmd, 314 (unsigned long)compat_ptr(arg)); 315 316 return -ENOIOCTLCMD; 317 } 318 EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); 319 #endif 320 321 static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) 322 { 323 /* no sense to make reservations for partitions */ 324 if (bdev_is_partition(bdev)) 325 return false; 326 327 if (capable(CAP_SYS_ADMIN)) 328 return true; 329 /* 330 * Only allow unprivileged reservations if the file descriptor is open 331 * for writing. 332 */ 333 return mode & BLK_OPEN_WRITE; 334 } 335 336 static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, 337 struct pr_registration __user *arg) 338 { 339 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 340 struct pr_registration reg; 341 342 if (!blkdev_pr_allowed(bdev, mode)) 343 return -EPERM; 344 if (!ops || !ops->pr_register) 345 return -EOPNOTSUPP; 346 if (copy_from_user(®, arg, sizeof(reg))) 347 return -EFAULT; 348 349 if (reg.flags & ~PR_FL_IGNORE_KEY) 350 return -EOPNOTSUPP; 351 return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); 352 } 353 354 static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, 355 struct pr_reservation __user *arg) 356 { 357 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 358 struct pr_reservation rsv; 359 360 if (!blkdev_pr_allowed(bdev, mode)) 361 return -EPERM; 362 if (!ops || !ops->pr_reserve) 363 return -EOPNOTSUPP; 364 if (copy_from_user(&rsv, arg, sizeof(rsv))) 365 return -EFAULT; 366 367 if (rsv.flags & ~PR_FL_IGNORE_KEY) 368 return -EOPNOTSUPP; 369 return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); 370 } 371 372 static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, 373 struct pr_reservation __user *arg) 374 { 375 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 376 struct pr_reservation rsv; 377 378 if (!blkdev_pr_allowed(bdev, mode)) 379 return -EPERM; 380 if (!ops || !ops->pr_release) 381 return -EOPNOTSUPP; 382 if (copy_from_user(&rsv, arg, sizeof(rsv))) 383 return -EFAULT; 384 385 if (rsv.flags) 386 return -EOPNOTSUPP; 387 return ops->pr_release(bdev, rsv.key, rsv.type); 388 } 389 390 static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, 391 struct pr_preempt __user *arg, bool abort) 392 { 393 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 394 struct pr_preempt p; 395 396 if (!blkdev_pr_allowed(bdev, mode)) 397 return -EPERM; 398 if (!ops || !ops->pr_preempt) 399 return -EOPNOTSUPP; 400 if (copy_from_user(&p, arg, sizeof(p))) 401 return -EFAULT; 402 403 if (p.flags) 404 return -EOPNOTSUPP; 405 return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); 406 } 407 408 static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, 409 struct pr_clear __user *arg) 410 { 411 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 412 struct pr_clear c; 413 414 if (!blkdev_pr_allowed(bdev, mode)) 415 return -EPERM; 416 if (!ops || !ops->pr_clear) 417 return -EOPNOTSUPP; 418 if (copy_from_user(&c, arg, sizeof(c))) 419 return -EFAULT; 420 421 if (c.flags) 422 return -EOPNOTSUPP; 423 return ops->pr_clear(bdev, c.key); 424 } 425 426 static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, 427 struct pr_read_keys __user *arg) 428 { 429 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 430 struct pr_keys *keys_info; 431 struct pr_read_keys read_keys; 432 u64 __user *keys_ptr; 433 size_t keys_info_len; 434 size_t keys_copy_len; 435 int ret; 436 437 if (!blkdev_pr_allowed(bdev, mode)) 438 return -EPERM; 439 if (!ops || !ops->pr_read_keys) 440 return -EOPNOTSUPP; 441 442 if (copy_from_user(&read_keys, arg, sizeof(read_keys))) 443 return -EFAULT; 444 445 if (read_keys.num_keys > PR_KEYS_MAX) 446 return -EINVAL; 447 448 keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); 449 450 keys_info = kvzalloc(keys_info_len, GFP_KERNEL); 451 if (!keys_info) 452 return -ENOMEM; 453 454 keys_info->num_keys = read_keys.num_keys; 455 456 ret = ops->pr_read_keys(bdev, keys_info); 457 if (ret) 458 goto out; 459 460 /* Copy out individual keys */ 461 keys_ptr = u64_to_user_ptr(read_keys.keys_ptr); 462 keys_copy_len = min(read_keys.num_keys, keys_info->num_keys) * 463 sizeof(keys_info->keys[0]); 464 465 if (copy_to_user(keys_ptr, keys_info->keys, keys_copy_len)) { 466 ret = -EFAULT; 467 goto out; 468 } 469 470 /* Copy out the arg struct */ 471 read_keys.generation = keys_info->generation; 472 read_keys.num_keys = keys_info->num_keys; 473 474 if (copy_to_user(arg, &read_keys, sizeof(read_keys))) 475 ret = -EFAULT; 476 out: 477 kvfree(keys_info); 478 return ret; 479 } 480 481 static int blkdev_pr_read_reservation(struct block_device *bdev, 482 blk_mode_t mode, struct pr_read_reservation __user *arg) 483 { 484 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 485 struct pr_held_reservation rsv = {}; 486 struct pr_read_reservation out = {}; 487 int ret; 488 489 if (!blkdev_pr_allowed(bdev, mode)) 490 return -EPERM; 491 if (!ops || !ops->pr_read_reservation) 492 return -EOPNOTSUPP; 493 494 ret = ops->pr_read_reservation(bdev, &rsv); 495 if (ret) 496 return ret; 497 498 out.key = rsv.key; 499 out.generation = rsv.generation; 500 out.type = rsv.type; 501 502 if (copy_to_user(arg, &out, sizeof(out))) 503 return -EFAULT; 504 return 0; 505 } 506 507 static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, 508 unsigned long arg) 509 { 510 if (!capable(CAP_SYS_ADMIN)) 511 return -EACCES; 512 513 mutex_lock(&bdev->bd_holder_lock); 514 if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) 515 bdev->bd_holder_ops->sync(bdev); 516 else { 517 mutex_unlock(&bdev->bd_holder_lock); 518 sync_blockdev(bdev); 519 } 520 521 invalidate_bdev(bdev); 522 return 0; 523 } 524 525 static int blkdev_roset(struct block_device *bdev, unsigned cmd, 526 unsigned long arg) 527 { 528 int ret, n; 529 530 if (!capable(CAP_SYS_ADMIN)) 531 return -EACCES; 532 533 if (get_user(n, (int __user *)arg)) 534 return -EFAULT; 535 if (bdev->bd_disk->fops->set_read_only) { 536 ret = bdev->bd_disk->fops->set_read_only(bdev, n); 537 if (ret) 538 return ret; 539 } 540 if (n) 541 bdev_set_flag(bdev, BD_READ_ONLY); 542 else 543 bdev_clear_flag(bdev, BD_READ_ONLY); 544 return 0; 545 } 546 547 static int blkdev_getgeo(struct block_device *bdev, 548 struct hd_geometry __user *argp) 549 { 550 struct gendisk *disk = bdev->bd_disk; 551 struct hd_geometry geo; 552 int ret; 553 554 if (!argp) 555 return -EINVAL; 556 if (!disk->fops->getgeo) 557 return -ENOTTY; 558 559 /* 560 * We need to set the startsect first, the driver may 561 * want to override it. 562 */ 563 memset(&geo, 0, sizeof(geo)); 564 geo.start = get_start_sect(bdev); 565 ret = disk->fops->getgeo(disk, &geo); 566 if (ret) 567 return ret; 568 if (copy_to_user(argp, &geo, sizeof(geo))) 569 return -EFAULT; 570 return 0; 571 } 572 573 #ifdef CONFIG_COMPAT 574 struct compat_hd_geometry { 575 unsigned char heads; 576 unsigned char sectors; 577 unsigned short cylinders; 578 u32 start; 579 }; 580 581 static int compat_hdio_getgeo(struct block_device *bdev, 582 struct compat_hd_geometry __user *ugeo) 583 { 584 struct gendisk *disk = bdev->bd_disk; 585 struct hd_geometry geo; 586 int ret; 587 588 if (!ugeo) 589 return -EINVAL; 590 if (!disk->fops->getgeo) 591 return -ENOTTY; 592 593 memset(&geo, 0, sizeof(geo)); 594 /* 595 * We need to set the startsect first, the driver may 596 * want to override it. 597 */ 598 geo.start = get_start_sect(bdev); 599 ret = disk->fops->getgeo(disk, &geo); 600 if (ret) 601 return ret; 602 603 ret = copy_to_user(ugeo, &geo, 4); 604 ret |= put_user(geo.start, &ugeo->start); 605 if (ret) 606 ret = -EFAULT; 607 608 return ret; 609 } 610 #endif 611 612 /* set the logical block size */ 613 static int blkdev_bszset(struct file *file, blk_mode_t mode, 614 int __user *argp) 615 { 616 // this one might be file_inode(file)->i_rdev - a rare valid 617 // use of file_inode() for those. 618 dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; 619 struct file *excl_file; 620 int ret, n; 621 622 if (!capable(CAP_SYS_ADMIN)) 623 return -EACCES; 624 if (!argp) 625 return -EINVAL; 626 if (get_user(n, argp)) 627 return -EFAULT; 628 629 if (mode & BLK_OPEN_EXCL) 630 return set_blocksize(file, n); 631 632 excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); 633 if (IS_ERR(excl_file)) 634 return -EBUSY; 635 ret = set_blocksize(excl_file, n); 636 fput(excl_file); 637 return ret; 638 } 639 640 /* 641 * Common commands that are handled the same way on native and compat 642 * user space. Note the separate arg/argp parameters that are needed 643 * to deal with the compat_ptr() conversion. 644 */ 645 static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, 646 unsigned int cmd, unsigned long arg, 647 void __user *argp) 648 { 649 unsigned int max_sectors; 650 651 switch (cmd) { 652 case BLKFLSBUF: 653 return blkdev_flushbuf(bdev, cmd, arg); 654 case BLKROSET: 655 return blkdev_roset(bdev, cmd, arg); 656 case BLKDISCARD: 657 return blk_ioctl_discard(bdev, mode, arg); 658 case BLKSECDISCARD: 659 return blk_ioctl_secure_erase(bdev, mode, argp); 660 case BLKZEROOUT: 661 return blk_ioctl_zeroout(bdev, mode, arg); 662 case BLKGETDISKSEQ: 663 return put_u64(argp, bdev->bd_disk->diskseq); 664 case BLKREPORTZONE: 665 case BLKREPORTZONEV2: 666 return blkdev_report_zones_ioctl(bdev, cmd, arg); 667 case BLKRESETZONE: 668 case BLKOPENZONE: 669 case BLKCLOSEZONE: 670 case BLKFINISHZONE: 671 return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); 672 case BLKGETZONESZ: 673 return put_uint(argp, bdev_zone_sectors(bdev)); 674 case BLKGETNRZONES: 675 return put_uint(argp, bdev_nr_zones(bdev)); 676 case BLKROGET: 677 return put_int(argp, bdev_read_only(bdev) != 0); 678 case BLKSSZGET: /* get block device logical block size */ 679 return put_int(argp, bdev_logical_block_size(bdev)); 680 case BLKPBSZGET: /* get block device physical block size */ 681 return put_uint(argp, bdev_physical_block_size(bdev)); 682 case BLKIOMIN: 683 return put_uint(argp, bdev_io_min(bdev)); 684 case BLKIOOPT: 685 return put_uint(argp, bdev_io_opt(bdev)); 686 case BLKALIGNOFF: 687 return put_int(argp, bdev_alignment_offset(bdev)); 688 case BLKDISCARDZEROES: 689 return put_uint(argp, 0); 690 case BLKSECTGET: 691 max_sectors = min_t(unsigned int, USHRT_MAX, 692 queue_max_sectors(bdev_get_queue(bdev))); 693 return put_ushort(argp, max_sectors); 694 case BLKROTATIONAL: 695 return put_ushort(argp, !bdev_nonrot(bdev)); 696 case BLKRASET: 697 case BLKFRASET: 698 if(!capable(CAP_SYS_ADMIN)) 699 return -EACCES; 700 bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; 701 return 0; 702 case BLKRRPART: 703 if (!capable(CAP_SYS_ADMIN)) 704 return -EACCES; 705 if (bdev_is_partition(bdev)) 706 return -EINVAL; 707 return disk_scan_partitions(bdev->bd_disk, 708 mode | BLK_OPEN_STRICT_SCAN); 709 case BLKTRACESTART: 710 case BLKTRACESTOP: 711 case BLKTRACETEARDOWN: 712 return blk_trace_ioctl(bdev, cmd, argp); 713 case BLKCRYPTOIMPORTKEY: 714 case BLKCRYPTOGENERATEKEY: 715 case BLKCRYPTOPREPAREKEY: 716 return blk_crypto_ioctl(bdev, cmd, argp); 717 case IOC_PR_REGISTER: 718 return blkdev_pr_register(bdev, mode, argp); 719 case IOC_PR_RESERVE: 720 return blkdev_pr_reserve(bdev, mode, argp); 721 case IOC_PR_RELEASE: 722 return blkdev_pr_release(bdev, mode, argp); 723 case IOC_PR_PREEMPT: 724 return blkdev_pr_preempt(bdev, mode, argp, false); 725 case IOC_PR_PREEMPT_ABORT: 726 return blkdev_pr_preempt(bdev, mode, argp, true); 727 case IOC_PR_CLEAR: 728 return blkdev_pr_clear(bdev, mode, argp); 729 case IOC_PR_READ_KEYS: 730 return blkdev_pr_read_keys(bdev, mode, argp); 731 case IOC_PR_READ_RESERVATION: 732 return blkdev_pr_read_reservation(bdev, mode, argp); 733 default: 734 return blk_get_meta_cap(bdev, cmd, argp); 735 } 736 } 737 738 /* 739 * Always keep this in sync with compat_blkdev_ioctl() 740 * to handle all incompatible commands in both functions. 741 * 742 * New commands must be compatible and go into blkdev_common_ioctl 743 */ 744 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 745 { 746 struct block_device *bdev = I_BDEV(file->f_mapping->host); 747 void __user *argp = (void __user *)arg; 748 blk_mode_t mode = file_to_blk_mode(file); 749 int ret; 750 751 switch (cmd) { 752 /* These need separate implementations for the data structure */ 753 case HDIO_GETGEO: 754 return blkdev_getgeo(bdev, argp); 755 case BLKPG: 756 return blkpg_ioctl(bdev, argp); 757 758 /* Compat mode returns 32-bit data instead of 'long' */ 759 case BLKRAGET: 760 case BLKFRAGET: 761 if (!argp) 762 return -EINVAL; 763 return put_long(argp, 764 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 765 case BLKGETSIZE: 766 if (bdev_nr_sectors(bdev) > ~0UL) 767 return -EFBIG; 768 return put_ulong(argp, bdev_nr_sectors(bdev)); 769 770 /* The data is compatible, but the command number is different */ 771 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ 772 return put_int(argp, block_size(bdev)); 773 case BLKBSZSET: 774 return blkdev_bszset(file, mode, argp); 775 case BLKGETSIZE64: 776 return put_u64(argp, bdev_nr_bytes(bdev)); 777 778 /* Incompatible alignment on i386 */ 779 case BLKTRACESETUP: 780 case BLKTRACESETUP2: 781 return blk_trace_ioctl(bdev, cmd, argp); 782 default: 783 break; 784 } 785 786 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 787 if (ret != -ENOIOCTLCMD) 788 return ret; 789 790 if (!bdev->bd_disk->fops->ioctl) 791 return -ENOTTY; 792 return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 793 } 794 795 #ifdef CONFIG_COMPAT 796 797 #define BLKBSZGET_32 _IOR(0x12, 112, int) 798 #define BLKBSZSET_32 _IOW(0x12, 113, int) 799 #define BLKGETSIZE64_32 _IOR(0x12, 114, int) 800 801 /* Most of the generic ioctls are handled in the normal fallback path. 802 This assumes the blkdev's low level compat_ioctl always returns 803 ENOIOCTLCMD for unknown ioctls. */ 804 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 805 { 806 int ret; 807 void __user *argp = compat_ptr(arg); 808 struct block_device *bdev = I_BDEV(file->f_mapping->host); 809 struct gendisk *disk = bdev->bd_disk; 810 blk_mode_t mode = file_to_blk_mode(file); 811 812 switch (cmd) { 813 /* These need separate implementations for the data structure */ 814 case HDIO_GETGEO: 815 return compat_hdio_getgeo(bdev, argp); 816 case BLKPG: 817 return compat_blkpg_ioctl(bdev, argp); 818 819 /* Compat mode returns 32-bit data instead of 'long' */ 820 case BLKRAGET: 821 case BLKFRAGET: 822 if (!argp) 823 return -EINVAL; 824 return compat_put_long(argp, 825 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 826 case BLKGETSIZE: 827 if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) 828 return -EFBIG; 829 return compat_put_ulong(argp, bdev_nr_sectors(bdev)); 830 831 /* The data is compatible, but the command number is different */ 832 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 833 return put_int(argp, bdev_logical_block_size(bdev)); 834 case BLKBSZSET_32: 835 return blkdev_bszset(file, mode, argp); 836 case BLKGETSIZE64_32: 837 return put_u64(argp, bdev_nr_bytes(bdev)); 838 839 /* Incompatible alignment on i386 */ 840 case BLKTRACESETUP32: 841 return blk_trace_ioctl(bdev, cmd, argp); 842 default: 843 break; 844 } 845 846 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 847 if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) 848 ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); 849 850 return ret; 851 } 852 #endif 853 854 struct blk_iou_cmd { 855 int res; 856 bool nowait; 857 }; 858 859 static void blk_cmd_complete(struct io_tw_req tw_req, io_tw_token_t tw) 860 { 861 struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req); 862 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 863 864 if (bic->res == -EAGAIN && bic->nowait) 865 io_uring_cmd_issue_blocking(cmd); 866 else 867 io_uring_cmd_done(cmd, bic->res, 868 IO_URING_CMD_TASK_WORK_ISSUE_FLAGS); 869 } 870 871 static void bio_cmd_bio_end_io(struct bio *bio) 872 { 873 struct io_uring_cmd *cmd = bio->bi_private; 874 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 875 876 if (unlikely(bio->bi_status) && !bic->res) 877 bic->res = blk_status_to_errno(bio->bi_status); 878 879 io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); 880 bio_put(bio); 881 } 882 883 static int blkdev_cmd_discard(struct io_uring_cmd *cmd, 884 struct block_device *bdev, 885 uint64_t start, uint64_t len, bool nowait) 886 { 887 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 888 gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; 889 sector_t sector = start >> SECTOR_SHIFT; 890 sector_t nr_sects = len >> SECTOR_SHIFT; 891 struct bio *prev = NULL, *bio; 892 int err; 893 894 if (!bdev_max_discard_sectors(bdev)) 895 return -EOPNOTSUPP; 896 if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) 897 return -EBADF; 898 if (bdev_read_only(bdev)) 899 return -EPERM; 900 err = blk_validate_byte_range(bdev, start, len); 901 if (err) 902 return err; 903 904 err = filemap_invalidate_pages(bdev->bd_mapping, start, 905 start + len - 1, nowait); 906 if (err) 907 return err; 908 909 while (true) { 910 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); 911 if (!bio) 912 break; 913 if (nowait) { 914 /* 915 * Don't allow multi-bio non-blocking submissions as 916 * subsequent bios may fail but we won't get a direct 917 * indication of that. Normally, the caller should 918 * retry from a blocking context. 919 */ 920 if (unlikely(nr_sects)) { 921 bio_put(bio); 922 return -EAGAIN; 923 } 924 bio->bi_opf |= REQ_NOWAIT; 925 } 926 927 prev = bio_chain_and_submit(prev, bio); 928 } 929 if (unlikely(!prev)) 930 return -EAGAIN; 931 if (unlikely(nr_sects)) 932 bic->res = -EAGAIN; 933 934 prev->bi_private = cmd; 935 prev->bi_end_io = bio_cmd_bio_end_io; 936 submit_bio(prev); 937 return -EIOCBQUEUED; 938 } 939 940 int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 941 { 942 struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); 943 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 944 const struct io_uring_sqe *sqe = cmd->sqe; 945 u32 cmd_op = cmd->cmd_op; 946 uint64_t start, len; 947 948 if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || 949 sqe->rw_flags || sqe->file_index)) 950 return -EINVAL; 951 952 bic->res = 0; 953 bic->nowait = issue_flags & IO_URING_F_NONBLOCK; 954 955 start = READ_ONCE(sqe->addr); 956 len = READ_ONCE(sqe->addr3); 957 958 switch (cmd_op) { 959 case BLOCK_URING_CMD_DISCARD: 960 return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); 961 } 962 return -EINVAL; 963 } 964