1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/capability.h> 3 #include <linux/compat.h> 4 #include <linux/blkdev.h> 5 #include <linux/export.h> 6 #include <linux/gfp.h> 7 #include <linux/blkpg.h> 8 #include <linux/hdreg.h> 9 #include <linux/backing-dev.h> 10 #include <linux/fs.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/pr.h> 13 #include <linux/uaccess.h> 14 #include <linux/pagemap.h> 15 #include <linux/io_uring/cmd.h> 16 #include <linux/blk-integrity.h> 17 #include <uapi/linux/blkdev.h> 18 #include "blk.h" 19 #include "blk-crypto-internal.h" 20 21 static int blkpg_do_ioctl(struct block_device *bdev, 22 struct blkpg_partition __user *upart, int op) 23 { 24 struct gendisk *disk = bdev->bd_disk; 25 struct blkpg_partition p; 26 sector_t start, length, capacity, end; 27 28 if (!capable(CAP_SYS_ADMIN)) 29 return -EACCES; 30 if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) 31 return -EFAULT; 32 if (bdev_is_partition(bdev)) 33 return -EINVAL; 34 35 if (p.pno <= 0) 36 return -EINVAL; 37 38 if (op == BLKPG_DEL_PARTITION) 39 return bdev_del_partition(disk, p.pno); 40 41 if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) 42 return -EINVAL; 43 /* Check that the partition is aligned to the block size */ 44 if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) 45 return -EINVAL; 46 47 start = p.start >> SECTOR_SHIFT; 48 length = p.length >> SECTOR_SHIFT; 49 capacity = get_capacity(disk); 50 51 if (check_add_overflow(start, length, &end)) 52 return -EINVAL; 53 54 if (start >= capacity || end > capacity) 55 return -EINVAL; 56 57 switch (op) { 58 case BLKPG_ADD_PARTITION: 59 return bdev_add_partition(disk, p.pno, start, length); 60 case BLKPG_RESIZE_PARTITION: 61 return bdev_resize_partition(disk, p.pno, start, length); 62 default: 63 return -EINVAL; 64 } 65 } 66 67 static int blkpg_ioctl(struct block_device *bdev, 68 struct blkpg_ioctl_arg __user *arg) 69 { 70 struct blkpg_partition __user *udata; 71 int op; 72 73 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 74 return -EFAULT; 75 76 return blkpg_do_ioctl(bdev, udata, op); 77 } 78 79 #ifdef CONFIG_COMPAT 80 struct compat_blkpg_ioctl_arg { 81 compat_int_t op; 82 compat_int_t flags; 83 compat_int_t datalen; 84 compat_caddr_t data; 85 }; 86 87 static int compat_blkpg_ioctl(struct block_device *bdev, 88 struct compat_blkpg_ioctl_arg __user *arg) 89 { 90 compat_caddr_t udata; 91 int op; 92 93 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 94 return -EFAULT; 95 96 return blkpg_do_ioctl(bdev, compat_ptr(udata), op); 97 } 98 #endif 99 100 /* 101 * Check that [start, start + len) is a valid range from the block device's 102 * perspective, including verifying that it can be correctly translated into 103 * logical block addresses. 104 */ 105 static int blk_validate_byte_range(struct block_device *bdev, 106 uint64_t start, uint64_t len) 107 { 108 unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; 109 uint64_t end; 110 111 if ((start | len) & bs_mask) 112 return -EINVAL; 113 if (!len) 114 return -EINVAL; 115 if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) 116 return -EINVAL; 117 118 return 0; 119 } 120 121 static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, 122 unsigned long arg) 123 { 124 uint64_t range[2], start, len; 125 struct bio *prev = NULL, *bio; 126 sector_t sector, nr_sects; 127 struct blk_plug plug; 128 int err; 129 130 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 131 return -EFAULT; 132 start = range[0]; 133 len = range[1]; 134 135 if (!bdev_max_discard_sectors(bdev)) 136 return -EOPNOTSUPP; 137 138 if (!(mode & BLK_OPEN_WRITE)) 139 return -EBADF; 140 if (bdev_read_only(bdev)) 141 return -EPERM; 142 err = blk_validate_byte_range(bdev, start, len); 143 if (err) 144 return err; 145 146 inode_lock(bdev->bd_mapping->host); 147 filemap_invalidate_lock(bdev->bd_mapping); 148 err = truncate_bdev_range(bdev, mode, start, start + len - 1); 149 if (err) 150 goto fail; 151 152 sector = start >> SECTOR_SHIFT; 153 nr_sects = len >> SECTOR_SHIFT; 154 155 blk_start_plug(&plug); 156 while (1) { 157 if (fatal_signal_pending(current)) { 158 if (prev) 159 bio_await_chain(prev); 160 err = -EINTR; 161 goto out_unplug; 162 } 163 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, 164 GFP_KERNEL); 165 if (!bio) 166 break; 167 prev = bio_chain_and_submit(prev, bio); 168 } 169 if (prev) { 170 err = submit_bio_wait(prev); 171 if (err == -EOPNOTSUPP) 172 err = 0; 173 bio_put(prev); 174 } 175 out_unplug: 176 blk_finish_plug(&plug); 177 fail: 178 filemap_invalidate_unlock(bdev->bd_mapping); 179 inode_unlock(bdev->bd_mapping->host); 180 return err; 181 } 182 183 static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, 184 void __user *argp) 185 { 186 uint64_t start, len, end; 187 uint64_t range[2]; 188 int err; 189 190 if (!(mode & BLK_OPEN_WRITE)) 191 return -EBADF; 192 if (!bdev_max_secure_erase_sectors(bdev)) 193 return -EOPNOTSUPP; 194 if (copy_from_user(range, argp, sizeof(range))) 195 return -EFAULT; 196 197 start = range[0]; 198 len = range[1]; 199 if ((start & 511) || (len & 511)) 200 return -EINVAL; 201 if (check_add_overflow(start, len, &end) || 202 end > bdev_nr_bytes(bdev)) 203 return -EINVAL; 204 205 inode_lock(bdev->bd_mapping->host); 206 filemap_invalidate_lock(bdev->bd_mapping); 207 err = truncate_bdev_range(bdev, mode, start, end - 1); 208 if (!err) 209 err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, 210 GFP_KERNEL); 211 filemap_invalidate_unlock(bdev->bd_mapping); 212 inode_unlock(bdev->bd_mapping->host); 213 return err; 214 } 215 216 217 static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, 218 unsigned long arg) 219 { 220 uint64_t range[2]; 221 uint64_t start, end, len; 222 int err; 223 224 if (!(mode & BLK_OPEN_WRITE)) 225 return -EBADF; 226 227 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 228 return -EFAULT; 229 230 start = range[0]; 231 len = range[1]; 232 end = start + len - 1; 233 234 if (start & 511) 235 return -EINVAL; 236 if (len & 511) 237 return -EINVAL; 238 if (end >= (uint64_t)bdev_nr_bytes(bdev)) 239 return -EINVAL; 240 if (end < start) 241 return -EINVAL; 242 243 /* Invalidate the page cache, including dirty pages */ 244 inode_lock(bdev->bd_mapping->host); 245 filemap_invalidate_lock(bdev->bd_mapping); 246 err = truncate_bdev_range(bdev, mode, start, end); 247 if (err) 248 goto fail; 249 250 err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, 251 BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); 252 253 fail: 254 filemap_invalidate_unlock(bdev->bd_mapping); 255 inode_unlock(bdev->bd_mapping->host); 256 return err; 257 } 258 259 static int put_ushort(unsigned short __user *argp, unsigned short val) 260 { 261 return put_user(val, argp); 262 } 263 264 static int put_int(int __user *argp, int val) 265 { 266 return put_user(val, argp); 267 } 268 269 static int put_uint(unsigned int __user *argp, unsigned int val) 270 { 271 return put_user(val, argp); 272 } 273 274 static int put_long(long __user *argp, long val) 275 { 276 return put_user(val, argp); 277 } 278 279 static int put_ulong(unsigned long __user *argp, unsigned long val) 280 { 281 return put_user(val, argp); 282 } 283 284 static int put_u64(u64 __user *argp, u64 val) 285 { 286 return put_user(val, argp); 287 } 288 289 #ifdef CONFIG_COMPAT 290 static int compat_put_long(compat_long_t __user *argp, long val) 291 { 292 return put_user(val, argp); 293 } 294 295 static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) 296 { 297 return put_user(val, argp); 298 } 299 #endif 300 301 #ifdef CONFIG_COMPAT 302 /* 303 * This is the equivalent of compat_ptr_ioctl(), to be used by block 304 * drivers that implement only commands that are completely compatible 305 * between 32-bit and 64-bit user space 306 */ 307 int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, 308 unsigned cmd, unsigned long arg) 309 { 310 struct gendisk *disk = bdev->bd_disk; 311 312 if (disk->fops->ioctl) 313 return disk->fops->ioctl(bdev, mode, cmd, 314 (unsigned long)compat_ptr(arg)); 315 316 return -ENOIOCTLCMD; 317 } 318 EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); 319 #endif 320 321 static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) 322 { 323 /* no sense to make reservations for partitions */ 324 if (bdev_is_partition(bdev)) 325 return false; 326 327 if (capable(CAP_SYS_ADMIN)) 328 return true; 329 /* 330 * Only allow unprivileged reservations if the file descriptor is open 331 * for writing. 332 */ 333 return mode & BLK_OPEN_WRITE; 334 } 335 336 static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, 337 struct pr_registration __user *arg) 338 { 339 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 340 struct pr_registration reg; 341 342 if (!blkdev_pr_allowed(bdev, mode)) 343 return -EPERM; 344 if (!ops || !ops->pr_register) 345 return -EOPNOTSUPP; 346 if (copy_from_user(®, arg, sizeof(reg))) 347 return -EFAULT; 348 349 if (reg.flags & ~PR_FL_IGNORE_KEY) 350 return -EOPNOTSUPP; 351 return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); 352 } 353 354 static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, 355 struct pr_reservation __user *arg) 356 { 357 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 358 struct pr_reservation rsv; 359 360 if (!blkdev_pr_allowed(bdev, mode)) 361 return -EPERM; 362 if (!ops || !ops->pr_reserve) 363 return -EOPNOTSUPP; 364 if (copy_from_user(&rsv, arg, sizeof(rsv))) 365 return -EFAULT; 366 367 if (rsv.flags & ~PR_FL_IGNORE_KEY) 368 return -EOPNOTSUPP; 369 return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); 370 } 371 372 static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, 373 struct pr_reservation __user *arg) 374 { 375 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 376 struct pr_reservation rsv; 377 378 if (!blkdev_pr_allowed(bdev, mode)) 379 return -EPERM; 380 if (!ops || !ops->pr_release) 381 return -EOPNOTSUPP; 382 if (copy_from_user(&rsv, arg, sizeof(rsv))) 383 return -EFAULT; 384 385 if (rsv.flags) 386 return -EOPNOTSUPP; 387 return ops->pr_release(bdev, rsv.key, rsv.type); 388 } 389 390 static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, 391 struct pr_preempt __user *arg, bool abort) 392 { 393 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 394 struct pr_preempt p; 395 396 if (!blkdev_pr_allowed(bdev, mode)) 397 return -EPERM; 398 if (!ops || !ops->pr_preempt) 399 return -EOPNOTSUPP; 400 if (copy_from_user(&p, arg, sizeof(p))) 401 return -EFAULT; 402 403 if (p.flags) 404 return -EOPNOTSUPP; 405 return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); 406 } 407 408 static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, 409 struct pr_clear __user *arg) 410 { 411 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 412 struct pr_clear c; 413 414 if (!blkdev_pr_allowed(bdev, mode)) 415 return -EPERM; 416 if (!ops || !ops->pr_clear) 417 return -EOPNOTSUPP; 418 if (copy_from_user(&c, arg, sizeof(c))) 419 return -EFAULT; 420 421 if (c.flags) 422 return -EOPNOTSUPP; 423 return ops->pr_clear(bdev, c.key); 424 } 425 426 static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, 427 struct pr_read_keys __user *arg) 428 { 429 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 430 struct pr_keys *keys_info; 431 struct pr_read_keys read_keys; 432 u64 __user *keys_ptr; 433 size_t keys_info_len; 434 size_t keys_copy_len; 435 int ret; 436 437 if (!blkdev_pr_allowed(bdev, mode)) 438 return -EPERM; 439 if (!ops || !ops->pr_read_keys) 440 return -EOPNOTSUPP; 441 442 if (copy_from_user(&read_keys, arg, sizeof(read_keys))) 443 return -EFAULT; 444 445 keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); 446 if (keys_info_len == SIZE_MAX) 447 return -EINVAL; 448 449 keys_info = kzalloc(keys_info_len, GFP_KERNEL); 450 if (!keys_info) 451 return -ENOMEM; 452 453 keys_info->num_keys = read_keys.num_keys; 454 455 ret = ops->pr_read_keys(bdev, keys_info); 456 if (ret) 457 goto out; 458 459 /* Copy out individual keys */ 460 keys_ptr = u64_to_user_ptr(read_keys.keys_ptr); 461 keys_copy_len = min(read_keys.num_keys, keys_info->num_keys) * 462 sizeof(keys_info->keys[0]); 463 464 if (copy_to_user(keys_ptr, keys_info->keys, keys_copy_len)) { 465 ret = -EFAULT; 466 goto out; 467 } 468 469 /* Copy out the arg struct */ 470 read_keys.generation = keys_info->generation; 471 read_keys.num_keys = keys_info->num_keys; 472 473 if (copy_to_user(arg, &read_keys, sizeof(read_keys))) 474 ret = -EFAULT; 475 out: 476 kfree(keys_info); 477 return ret; 478 } 479 480 static int blkdev_pr_read_reservation(struct block_device *bdev, 481 blk_mode_t mode, struct pr_read_reservation __user *arg) 482 { 483 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 484 struct pr_held_reservation rsv = {}; 485 struct pr_read_reservation out = {}; 486 int ret; 487 488 if (!blkdev_pr_allowed(bdev, mode)) 489 return -EPERM; 490 if (!ops || !ops->pr_read_reservation) 491 return -EOPNOTSUPP; 492 493 ret = ops->pr_read_reservation(bdev, &rsv); 494 if (ret) 495 return ret; 496 497 out.key = rsv.key; 498 out.generation = rsv.generation; 499 out.type = rsv.type; 500 501 if (copy_to_user(arg, &out, sizeof(out))) 502 return -EFAULT; 503 return 0; 504 } 505 506 static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, 507 unsigned long arg) 508 { 509 if (!capable(CAP_SYS_ADMIN)) 510 return -EACCES; 511 512 mutex_lock(&bdev->bd_holder_lock); 513 if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) 514 bdev->bd_holder_ops->sync(bdev); 515 else { 516 mutex_unlock(&bdev->bd_holder_lock); 517 sync_blockdev(bdev); 518 } 519 520 invalidate_bdev(bdev); 521 return 0; 522 } 523 524 static int blkdev_roset(struct block_device *bdev, unsigned cmd, 525 unsigned long arg) 526 { 527 int ret, n; 528 529 if (!capable(CAP_SYS_ADMIN)) 530 return -EACCES; 531 532 if (get_user(n, (int __user *)arg)) 533 return -EFAULT; 534 if (bdev->bd_disk->fops->set_read_only) { 535 ret = bdev->bd_disk->fops->set_read_only(bdev, n); 536 if (ret) 537 return ret; 538 } 539 if (n) 540 bdev_set_flag(bdev, BD_READ_ONLY); 541 else 542 bdev_clear_flag(bdev, BD_READ_ONLY); 543 return 0; 544 } 545 546 static int blkdev_getgeo(struct block_device *bdev, 547 struct hd_geometry __user *argp) 548 { 549 struct gendisk *disk = bdev->bd_disk; 550 struct hd_geometry geo; 551 int ret; 552 553 if (!argp) 554 return -EINVAL; 555 if (!disk->fops->getgeo) 556 return -ENOTTY; 557 558 /* 559 * We need to set the startsect first, the driver may 560 * want to override it. 561 */ 562 memset(&geo, 0, sizeof(geo)); 563 geo.start = get_start_sect(bdev); 564 ret = disk->fops->getgeo(disk, &geo); 565 if (ret) 566 return ret; 567 if (copy_to_user(argp, &geo, sizeof(geo))) 568 return -EFAULT; 569 return 0; 570 } 571 572 #ifdef CONFIG_COMPAT 573 struct compat_hd_geometry { 574 unsigned char heads; 575 unsigned char sectors; 576 unsigned short cylinders; 577 u32 start; 578 }; 579 580 static int compat_hdio_getgeo(struct block_device *bdev, 581 struct compat_hd_geometry __user *ugeo) 582 { 583 struct gendisk *disk = bdev->bd_disk; 584 struct hd_geometry geo; 585 int ret; 586 587 if (!ugeo) 588 return -EINVAL; 589 if (!disk->fops->getgeo) 590 return -ENOTTY; 591 592 memset(&geo, 0, sizeof(geo)); 593 /* 594 * We need to set the startsect first, the driver may 595 * want to override it. 596 */ 597 geo.start = get_start_sect(bdev); 598 ret = disk->fops->getgeo(disk, &geo); 599 if (ret) 600 return ret; 601 602 ret = copy_to_user(ugeo, &geo, 4); 603 ret |= put_user(geo.start, &ugeo->start); 604 if (ret) 605 ret = -EFAULT; 606 607 return ret; 608 } 609 #endif 610 611 /* set the logical block size */ 612 static int blkdev_bszset(struct file *file, blk_mode_t mode, 613 int __user *argp) 614 { 615 // this one might be file_inode(file)->i_rdev - a rare valid 616 // use of file_inode() for those. 617 dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; 618 struct file *excl_file; 619 int ret, n; 620 621 if (!capable(CAP_SYS_ADMIN)) 622 return -EACCES; 623 if (!argp) 624 return -EINVAL; 625 if (get_user(n, argp)) 626 return -EFAULT; 627 628 if (mode & BLK_OPEN_EXCL) 629 return set_blocksize(file, n); 630 631 excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); 632 if (IS_ERR(excl_file)) 633 return -EBUSY; 634 ret = set_blocksize(excl_file, n); 635 fput(excl_file); 636 return ret; 637 } 638 639 /* 640 * Common commands that are handled the same way on native and compat 641 * user space. Note the separate arg/argp parameters that are needed 642 * to deal with the compat_ptr() conversion. 643 */ 644 static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, 645 unsigned int cmd, unsigned long arg, 646 void __user *argp) 647 { 648 unsigned int max_sectors; 649 650 switch (cmd) { 651 case BLKFLSBUF: 652 return blkdev_flushbuf(bdev, cmd, arg); 653 case BLKROSET: 654 return blkdev_roset(bdev, cmd, arg); 655 case BLKDISCARD: 656 return blk_ioctl_discard(bdev, mode, arg); 657 case BLKSECDISCARD: 658 return blk_ioctl_secure_erase(bdev, mode, argp); 659 case BLKZEROOUT: 660 return blk_ioctl_zeroout(bdev, mode, arg); 661 case BLKGETDISKSEQ: 662 return put_u64(argp, bdev->bd_disk->diskseq); 663 case BLKREPORTZONE: 664 case BLKREPORTZONEV2: 665 return blkdev_report_zones_ioctl(bdev, cmd, arg); 666 case BLKRESETZONE: 667 case BLKOPENZONE: 668 case BLKCLOSEZONE: 669 case BLKFINISHZONE: 670 return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); 671 case BLKGETZONESZ: 672 return put_uint(argp, bdev_zone_sectors(bdev)); 673 case BLKGETNRZONES: 674 return put_uint(argp, bdev_nr_zones(bdev)); 675 case BLKROGET: 676 return put_int(argp, bdev_read_only(bdev) != 0); 677 case BLKSSZGET: /* get block device logical block size */ 678 return put_int(argp, bdev_logical_block_size(bdev)); 679 case BLKPBSZGET: /* get block device physical block size */ 680 return put_uint(argp, bdev_physical_block_size(bdev)); 681 case BLKIOMIN: 682 return put_uint(argp, bdev_io_min(bdev)); 683 case BLKIOOPT: 684 return put_uint(argp, bdev_io_opt(bdev)); 685 case BLKALIGNOFF: 686 return put_int(argp, bdev_alignment_offset(bdev)); 687 case BLKDISCARDZEROES: 688 return put_uint(argp, 0); 689 case BLKSECTGET: 690 max_sectors = min_t(unsigned int, USHRT_MAX, 691 queue_max_sectors(bdev_get_queue(bdev))); 692 return put_ushort(argp, max_sectors); 693 case BLKROTATIONAL: 694 return put_ushort(argp, !bdev_nonrot(bdev)); 695 case BLKRASET: 696 case BLKFRASET: 697 if(!capable(CAP_SYS_ADMIN)) 698 return -EACCES; 699 bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; 700 return 0; 701 case BLKRRPART: 702 if (!capable(CAP_SYS_ADMIN)) 703 return -EACCES; 704 if (bdev_is_partition(bdev)) 705 return -EINVAL; 706 return disk_scan_partitions(bdev->bd_disk, 707 mode | BLK_OPEN_STRICT_SCAN); 708 case BLKTRACESTART: 709 case BLKTRACESTOP: 710 case BLKTRACETEARDOWN: 711 return blk_trace_ioctl(bdev, cmd, argp); 712 case BLKCRYPTOIMPORTKEY: 713 case BLKCRYPTOGENERATEKEY: 714 case BLKCRYPTOPREPAREKEY: 715 return blk_crypto_ioctl(bdev, cmd, argp); 716 case IOC_PR_REGISTER: 717 return blkdev_pr_register(bdev, mode, argp); 718 case IOC_PR_RESERVE: 719 return blkdev_pr_reserve(bdev, mode, argp); 720 case IOC_PR_RELEASE: 721 return blkdev_pr_release(bdev, mode, argp); 722 case IOC_PR_PREEMPT: 723 return blkdev_pr_preempt(bdev, mode, argp, false); 724 case IOC_PR_PREEMPT_ABORT: 725 return blkdev_pr_preempt(bdev, mode, argp, true); 726 case IOC_PR_CLEAR: 727 return blkdev_pr_clear(bdev, mode, argp); 728 case IOC_PR_READ_KEYS: 729 return blkdev_pr_read_keys(bdev, mode, argp); 730 case IOC_PR_READ_RESERVATION: 731 return blkdev_pr_read_reservation(bdev, mode, argp); 732 default: 733 return blk_get_meta_cap(bdev, cmd, argp); 734 } 735 } 736 737 /* 738 * Always keep this in sync with compat_blkdev_ioctl() 739 * to handle all incompatible commands in both functions. 740 * 741 * New commands must be compatible and go into blkdev_common_ioctl 742 */ 743 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 744 { 745 struct block_device *bdev = I_BDEV(file->f_mapping->host); 746 void __user *argp = (void __user *)arg; 747 blk_mode_t mode = file_to_blk_mode(file); 748 int ret; 749 750 switch (cmd) { 751 /* These need separate implementations for the data structure */ 752 case HDIO_GETGEO: 753 return blkdev_getgeo(bdev, argp); 754 case BLKPG: 755 return blkpg_ioctl(bdev, argp); 756 757 /* Compat mode returns 32-bit data instead of 'long' */ 758 case BLKRAGET: 759 case BLKFRAGET: 760 if (!argp) 761 return -EINVAL; 762 return put_long(argp, 763 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 764 case BLKGETSIZE: 765 if (bdev_nr_sectors(bdev) > ~0UL) 766 return -EFBIG; 767 return put_ulong(argp, bdev_nr_sectors(bdev)); 768 769 /* The data is compatible, but the command number is different */ 770 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ 771 return put_int(argp, block_size(bdev)); 772 case BLKBSZSET: 773 return blkdev_bszset(file, mode, argp); 774 case BLKGETSIZE64: 775 return put_u64(argp, bdev_nr_bytes(bdev)); 776 777 /* Incompatible alignment on i386 */ 778 case BLKTRACESETUP: 779 case BLKTRACESETUP2: 780 return blk_trace_ioctl(bdev, cmd, argp); 781 default: 782 break; 783 } 784 785 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 786 if (ret != -ENOIOCTLCMD) 787 return ret; 788 789 if (!bdev->bd_disk->fops->ioctl) 790 return -ENOTTY; 791 return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 792 } 793 794 #ifdef CONFIG_COMPAT 795 796 #define BLKBSZGET_32 _IOR(0x12, 112, int) 797 #define BLKBSZSET_32 _IOW(0x12, 113, int) 798 #define BLKGETSIZE64_32 _IOR(0x12, 114, int) 799 800 /* Most of the generic ioctls are handled in the normal fallback path. 801 This assumes the blkdev's low level compat_ioctl always returns 802 ENOIOCTLCMD for unknown ioctls. */ 803 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 804 { 805 int ret; 806 void __user *argp = compat_ptr(arg); 807 struct block_device *bdev = I_BDEV(file->f_mapping->host); 808 struct gendisk *disk = bdev->bd_disk; 809 blk_mode_t mode = file_to_blk_mode(file); 810 811 switch (cmd) { 812 /* These need separate implementations for the data structure */ 813 case HDIO_GETGEO: 814 return compat_hdio_getgeo(bdev, argp); 815 case BLKPG: 816 return compat_blkpg_ioctl(bdev, argp); 817 818 /* Compat mode returns 32-bit data instead of 'long' */ 819 case BLKRAGET: 820 case BLKFRAGET: 821 if (!argp) 822 return -EINVAL; 823 return compat_put_long(argp, 824 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 825 case BLKGETSIZE: 826 if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) 827 return -EFBIG; 828 return compat_put_ulong(argp, bdev_nr_sectors(bdev)); 829 830 /* The data is compatible, but the command number is different */ 831 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 832 return put_int(argp, bdev_logical_block_size(bdev)); 833 case BLKBSZSET_32: 834 return blkdev_bszset(file, mode, argp); 835 case BLKGETSIZE64_32: 836 return put_u64(argp, bdev_nr_bytes(bdev)); 837 838 /* Incompatible alignment on i386 */ 839 case BLKTRACESETUP32: 840 return blk_trace_ioctl(bdev, cmd, argp); 841 default: 842 break; 843 } 844 845 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 846 if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) 847 ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); 848 849 return ret; 850 } 851 #endif 852 853 struct blk_iou_cmd { 854 int res; 855 bool nowait; 856 }; 857 858 static void blk_cmd_complete(struct io_tw_req tw_req, io_tw_token_t tw) 859 { 860 struct io_uring_cmd *cmd = io_uring_cmd_from_tw(tw_req); 861 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 862 863 if (bic->res == -EAGAIN && bic->nowait) 864 io_uring_cmd_issue_blocking(cmd); 865 else 866 io_uring_cmd_done(cmd, bic->res, 867 IO_URING_CMD_TASK_WORK_ISSUE_FLAGS); 868 } 869 870 static void bio_cmd_bio_end_io(struct bio *bio) 871 { 872 struct io_uring_cmd *cmd = bio->bi_private; 873 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 874 875 if (unlikely(bio->bi_status) && !bic->res) 876 bic->res = blk_status_to_errno(bio->bi_status); 877 878 io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); 879 bio_put(bio); 880 } 881 882 static int blkdev_cmd_discard(struct io_uring_cmd *cmd, 883 struct block_device *bdev, 884 uint64_t start, uint64_t len, bool nowait) 885 { 886 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 887 gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; 888 sector_t sector = start >> SECTOR_SHIFT; 889 sector_t nr_sects = len >> SECTOR_SHIFT; 890 struct bio *prev = NULL, *bio; 891 int err; 892 893 if (!bdev_max_discard_sectors(bdev)) 894 return -EOPNOTSUPP; 895 if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) 896 return -EBADF; 897 if (bdev_read_only(bdev)) 898 return -EPERM; 899 err = blk_validate_byte_range(bdev, start, len); 900 if (err) 901 return err; 902 903 err = filemap_invalidate_pages(bdev->bd_mapping, start, 904 start + len - 1, nowait); 905 if (err) 906 return err; 907 908 while (true) { 909 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); 910 if (!bio) 911 break; 912 if (nowait) { 913 /* 914 * Don't allow multi-bio non-blocking submissions as 915 * subsequent bios may fail but we won't get a direct 916 * indication of that. Normally, the caller should 917 * retry from a blocking context. 918 */ 919 if (unlikely(nr_sects)) { 920 bio_put(bio); 921 return -EAGAIN; 922 } 923 bio->bi_opf |= REQ_NOWAIT; 924 } 925 926 prev = bio_chain_and_submit(prev, bio); 927 } 928 if (unlikely(!prev)) 929 return -EAGAIN; 930 if (unlikely(nr_sects)) 931 bic->res = -EAGAIN; 932 933 prev->bi_private = cmd; 934 prev->bi_end_io = bio_cmd_bio_end_io; 935 submit_bio(prev); 936 return -EIOCBQUEUED; 937 } 938 939 int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 940 { 941 struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); 942 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 943 const struct io_uring_sqe *sqe = cmd->sqe; 944 u32 cmd_op = cmd->cmd_op; 945 uint64_t start, len; 946 947 if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || 948 sqe->rw_flags || sqe->file_index)) 949 return -EINVAL; 950 951 bic->res = 0; 952 bic->nowait = issue_flags & IO_URING_F_NONBLOCK; 953 954 start = READ_ONCE(sqe->addr); 955 len = READ_ONCE(sqe->addr3); 956 957 switch (cmd_op) { 958 case BLOCK_URING_CMD_DISCARD: 959 return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); 960 } 961 return -EINVAL; 962 } 963