1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/capability.h> 3 #include <linux/compat.h> 4 #include <linux/blkdev.h> 5 #include <linux/export.h> 6 #include <linux/gfp.h> 7 #include <linux/blkpg.h> 8 #include <linux/hdreg.h> 9 #include <linux/backing-dev.h> 10 #include <linux/fs.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/pr.h> 13 #include <linux/uaccess.h> 14 #include <linux/pagemap.h> 15 #include <linux/io_uring/cmd.h> 16 #include <uapi/linux/blkdev.h> 17 #include "blk.h" 18 #include "blk-crypto-internal.h" 19 20 static int blkpg_do_ioctl(struct block_device *bdev, 21 struct blkpg_partition __user *upart, int op) 22 { 23 struct gendisk *disk = bdev->bd_disk; 24 struct blkpg_partition p; 25 sector_t start, length, capacity, end; 26 27 if (!capable(CAP_SYS_ADMIN)) 28 return -EACCES; 29 if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) 30 return -EFAULT; 31 if (bdev_is_partition(bdev)) 32 return -EINVAL; 33 34 if (p.pno <= 0) 35 return -EINVAL; 36 37 if (op == BLKPG_DEL_PARTITION) 38 return bdev_del_partition(disk, p.pno); 39 40 if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) 41 return -EINVAL; 42 /* Check that the partition is aligned to the block size */ 43 if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) 44 return -EINVAL; 45 46 start = p.start >> SECTOR_SHIFT; 47 length = p.length >> SECTOR_SHIFT; 48 capacity = get_capacity(disk); 49 50 if (check_add_overflow(start, length, &end)) 51 return -EINVAL; 52 53 if (start >= capacity || end > capacity) 54 return -EINVAL; 55 56 switch (op) { 57 case BLKPG_ADD_PARTITION: 58 return bdev_add_partition(disk, p.pno, start, length); 59 case BLKPG_RESIZE_PARTITION: 60 return bdev_resize_partition(disk, p.pno, start, length); 61 default: 62 return -EINVAL; 63 } 64 } 65 66 static int blkpg_ioctl(struct block_device *bdev, 67 struct blkpg_ioctl_arg __user *arg) 68 { 69 struct blkpg_partition __user *udata; 70 int op; 71 72 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 73 return -EFAULT; 74 75 return blkpg_do_ioctl(bdev, udata, op); 76 } 77 78 #ifdef CONFIG_COMPAT 79 struct compat_blkpg_ioctl_arg { 80 compat_int_t op; 81 compat_int_t flags; 82 compat_int_t datalen; 83 compat_caddr_t data; 84 }; 85 86 static int compat_blkpg_ioctl(struct block_device *bdev, 87 struct compat_blkpg_ioctl_arg __user *arg) 88 { 89 compat_caddr_t udata; 90 int op; 91 92 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 93 return -EFAULT; 94 95 return blkpg_do_ioctl(bdev, compat_ptr(udata), op); 96 } 97 #endif 98 99 /* 100 * Check that [start, start + len) is a valid range from the block device's 101 * perspective, including verifying that it can be correctly translated into 102 * logical block addresses. 103 */ 104 static int blk_validate_byte_range(struct block_device *bdev, 105 uint64_t start, uint64_t len) 106 { 107 unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; 108 uint64_t end; 109 110 if ((start | len) & bs_mask) 111 return -EINVAL; 112 if (!len) 113 return -EINVAL; 114 if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) 115 return -EINVAL; 116 117 return 0; 118 } 119 120 static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, 121 unsigned long arg) 122 { 123 uint64_t range[2], start, len; 124 struct bio *prev = NULL, *bio; 125 sector_t sector, nr_sects; 126 struct blk_plug plug; 127 int err; 128 129 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 130 return -EFAULT; 131 start = range[0]; 132 len = range[1]; 133 134 if (!bdev_max_discard_sectors(bdev)) 135 return -EOPNOTSUPP; 136 137 if (!(mode & BLK_OPEN_WRITE)) 138 return -EBADF; 139 if (bdev_read_only(bdev)) 140 return -EPERM; 141 err = blk_validate_byte_range(bdev, start, len); 142 if (err) 143 return err; 144 145 filemap_invalidate_lock(bdev->bd_mapping); 146 err = truncate_bdev_range(bdev, mode, start, start + len - 1); 147 if (err) 148 goto fail; 149 150 sector = start >> SECTOR_SHIFT; 151 nr_sects = len >> SECTOR_SHIFT; 152 153 blk_start_plug(&plug); 154 while (1) { 155 if (fatal_signal_pending(current)) { 156 if (prev) 157 bio_await_chain(prev); 158 err = -EINTR; 159 goto out_unplug; 160 } 161 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, 162 GFP_KERNEL); 163 if (!bio) 164 break; 165 prev = bio_chain_and_submit(prev, bio); 166 } 167 if (prev) { 168 err = submit_bio_wait(prev); 169 if (err == -EOPNOTSUPP) 170 err = 0; 171 bio_put(prev); 172 } 173 out_unplug: 174 blk_finish_plug(&plug); 175 fail: 176 filemap_invalidate_unlock(bdev->bd_mapping); 177 return err; 178 } 179 180 static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, 181 void __user *argp) 182 { 183 uint64_t start, len, end; 184 uint64_t range[2]; 185 int err; 186 187 if (!(mode & BLK_OPEN_WRITE)) 188 return -EBADF; 189 if (!bdev_max_secure_erase_sectors(bdev)) 190 return -EOPNOTSUPP; 191 if (copy_from_user(range, argp, sizeof(range))) 192 return -EFAULT; 193 194 start = range[0]; 195 len = range[1]; 196 if ((start & 511) || (len & 511)) 197 return -EINVAL; 198 if (check_add_overflow(start, len, &end) || 199 end > bdev_nr_bytes(bdev)) 200 return -EINVAL; 201 202 filemap_invalidate_lock(bdev->bd_mapping); 203 err = truncate_bdev_range(bdev, mode, start, end - 1); 204 if (!err) 205 err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, 206 GFP_KERNEL); 207 filemap_invalidate_unlock(bdev->bd_mapping); 208 return err; 209 } 210 211 212 static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, 213 unsigned long arg) 214 { 215 uint64_t range[2]; 216 uint64_t start, end, len; 217 int err; 218 219 if (!(mode & BLK_OPEN_WRITE)) 220 return -EBADF; 221 222 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 223 return -EFAULT; 224 225 start = range[0]; 226 len = range[1]; 227 end = start + len - 1; 228 229 if (start & 511) 230 return -EINVAL; 231 if (len & 511) 232 return -EINVAL; 233 if (end >= (uint64_t)bdev_nr_bytes(bdev)) 234 return -EINVAL; 235 if (end < start) 236 return -EINVAL; 237 238 /* Invalidate the page cache, including dirty pages */ 239 filemap_invalidate_lock(bdev->bd_mapping); 240 err = truncate_bdev_range(bdev, mode, start, end); 241 if (err) 242 goto fail; 243 244 err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, 245 BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); 246 247 fail: 248 filemap_invalidate_unlock(bdev->bd_mapping); 249 return err; 250 } 251 252 static int put_ushort(unsigned short __user *argp, unsigned short val) 253 { 254 return put_user(val, argp); 255 } 256 257 static int put_int(int __user *argp, int val) 258 { 259 return put_user(val, argp); 260 } 261 262 static int put_uint(unsigned int __user *argp, unsigned int val) 263 { 264 return put_user(val, argp); 265 } 266 267 static int put_long(long __user *argp, long val) 268 { 269 return put_user(val, argp); 270 } 271 272 static int put_ulong(unsigned long __user *argp, unsigned long val) 273 { 274 return put_user(val, argp); 275 } 276 277 static int put_u64(u64 __user *argp, u64 val) 278 { 279 return put_user(val, argp); 280 } 281 282 #ifdef CONFIG_COMPAT 283 static int compat_put_long(compat_long_t __user *argp, long val) 284 { 285 return put_user(val, argp); 286 } 287 288 static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) 289 { 290 return put_user(val, argp); 291 } 292 #endif 293 294 #ifdef CONFIG_COMPAT 295 /* 296 * This is the equivalent of compat_ptr_ioctl(), to be used by block 297 * drivers that implement only commands that are completely compatible 298 * between 32-bit and 64-bit user space 299 */ 300 int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, 301 unsigned cmd, unsigned long arg) 302 { 303 struct gendisk *disk = bdev->bd_disk; 304 305 if (disk->fops->ioctl) 306 return disk->fops->ioctl(bdev, mode, cmd, 307 (unsigned long)compat_ptr(arg)); 308 309 return -ENOIOCTLCMD; 310 } 311 EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); 312 #endif 313 314 static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) 315 { 316 /* no sense to make reservations for partitions */ 317 if (bdev_is_partition(bdev)) 318 return false; 319 320 if (capable(CAP_SYS_ADMIN)) 321 return true; 322 /* 323 * Only allow unprivileged reservations if the file descriptor is open 324 * for writing. 325 */ 326 return mode & BLK_OPEN_WRITE; 327 } 328 329 static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, 330 struct pr_registration __user *arg) 331 { 332 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 333 struct pr_registration reg; 334 335 if (!blkdev_pr_allowed(bdev, mode)) 336 return -EPERM; 337 if (!ops || !ops->pr_register) 338 return -EOPNOTSUPP; 339 if (copy_from_user(®, arg, sizeof(reg))) 340 return -EFAULT; 341 342 if (reg.flags & ~PR_FL_IGNORE_KEY) 343 return -EOPNOTSUPP; 344 return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); 345 } 346 347 static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, 348 struct pr_reservation __user *arg) 349 { 350 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 351 struct pr_reservation rsv; 352 353 if (!blkdev_pr_allowed(bdev, mode)) 354 return -EPERM; 355 if (!ops || !ops->pr_reserve) 356 return -EOPNOTSUPP; 357 if (copy_from_user(&rsv, arg, sizeof(rsv))) 358 return -EFAULT; 359 360 if (rsv.flags & ~PR_FL_IGNORE_KEY) 361 return -EOPNOTSUPP; 362 return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); 363 } 364 365 static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, 366 struct pr_reservation __user *arg) 367 { 368 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 369 struct pr_reservation rsv; 370 371 if (!blkdev_pr_allowed(bdev, mode)) 372 return -EPERM; 373 if (!ops || !ops->pr_release) 374 return -EOPNOTSUPP; 375 if (copy_from_user(&rsv, arg, sizeof(rsv))) 376 return -EFAULT; 377 378 if (rsv.flags) 379 return -EOPNOTSUPP; 380 return ops->pr_release(bdev, rsv.key, rsv.type); 381 } 382 383 static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, 384 struct pr_preempt __user *arg, bool abort) 385 { 386 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 387 struct pr_preempt p; 388 389 if (!blkdev_pr_allowed(bdev, mode)) 390 return -EPERM; 391 if (!ops || !ops->pr_preempt) 392 return -EOPNOTSUPP; 393 if (copy_from_user(&p, arg, sizeof(p))) 394 return -EFAULT; 395 396 if (p.flags) 397 return -EOPNOTSUPP; 398 return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); 399 } 400 401 static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, 402 struct pr_clear __user *arg) 403 { 404 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 405 struct pr_clear c; 406 407 if (!blkdev_pr_allowed(bdev, mode)) 408 return -EPERM; 409 if (!ops || !ops->pr_clear) 410 return -EOPNOTSUPP; 411 if (copy_from_user(&c, arg, sizeof(c))) 412 return -EFAULT; 413 414 if (c.flags) 415 return -EOPNOTSUPP; 416 return ops->pr_clear(bdev, c.key); 417 } 418 419 static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, 420 unsigned long arg) 421 { 422 if (!capable(CAP_SYS_ADMIN)) 423 return -EACCES; 424 425 mutex_lock(&bdev->bd_holder_lock); 426 if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) 427 bdev->bd_holder_ops->sync(bdev); 428 else { 429 mutex_unlock(&bdev->bd_holder_lock); 430 sync_blockdev(bdev); 431 } 432 433 invalidate_bdev(bdev); 434 return 0; 435 } 436 437 static int blkdev_roset(struct block_device *bdev, unsigned cmd, 438 unsigned long arg) 439 { 440 int ret, n; 441 442 if (!capable(CAP_SYS_ADMIN)) 443 return -EACCES; 444 445 if (get_user(n, (int __user *)arg)) 446 return -EFAULT; 447 if (bdev->bd_disk->fops->set_read_only) { 448 ret = bdev->bd_disk->fops->set_read_only(bdev, n); 449 if (ret) 450 return ret; 451 } 452 if (n) 453 bdev_set_flag(bdev, BD_READ_ONLY); 454 else 455 bdev_clear_flag(bdev, BD_READ_ONLY); 456 return 0; 457 } 458 459 static int blkdev_getgeo(struct block_device *bdev, 460 struct hd_geometry __user *argp) 461 { 462 struct gendisk *disk = bdev->bd_disk; 463 struct hd_geometry geo; 464 int ret; 465 466 if (!argp) 467 return -EINVAL; 468 if (!disk->fops->getgeo) 469 return -ENOTTY; 470 471 /* 472 * We need to set the startsect first, the driver may 473 * want to override it. 474 */ 475 memset(&geo, 0, sizeof(geo)); 476 geo.start = get_start_sect(bdev); 477 ret = disk->fops->getgeo(bdev, &geo); 478 if (ret) 479 return ret; 480 if (copy_to_user(argp, &geo, sizeof(geo))) 481 return -EFAULT; 482 return 0; 483 } 484 485 #ifdef CONFIG_COMPAT 486 struct compat_hd_geometry { 487 unsigned char heads; 488 unsigned char sectors; 489 unsigned short cylinders; 490 u32 start; 491 }; 492 493 static int compat_hdio_getgeo(struct block_device *bdev, 494 struct compat_hd_geometry __user *ugeo) 495 { 496 struct gendisk *disk = bdev->bd_disk; 497 struct hd_geometry geo; 498 int ret; 499 500 if (!ugeo) 501 return -EINVAL; 502 if (!disk->fops->getgeo) 503 return -ENOTTY; 504 505 memset(&geo, 0, sizeof(geo)); 506 /* 507 * We need to set the startsect first, the driver may 508 * want to override it. 509 */ 510 geo.start = get_start_sect(bdev); 511 ret = disk->fops->getgeo(bdev, &geo); 512 if (ret) 513 return ret; 514 515 ret = copy_to_user(ugeo, &geo, 4); 516 ret |= put_user(geo.start, &ugeo->start); 517 if (ret) 518 ret = -EFAULT; 519 520 return ret; 521 } 522 #endif 523 524 /* set the logical block size */ 525 static int blkdev_bszset(struct file *file, blk_mode_t mode, 526 int __user *argp) 527 { 528 // this one might be file_inode(file)->i_rdev - a rare valid 529 // use of file_inode() for those. 530 dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; 531 struct file *excl_file; 532 int ret, n; 533 534 if (!capable(CAP_SYS_ADMIN)) 535 return -EACCES; 536 if (!argp) 537 return -EINVAL; 538 if (get_user(n, argp)) 539 return -EFAULT; 540 541 if (mode & BLK_OPEN_EXCL) 542 return set_blocksize(file, n); 543 544 excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); 545 if (IS_ERR(excl_file)) 546 return -EBUSY; 547 ret = set_blocksize(excl_file, n); 548 fput(excl_file); 549 return ret; 550 } 551 552 /* 553 * Common commands that are handled the same way on native and compat 554 * user space. Note the separate arg/argp parameters that are needed 555 * to deal with the compat_ptr() conversion. 556 */ 557 static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, 558 unsigned int cmd, unsigned long arg, 559 void __user *argp) 560 { 561 unsigned int max_sectors; 562 563 switch (cmd) { 564 case BLKFLSBUF: 565 return blkdev_flushbuf(bdev, cmd, arg); 566 case BLKROSET: 567 return blkdev_roset(bdev, cmd, arg); 568 case BLKDISCARD: 569 return blk_ioctl_discard(bdev, mode, arg); 570 case BLKSECDISCARD: 571 return blk_ioctl_secure_erase(bdev, mode, argp); 572 case BLKZEROOUT: 573 return blk_ioctl_zeroout(bdev, mode, arg); 574 case BLKGETDISKSEQ: 575 return put_u64(argp, bdev->bd_disk->diskseq); 576 case BLKREPORTZONE: 577 return blkdev_report_zones_ioctl(bdev, cmd, arg); 578 case BLKRESETZONE: 579 case BLKOPENZONE: 580 case BLKCLOSEZONE: 581 case BLKFINISHZONE: 582 return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); 583 case BLKGETZONESZ: 584 return put_uint(argp, bdev_zone_sectors(bdev)); 585 case BLKGETNRZONES: 586 return put_uint(argp, bdev_nr_zones(bdev)); 587 case BLKROGET: 588 return put_int(argp, bdev_read_only(bdev) != 0); 589 case BLKSSZGET: /* get block device logical block size */ 590 return put_int(argp, bdev_logical_block_size(bdev)); 591 case BLKPBSZGET: /* get block device physical block size */ 592 return put_uint(argp, bdev_physical_block_size(bdev)); 593 case BLKIOMIN: 594 return put_uint(argp, bdev_io_min(bdev)); 595 case BLKIOOPT: 596 return put_uint(argp, bdev_io_opt(bdev)); 597 case BLKALIGNOFF: 598 return put_int(argp, bdev_alignment_offset(bdev)); 599 case BLKDISCARDZEROES: 600 return put_uint(argp, 0); 601 case BLKSECTGET: 602 max_sectors = min_t(unsigned int, USHRT_MAX, 603 queue_max_sectors(bdev_get_queue(bdev))); 604 return put_ushort(argp, max_sectors); 605 case BLKROTATIONAL: 606 return put_ushort(argp, !bdev_nonrot(bdev)); 607 case BLKRASET: 608 case BLKFRASET: 609 if(!capable(CAP_SYS_ADMIN)) 610 return -EACCES; 611 bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; 612 return 0; 613 case BLKRRPART: 614 if (!capable(CAP_SYS_ADMIN)) 615 return -EACCES; 616 if (bdev_is_partition(bdev)) 617 return -EINVAL; 618 return disk_scan_partitions(bdev->bd_disk, 619 mode | BLK_OPEN_STRICT_SCAN); 620 case BLKTRACESTART: 621 case BLKTRACESTOP: 622 case BLKTRACETEARDOWN: 623 return blk_trace_ioctl(bdev, cmd, argp); 624 case BLKCRYPTOIMPORTKEY: 625 case BLKCRYPTOGENERATEKEY: 626 case BLKCRYPTOPREPAREKEY: 627 return blk_crypto_ioctl(bdev, cmd, argp); 628 case IOC_PR_REGISTER: 629 return blkdev_pr_register(bdev, mode, argp); 630 case IOC_PR_RESERVE: 631 return blkdev_pr_reserve(bdev, mode, argp); 632 case IOC_PR_RELEASE: 633 return blkdev_pr_release(bdev, mode, argp); 634 case IOC_PR_PREEMPT: 635 return blkdev_pr_preempt(bdev, mode, argp, false); 636 case IOC_PR_PREEMPT_ABORT: 637 return blkdev_pr_preempt(bdev, mode, argp, true); 638 case IOC_PR_CLEAR: 639 return blkdev_pr_clear(bdev, mode, argp); 640 default: 641 return -ENOIOCTLCMD; 642 } 643 } 644 645 /* 646 * Always keep this in sync with compat_blkdev_ioctl() 647 * to handle all incompatible commands in both functions. 648 * 649 * New commands must be compatible and go into blkdev_common_ioctl 650 */ 651 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 652 { 653 struct block_device *bdev = I_BDEV(file->f_mapping->host); 654 void __user *argp = (void __user *)arg; 655 blk_mode_t mode = file_to_blk_mode(file); 656 int ret; 657 658 switch (cmd) { 659 /* These need separate implementations for the data structure */ 660 case HDIO_GETGEO: 661 return blkdev_getgeo(bdev, argp); 662 case BLKPG: 663 return blkpg_ioctl(bdev, argp); 664 665 /* Compat mode returns 32-bit data instead of 'long' */ 666 case BLKRAGET: 667 case BLKFRAGET: 668 if (!argp) 669 return -EINVAL; 670 return put_long(argp, 671 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 672 case BLKGETSIZE: 673 if (bdev_nr_sectors(bdev) > ~0UL) 674 return -EFBIG; 675 return put_ulong(argp, bdev_nr_sectors(bdev)); 676 677 /* The data is compatible, but the command number is different */ 678 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ 679 return put_int(argp, block_size(bdev)); 680 case BLKBSZSET: 681 return blkdev_bszset(file, mode, argp); 682 case BLKGETSIZE64: 683 return put_u64(argp, bdev_nr_bytes(bdev)); 684 685 /* Incompatible alignment on i386 */ 686 case BLKTRACESETUP: 687 return blk_trace_ioctl(bdev, cmd, argp); 688 default: 689 break; 690 } 691 692 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 693 if (ret != -ENOIOCTLCMD) 694 return ret; 695 696 if (!bdev->bd_disk->fops->ioctl) 697 return -ENOTTY; 698 return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 699 } 700 701 #ifdef CONFIG_COMPAT 702 703 #define BLKBSZGET_32 _IOR(0x12, 112, int) 704 #define BLKBSZSET_32 _IOW(0x12, 113, int) 705 #define BLKGETSIZE64_32 _IOR(0x12, 114, int) 706 707 /* Most of the generic ioctls are handled in the normal fallback path. 708 This assumes the blkdev's low level compat_ioctl always returns 709 ENOIOCTLCMD for unknown ioctls. */ 710 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 711 { 712 int ret; 713 void __user *argp = compat_ptr(arg); 714 struct block_device *bdev = I_BDEV(file->f_mapping->host); 715 struct gendisk *disk = bdev->bd_disk; 716 blk_mode_t mode = file_to_blk_mode(file); 717 718 switch (cmd) { 719 /* These need separate implementations for the data structure */ 720 case HDIO_GETGEO: 721 return compat_hdio_getgeo(bdev, argp); 722 case BLKPG: 723 return compat_blkpg_ioctl(bdev, argp); 724 725 /* Compat mode returns 32-bit data instead of 'long' */ 726 case BLKRAGET: 727 case BLKFRAGET: 728 if (!argp) 729 return -EINVAL; 730 return compat_put_long(argp, 731 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 732 case BLKGETSIZE: 733 if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) 734 return -EFBIG; 735 return compat_put_ulong(argp, bdev_nr_sectors(bdev)); 736 737 /* The data is compatible, but the command number is different */ 738 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 739 return put_int(argp, bdev_logical_block_size(bdev)); 740 case BLKBSZSET_32: 741 return blkdev_bszset(file, mode, argp); 742 case BLKGETSIZE64_32: 743 return put_u64(argp, bdev_nr_bytes(bdev)); 744 745 /* Incompatible alignment on i386 */ 746 case BLKTRACESETUP32: 747 return blk_trace_ioctl(bdev, cmd, argp); 748 default: 749 break; 750 } 751 752 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 753 if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) 754 ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); 755 756 return ret; 757 } 758 #endif 759 760 struct blk_iou_cmd { 761 int res; 762 bool nowait; 763 }; 764 765 static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags) 766 { 767 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 768 769 if (bic->res == -EAGAIN && bic->nowait) 770 io_uring_cmd_issue_blocking(cmd); 771 else 772 io_uring_cmd_done(cmd, bic->res, 0, issue_flags); 773 } 774 775 static void bio_cmd_bio_end_io(struct bio *bio) 776 { 777 struct io_uring_cmd *cmd = bio->bi_private; 778 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 779 780 if (unlikely(bio->bi_status) && !bic->res) 781 bic->res = blk_status_to_errno(bio->bi_status); 782 783 io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); 784 bio_put(bio); 785 } 786 787 static int blkdev_cmd_discard(struct io_uring_cmd *cmd, 788 struct block_device *bdev, 789 uint64_t start, uint64_t len, bool nowait) 790 { 791 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 792 gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; 793 sector_t sector = start >> SECTOR_SHIFT; 794 sector_t nr_sects = len >> SECTOR_SHIFT; 795 struct bio *prev = NULL, *bio; 796 int err; 797 798 if (!bdev_max_discard_sectors(bdev)) 799 return -EOPNOTSUPP; 800 if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) 801 return -EBADF; 802 if (bdev_read_only(bdev)) 803 return -EPERM; 804 err = blk_validate_byte_range(bdev, start, len); 805 if (err) 806 return err; 807 808 err = filemap_invalidate_pages(bdev->bd_mapping, start, 809 start + len - 1, nowait); 810 if (err) 811 return err; 812 813 while (true) { 814 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); 815 if (!bio) 816 break; 817 if (nowait) { 818 /* 819 * Don't allow multi-bio non-blocking submissions as 820 * subsequent bios may fail but we won't get a direct 821 * indication of that. Normally, the caller should 822 * retry from a blocking context. 823 */ 824 if (unlikely(nr_sects)) { 825 bio_put(bio); 826 return -EAGAIN; 827 } 828 bio->bi_opf |= REQ_NOWAIT; 829 } 830 831 prev = bio_chain_and_submit(prev, bio); 832 } 833 if (unlikely(!prev)) 834 return -EAGAIN; 835 if (unlikely(nr_sects)) 836 bic->res = -EAGAIN; 837 838 prev->bi_private = cmd; 839 prev->bi_end_io = bio_cmd_bio_end_io; 840 submit_bio(prev); 841 return -EIOCBQUEUED; 842 } 843 844 int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 845 { 846 struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); 847 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 848 const struct io_uring_sqe *sqe = cmd->sqe; 849 u32 cmd_op = cmd->cmd_op; 850 uint64_t start, len; 851 852 if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || 853 sqe->rw_flags || sqe->file_index)) 854 return -EINVAL; 855 856 bic->res = 0; 857 bic->nowait = issue_flags & IO_URING_F_NONBLOCK; 858 859 start = READ_ONCE(sqe->addr); 860 len = READ_ONCE(sqe->addr3); 861 862 switch (cmd_op) { 863 case BLOCK_URING_CMD_DISCARD: 864 return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); 865 } 866 return -EINVAL; 867 } 868