1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/capability.h> 3 #include <linux/compat.h> 4 #include <linux/blkdev.h> 5 #include <linux/export.h> 6 #include <linux/gfp.h> 7 #include <linux/blkpg.h> 8 #include <linux/hdreg.h> 9 #include <linux/backing-dev.h> 10 #include <linux/fs.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/pr.h> 13 #include <linux/uaccess.h> 14 #include <linux/pagemap.h> 15 #include <linux/io_uring/cmd.h> 16 #include <uapi/linux/blkdev.h> 17 #include "blk.h" 18 19 static int blkpg_do_ioctl(struct block_device *bdev, 20 struct blkpg_partition __user *upart, int op) 21 { 22 struct gendisk *disk = bdev->bd_disk; 23 struct blkpg_partition p; 24 sector_t start, length, capacity, end; 25 26 if (!capable(CAP_SYS_ADMIN)) 27 return -EACCES; 28 if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) 29 return -EFAULT; 30 if (bdev_is_partition(bdev)) 31 return -EINVAL; 32 33 if (p.pno <= 0) 34 return -EINVAL; 35 36 if (op == BLKPG_DEL_PARTITION) 37 return bdev_del_partition(disk, p.pno); 38 39 if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) 40 return -EINVAL; 41 /* Check that the partition is aligned to the block size */ 42 if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) 43 return -EINVAL; 44 45 start = p.start >> SECTOR_SHIFT; 46 length = p.length >> SECTOR_SHIFT; 47 capacity = get_capacity(disk); 48 49 if (check_add_overflow(start, length, &end)) 50 return -EINVAL; 51 52 if (start >= capacity || end > capacity) 53 return -EINVAL; 54 55 switch (op) { 56 case BLKPG_ADD_PARTITION: 57 return bdev_add_partition(disk, p.pno, start, length); 58 case BLKPG_RESIZE_PARTITION: 59 return bdev_resize_partition(disk, p.pno, start, length); 60 default: 61 return -EINVAL; 62 } 63 } 64 65 static int blkpg_ioctl(struct block_device *bdev, 66 struct blkpg_ioctl_arg __user *arg) 67 { 68 struct blkpg_partition __user *udata; 69 int op; 70 71 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 72 return -EFAULT; 73 74 return blkpg_do_ioctl(bdev, udata, op); 75 } 76 77 #ifdef CONFIG_COMPAT 78 struct compat_blkpg_ioctl_arg { 79 compat_int_t op; 80 compat_int_t flags; 81 compat_int_t datalen; 82 compat_caddr_t data; 83 }; 84 85 static int compat_blkpg_ioctl(struct block_device *bdev, 86 struct compat_blkpg_ioctl_arg __user *arg) 87 { 88 compat_caddr_t udata; 89 int op; 90 91 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 92 return -EFAULT; 93 94 return blkpg_do_ioctl(bdev, compat_ptr(udata), op); 95 } 96 #endif 97 98 /* 99 * Check that [start, start + len) is a valid range from the block device's 100 * perspective, including verifying that it can be correctly translated into 101 * logical block addresses. 102 */ 103 static int blk_validate_byte_range(struct block_device *bdev, 104 uint64_t start, uint64_t len) 105 { 106 unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; 107 uint64_t end; 108 109 if ((start | len) & bs_mask) 110 return -EINVAL; 111 if (!len) 112 return -EINVAL; 113 if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) 114 return -EINVAL; 115 116 return 0; 117 } 118 119 static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, 120 unsigned long arg) 121 { 122 uint64_t range[2], start, len; 123 struct bio *prev = NULL, *bio; 124 sector_t sector, nr_sects; 125 struct blk_plug plug; 126 int err; 127 128 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 129 return -EFAULT; 130 start = range[0]; 131 len = range[1]; 132 133 if (!bdev_max_discard_sectors(bdev)) 134 return -EOPNOTSUPP; 135 136 if (!(mode & BLK_OPEN_WRITE)) 137 return -EBADF; 138 if (bdev_read_only(bdev)) 139 return -EPERM; 140 err = blk_validate_byte_range(bdev, start, len); 141 if (err) 142 return err; 143 144 filemap_invalidate_lock(bdev->bd_mapping); 145 err = truncate_bdev_range(bdev, mode, start, start + len - 1); 146 if (err) 147 goto fail; 148 149 sector = start >> SECTOR_SHIFT; 150 nr_sects = len >> SECTOR_SHIFT; 151 152 blk_start_plug(&plug); 153 while (1) { 154 if (fatal_signal_pending(current)) { 155 if (prev) 156 bio_await_chain(prev); 157 err = -EINTR; 158 goto out_unplug; 159 } 160 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, 161 GFP_KERNEL); 162 if (!bio) 163 break; 164 prev = bio_chain_and_submit(prev, bio); 165 } 166 if (prev) { 167 err = submit_bio_wait(prev); 168 if (err == -EOPNOTSUPP) 169 err = 0; 170 bio_put(prev); 171 } 172 out_unplug: 173 blk_finish_plug(&plug); 174 fail: 175 filemap_invalidate_unlock(bdev->bd_mapping); 176 return err; 177 } 178 179 static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, 180 void __user *argp) 181 { 182 uint64_t start, len, end; 183 uint64_t range[2]; 184 int err; 185 186 if (!(mode & BLK_OPEN_WRITE)) 187 return -EBADF; 188 if (!bdev_max_secure_erase_sectors(bdev)) 189 return -EOPNOTSUPP; 190 if (copy_from_user(range, argp, sizeof(range))) 191 return -EFAULT; 192 193 start = range[0]; 194 len = range[1]; 195 if ((start & 511) || (len & 511)) 196 return -EINVAL; 197 if (check_add_overflow(start, len, &end) || 198 end > bdev_nr_bytes(bdev)) 199 return -EINVAL; 200 201 filemap_invalidate_lock(bdev->bd_mapping); 202 err = truncate_bdev_range(bdev, mode, start, end - 1); 203 if (!err) 204 err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, 205 GFP_KERNEL); 206 filemap_invalidate_unlock(bdev->bd_mapping); 207 return err; 208 } 209 210 211 static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, 212 unsigned long arg) 213 { 214 uint64_t range[2]; 215 uint64_t start, end, len; 216 int err; 217 218 if (!(mode & BLK_OPEN_WRITE)) 219 return -EBADF; 220 221 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 222 return -EFAULT; 223 224 start = range[0]; 225 len = range[1]; 226 end = start + len - 1; 227 228 if (start & 511) 229 return -EINVAL; 230 if (len & 511) 231 return -EINVAL; 232 if (end >= (uint64_t)bdev_nr_bytes(bdev)) 233 return -EINVAL; 234 if (end < start) 235 return -EINVAL; 236 237 /* Invalidate the page cache, including dirty pages */ 238 filemap_invalidate_lock(bdev->bd_mapping); 239 err = truncate_bdev_range(bdev, mode, start, end); 240 if (err) 241 goto fail; 242 243 err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, 244 BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); 245 246 fail: 247 filemap_invalidate_unlock(bdev->bd_mapping); 248 return err; 249 } 250 251 static int put_ushort(unsigned short __user *argp, unsigned short val) 252 { 253 return put_user(val, argp); 254 } 255 256 static int put_int(int __user *argp, int val) 257 { 258 return put_user(val, argp); 259 } 260 261 static int put_uint(unsigned int __user *argp, unsigned int val) 262 { 263 return put_user(val, argp); 264 } 265 266 static int put_long(long __user *argp, long val) 267 { 268 return put_user(val, argp); 269 } 270 271 static int put_ulong(unsigned long __user *argp, unsigned long val) 272 { 273 return put_user(val, argp); 274 } 275 276 static int put_u64(u64 __user *argp, u64 val) 277 { 278 return put_user(val, argp); 279 } 280 281 #ifdef CONFIG_COMPAT 282 static int compat_put_long(compat_long_t __user *argp, long val) 283 { 284 return put_user(val, argp); 285 } 286 287 static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) 288 { 289 return put_user(val, argp); 290 } 291 #endif 292 293 #ifdef CONFIG_COMPAT 294 /* 295 * This is the equivalent of compat_ptr_ioctl(), to be used by block 296 * drivers that implement only commands that are completely compatible 297 * between 32-bit and 64-bit user space 298 */ 299 int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, 300 unsigned cmd, unsigned long arg) 301 { 302 struct gendisk *disk = bdev->bd_disk; 303 304 if (disk->fops->ioctl) 305 return disk->fops->ioctl(bdev, mode, cmd, 306 (unsigned long)compat_ptr(arg)); 307 308 return -ENOIOCTLCMD; 309 } 310 EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); 311 #endif 312 313 static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) 314 { 315 /* no sense to make reservations for partitions */ 316 if (bdev_is_partition(bdev)) 317 return false; 318 319 if (capable(CAP_SYS_ADMIN)) 320 return true; 321 /* 322 * Only allow unprivileged reservations if the file descriptor is open 323 * for writing. 324 */ 325 return mode & BLK_OPEN_WRITE; 326 } 327 328 static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, 329 struct pr_registration __user *arg) 330 { 331 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 332 struct pr_registration reg; 333 334 if (!blkdev_pr_allowed(bdev, mode)) 335 return -EPERM; 336 if (!ops || !ops->pr_register) 337 return -EOPNOTSUPP; 338 if (copy_from_user(®, arg, sizeof(reg))) 339 return -EFAULT; 340 341 if (reg.flags & ~PR_FL_IGNORE_KEY) 342 return -EOPNOTSUPP; 343 return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); 344 } 345 346 static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, 347 struct pr_reservation __user *arg) 348 { 349 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 350 struct pr_reservation rsv; 351 352 if (!blkdev_pr_allowed(bdev, mode)) 353 return -EPERM; 354 if (!ops || !ops->pr_reserve) 355 return -EOPNOTSUPP; 356 if (copy_from_user(&rsv, arg, sizeof(rsv))) 357 return -EFAULT; 358 359 if (rsv.flags & ~PR_FL_IGNORE_KEY) 360 return -EOPNOTSUPP; 361 return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); 362 } 363 364 static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, 365 struct pr_reservation __user *arg) 366 { 367 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 368 struct pr_reservation rsv; 369 370 if (!blkdev_pr_allowed(bdev, mode)) 371 return -EPERM; 372 if (!ops || !ops->pr_release) 373 return -EOPNOTSUPP; 374 if (copy_from_user(&rsv, arg, sizeof(rsv))) 375 return -EFAULT; 376 377 if (rsv.flags) 378 return -EOPNOTSUPP; 379 return ops->pr_release(bdev, rsv.key, rsv.type); 380 } 381 382 static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, 383 struct pr_preempt __user *arg, bool abort) 384 { 385 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 386 struct pr_preempt p; 387 388 if (!blkdev_pr_allowed(bdev, mode)) 389 return -EPERM; 390 if (!ops || !ops->pr_preempt) 391 return -EOPNOTSUPP; 392 if (copy_from_user(&p, arg, sizeof(p))) 393 return -EFAULT; 394 395 if (p.flags) 396 return -EOPNOTSUPP; 397 return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); 398 } 399 400 static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, 401 struct pr_clear __user *arg) 402 { 403 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 404 struct pr_clear c; 405 406 if (!blkdev_pr_allowed(bdev, mode)) 407 return -EPERM; 408 if (!ops || !ops->pr_clear) 409 return -EOPNOTSUPP; 410 if (copy_from_user(&c, arg, sizeof(c))) 411 return -EFAULT; 412 413 if (c.flags) 414 return -EOPNOTSUPP; 415 return ops->pr_clear(bdev, c.key); 416 } 417 418 static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, 419 unsigned long arg) 420 { 421 if (!capable(CAP_SYS_ADMIN)) 422 return -EACCES; 423 424 mutex_lock(&bdev->bd_holder_lock); 425 if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) 426 bdev->bd_holder_ops->sync(bdev); 427 else { 428 mutex_unlock(&bdev->bd_holder_lock); 429 sync_blockdev(bdev); 430 } 431 432 invalidate_bdev(bdev); 433 return 0; 434 } 435 436 static int blkdev_roset(struct block_device *bdev, unsigned cmd, 437 unsigned long arg) 438 { 439 int ret, n; 440 441 if (!capable(CAP_SYS_ADMIN)) 442 return -EACCES; 443 444 if (get_user(n, (int __user *)arg)) 445 return -EFAULT; 446 if (bdev->bd_disk->fops->set_read_only) { 447 ret = bdev->bd_disk->fops->set_read_only(bdev, n); 448 if (ret) 449 return ret; 450 } 451 if (n) 452 bdev_set_flag(bdev, BD_READ_ONLY); 453 else 454 bdev_clear_flag(bdev, BD_READ_ONLY); 455 return 0; 456 } 457 458 static int blkdev_getgeo(struct block_device *bdev, 459 struct hd_geometry __user *argp) 460 { 461 struct gendisk *disk = bdev->bd_disk; 462 struct hd_geometry geo; 463 int ret; 464 465 if (!argp) 466 return -EINVAL; 467 if (!disk->fops->getgeo) 468 return -ENOTTY; 469 470 /* 471 * We need to set the startsect first, the driver may 472 * want to override it. 473 */ 474 memset(&geo, 0, sizeof(geo)); 475 geo.start = get_start_sect(bdev); 476 ret = disk->fops->getgeo(bdev, &geo); 477 if (ret) 478 return ret; 479 if (copy_to_user(argp, &geo, sizeof(geo))) 480 return -EFAULT; 481 return 0; 482 } 483 484 #ifdef CONFIG_COMPAT 485 struct compat_hd_geometry { 486 unsigned char heads; 487 unsigned char sectors; 488 unsigned short cylinders; 489 u32 start; 490 }; 491 492 static int compat_hdio_getgeo(struct block_device *bdev, 493 struct compat_hd_geometry __user *ugeo) 494 { 495 struct gendisk *disk = bdev->bd_disk; 496 struct hd_geometry geo; 497 int ret; 498 499 if (!ugeo) 500 return -EINVAL; 501 if (!disk->fops->getgeo) 502 return -ENOTTY; 503 504 memset(&geo, 0, sizeof(geo)); 505 /* 506 * We need to set the startsect first, the driver may 507 * want to override it. 508 */ 509 geo.start = get_start_sect(bdev); 510 ret = disk->fops->getgeo(bdev, &geo); 511 if (ret) 512 return ret; 513 514 ret = copy_to_user(ugeo, &geo, 4); 515 ret |= put_user(geo.start, &ugeo->start); 516 if (ret) 517 ret = -EFAULT; 518 519 return ret; 520 } 521 #endif 522 523 /* set the logical block size */ 524 static int blkdev_bszset(struct file *file, blk_mode_t mode, 525 int __user *argp) 526 { 527 // this one might be file_inode(file)->i_rdev - a rare valid 528 // use of file_inode() for those. 529 dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; 530 struct file *excl_file; 531 int ret, n; 532 533 if (!capable(CAP_SYS_ADMIN)) 534 return -EACCES; 535 if (!argp) 536 return -EINVAL; 537 if (get_user(n, argp)) 538 return -EFAULT; 539 540 if (mode & BLK_OPEN_EXCL) 541 return set_blocksize(file, n); 542 543 excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); 544 if (IS_ERR(excl_file)) 545 return -EBUSY; 546 ret = set_blocksize(excl_file, n); 547 fput(excl_file); 548 return ret; 549 } 550 551 /* 552 * Common commands that are handled the same way on native and compat 553 * user space. Note the separate arg/argp parameters that are needed 554 * to deal with the compat_ptr() conversion. 555 */ 556 static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, 557 unsigned int cmd, unsigned long arg, 558 void __user *argp) 559 { 560 unsigned int max_sectors; 561 562 switch (cmd) { 563 case BLKFLSBUF: 564 return blkdev_flushbuf(bdev, cmd, arg); 565 case BLKROSET: 566 return blkdev_roset(bdev, cmd, arg); 567 case BLKDISCARD: 568 return blk_ioctl_discard(bdev, mode, arg); 569 case BLKSECDISCARD: 570 return blk_ioctl_secure_erase(bdev, mode, argp); 571 case BLKZEROOUT: 572 return blk_ioctl_zeroout(bdev, mode, arg); 573 case BLKGETDISKSEQ: 574 return put_u64(argp, bdev->bd_disk->diskseq); 575 case BLKREPORTZONE: 576 return blkdev_report_zones_ioctl(bdev, cmd, arg); 577 case BLKRESETZONE: 578 case BLKOPENZONE: 579 case BLKCLOSEZONE: 580 case BLKFINISHZONE: 581 return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); 582 case BLKGETZONESZ: 583 return put_uint(argp, bdev_zone_sectors(bdev)); 584 case BLKGETNRZONES: 585 return put_uint(argp, bdev_nr_zones(bdev)); 586 case BLKROGET: 587 return put_int(argp, bdev_read_only(bdev) != 0); 588 case BLKSSZGET: /* get block device logical block size */ 589 return put_int(argp, bdev_logical_block_size(bdev)); 590 case BLKPBSZGET: /* get block device physical block size */ 591 return put_uint(argp, bdev_physical_block_size(bdev)); 592 case BLKIOMIN: 593 return put_uint(argp, bdev_io_min(bdev)); 594 case BLKIOOPT: 595 return put_uint(argp, bdev_io_opt(bdev)); 596 case BLKALIGNOFF: 597 return put_int(argp, bdev_alignment_offset(bdev)); 598 case BLKDISCARDZEROES: 599 return put_uint(argp, 0); 600 case BLKSECTGET: 601 max_sectors = min_t(unsigned int, USHRT_MAX, 602 queue_max_sectors(bdev_get_queue(bdev))); 603 return put_ushort(argp, max_sectors); 604 case BLKROTATIONAL: 605 return put_ushort(argp, !bdev_nonrot(bdev)); 606 case BLKRASET: 607 case BLKFRASET: 608 if(!capable(CAP_SYS_ADMIN)) 609 return -EACCES; 610 bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; 611 return 0; 612 case BLKRRPART: 613 if (!capable(CAP_SYS_ADMIN)) 614 return -EACCES; 615 if (bdev_is_partition(bdev)) 616 return -EINVAL; 617 return disk_scan_partitions(bdev->bd_disk, 618 mode | BLK_OPEN_STRICT_SCAN); 619 case BLKTRACESTART: 620 case BLKTRACESTOP: 621 case BLKTRACETEARDOWN: 622 return blk_trace_ioctl(bdev, cmd, argp); 623 case IOC_PR_REGISTER: 624 return blkdev_pr_register(bdev, mode, argp); 625 case IOC_PR_RESERVE: 626 return blkdev_pr_reserve(bdev, mode, argp); 627 case IOC_PR_RELEASE: 628 return blkdev_pr_release(bdev, mode, argp); 629 case IOC_PR_PREEMPT: 630 return blkdev_pr_preempt(bdev, mode, argp, false); 631 case IOC_PR_PREEMPT_ABORT: 632 return blkdev_pr_preempt(bdev, mode, argp, true); 633 case IOC_PR_CLEAR: 634 return blkdev_pr_clear(bdev, mode, argp); 635 default: 636 return -ENOIOCTLCMD; 637 } 638 } 639 640 /* 641 * Always keep this in sync with compat_blkdev_ioctl() 642 * to handle all incompatible commands in both functions. 643 * 644 * New commands must be compatible and go into blkdev_common_ioctl 645 */ 646 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 647 { 648 struct block_device *bdev = I_BDEV(file->f_mapping->host); 649 void __user *argp = (void __user *)arg; 650 blk_mode_t mode = file_to_blk_mode(file); 651 int ret; 652 653 switch (cmd) { 654 /* These need separate implementations for the data structure */ 655 case HDIO_GETGEO: 656 return blkdev_getgeo(bdev, argp); 657 case BLKPG: 658 return blkpg_ioctl(bdev, argp); 659 660 /* Compat mode returns 32-bit data instead of 'long' */ 661 case BLKRAGET: 662 case BLKFRAGET: 663 if (!argp) 664 return -EINVAL; 665 return put_long(argp, 666 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 667 case BLKGETSIZE: 668 if (bdev_nr_sectors(bdev) > ~0UL) 669 return -EFBIG; 670 return put_ulong(argp, bdev_nr_sectors(bdev)); 671 672 /* The data is compatible, but the command number is different */ 673 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ 674 return put_int(argp, block_size(bdev)); 675 case BLKBSZSET: 676 return blkdev_bszset(file, mode, argp); 677 case BLKGETSIZE64: 678 return put_u64(argp, bdev_nr_bytes(bdev)); 679 680 /* Incompatible alignment on i386 */ 681 case BLKTRACESETUP: 682 return blk_trace_ioctl(bdev, cmd, argp); 683 default: 684 break; 685 } 686 687 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 688 if (ret != -ENOIOCTLCMD) 689 return ret; 690 691 if (!bdev->bd_disk->fops->ioctl) 692 return -ENOTTY; 693 return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 694 } 695 696 #ifdef CONFIG_COMPAT 697 698 #define BLKBSZGET_32 _IOR(0x12, 112, int) 699 #define BLKBSZSET_32 _IOW(0x12, 113, int) 700 #define BLKGETSIZE64_32 _IOR(0x12, 114, int) 701 702 /* Most of the generic ioctls are handled in the normal fallback path. 703 This assumes the blkdev's low level compat_ioctl always returns 704 ENOIOCTLCMD for unknown ioctls. */ 705 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 706 { 707 int ret; 708 void __user *argp = compat_ptr(arg); 709 struct block_device *bdev = I_BDEV(file->f_mapping->host); 710 struct gendisk *disk = bdev->bd_disk; 711 blk_mode_t mode = file_to_blk_mode(file); 712 713 switch (cmd) { 714 /* These need separate implementations for the data structure */ 715 case HDIO_GETGEO: 716 return compat_hdio_getgeo(bdev, argp); 717 case BLKPG: 718 return compat_blkpg_ioctl(bdev, argp); 719 720 /* Compat mode returns 32-bit data instead of 'long' */ 721 case BLKRAGET: 722 case BLKFRAGET: 723 if (!argp) 724 return -EINVAL; 725 return compat_put_long(argp, 726 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 727 case BLKGETSIZE: 728 if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) 729 return -EFBIG; 730 return compat_put_ulong(argp, bdev_nr_sectors(bdev)); 731 732 /* The data is compatible, but the command number is different */ 733 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 734 return put_int(argp, bdev_logical_block_size(bdev)); 735 case BLKBSZSET_32: 736 return blkdev_bszset(file, mode, argp); 737 case BLKGETSIZE64_32: 738 return put_u64(argp, bdev_nr_bytes(bdev)); 739 740 /* Incompatible alignment on i386 */ 741 case BLKTRACESETUP32: 742 return blk_trace_ioctl(bdev, cmd, argp); 743 default: 744 break; 745 } 746 747 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 748 if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) 749 ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); 750 751 return ret; 752 } 753 #endif 754 755 struct blk_iou_cmd { 756 int res; 757 bool nowait; 758 }; 759 760 static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags) 761 { 762 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 763 764 if (bic->res == -EAGAIN && bic->nowait) 765 io_uring_cmd_issue_blocking(cmd); 766 else 767 io_uring_cmd_done(cmd, bic->res, 0, issue_flags); 768 } 769 770 static void bio_cmd_bio_end_io(struct bio *bio) 771 { 772 struct io_uring_cmd *cmd = bio->bi_private; 773 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 774 775 if (unlikely(bio->bi_status) && !bic->res) 776 bic->res = blk_status_to_errno(bio->bi_status); 777 778 io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); 779 bio_put(bio); 780 } 781 782 static int blkdev_cmd_discard(struct io_uring_cmd *cmd, 783 struct block_device *bdev, 784 uint64_t start, uint64_t len, bool nowait) 785 { 786 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 787 gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; 788 sector_t sector = start >> SECTOR_SHIFT; 789 sector_t nr_sects = len >> SECTOR_SHIFT; 790 struct bio *prev = NULL, *bio; 791 int err; 792 793 if (!bdev_max_discard_sectors(bdev)) 794 return -EOPNOTSUPP; 795 if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) 796 return -EBADF; 797 if (bdev_read_only(bdev)) 798 return -EPERM; 799 err = blk_validate_byte_range(bdev, start, len); 800 if (err) 801 return err; 802 803 err = filemap_invalidate_pages(bdev->bd_mapping, start, 804 start + len - 1, nowait); 805 if (err) 806 return err; 807 808 while (true) { 809 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); 810 if (!bio) 811 break; 812 if (nowait) { 813 /* 814 * Don't allow multi-bio non-blocking submissions as 815 * subsequent bios may fail but we won't get a direct 816 * indication of that. Normally, the caller should 817 * retry from a blocking context. 818 */ 819 if (unlikely(nr_sects)) { 820 bio_put(bio); 821 return -EAGAIN; 822 } 823 bio->bi_opf |= REQ_NOWAIT; 824 } 825 826 prev = bio_chain_and_submit(prev, bio); 827 } 828 if (unlikely(!prev)) 829 return -EAGAIN; 830 if (unlikely(nr_sects)) 831 bic->res = -EAGAIN; 832 833 prev->bi_private = cmd; 834 prev->bi_end_io = bio_cmd_bio_end_io; 835 submit_bio(prev); 836 return -EIOCBQUEUED; 837 } 838 839 int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 840 { 841 struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); 842 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 843 const struct io_uring_sqe *sqe = cmd->sqe; 844 u32 cmd_op = cmd->cmd_op; 845 uint64_t start, len; 846 847 if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || 848 sqe->rw_flags || sqe->file_index)) 849 return -EINVAL; 850 851 bic->res = 0; 852 bic->nowait = issue_flags & IO_URING_F_NONBLOCK; 853 854 start = READ_ONCE(sqe->addr); 855 len = READ_ONCE(sqe->addr3); 856 857 switch (cmd_op) { 858 case BLOCK_URING_CMD_DISCARD: 859 return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); 860 } 861 return -EINVAL; 862 } 863