1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/capability.h> 3 #include <linux/compat.h> 4 #include <linux/blkdev.h> 5 #include <linux/export.h> 6 #include <linux/gfp.h> 7 #include <linux/blkpg.h> 8 #include <linux/hdreg.h> 9 #include <linux/backing-dev.h> 10 #include <linux/fs.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/pr.h> 13 #include <linux/uaccess.h> 14 #include <linux/pagemap.h> 15 #include <linux/io_uring/cmd.h> 16 #include <linux/blk-integrity.h> 17 #include <uapi/linux/blkdev.h> 18 #include "blk.h" 19 #include "blk-crypto-internal.h" 20 21 static int blkpg_do_ioctl(struct block_device *bdev, 22 struct blkpg_partition __user *upart, int op) 23 { 24 struct gendisk *disk = bdev->bd_disk; 25 struct blkpg_partition p; 26 sector_t start, length, capacity, end; 27 28 if (!capable(CAP_SYS_ADMIN)) 29 return -EACCES; 30 if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) 31 return -EFAULT; 32 if (bdev_is_partition(bdev)) 33 return -EINVAL; 34 35 if (p.pno <= 0) 36 return -EINVAL; 37 38 if (op == BLKPG_DEL_PARTITION) 39 return bdev_del_partition(disk, p.pno); 40 41 if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) 42 return -EINVAL; 43 /* Check that the partition is aligned to the block size */ 44 if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) 45 return -EINVAL; 46 47 start = p.start >> SECTOR_SHIFT; 48 length = p.length >> SECTOR_SHIFT; 49 capacity = get_capacity(disk); 50 51 if (check_add_overflow(start, length, &end)) 52 return -EINVAL; 53 54 if (start >= capacity || end > capacity) 55 return -EINVAL; 56 57 switch (op) { 58 case BLKPG_ADD_PARTITION: 59 return bdev_add_partition(disk, p.pno, start, length); 60 case BLKPG_RESIZE_PARTITION: 61 return bdev_resize_partition(disk, p.pno, start, length); 62 default: 63 return -EINVAL; 64 } 65 } 66 67 static int blkpg_ioctl(struct block_device *bdev, 68 struct blkpg_ioctl_arg __user *arg) 69 { 70 struct blkpg_partition __user *udata; 71 int op; 72 73 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 74 return -EFAULT; 75 76 return blkpg_do_ioctl(bdev, udata, op); 77 } 78 79 #ifdef CONFIG_COMPAT 80 struct compat_blkpg_ioctl_arg { 81 compat_int_t op; 82 compat_int_t flags; 83 compat_int_t datalen; 84 compat_caddr_t data; 85 }; 86 87 static int compat_blkpg_ioctl(struct block_device *bdev, 88 struct compat_blkpg_ioctl_arg __user *arg) 89 { 90 compat_caddr_t udata; 91 int op; 92 93 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 94 return -EFAULT; 95 96 return blkpg_do_ioctl(bdev, compat_ptr(udata), op); 97 } 98 #endif 99 100 /* 101 * Check that [start, start + len) is a valid range from the block device's 102 * perspective, including verifying that it can be correctly translated into 103 * logical block addresses. 104 */ 105 static int blk_validate_byte_range(struct block_device *bdev, 106 uint64_t start, uint64_t len) 107 { 108 unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; 109 uint64_t end; 110 111 if ((start | len) & bs_mask) 112 return -EINVAL; 113 if (!len) 114 return -EINVAL; 115 if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) 116 return -EINVAL; 117 118 return 0; 119 } 120 121 static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, 122 unsigned long arg) 123 { 124 uint64_t range[2], start, len; 125 struct bio *prev = NULL, *bio; 126 sector_t sector, nr_sects; 127 struct blk_plug plug; 128 int err; 129 130 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 131 return -EFAULT; 132 start = range[0]; 133 len = range[1]; 134 135 if (!bdev_max_discard_sectors(bdev)) 136 return -EOPNOTSUPP; 137 138 if (!(mode & BLK_OPEN_WRITE)) 139 return -EBADF; 140 if (bdev_read_only(bdev)) 141 return -EPERM; 142 err = blk_validate_byte_range(bdev, start, len); 143 if (err) 144 return err; 145 146 inode_lock(bdev->bd_mapping->host); 147 filemap_invalidate_lock(bdev->bd_mapping); 148 err = truncate_bdev_range(bdev, mode, start, start + len - 1); 149 if (err) 150 goto fail; 151 152 sector = start >> SECTOR_SHIFT; 153 nr_sects = len >> SECTOR_SHIFT; 154 155 blk_start_plug(&plug); 156 while (1) { 157 if (fatal_signal_pending(current)) { 158 if (prev) 159 bio_await_chain(prev); 160 err = -EINTR; 161 goto out_unplug; 162 } 163 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, 164 GFP_KERNEL); 165 if (!bio) 166 break; 167 prev = bio_chain_and_submit(prev, bio); 168 } 169 if (prev) { 170 err = submit_bio_wait(prev); 171 if (err == -EOPNOTSUPP) 172 err = 0; 173 bio_put(prev); 174 } 175 out_unplug: 176 blk_finish_plug(&plug); 177 fail: 178 filemap_invalidate_unlock(bdev->bd_mapping); 179 inode_unlock(bdev->bd_mapping->host); 180 return err; 181 } 182 183 static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, 184 void __user *argp) 185 { 186 uint64_t start, len, end; 187 uint64_t range[2]; 188 int err; 189 190 if (!(mode & BLK_OPEN_WRITE)) 191 return -EBADF; 192 if (!bdev_max_secure_erase_sectors(bdev)) 193 return -EOPNOTSUPP; 194 if (copy_from_user(range, argp, sizeof(range))) 195 return -EFAULT; 196 197 start = range[0]; 198 len = range[1]; 199 if ((start & 511) || (len & 511)) 200 return -EINVAL; 201 if (check_add_overflow(start, len, &end) || 202 end > bdev_nr_bytes(bdev)) 203 return -EINVAL; 204 205 inode_lock(bdev->bd_mapping->host); 206 filemap_invalidate_lock(bdev->bd_mapping); 207 err = truncate_bdev_range(bdev, mode, start, end - 1); 208 if (!err) 209 err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, 210 GFP_KERNEL); 211 filemap_invalidate_unlock(bdev->bd_mapping); 212 inode_unlock(bdev->bd_mapping->host); 213 return err; 214 } 215 216 217 static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, 218 unsigned long arg) 219 { 220 uint64_t range[2]; 221 uint64_t start, end, len; 222 int err; 223 224 if (!(mode & BLK_OPEN_WRITE)) 225 return -EBADF; 226 227 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 228 return -EFAULT; 229 230 start = range[0]; 231 len = range[1]; 232 end = start + len - 1; 233 234 if (start & 511) 235 return -EINVAL; 236 if (len & 511) 237 return -EINVAL; 238 if (end >= (uint64_t)bdev_nr_bytes(bdev)) 239 return -EINVAL; 240 if (end < start) 241 return -EINVAL; 242 243 /* Invalidate the page cache, including dirty pages */ 244 inode_lock(bdev->bd_mapping->host); 245 filemap_invalidate_lock(bdev->bd_mapping); 246 err = truncate_bdev_range(bdev, mode, start, end); 247 if (err) 248 goto fail; 249 250 err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, 251 BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); 252 253 fail: 254 filemap_invalidate_unlock(bdev->bd_mapping); 255 inode_unlock(bdev->bd_mapping->host); 256 return err; 257 } 258 259 static int put_ushort(unsigned short __user *argp, unsigned short val) 260 { 261 return put_user(val, argp); 262 } 263 264 static int put_int(int __user *argp, int val) 265 { 266 return put_user(val, argp); 267 } 268 269 static int put_uint(unsigned int __user *argp, unsigned int val) 270 { 271 return put_user(val, argp); 272 } 273 274 static int put_long(long __user *argp, long val) 275 { 276 return put_user(val, argp); 277 } 278 279 static int put_ulong(unsigned long __user *argp, unsigned long val) 280 { 281 return put_user(val, argp); 282 } 283 284 static int put_u64(u64 __user *argp, u64 val) 285 { 286 return put_user(val, argp); 287 } 288 289 #ifdef CONFIG_COMPAT 290 static int compat_put_long(compat_long_t __user *argp, long val) 291 { 292 return put_user(val, argp); 293 } 294 295 static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) 296 { 297 return put_user(val, argp); 298 } 299 #endif 300 301 #ifdef CONFIG_COMPAT 302 /* 303 * This is the equivalent of compat_ptr_ioctl(), to be used by block 304 * drivers that implement only commands that are completely compatible 305 * between 32-bit and 64-bit user space 306 */ 307 int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, 308 unsigned cmd, unsigned long arg) 309 { 310 struct gendisk *disk = bdev->bd_disk; 311 312 if (disk->fops->ioctl) 313 return disk->fops->ioctl(bdev, mode, cmd, 314 (unsigned long)compat_ptr(arg)); 315 316 return -ENOIOCTLCMD; 317 } 318 EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); 319 #endif 320 321 static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) 322 { 323 /* no sense to make reservations for partitions */ 324 if (bdev_is_partition(bdev)) 325 return false; 326 327 if (capable(CAP_SYS_ADMIN)) 328 return true; 329 /* 330 * Only allow unprivileged reservations if the file descriptor is open 331 * for writing. 332 */ 333 return mode & BLK_OPEN_WRITE; 334 } 335 336 static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, 337 struct pr_registration __user *arg) 338 { 339 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 340 struct pr_registration reg; 341 342 if (!blkdev_pr_allowed(bdev, mode)) 343 return -EPERM; 344 if (!ops || !ops->pr_register) 345 return -EOPNOTSUPP; 346 if (copy_from_user(®, arg, sizeof(reg))) 347 return -EFAULT; 348 349 if (reg.flags & ~PR_FL_IGNORE_KEY) 350 return -EOPNOTSUPP; 351 return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); 352 } 353 354 static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, 355 struct pr_reservation __user *arg) 356 { 357 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 358 struct pr_reservation rsv; 359 360 if (!blkdev_pr_allowed(bdev, mode)) 361 return -EPERM; 362 if (!ops || !ops->pr_reserve) 363 return -EOPNOTSUPP; 364 if (copy_from_user(&rsv, arg, sizeof(rsv))) 365 return -EFAULT; 366 367 if (rsv.flags & ~PR_FL_IGNORE_KEY) 368 return -EOPNOTSUPP; 369 return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); 370 } 371 372 static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, 373 struct pr_reservation __user *arg) 374 { 375 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 376 struct pr_reservation rsv; 377 378 if (!blkdev_pr_allowed(bdev, mode)) 379 return -EPERM; 380 if (!ops || !ops->pr_release) 381 return -EOPNOTSUPP; 382 if (copy_from_user(&rsv, arg, sizeof(rsv))) 383 return -EFAULT; 384 385 if (rsv.flags) 386 return -EOPNOTSUPP; 387 return ops->pr_release(bdev, rsv.key, rsv.type); 388 } 389 390 static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, 391 struct pr_preempt __user *arg, bool abort) 392 { 393 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 394 struct pr_preempt p; 395 396 if (!blkdev_pr_allowed(bdev, mode)) 397 return -EPERM; 398 if (!ops || !ops->pr_preempt) 399 return -EOPNOTSUPP; 400 if (copy_from_user(&p, arg, sizeof(p))) 401 return -EFAULT; 402 403 if (p.flags) 404 return -EOPNOTSUPP; 405 return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); 406 } 407 408 static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, 409 struct pr_clear __user *arg) 410 { 411 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 412 struct pr_clear c; 413 414 if (!blkdev_pr_allowed(bdev, mode)) 415 return -EPERM; 416 if (!ops || !ops->pr_clear) 417 return -EOPNOTSUPP; 418 if (copy_from_user(&c, arg, sizeof(c))) 419 return -EFAULT; 420 421 if (c.flags) 422 return -EOPNOTSUPP; 423 return ops->pr_clear(bdev, c.key); 424 } 425 426 static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, 427 unsigned long arg) 428 { 429 if (!capable(CAP_SYS_ADMIN)) 430 return -EACCES; 431 432 mutex_lock(&bdev->bd_holder_lock); 433 if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) 434 bdev->bd_holder_ops->sync(bdev); 435 else { 436 mutex_unlock(&bdev->bd_holder_lock); 437 sync_blockdev(bdev); 438 } 439 440 invalidate_bdev(bdev); 441 return 0; 442 } 443 444 static int blkdev_roset(struct block_device *bdev, unsigned cmd, 445 unsigned long arg) 446 { 447 int ret, n; 448 449 if (!capable(CAP_SYS_ADMIN)) 450 return -EACCES; 451 452 if (get_user(n, (int __user *)arg)) 453 return -EFAULT; 454 if (bdev->bd_disk->fops->set_read_only) { 455 ret = bdev->bd_disk->fops->set_read_only(bdev, n); 456 if (ret) 457 return ret; 458 } 459 if (n) 460 bdev_set_flag(bdev, BD_READ_ONLY); 461 else 462 bdev_clear_flag(bdev, BD_READ_ONLY); 463 return 0; 464 } 465 466 static int blkdev_getgeo(struct block_device *bdev, 467 struct hd_geometry __user *argp) 468 { 469 struct gendisk *disk = bdev->bd_disk; 470 struct hd_geometry geo; 471 int ret; 472 473 if (!argp) 474 return -EINVAL; 475 if (!disk->fops->getgeo) 476 return -ENOTTY; 477 478 /* 479 * We need to set the startsect first, the driver may 480 * want to override it. 481 */ 482 memset(&geo, 0, sizeof(geo)); 483 geo.start = get_start_sect(bdev); 484 ret = disk->fops->getgeo(bdev, &geo); 485 if (ret) 486 return ret; 487 if (copy_to_user(argp, &geo, sizeof(geo))) 488 return -EFAULT; 489 return 0; 490 } 491 492 #ifdef CONFIG_COMPAT 493 struct compat_hd_geometry { 494 unsigned char heads; 495 unsigned char sectors; 496 unsigned short cylinders; 497 u32 start; 498 }; 499 500 static int compat_hdio_getgeo(struct block_device *bdev, 501 struct compat_hd_geometry __user *ugeo) 502 { 503 struct gendisk *disk = bdev->bd_disk; 504 struct hd_geometry geo; 505 int ret; 506 507 if (!ugeo) 508 return -EINVAL; 509 if (!disk->fops->getgeo) 510 return -ENOTTY; 511 512 memset(&geo, 0, sizeof(geo)); 513 /* 514 * We need to set the startsect first, the driver may 515 * want to override it. 516 */ 517 geo.start = get_start_sect(bdev); 518 ret = disk->fops->getgeo(bdev, &geo); 519 if (ret) 520 return ret; 521 522 ret = copy_to_user(ugeo, &geo, 4); 523 ret |= put_user(geo.start, &ugeo->start); 524 if (ret) 525 ret = -EFAULT; 526 527 return ret; 528 } 529 #endif 530 531 /* set the logical block size */ 532 static int blkdev_bszset(struct file *file, blk_mode_t mode, 533 int __user *argp) 534 { 535 // this one might be file_inode(file)->i_rdev - a rare valid 536 // use of file_inode() for those. 537 dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; 538 struct file *excl_file; 539 int ret, n; 540 541 if (!capable(CAP_SYS_ADMIN)) 542 return -EACCES; 543 if (!argp) 544 return -EINVAL; 545 if (get_user(n, argp)) 546 return -EFAULT; 547 548 if (mode & BLK_OPEN_EXCL) 549 return set_blocksize(file, n); 550 551 excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); 552 if (IS_ERR(excl_file)) 553 return -EBUSY; 554 ret = set_blocksize(excl_file, n); 555 fput(excl_file); 556 return ret; 557 } 558 559 /* 560 * Common commands that are handled the same way on native and compat 561 * user space. Note the separate arg/argp parameters that are needed 562 * to deal with the compat_ptr() conversion. 563 */ 564 static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, 565 unsigned int cmd, unsigned long arg, 566 void __user *argp) 567 { 568 unsigned int max_sectors; 569 570 switch (cmd) { 571 case BLKFLSBUF: 572 return blkdev_flushbuf(bdev, cmd, arg); 573 case BLKROSET: 574 return blkdev_roset(bdev, cmd, arg); 575 case BLKDISCARD: 576 return blk_ioctl_discard(bdev, mode, arg); 577 case BLKSECDISCARD: 578 return blk_ioctl_secure_erase(bdev, mode, argp); 579 case BLKZEROOUT: 580 return blk_ioctl_zeroout(bdev, mode, arg); 581 case BLKGETDISKSEQ: 582 return put_u64(argp, bdev->bd_disk->diskseq); 583 case BLKREPORTZONE: 584 return blkdev_report_zones_ioctl(bdev, cmd, arg); 585 case BLKRESETZONE: 586 case BLKOPENZONE: 587 case BLKCLOSEZONE: 588 case BLKFINISHZONE: 589 return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); 590 case BLKGETZONESZ: 591 return put_uint(argp, bdev_zone_sectors(bdev)); 592 case BLKGETNRZONES: 593 return put_uint(argp, bdev_nr_zones(bdev)); 594 case BLKROGET: 595 return put_int(argp, bdev_read_only(bdev) != 0); 596 case BLKSSZGET: /* get block device logical block size */ 597 return put_int(argp, bdev_logical_block_size(bdev)); 598 case BLKPBSZGET: /* get block device physical block size */ 599 return put_uint(argp, bdev_physical_block_size(bdev)); 600 case BLKIOMIN: 601 return put_uint(argp, bdev_io_min(bdev)); 602 case BLKIOOPT: 603 return put_uint(argp, bdev_io_opt(bdev)); 604 case BLKALIGNOFF: 605 return put_int(argp, bdev_alignment_offset(bdev)); 606 case BLKDISCARDZEROES: 607 return put_uint(argp, 0); 608 case BLKSECTGET: 609 max_sectors = min_t(unsigned int, USHRT_MAX, 610 queue_max_sectors(bdev_get_queue(bdev))); 611 return put_ushort(argp, max_sectors); 612 case BLKROTATIONAL: 613 return put_ushort(argp, !bdev_nonrot(bdev)); 614 case BLKRASET: 615 case BLKFRASET: 616 if(!capable(CAP_SYS_ADMIN)) 617 return -EACCES; 618 bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; 619 return 0; 620 case BLKRRPART: 621 if (!capable(CAP_SYS_ADMIN)) 622 return -EACCES; 623 if (bdev_is_partition(bdev)) 624 return -EINVAL; 625 return disk_scan_partitions(bdev->bd_disk, 626 mode | BLK_OPEN_STRICT_SCAN); 627 case BLKTRACESTART: 628 case BLKTRACESTOP: 629 case BLKTRACETEARDOWN: 630 return blk_trace_ioctl(bdev, cmd, argp); 631 case BLKCRYPTOIMPORTKEY: 632 case BLKCRYPTOGENERATEKEY: 633 case BLKCRYPTOPREPAREKEY: 634 return blk_crypto_ioctl(bdev, cmd, argp); 635 case IOC_PR_REGISTER: 636 return blkdev_pr_register(bdev, mode, argp); 637 case IOC_PR_RESERVE: 638 return blkdev_pr_reserve(bdev, mode, argp); 639 case IOC_PR_RELEASE: 640 return blkdev_pr_release(bdev, mode, argp); 641 case IOC_PR_PREEMPT: 642 return blkdev_pr_preempt(bdev, mode, argp, false); 643 case IOC_PR_PREEMPT_ABORT: 644 return blkdev_pr_preempt(bdev, mode, argp, true); 645 case IOC_PR_CLEAR: 646 return blkdev_pr_clear(bdev, mode, argp); 647 default: 648 return blk_get_meta_cap(bdev, cmd, argp); 649 } 650 } 651 652 /* 653 * Always keep this in sync with compat_blkdev_ioctl() 654 * to handle all incompatible commands in both functions. 655 * 656 * New commands must be compatible and go into blkdev_common_ioctl 657 */ 658 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 659 { 660 struct block_device *bdev = I_BDEV(file->f_mapping->host); 661 void __user *argp = (void __user *)arg; 662 blk_mode_t mode = file_to_blk_mode(file); 663 int ret; 664 665 switch (cmd) { 666 /* These need separate implementations for the data structure */ 667 case HDIO_GETGEO: 668 return blkdev_getgeo(bdev, argp); 669 case BLKPG: 670 return blkpg_ioctl(bdev, argp); 671 672 /* Compat mode returns 32-bit data instead of 'long' */ 673 case BLKRAGET: 674 case BLKFRAGET: 675 if (!argp) 676 return -EINVAL; 677 return put_long(argp, 678 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 679 case BLKGETSIZE: 680 if (bdev_nr_sectors(bdev) > ~0UL) 681 return -EFBIG; 682 return put_ulong(argp, bdev_nr_sectors(bdev)); 683 684 /* The data is compatible, but the command number is different */ 685 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ 686 return put_int(argp, block_size(bdev)); 687 case BLKBSZSET: 688 return blkdev_bszset(file, mode, argp); 689 case BLKGETSIZE64: 690 return put_u64(argp, bdev_nr_bytes(bdev)); 691 692 /* Incompatible alignment on i386 */ 693 case BLKTRACESETUP: 694 return blk_trace_ioctl(bdev, cmd, argp); 695 default: 696 break; 697 } 698 699 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 700 if (ret != -ENOIOCTLCMD) 701 return ret; 702 703 if (!bdev->bd_disk->fops->ioctl) 704 return -ENOTTY; 705 return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 706 } 707 708 #ifdef CONFIG_COMPAT 709 710 #define BLKBSZGET_32 _IOR(0x12, 112, int) 711 #define BLKBSZSET_32 _IOW(0x12, 113, int) 712 #define BLKGETSIZE64_32 _IOR(0x12, 114, int) 713 714 /* Most of the generic ioctls are handled in the normal fallback path. 715 This assumes the blkdev's low level compat_ioctl always returns 716 ENOIOCTLCMD for unknown ioctls. */ 717 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 718 { 719 int ret; 720 void __user *argp = compat_ptr(arg); 721 struct block_device *bdev = I_BDEV(file->f_mapping->host); 722 struct gendisk *disk = bdev->bd_disk; 723 blk_mode_t mode = file_to_blk_mode(file); 724 725 switch (cmd) { 726 /* These need separate implementations for the data structure */ 727 case HDIO_GETGEO: 728 return compat_hdio_getgeo(bdev, argp); 729 case BLKPG: 730 return compat_blkpg_ioctl(bdev, argp); 731 732 /* Compat mode returns 32-bit data instead of 'long' */ 733 case BLKRAGET: 734 case BLKFRAGET: 735 if (!argp) 736 return -EINVAL; 737 return compat_put_long(argp, 738 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 739 case BLKGETSIZE: 740 if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) 741 return -EFBIG; 742 return compat_put_ulong(argp, bdev_nr_sectors(bdev)); 743 744 /* The data is compatible, but the command number is different */ 745 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 746 return put_int(argp, bdev_logical_block_size(bdev)); 747 case BLKBSZSET_32: 748 return blkdev_bszset(file, mode, argp); 749 case BLKGETSIZE64_32: 750 return put_u64(argp, bdev_nr_bytes(bdev)); 751 752 /* Incompatible alignment on i386 */ 753 case BLKTRACESETUP32: 754 return blk_trace_ioctl(bdev, cmd, argp); 755 default: 756 break; 757 } 758 759 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 760 if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) 761 ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); 762 763 return ret; 764 } 765 #endif 766 767 struct blk_iou_cmd { 768 int res; 769 bool nowait; 770 }; 771 772 static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags) 773 { 774 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 775 776 if (bic->res == -EAGAIN && bic->nowait) 777 io_uring_cmd_issue_blocking(cmd); 778 else 779 io_uring_cmd_done(cmd, bic->res, 0, issue_flags); 780 } 781 782 static void bio_cmd_bio_end_io(struct bio *bio) 783 { 784 struct io_uring_cmd *cmd = bio->bi_private; 785 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 786 787 if (unlikely(bio->bi_status) && !bic->res) 788 bic->res = blk_status_to_errno(bio->bi_status); 789 790 io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); 791 bio_put(bio); 792 } 793 794 static int blkdev_cmd_discard(struct io_uring_cmd *cmd, 795 struct block_device *bdev, 796 uint64_t start, uint64_t len, bool nowait) 797 { 798 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 799 gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; 800 sector_t sector = start >> SECTOR_SHIFT; 801 sector_t nr_sects = len >> SECTOR_SHIFT; 802 struct bio *prev = NULL, *bio; 803 int err; 804 805 if (!bdev_max_discard_sectors(bdev)) 806 return -EOPNOTSUPP; 807 if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) 808 return -EBADF; 809 if (bdev_read_only(bdev)) 810 return -EPERM; 811 err = blk_validate_byte_range(bdev, start, len); 812 if (err) 813 return err; 814 815 err = filemap_invalidate_pages(bdev->bd_mapping, start, 816 start + len - 1, nowait); 817 if (err) 818 return err; 819 820 while (true) { 821 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); 822 if (!bio) 823 break; 824 if (nowait) { 825 /* 826 * Don't allow multi-bio non-blocking submissions as 827 * subsequent bios may fail but we won't get a direct 828 * indication of that. Normally, the caller should 829 * retry from a blocking context. 830 */ 831 if (unlikely(nr_sects)) { 832 bio_put(bio); 833 return -EAGAIN; 834 } 835 bio->bi_opf |= REQ_NOWAIT; 836 } 837 838 prev = bio_chain_and_submit(prev, bio); 839 } 840 if (unlikely(!prev)) 841 return -EAGAIN; 842 if (unlikely(nr_sects)) 843 bic->res = -EAGAIN; 844 845 prev->bi_private = cmd; 846 prev->bi_end_io = bio_cmd_bio_end_io; 847 submit_bio(prev); 848 return -EIOCBQUEUED; 849 } 850 851 int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 852 { 853 struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); 854 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 855 const struct io_uring_sqe *sqe = cmd->sqe; 856 u32 cmd_op = cmd->cmd_op; 857 uint64_t start, len; 858 859 if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || 860 sqe->rw_flags || sqe->file_index)) 861 return -EINVAL; 862 863 bic->res = 0; 864 bic->nowait = issue_flags & IO_URING_F_NONBLOCK; 865 866 start = READ_ONCE(sqe->addr); 867 len = READ_ONCE(sqe->addr3); 868 869 switch (cmd_op) { 870 case BLOCK_URING_CMD_DISCARD: 871 return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); 872 } 873 return -EINVAL; 874 } 875