1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/capability.h> 3 #include <linux/compat.h> 4 #include <linux/blkdev.h> 5 #include <linux/export.h> 6 #include <linux/gfp.h> 7 #include <linux/blkpg.h> 8 #include <linux/hdreg.h> 9 #include <linux/backing-dev.h> 10 #include <linux/fs.h> 11 #include <linux/blktrace_api.h> 12 #include <linux/pr.h> 13 #include <linux/uaccess.h> 14 #include <linux/pagemap.h> 15 #include <linux/io_uring/cmd.h> 16 #include <uapi/linux/blkdev.h> 17 #include "blk.h" 18 #include "blk-crypto-internal.h" 19 20 static int blkpg_do_ioctl(struct block_device *bdev, 21 struct blkpg_partition __user *upart, int op) 22 { 23 struct gendisk *disk = bdev->bd_disk; 24 struct blkpg_partition p; 25 sector_t start, length, capacity, end; 26 27 if (!capable(CAP_SYS_ADMIN)) 28 return -EACCES; 29 if (copy_from_user(&p, upart, sizeof(struct blkpg_partition))) 30 return -EFAULT; 31 if (bdev_is_partition(bdev)) 32 return -EINVAL; 33 34 if (p.pno <= 0) 35 return -EINVAL; 36 37 if (op == BLKPG_DEL_PARTITION) 38 return bdev_del_partition(disk, p.pno); 39 40 if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) 41 return -EINVAL; 42 /* Check that the partition is aligned to the block size */ 43 if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) 44 return -EINVAL; 45 46 start = p.start >> SECTOR_SHIFT; 47 length = p.length >> SECTOR_SHIFT; 48 capacity = get_capacity(disk); 49 50 if (check_add_overflow(start, length, &end)) 51 return -EINVAL; 52 53 if (start >= capacity || end > capacity) 54 return -EINVAL; 55 56 switch (op) { 57 case BLKPG_ADD_PARTITION: 58 return bdev_add_partition(disk, p.pno, start, length); 59 case BLKPG_RESIZE_PARTITION: 60 return bdev_resize_partition(disk, p.pno, start, length); 61 default: 62 return -EINVAL; 63 } 64 } 65 66 static int blkpg_ioctl(struct block_device *bdev, 67 struct blkpg_ioctl_arg __user *arg) 68 { 69 struct blkpg_partition __user *udata; 70 int op; 71 72 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 73 return -EFAULT; 74 75 return blkpg_do_ioctl(bdev, udata, op); 76 } 77 78 #ifdef CONFIG_COMPAT 79 struct compat_blkpg_ioctl_arg { 80 compat_int_t op; 81 compat_int_t flags; 82 compat_int_t datalen; 83 compat_caddr_t data; 84 }; 85 86 static int compat_blkpg_ioctl(struct block_device *bdev, 87 struct compat_blkpg_ioctl_arg __user *arg) 88 { 89 compat_caddr_t udata; 90 int op; 91 92 if (get_user(op, &arg->op) || get_user(udata, &arg->data)) 93 return -EFAULT; 94 95 return blkpg_do_ioctl(bdev, compat_ptr(udata), op); 96 } 97 #endif 98 99 /* 100 * Check that [start, start + len) is a valid range from the block device's 101 * perspective, including verifying that it can be correctly translated into 102 * logical block addresses. 103 */ 104 static int blk_validate_byte_range(struct block_device *bdev, 105 uint64_t start, uint64_t len) 106 { 107 unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; 108 uint64_t end; 109 110 if ((start | len) & bs_mask) 111 return -EINVAL; 112 if (!len) 113 return -EINVAL; 114 if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) 115 return -EINVAL; 116 117 return 0; 118 } 119 120 static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, 121 unsigned long arg) 122 { 123 uint64_t range[2], start, len; 124 struct bio *prev = NULL, *bio; 125 sector_t sector, nr_sects; 126 struct blk_plug plug; 127 int err; 128 129 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 130 return -EFAULT; 131 start = range[0]; 132 len = range[1]; 133 134 if (!bdev_max_discard_sectors(bdev)) 135 return -EOPNOTSUPP; 136 137 if (!(mode & BLK_OPEN_WRITE)) 138 return -EBADF; 139 if (bdev_read_only(bdev)) 140 return -EPERM; 141 err = blk_validate_byte_range(bdev, start, len); 142 if (err) 143 return err; 144 145 inode_lock(bdev->bd_mapping->host); 146 filemap_invalidate_lock(bdev->bd_mapping); 147 err = truncate_bdev_range(bdev, mode, start, start + len - 1); 148 if (err) 149 goto fail; 150 151 sector = start >> SECTOR_SHIFT; 152 nr_sects = len >> SECTOR_SHIFT; 153 154 blk_start_plug(&plug); 155 while (1) { 156 if (fatal_signal_pending(current)) { 157 if (prev) 158 bio_await_chain(prev); 159 err = -EINTR; 160 goto out_unplug; 161 } 162 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, 163 GFP_KERNEL); 164 if (!bio) 165 break; 166 prev = bio_chain_and_submit(prev, bio); 167 } 168 if (prev) { 169 err = submit_bio_wait(prev); 170 if (err == -EOPNOTSUPP) 171 err = 0; 172 bio_put(prev); 173 } 174 out_unplug: 175 blk_finish_plug(&plug); 176 fail: 177 filemap_invalidate_unlock(bdev->bd_mapping); 178 inode_unlock(bdev->bd_mapping->host); 179 return err; 180 } 181 182 static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, 183 void __user *argp) 184 { 185 uint64_t start, len, end; 186 uint64_t range[2]; 187 int err; 188 189 if (!(mode & BLK_OPEN_WRITE)) 190 return -EBADF; 191 if (!bdev_max_secure_erase_sectors(bdev)) 192 return -EOPNOTSUPP; 193 if (copy_from_user(range, argp, sizeof(range))) 194 return -EFAULT; 195 196 start = range[0]; 197 len = range[1]; 198 if ((start & 511) || (len & 511)) 199 return -EINVAL; 200 if (check_add_overflow(start, len, &end) || 201 end > bdev_nr_bytes(bdev)) 202 return -EINVAL; 203 204 inode_lock(bdev->bd_mapping->host); 205 filemap_invalidate_lock(bdev->bd_mapping); 206 err = truncate_bdev_range(bdev, mode, start, end - 1); 207 if (!err) 208 err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, 209 GFP_KERNEL); 210 filemap_invalidate_unlock(bdev->bd_mapping); 211 inode_unlock(bdev->bd_mapping->host); 212 return err; 213 } 214 215 216 static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, 217 unsigned long arg) 218 { 219 uint64_t range[2]; 220 uint64_t start, end, len; 221 int err; 222 223 if (!(mode & BLK_OPEN_WRITE)) 224 return -EBADF; 225 226 if (copy_from_user(range, (void __user *)arg, sizeof(range))) 227 return -EFAULT; 228 229 start = range[0]; 230 len = range[1]; 231 end = start + len - 1; 232 233 if (start & 511) 234 return -EINVAL; 235 if (len & 511) 236 return -EINVAL; 237 if (end >= (uint64_t)bdev_nr_bytes(bdev)) 238 return -EINVAL; 239 if (end < start) 240 return -EINVAL; 241 242 /* Invalidate the page cache, including dirty pages */ 243 inode_lock(bdev->bd_mapping->host); 244 filemap_invalidate_lock(bdev->bd_mapping); 245 err = truncate_bdev_range(bdev, mode, start, end); 246 if (err) 247 goto fail; 248 249 err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, 250 BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); 251 252 fail: 253 filemap_invalidate_unlock(bdev->bd_mapping); 254 inode_unlock(bdev->bd_mapping->host); 255 return err; 256 } 257 258 static int put_ushort(unsigned short __user *argp, unsigned short val) 259 { 260 return put_user(val, argp); 261 } 262 263 static int put_int(int __user *argp, int val) 264 { 265 return put_user(val, argp); 266 } 267 268 static int put_uint(unsigned int __user *argp, unsigned int val) 269 { 270 return put_user(val, argp); 271 } 272 273 static int put_long(long __user *argp, long val) 274 { 275 return put_user(val, argp); 276 } 277 278 static int put_ulong(unsigned long __user *argp, unsigned long val) 279 { 280 return put_user(val, argp); 281 } 282 283 static int put_u64(u64 __user *argp, u64 val) 284 { 285 return put_user(val, argp); 286 } 287 288 #ifdef CONFIG_COMPAT 289 static int compat_put_long(compat_long_t __user *argp, long val) 290 { 291 return put_user(val, argp); 292 } 293 294 static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) 295 { 296 return put_user(val, argp); 297 } 298 #endif 299 300 #ifdef CONFIG_COMPAT 301 /* 302 * This is the equivalent of compat_ptr_ioctl(), to be used by block 303 * drivers that implement only commands that are completely compatible 304 * between 32-bit and 64-bit user space 305 */ 306 int blkdev_compat_ptr_ioctl(struct block_device *bdev, blk_mode_t mode, 307 unsigned cmd, unsigned long arg) 308 { 309 struct gendisk *disk = bdev->bd_disk; 310 311 if (disk->fops->ioctl) 312 return disk->fops->ioctl(bdev, mode, cmd, 313 (unsigned long)compat_ptr(arg)); 314 315 return -ENOIOCTLCMD; 316 } 317 EXPORT_SYMBOL(blkdev_compat_ptr_ioctl); 318 #endif 319 320 static bool blkdev_pr_allowed(struct block_device *bdev, blk_mode_t mode) 321 { 322 /* no sense to make reservations for partitions */ 323 if (bdev_is_partition(bdev)) 324 return false; 325 326 if (capable(CAP_SYS_ADMIN)) 327 return true; 328 /* 329 * Only allow unprivileged reservations if the file descriptor is open 330 * for writing. 331 */ 332 return mode & BLK_OPEN_WRITE; 333 } 334 335 static int blkdev_pr_register(struct block_device *bdev, blk_mode_t mode, 336 struct pr_registration __user *arg) 337 { 338 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 339 struct pr_registration reg; 340 341 if (!blkdev_pr_allowed(bdev, mode)) 342 return -EPERM; 343 if (!ops || !ops->pr_register) 344 return -EOPNOTSUPP; 345 if (copy_from_user(®, arg, sizeof(reg))) 346 return -EFAULT; 347 348 if (reg.flags & ~PR_FL_IGNORE_KEY) 349 return -EOPNOTSUPP; 350 return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); 351 } 352 353 static int blkdev_pr_reserve(struct block_device *bdev, blk_mode_t mode, 354 struct pr_reservation __user *arg) 355 { 356 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 357 struct pr_reservation rsv; 358 359 if (!blkdev_pr_allowed(bdev, mode)) 360 return -EPERM; 361 if (!ops || !ops->pr_reserve) 362 return -EOPNOTSUPP; 363 if (copy_from_user(&rsv, arg, sizeof(rsv))) 364 return -EFAULT; 365 366 if (rsv.flags & ~PR_FL_IGNORE_KEY) 367 return -EOPNOTSUPP; 368 return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); 369 } 370 371 static int blkdev_pr_release(struct block_device *bdev, blk_mode_t mode, 372 struct pr_reservation __user *arg) 373 { 374 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 375 struct pr_reservation rsv; 376 377 if (!blkdev_pr_allowed(bdev, mode)) 378 return -EPERM; 379 if (!ops || !ops->pr_release) 380 return -EOPNOTSUPP; 381 if (copy_from_user(&rsv, arg, sizeof(rsv))) 382 return -EFAULT; 383 384 if (rsv.flags) 385 return -EOPNOTSUPP; 386 return ops->pr_release(bdev, rsv.key, rsv.type); 387 } 388 389 static int blkdev_pr_preempt(struct block_device *bdev, blk_mode_t mode, 390 struct pr_preempt __user *arg, bool abort) 391 { 392 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 393 struct pr_preempt p; 394 395 if (!blkdev_pr_allowed(bdev, mode)) 396 return -EPERM; 397 if (!ops || !ops->pr_preempt) 398 return -EOPNOTSUPP; 399 if (copy_from_user(&p, arg, sizeof(p))) 400 return -EFAULT; 401 402 if (p.flags) 403 return -EOPNOTSUPP; 404 return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); 405 } 406 407 static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, 408 struct pr_clear __user *arg) 409 { 410 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 411 struct pr_clear c; 412 413 if (!blkdev_pr_allowed(bdev, mode)) 414 return -EPERM; 415 if (!ops || !ops->pr_clear) 416 return -EOPNOTSUPP; 417 if (copy_from_user(&c, arg, sizeof(c))) 418 return -EFAULT; 419 420 if (c.flags) 421 return -EOPNOTSUPP; 422 return ops->pr_clear(bdev, c.key); 423 } 424 425 static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, 426 unsigned long arg) 427 { 428 if (!capable(CAP_SYS_ADMIN)) 429 return -EACCES; 430 431 mutex_lock(&bdev->bd_holder_lock); 432 if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync) 433 bdev->bd_holder_ops->sync(bdev); 434 else { 435 mutex_unlock(&bdev->bd_holder_lock); 436 sync_blockdev(bdev); 437 } 438 439 invalidate_bdev(bdev); 440 return 0; 441 } 442 443 static int blkdev_roset(struct block_device *bdev, unsigned cmd, 444 unsigned long arg) 445 { 446 int ret, n; 447 448 if (!capable(CAP_SYS_ADMIN)) 449 return -EACCES; 450 451 if (get_user(n, (int __user *)arg)) 452 return -EFAULT; 453 if (bdev->bd_disk->fops->set_read_only) { 454 ret = bdev->bd_disk->fops->set_read_only(bdev, n); 455 if (ret) 456 return ret; 457 } 458 if (n) 459 bdev_set_flag(bdev, BD_READ_ONLY); 460 else 461 bdev_clear_flag(bdev, BD_READ_ONLY); 462 return 0; 463 } 464 465 static int blkdev_getgeo(struct block_device *bdev, 466 struct hd_geometry __user *argp) 467 { 468 struct gendisk *disk = bdev->bd_disk; 469 struct hd_geometry geo; 470 int ret; 471 472 if (!argp) 473 return -EINVAL; 474 if (!disk->fops->getgeo) 475 return -ENOTTY; 476 477 /* 478 * We need to set the startsect first, the driver may 479 * want to override it. 480 */ 481 memset(&geo, 0, sizeof(geo)); 482 geo.start = get_start_sect(bdev); 483 ret = disk->fops->getgeo(bdev, &geo); 484 if (ret) 485 return ret; 486 if (copy_to_user(argp, &geo, sizeof(geo))) 487 return -EFAULT; 488 return 0; 489 } 490 491 #ifdef CONFIG_COMPAT 492 struct compat_hd_geometry { 493 unsigned char heads; 494 unsigned char sectors; 495 unsigned short cylinders; 496 u32 start; 497 }; 498 499 static int compat_hdio_getgeo(struct block_device *bdev, 500 struct compat_hd_geometry __user *ugeo) 501 { 502 struct gendisk *disk = bdev->bd_disk; 503 struct hd_geometry geo; 504 int ret; 505 506 if (!ugeo) 507 return -EINVAL; 508 if (!disk->fops->getgeo) 509 return -ENOTTY; 510 511 memset(&geo, 0, sizeof(geo)); 512 /* 513 * We need to set the startsect first, the driver may 514 * want to override it. 515 */ 516 geo.start = get_start_sect(bdev); 517 ret = disk->fops->getgeo(bdev, &geo); 518 if (ret) 519 return ret; 520 521 ret = copy_to_user(ugeo, &geo, 4); 522 ret |= put_user(geo.start, &ugeo->start); 523 if (ret) 524 ret = -EFAULT; 525 526 return ret; 527 } 528 #endif 529 530 /* set the logical block size */ 531 static int blkdev_bszset(struct file *file, blk_mode_t mode, 532 int __user *argp) 533 { 534 // this one might be file_inode(file)->i_rdev - a rare valid 535 // use of file_inode() for those. 536 dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; 537 struct file *excl_file; 538 int ret, n; 539 540 if (!capable(CAP_SYS_ADMIN)) 541 return -EACCES; 542 if (!argp) 543 return -EINVAL; 544 if (get_user(n, argp)) 545 return -EFAULT; 546 547 if (mode & BLK_OPEN_EXCL) 548 return set_blocksize(file, n); 549 550 excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); 551 if (IS_ERR(excl_file)) 552 return -EBUSY; 553 ret = set_blocksize(excl_file, n); 554 fput(excl_file); 555 return ret; 556 } 557 558 /* 559 * Common commands that are handled the same way on native and compat 560 * user space. Note the separate arg/argp parameters that are needed 561 * to deal with the compat_ptr() conversion. 562 */ 563 static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, 564 unsigned int cmd, unsigned long arg, 565 void __user *argp) 566 { 567 unsigned int max_sectors; 568 569 switch (cmd) { 570 case BLKFLSBUF: 571 return blkdev_flushbuf(bdev, cmd, arg); 572 case BLKROSET: 573 return blkdev_roset(bdev, cmd, arg); 574 case BLKDISCARD: 575 return blk_ioctl_discard(bdev, mode, arg); 576 case BLKSECDISCARD: 577 return blk_ioctl_secure_erase(bdev, mode, argp); 578 case BLKZEROOUT: 579 return blk_ioctl_zeroout(bdev, mode, arg); 580 case BLKGETDISKSEQ: 581 return put_u64(argp, bdev->bd_disk->diskseq); 582 case BLKREPORTZONE: 583 return blkdev_report_zones_ioctl(bdev, cmd, arg); 584 case BLKRESETZONE: 585 case BLKOPENZONE: 586 case BLKCLOSEZONE: 587 case BLKFINISHZONE: 588 return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); 589 case BLKGETZONESZ: 590 return put_uint(argp, bdev_zone_sectors(bdev)); 591 case BLKGETNRZONES: 592 return put_uint(argp, bdev_nr_zones(bdev)); 593 case BLKROGET: 594 return put_int(argp, bdev_read_only(bdev) != 0); 595 case BLKSSZGET: /* get block device logical block size */ 596 return put_int(argp, bdev_logical_block_size(bdev)); 597 case BLKPBSZGET: /* get block device physical block size */ 598 return put_uint(argp, bdev_physical_block_size(bdev)); 599 case BLKIOMIN: 600 return put_uint(argp, bdev_io_min(bdev)); 601 case BLKIOOPT: 602 return put_uint(argp, bdev_io_opt(bdev)); 603 case BLKALIGNOFF: 604 return put_int(argp, bdev_alignment_offset(bdev)); 605 case BLKDISCARDZEROES: 606 return put_uint(argp, 0); 607 case BLKSECTGET: 608 max_sectors = min_t(unsigned int, USHRT_MAX, 609 queue_max_sectors(bdev_get_queue(bdev))); 610 return put_ushort(argp, max_sectors); 611 case BLKROTATIONAL: 612 return put_ushort(argp, !bdev_nonrot(bdev)); 613 case BLKRASET: 614 case BLKFRASET: 615 if(!capable(CAP_SYS_ADMIN)) 616 return -EACCES; 617 bdev->bd_disk->bdi->ra_pages = (arg * 512) / PAGE_SIZE; 618 return 0; 619 case BLKRRPART: 620 if (!capable(CAP_SYS_ADMIN)) 621 return -EACCES; 622 if (bdev_is_partition(bdev)) 623 return -EINVAL; 624 return disk_scan_partitions(bdev->bd_disk, 625 mode | BLK_OPEN_STRICT_SCAN); 626 case BLKTRACESTART: 627 case BLKTRACESTOP: 628 case BLKTRACETEARDOWN: 629 return blk_trace_ioctl(bdev, cmd, argp); 630 case BLKCRYPTOIMPORTKEY: 631 case BLKCRYPTOGENERATEKEY: 632 case BLKCRYPTOPREPAREKEY: 633 return blk_crypto_ioctl(bdev, cmd, argp); 634 case IOC_PR_REGISTER: 635 return blkdev_pr_register(bdev, mode, argp); 636 case IOC_PR_RESERVE: 637 return blkdev_pr_reserve(bdev, mode, argp); 638 case IOC_PR_RELEASE: 639 return blkdev_pr_release(bdev, mode, argp); 640 case IOC_PR_PREEMPT: 641 return blkdev_pr_preempt(bdev, mode, argp, false); 642 case IOC_PR_PREEMPT_ABORT: 643 return blkdev_pr_preempt(bdev, mode, argp, true); 644 case IOC_PR_CLEAR: 645 return blkdev_pr_clear(bdev, mode, argp); 646 default: 647 return -ENOIOCTLCMD; 648 } 649 } 650 651 /* 652 * Always keep this in sync with compat_blkdev_ioctl() 653 * to handle all incompatible commands in both functions. 654 * 655 * New commands must be compatible and go into blkdev_common_ioctl 656 */ 657 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 658 { 659 struct block_device *bdev = I_BDEV(file->f_mapping->host); 660 void __user *argp = (void __user *)arg; 661 blk_mode_t mode = file_to_blk_mode(file); 662 int ret; 663 664 switch (cmd) { 665 /* These need separate implementations for the data structure */ 666 case HDIO_GETGEO: 667 return blkdev_getgeo(bdev, argp); 668 case BLKPG: 669 return blkpg_ioctl(bdev, argp); 670 671 /* Compat mode returns 32-bit data instead of 'long' */ 672 case BLKRAGET: 673 case BLKFRAGET: 674 if (!argp) 675 return -EINVAL; 676 return put_long(argp, 677 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 678 case BLKGETSIZE: 679 if (bdev_nr_sectors(bdev) > ~0UL) 680 return -EFBIG; 681 return put_ulong(argp, bdev_nr_sectors(bdev)); 682 683 /* The data is compatible, but the command number is different */ 684 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ 685 return put_int(argp, block_size(bdev)); 686 case BLKBSZSET: 687 return blkdev_bszset(file, mode, argp); 688 case BLKGETSIZE64: 689 return put_u64(argp, bdev_nr_bytes(bdev)); 690 691 /* Incompatible alignment on i386 */ 692 case BLKTRACESETUP: 693 return blk_trace_ioctl(bdev, cmd, argp); 694 default: 695 break; 696 } 697 698 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 699 if (ret != -ENOIOCTLCMD) 700 return ret; 701 702 if (!bdev->bd_disk->fops->ioctl) 703 return -ENOTTY; 704 return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 705 } 706 707 #ifdef CONFIG_COMPAT 708 709 #define BLKBSZGET_32 _IOR(0x12, 112, int) 710 #define BLKBSZSET_32 _IOW(0x12, 113, int) 711 #define BLKGETSIZE64_32 _IOR(0x12, 114, int) 712 713 /* Most of the generic ioctls are handled in the normal fallback path. 714 This assumes the blkdev's low level compat_ioctl always returns 715 ENOIOCTLCMD for unknown ioctls. */ 716 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) 717 { 718 int ret; 719 void __user *argp = compat_ptr(arg); 720 struct block_device *bdev = I_BDEV(file->f_mapping->host); 721 struct gendisk *disk = bdev->bd_disk; 722 blk_mode_t mode = file_to_blk_mode(file); 723 724 switch (cmd) { 725 /* These need separate implementations for the data structure */ 726 case HDIO_GETGEO: 727 return compat_hdio_getgeo(bdev, argp); 728 case BLKPG: 729 return compat_blkpg_ioctl(bdev, argp); 730 731 /* Compat mode returns 32-bit data instead of 'long' */ 732 case BLKRAGET: 733 case BLKFRAGET: 734 if (!argp) 735 return -EINVAL; 736 return compat_put_long(argp, 737 (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); 738 case BLKGETSIZE: 739 if (bdev_nr_sectors(bdev) > ~(compat_ulong_t)0) 740 return -EFBIG; 741 return compat_put_ulong(argp, bdev_nr_sectors(bdev)); 742 743 /* The data is compatible, but the command number is different */ 744 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 745 return put_int(argp, bdev_logical_block_size(bdev)); 746 case BLKBSZSET_32: 747 return blkdev_bszset(file, mode, argp); 748 case BLKGETSIZE64_32: 749 return put_u64(argp, bdev_nr_bytes(bdev)); 750 751 /* Incompatible alignment on i386 */ 752 case BLKTRACESETUP32: 753 return blk_trace_ioctl(bdev, cmd, argp); 754 default: 755 break; 756 } 757 758 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 759 if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl) 760 ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); 761 762 return ret; 763 } 764 #endif 765 766 struct blk_iou_cmd { 767 int res; 768 bool nowait; 769 }; 770 771 static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags) 772 { 773 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 774 775 if (bic->res == -EAGAIN && bic->nowait) 776 io_uring_cmd_issue_blocking(cmd); 777 else 778 io_uring_cmd_done(cmd, bic->res, 0, issue_flags); 779 } 780 781 static void bio_cmd_bio_end_io(struct bio *bio) 782 { 783 struct io_uring_cmd *cmd = bio->bi_private; 784 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 785 786 if (unlikely(bio->bi_status) && !bic->res) 787 bic->res = blk_status_to_errno(bio->bi_status); 788 789 io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); 790 bio_put(bio); 791 } 792 793 static int blkdev_cmd_discard(struct io_uring_cmd *cmd, 794 struct block_device *bdev, 795 uint64_t start, uint64_t len, bool nowait) 796 { 797 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 798 gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; 799 sector_t sector = start >> SECTOR_SHIFT; 800 sector_t nr_sects = len >> SECTOR_SHIFT; 801 struct bio *prev = NULL, *bio; 802 int err; 803 804 if (!bdev_max_discard_sectors(bdev)) 805 return -EOPNOTSUPP; 806 if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) 807 return -EBADF; 808 if (bdev_read_only(bdev)) 809 return -EPERM; 810 err = blk_validate_byte_range(bdev, start, len); 811 if (err) 812 return err; 813 814 err = filemap_invalidate_pages(bdev->bd_mapping, start, 815 start + len - 1, nowait); 816 if (err) 817 return err; 818 819 while (true) { 820 bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); 821 if (!bio) 822 break; 823 if (nowait) { 824 /* 825 * Don't allow multi-bio non-blocking submissions as 826 * subsequent bios may fail but we won't get a direct 827 * indication of that. Normally, the caller should 828 * retry from a blocking context. 829 */ 830 if (unlikely(nr_sects)) { 831 bio_put(bio); 832 return -EAGAIN; 833 } 834 bio->bi_opf |= REQ_NOWAIT; 835 } 836 837 prev = bio_chain_and_submit(prev, bio); 838 } 839 if (unlikely(!prev)) 840 return -EAGAIN; 841 if (unlikely(nr_sects)) 842 bic->res = -EAGAIN; 843 844 prev->bi_private = cmd; 845 prev->bi_end_io = bio_cmd_bio_end_io; 846 submit_bio(prev); 847 return -EIOCBQUEUED; 848 } 849 850 int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) 851 { 852 struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); 853 struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); 854 const struct io_uring_sqe *sqe = cmd->sqe; 855 u32 cmd_op = cmd->cmd_op; 856 uint64_t start, len; 857 858 if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || 859 sqe->rw_flags || sqe->file_index)) 860 return -EINVAL; 861 862 bic->res = 0; 863 bic->nowait = issue_flags & IO_URING_F_NONBLOCK; 864 865 start = READ_ONCE(sqe->addr); 866 len = READ_ONCE(sqe->addr3); 867 868 switch (cmd_op) { 869 case BLOCK_URING_CMD_DISCARD: 870 return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); 871 } 872 return -EINVAL; 873 } 874