1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * SCSI Zoned Block commands 4 * 5 * Copyright (C) 2014-2015 SUSE Linux GmbH 6 * Written by: Hannes Reinecke <hare@suse.de> 7 * Modified by: Damien Le Moal <damien.lemoal@hgst.com> 8 * Modified by: Shaun Tancheff <shaun.tancheff@seagate.com> 9 */ 10 11 #include <linux/blkdev.h> 12 #include <linux/vmalloc.h> 13 #include <linux/sched/mm.h> 14 #include <linux/mutex.h> 15 16 #include <linux/unaligned.h> 17 18 #include <scsi/scsi.h> 19 #include <scsi/scsi_cmnd.h> 20 21 #include "sd.h" 22 23 #define CREATE_TRACE_POINTS 24 #include "sd_trace.h" 25 26 /* Whether or not a SCSI zone descriptor describes a gap zone. */ 27 static bool sd_zbc_is_gap_zone(const u8 buf[64]) 28 { 29 return (buf[0] & 0xf) == ZBC_ZONE_TYPE_GAP; 30 } 31 32 /** 33 * sd_zbc_parse_report - Parse a SCSI zone descriptor 34 * @sdkp: SCSI disk pointer. 35 * @buf: SCSI zone descriptor. 36 * @idx: Index of the zone relative to the first zone reported by the current 37 * sd_zbc_report_zones() call. 38 * @args: report zones arguments (callback, etc) 39 * 40 * Return: Value returned by @cb. 41 * 42 * Convert a SCSI zone descriptor into struct blk_zone format. Additionally, 43 * call @cb(blk_zone, @data). 44 */ 45 static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64], 46 unsigned int idx, struct blk_report_zones_args *args) 47 { 48 struct scsi_device *sdp = sdkp->device; 49 struct blk_zone zone = { 0 }; 50 sector_t start_lba, gran; 51 52 if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf))) 53 return -EINVAL; 54 55 zone.type = buf[0] & 0x0f; 56 zone.cond = (buf[1] >> 4) & 0xf; 57 if (buf[1] & 0x01) 58 zone.reset = 1; 59 if (buf[1] & 0x02) 60 zone.non_seq = 1; 61 62 start_lba = get_unaligned_be64(&buf[16]); 63 zone.start = logical_to_sectors(sdp, start_lba); 64 zone.capacity = logical_to_sectors(sdp, get_unaligned_be64(&buf[8])); 65 zone.len = zone.capacity; 66 if (sdkp->zone_starting_lba_gran) { 67 gran = logical_to_sectors(sdp, sdkp->zone_starting_lba_gran); 68 if (zone.len > gran) { 69 sd_printk(KERN_ERR, sdkp, 70 "Invalid zone at LBA %llu with capacity %llu and length %llu; granularity = %llu\n", 71 start_lba, 72 sectors_to_logical(sdp, zone.capacity), 73 sectors_to_logical(sdp, zone.len), 74 sectors_to_logical(sdp, gran)); 75 return -EINVAL; 76 } 77 /* 78 * Use the starting LBA granularity instead of the zone length 79 * obtained from the REPORT ZONES command. 80 */ 81 zone.len = gran; 82 } 83 if (zone.cond == ZBC_ZONE_COND_FULL) 84 zone.wp = zone.start + zone.len; 85 else 86 zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); 87 88 return disk_report_zone(sdkp->disk, &zone, idx, args); 89 } 90 91 /** 92 * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. 93 * @sdkp: The target disk 94 * @buf: vmalloc-ed buffer to use for the reply 95 * @buflen: the buffer size 96 * @lba: Start LBA of the report 97 * @partial: Do partial report 98 * 99 * For internal use during device validation. 100 * Using partial=true can significantly speed up execution of a report zones 101 * command because the disk does not have to count all possible report matching 102 * zones and will only report the count of zones fitting in the command reply 103 * buffer. 104 */ 105 static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, 106 unsigned int buflen, sector_t lba, 107 bool partial) 108 { 109 struct scsi_device *sdp = sdkp->device; 110 const int timeout = sdp->request_queue->rq_timeout; 111 struct scsi_sense_hdr sshdr; 112 const struct scsi_exec_args exec_args = { 113 .sshdr = &sshdr, 114 }; 115 unsigned char cmd[16]; 116 unsigned int rep_len; 117 int result; 118 119 memset(cmd, 0, 16); 120 cmd[0] = ZBC_IN; 121 cmd[1] = ZI_REPORT_ZONES; 122 put_unaligned_be64(lba, &cmd[2]); 123 put_unaligned_be32(buflen, &cmd[10]); 124 if (partial) 125 cmd[14] = ZBC_REPORT_ZONE_PARTIAL; 126 127 result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buf, buflen, 128 timeout, SD_MAX_RETRIES, &exec_args); 129 if (result) { 130 sd_printk(KERN_ERR, sdkp, 131 "REPORT ZONES start lba %llu failed\n", lba); 132 sd_print_result(sdkp, "REPORT ZONES", result); 133 if (result > 0 && scsi_sense_valid(&sshdr)) 134 sd_print_sense_hdr(sdkp, &sshdr); 135 return -EIO; 136 } 137 138 rep_len = get_unaligned_be32(&buf[0]); 139 if (rep_len < 64) { 140 sd_printk(KERN_ERR, sdkp, 141 "REPORT ZONES report invalid length %u\n", 142 rep_len); 143 return -EIO; 144 } 145 146 return 0; 147 } 148 149 /** 150 * sd_zbc_alloc_report_buffer() - Allocate a buffer for report zones reply. 151 * @sdkp: The target disk 152 * @nr_zones: Maximum number of zones to report 153 * @buflen: Size of the buffer allocated 154 * 155 * Try to allocate a reply buffer for the number of requested zones. 156 * The size of the buffer allocated may be smaller than requested to 157 * satify the device constraint (max_hw_sectors, max_segments, etc). 158 * 159 * Return the address of the allocated buffer and update @buflen with 160 * the size of the allocated buffer. 161 */ 162 static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, 163 unsigned int nr_zones, size_t *buflen) 164 { 165 struct request_queue *q = sdkp->disk->queue; 166 unsigned int max_segments; 167 size_t bufsize; 168 void *buf; 169 170 /* 171 * Report zone buffer size should be at most 64B times the number of 172 * zones requested plus the 64B reply header, but should be aligned 173 * to SECTOR_SIZE for ATA devices. 174 * Make sure that this size does not exceed the hardware capabilities. 175 * Furthermore, since the report zone command cannot be split, make 176 * sure that the allocated buffer can always be mapped by limiting the 177 * number of pages allocated to the HBA max segments limit. 178 * Since max segments can be larger than the max inline bio vectors, 179 * further limit the allocated buffer to BIO_MAX_INLINE_VECS. 180 */ 181 nr_zones = min(nr_zones, sdkp->zone_info.nr_zones); 182 bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE); 183 bufsize = min_t(size_t, bufsize, 184 queue_max_hw_sectors(q) << SECTOR_SHIFT); 185 max_segments = min(BIO_MAX_INLINE_VECS, queue_max_segments(q)); 186 bufsize = min_t(size_t, bufsize, max_segments << PAGE_SHIFT); 187 188 while (bufsize >= SECTOR_SIZE) { 189 buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); 190 if (buf) { 191 *buflen = bufsize; 192 return buf; 193 } 194 bufsize = rounddown(bufsize >> 1, SECTOR_SIZE); 195 } 196 197 return NULL; 198 } 199 200 /** 201 * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors. 202 * @sdkp: The target disk 203 */ 204 static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) 205 { 206 return logical_to_sectors(sdkp->device, sdkp->zone_info.zone_blocks); 207 } 208 209 /** 210 * sd_zbc_report_zones - SCSI .report_zones() callback. 211 * @disk: Disk to report zones for. 212 * @sector: Start sector. 213 * @nr_zones: Maximum number of zones to report. 214 * @args: Callback arguments. 215 * 216 * Called by the block layer to iterate over zone information. See also the 217 * disk->fops->report_zones() calls in block/blk-zoned.c. 218 */ 219 int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, 220 unsigned int nr_zones, 221 struct blk_report_zones_args *args) 222 { 223 struct scsi_disk *sdkp = scsi_disk(disk); 224 sector_t lba = sectors_to_logical(sdkp->device, sector); 225 unsigned int nr, i; 226 unsigned char *buf; 227 u64 zone_length, start_lba; 228 size_t offset, buflen = 0; 229 int zone_idx = 0; 230 int ret; 231 232 if (sdkp->device->type != TYPE_ZBC) 233 /* Not a zoned device */ 234 return -EOPNOTSUPP; 235 236 if (!sdkp->capacity) 237 /* Device gone or invalid */ 238 return -ENODEV; 239 240 buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen); 241 if (!buf) 242 return -ENOMEM; 243 244 while (zone_idx < nr_zones && lba < sdkp->capacity) { 245 ret = sd_zbc_do_report_zones(sdkp, buf, buflen, lba, true); 246 if (ret) 247 goto out; 248 249 offset = 0; 250 nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64); 251 if (!nr) 252 break; 253 254 for (i = 0; i < nr && zone_idx < nr_zones; i++) { 255 offset += 64; 256 start_lba = get_unaligned_be64(&buf[offset + 16]); 257 zone_length = get_unaligned_be64(&buf[offset + 8]); 258 if ((zone_idx == 0 && 259 (lba < start_lba || 260 lba >= start_lba + zone_length)) || 261 (zone_idx > 0 && start_lba != lba) || 262 start_lba + zone_length < start_lba) { 263 sd_printk(KERN_ERR, sdkp, 264 "Zone %d at LBA %llu is invalid: %llu + %llu\n", 265 zone_idx, lba, start_lba, zone_length); 266 ret = -EINVAL; 267 goto out; 268 } 269 lba = start_lba + zone_length; 270 if (sd_zbc_is_gap_zone(&buf[offset])) { 271 if (sdkp->zone_starting_lba_gran) 272 continue; 273 sd_printk(KERN_ERR, sdkp, 274 "Gap zone without constant LBA offsets\n"); 275 ret = -EINVAL; 276 goto out; 277 } 278 279 ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx, 280 args); 281 if (ret) 282 goto out; 283 284 zone_idx++; 285 } 286 } 287 288 ret = zone_idx; 289 out: 290 kvfree(buf); 291 return ret; 292 } 293 294 static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd) 295 { 296 struct request *rq = scsi_cmd_to_rq(cmd); 297 struct scsi_disk *sdkp = scsi_disk(rq->q->disk); 298 sector_t sector = blk_rq_pos(rq); 299 300 if (sdkp->device->type != TYPE_ZBC) 301 /* Not a zoned device */ 302 return BLK_STS_IOERR; 303 304 if (sdkp->device->changed) 305 return BLK_STS_IOERR; 306 307 if (sector & (sd_zbc_zone_sectors(sdkp) - 1)) 308 /* Unaligned request */ 309 return BLK_STS_IOERR; 310 311 return BLK_STS_OK; 312 } 313 314 /** 315 * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations 316 * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH. 317 * @cmd: the command to setup 318 * @op: Operation to be performed 319 * @all: All zones control 320 * 321 * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL, 322 * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests. 323 */ 324 blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, 325 unsigned char op, bool all) 326 { 327 struct request *rq = scsi_cmd_to_rq(cmd); 328 sector_t sector = blk_rq_pos(rq); 329 struct scsi_disk *sdkp = scsi_disk(rq->q->disk); 330 sector_t block = sectors_to_logical(sdkp->device, sector); 331 blk_status_t ret; 332 333 ret = sd_zbc_cmnd_checks(cmd); 334 if (ret != BLK_STS_OK) 335 return ret; 336 337 cmd->cmd_len = 16; 338 memset(cmd->cmnd, 0, cmd->cmd_len); 339 cmd->cmnd[0] = ZBC_OUT; 340 cmd->cmnd[1] = op; 341 if (all) 342 cmd->cmnd[14] = 0x1; 343 else 344 put_unaligned_be64(block, &cmd->cmnd[2]); 345 346 rq->timeout = SD_TIMEOUT; 347 cmd->sc_data_direction = DMA_NONE; 348 cmd->transfersize = 0; 349 cmd->allowed = 0; 350 351 return BLK_STS_OK; 352 } 353 354 /** 355 * sd_zbc_complete - ZBC command post processing. 356 * @cmd: Completed command 357 * @good_bytes: Command reply bytes 358 * @sshdr: command sense header 359 * 360 * Called from sd_done() to handle zone commands errors and updates to the 361 * device queue zone write pointer offset cahce. 362 */ 363 unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, 364 struct scsi_sense_hdr *sshdr) 365 { 366 int result = cmd->result; 367 struct request *rq = scsi_cmd_to_rq(cmd); 368 369 if (op_is_zone_mgmt(req_op(rq)) && 370 result && 371 sshdr->sense_key == ILLEGAL_REQUEST && 372 sshdr->asc == 0x24) { 373 /* 374 * INVALID FIELD IN CDB error: a zone management command was 375 * attempted on a conventional zone. Nothing to worry about, 376 * so be quiet about the error. 377 */ 378 rq->rq_flags |= RQF_QUIET; 379 } 380 381 return good_bytes; 382 } 383 384 /** 385 * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics 386 * @sdkp: Target disk 387 * @buf: Buffer where to store the VPD page data 388 * 389 * Read VPD page B6, get information and check that reads are unconstrained. 390 */ 391 static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, 392 unsigned char *buf) 393 { 394 u64 zone_starting_lba_gran; 395 396 if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) { 397 sd_printk(KERN_NOTICE, sdkp, 398 "Read zoned characteristics VPD page failed\n"); 399 return -ENODEV; 400 } 401 402 if (sdkp->device->type != TYPE_ZBC) { 403 /* Host-aware */ 404 sdkp->urswrz = 1; 405 sdkp->zones_optimal_open = get_unaligned_be32(&buf[8]); 406 sdkp->zones_optimal_nonseq = get_unaligned_be32(&buf[12]); 407 sdkp->zones_max_open = 0; 408 return 0; 409 } 410 411 /* Host-managed */ 412 sdkp->urswrz = buf[4] & 1; 413 sdkp->zones_optimal_open = 0; 414 sdkp->zones_optimal_nonseq = 0; 415 sdkp->zones_max_open = get_unaligned_be32(&buf[16]); 416 /* Check zone alignment method */ 417 switch (buf[23] & 0xf) { 418 case 0: 419 case ZBC_CONSTANT_ZONE_LENGTH: 420 /* Use zone length */ 421 break; 422 case ZBC_CONSTANT_ZONE_START_OFFSET: 423 zone_starting_lba_gran = get_unaligned_be64(&buf[24]); 424 if (zone_starting_lba_gran == 0 || 425 !is_power_of_2(zone_starting_lba_gran) || 426 logical_to_sectors(sdkp->device, zone_starting_lba_gran) > 427 UINT_MAX) { 428 sd_printk(KERN_ERR, sdkp, 429 "Invalid zone starting LBA granularity %llu\n", 430 zone_starting_lba_gran); 431 return -ENODEV; 432 } 433 sdkp->zone_starting_lba_gran = zone_starting_lba_gran; 434 break; 435 default: 436 sd_printk(KERN_ERR, sdkp, "Invalid zone alignment method\n"); 437 return -ENODEV; 438 } 439 440 /* 441 * Check for unconstrained reads: host-managed devices with 442 * constrained reads (drives failing read after write pointer) 443 * are not supported. 444 */ 445 if (!sdkp->urswrz) { 446 if (sdkp->first_scan) 447 sd_printk(KERN_NOTICE, sdkp, 448 "constrained reads devices are not supported\n"); 449 return -ENODEV; 450 } 451 452 return 0; 453 } 454 455 /** 456 * sd_zbc_check_capacity - Check the device capacity 457 * @sdkp: Target disk 458 * @buf: command buffer 459 * @zblocks: zone size in logical blocks 460 * 461 * Get the device zone size and check that the device capacity as reported 462 * by READ CAPACITY matches the max_lba value (plus one) of the report zones 463 * command reply for devices with RC_BASIS == 0. 464 * 465 * Returns 0 upon success or an error code upon failure. 466 */ 467 static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf, 468 u32 *zblocks) 469 { 470 u64 zone_blocks; 471 sector_t max_lba; 472 unsigned char *rec; 473 int ret; 474 475 /* Do a report zone to get max_lba and the size of the first zone */ 476 ret = sd_zbc_do_report_zones(sdkp, buf, SD_BUF_SIZE, 0, false); 477 if (ret) 478 return ret; 479 480 if (sdkp->rc_basis == 0) { 481 /* The max_lba field is the capacity of this device */ 482 max_lba = get_unaligned_be64(&buf[8]); 483 if (sdkp->capacity != max_lba + 1) { 484 if (sdkp->first_scan) 485 sd_printk(KERN_WARNING, sdkp, 486 "Changing capacity from %llu to max LBA+1 %llu\n", 487 (unsigned long long)sdkp->capacity, 488 (unsigned long long)max_lba + 1); 489 sdkp->capacity = max_lba + 1; 490 } 491 } 492 493 if (sdkp->zone_starting_lba_gran == 0) { 494 /* Get the size of the first reported zone */ 495 rec = buf + 64; 496 zone_blocks = get_unaligned_be64(&rec[8]); 497 if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) { 498 if (sdkp->first_scan) 499 sd_printk(KERN_NOTICE, sdkp, 500 "Zone size too large\n"); 501 return -EFBIG; 502 } 503 } else { 504 zone_blocks = sdkp->zone_starting_lba_gran; 505 } 506 507 if (!is_power_of_2(zone_blocks)) { 508 sd_printk(KERN_ERR, sdkp, 509 "Zone size %llu is not a power of two.\n", 510 zone_blocks); 511 return -EINVAL; 512 } 513 514 *zblocks = zone_blocks; 515 516 return 0; 517 } 518 519 static void sd_zbc_print_zones(struct scsi_disk *sdkp) 520 { 521 if (sdkp->device->type != TYPE_ZBC || !sdkp->capacity) 522 return; 523 524 if (sdkp->capacity & (sdkp->zone_info.zone_blocks - 1)) 525 sd_printk(KERN_NOTICE, sdkp, 526 "%u zones of %u logical blocks + 1 runt zone\n", 527 sdkp->zone_info.nr_zones - 1, 528 sdkp->zone_info.zone_blocks); 529 else 530 sd_printk(KERN_NOTICE, sdkp, 531 "%u zones of %u logical blocks\n", 532 sdkp->zone_info.nr_zones, 533 sdkp->zone_info.zone_blocks); 534 } 535 536 /* 537 * Call blk_revalidate_disk_zones() if any of the zoned disk properties have 538 * changed that make it necessary to call that function. Called by 539 * sd_revalidate_disk() after the gendisk capacity has been set. 540 */ 541 int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) 542 { 543 struct gendisk *disk = sdkp->disk; 544 struct request_queue *q = disk->queue; 545 u32 zone_blocks = sdkp->early_zone_info.zone_blocks; 546 unsigned int nr_zones = sdkp->early_zone_info.nr_zones; 547 unsigned int flags; 548 int ret; 549 550 /* 551 * There is nothing to do for regular disks, including host-aware disks 552 * that have partitions. 553 */ 554 if (!blk_queue_is_zoned(q)) 555 return 0; 556 557 if (sdkp->zone_info.zone_blocks == zone_blocks && 558 sdkp->zone_info.nr_zones == nr_zones && 559 disk->nr_zones == nr_zones) 560 return 0; 561 562 sdkp->zone_info.zone_blocks = zone_blocks; 563 sdkp->zone_info.nr_zones = nr_zones; 564 565 flags = memalloc_noio_save(); 566 ret = blk_revalidate_disk_zones(disk); 567 memalloc_noio_restore(flags); 568 if (ret) { 569 sdkp->zone_info = (struct zoned_disk_info){ }; 570 sdkp->capacity = 0; 571 return ret; 572 } 573 574 sd_zbc_print_zones(sdkp); 575 576 return 0; 577 } 578 579 /** 580 * sd_zbc_read_zones - Read zone information and update the request queue 581 * @sdkp: SCSI disk pointer. 582 * @lim: queue limits to read into 583 * @buf: 512 byte buffer used for storing SCSI command output. 584 * 585 * Read zone information and update the request queue zone characteristics and 586 * also the zoned device information in *sdkp. Called by sd_revalidate_disk() 587 * before the gendisk capacity has been set. 588 */ 589 int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, 590 u8 buf[SD_BUF_SIZE]) 591 { 592 unsigned int nr_zones; 593 u32 zone_blocks = 0; 594 int ret; 595 596 if (sdkp->device->type != TYPE_ZBC) 597 return 0; 598 599 lim->features |= BLK_FEAT_ZONED; 600 601 /* 602 * Per ZBC and ZAC specifications, writes in sequential write required 603 * zones of host-managed devices must be aligned to the device physical 604 * block size. 605 */ 606 lim->zone_write_granularity = sdkp->physical_block_size; 607 608 /* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */ 609 sdkp->device->use_16_for_rw = 1; 610 sdkp->device->use_10_for_rw = 0; 611 sdkp->device->use_16_for_sync = 1; 612 613 /* Check zoned block device characteristics (unconstrained reads) */ 614 ret = sd_zbc_check_zoned_characteristics(sdkp, buf); 615 if (ret) 616 goto err; 617 618 /* Check the device capacity reported by report zones */ 619 ret = sd_zbc_check_capacity(sdkp, buf, &zone_blocks); 620 if (ret != 0) 621 goto err; 622 623 nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); 624 sdkp->early_zone_info.nr_zones = nr_zones; 625 sdkp->early_zone_info.zone_blocks = zone_blocks; 626 627 /* The drive satisfies the kernel restrictions: set it up */ 628 if (sdkp->zones_max_open == U32_MAX) 629 lim->max_open_zones = 0; 630 else 631 lim->max_open_zones = sdkp->zones_max_open; 632 lim->max_active_zones = 0; 633 lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks); 634 635 return 0; 636 637 err: 638 sdkp->capacity = 0; 639 640 return ret; 641 } 642