1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * SCSI Zoned Block commands 4 * 5 * Copyright (C) 2014-2015 SUSE Linux GmbH 6 * Written by: Hannes Reinecke <hare@suse.de> 7 * Modified by: Damien Le Moal <damien.lemoal@hgst.com> 8 * Modified by: Shaun Tancheff <shaun.tancheff@seagate.com> 9 */ 10 11 #include <linux/blkdev.h> 12 #include <linux/vmalloc.h> 13 #include <linux/sched/mm.h> 14 #include <linux/mutex.h> 15 16 #include <asm/unaligned.h> 17 18 #include <scsi/scsi.h> 19 #include <scsi/scsi_cmnd.h> 20 21 #include "sd.h" 22 23 #define CREATE_TRACE_POINTS 24 #include "sd_trace.h" 25 26 /* Whether or not a SCSI zone descriptor describes a gap zone. */ 27 static bool sd_zbc_is_gap_zone(const u8 buf[64]) 28 { 29 return (buf[0] & 0xf) == ZBC_ZONE_TYPE_GAP; 30 } 31 32 /** 33 * sd_zbc_parse_report - Parse a SCSI zone descriptor 34 * @sdkp: SCSI disk pointer. 35 * @buf: SCSI zone descriptor. 36 * @idx: Index of the zone relative to the first zone reported by the current 37 * sd_zbc_report_zones() call. 38 * @cb: Callback function pointer. 39 * @data: Second argument passed to @cb. 40 * 41 * Return: Value returned by @cb. 42 * 43 * Convert a SCSI zone descriptor into struct blk_zone format. Additionally, 44 * call @cb(blk_zone, @data). 45 */ 46 static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64], 47 unsigned int idx, report_zones_cb cb, void *data) 48 { 49 struct scsi_device *sdp = sdkp->device; 50 struct blk_zone zone = { 0 }; 51 sector_t start_lba, gran; 52 int ret; 53 54 if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf))) 55 return -EINVAL; 56 57 zone.type = buf[0] & 0x0f; 58 zone.cond = (buf[1] >> 4) & 0xf; 59 if (buf[1] & 0x01) 60 zone.reset = 1; 61 if (buf[1] & 0x02) 62 zone.non_seq = 1; 63 64 start_lba = get_unaligned_be64(&buf[16]); 65 zone.start = logical_to_sectors(sdp, start_lba); 66 zone.capacity = logical_to_sectors(sdp, get_unaligned_be64(&buf[8])); 67 zone.len = zone.capacity; 68 if (sdkp->zone_starting_lba_gran) { 69 gran = logical_to_sectors(sdp, sdkp->zone_starting_lba_gran); 70 if (zone.len > gran) { 71 sd_printk(KERN_ERR, sdkp, 72 "Invalid zone at LBA %llu with capacity %llu and length %llu; granularity = %llu\n", 73 start_lba, 74 sectors_to_logical(sdp, zone.capacity), 75 sectors_to_logical(sdp, zone.len), 76 sectors_to_logical(sdp, gran)); 77 return -EINVAL; 78 } 79 /* 80 * Use the starting LBA granularity instead of the zone length 81 * obtained from the REPORT ZONES command. 82 */ 83 zone.len = gran; 84 } 85 if (zone.cond == ZBC_ZONE_COND_FULL) 86 zone.wp = zone.start + zone.len; 87 else 88 zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); 89 90 ret = cb(&zone, idx, data); 91 if (ret) 92 return ret; 93 94 return 0; 95 } 96 97 /** 98 * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. 99 * @sdkp: The target disk 100 * @buf: vmalloc-ed buffer to use for the reply 101 * @buflen: the buffer size 102 * @lba: Start LBA of the report 103 * @partial: Do partial report 104 * 105 * For internal use during device validation. 106 * Using partial=true can significantly speed up execution of a report zones 107 * command because the disk does not have to count all possible report matching 108 * zones and will only report the count of zones fitting in the command reply 109 * buffer. 110 */ 111 static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, 112 unsigned int buflen, sector_t lba, 113 bool partial) 114 { 115 struct scsi_device *sdp = sdkp->device; 116 const int timeout = sdp->request_queue->rq_timeout; 117 struct scsi_sense_hdr sshdr; 118 const struct scsi_exec_args exec_args = { 119 .sshdr = &sshdr, 120 }; 121 unsigned char cmd[16]; 122 unsigned int rep_len; 123 int result; 124 125 memset(cmd, 0, 16); 126 cmd[0] = ZBC_IN; 127 cmd[1] = ZI_REPORT_ZONES; 128 put_unaligned_be64(lba, &cmd[2]); 129 put_unaligned_be32(buflen, &cmd[10]); 130 if (partial) 131 cmd[14] = ZBC_REPORT_ZONE_PARTIAL; 132 133 result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buf, buflen, 134 timeout, SD_MAX_RETRIES, &exec_args); 135 if (result) { 136 sd_printk(KERN_ERR, sdkp, 137 "REPORT ZONES start lba %llu failed\n", lba); 138 sd_print_result(sdkp, "REPORT ZONES", result); 139 if (result > 0 && scsi_sense_valid(&sshdr)) 140 sd_print_sense_hdr(sdkp, &sshdr); 141 return -EIO; 142 } 143 144 rep_len = get_unaligned_be32(&buf[0]); 145 if (rep_len < 64) { 146 sd_printk(KERN_ERR, sdkp, 147 "REPORT ZONES report invalid length %u\n", 148 rep_len); 149 return -EIO; 150 } 151 152 return 0; 153 } 154 155 /** 156 * sd_zbc_alloc_report_buffer() - Allocate a buffer for report zones reply. 157 * @sdkp: The target disk 158 * @nr_zones: Maximum number of zones to report 159 * @buflen: Size of the buffer allocated 160 * 161 * Try to allocate a reply buffer for the number of requested zones. 162 * The size of the buffer allocated may be smaller than requested to 163 * satify the device constraint (max_hw_sectors, max_segments, etc). 164 * 165 * Return the address of the allocated buffer and update @buflen with 166 * the size of the allocated buffer. 167 */ 168 static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, 169 unsigned int nr_zones, size_t *buflen) 170 { 171 struct request_queue *q = sdkp->disk->queue; 172 size_t bufsize; 173 void *buf; 174 175 /* 176 * Report zone buffer size should be at most 64B times the number of 177 * zones requested plus the 64B reply header, but should be aligned 178 * to SECTOR_SIZE for ATA devices. 179 * Make sure that this size does not exceed the hardware capabilities. 180 * Furthermore, since the report zone command cannot be split, make 181 * sure that the allocated buffer can always be mapped by limiting the 182 * number of pages allocated to the HBA max segments limit. 183 */ 184 nr_zones = min(nr_zones, sdkp->zone_info.nr_zones); 185 bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE); 186 bufsize = min_t(size_t, bufsize, 187 queue_max_hw_sectors(q) << SECTOR_SHIFT); 188 bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); 189 190 while (bufsize >= SECTOR_SIZE) { 191 buf = __vmalloc(bufsize, 192 GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY); 193 if (buf) { 194 *buflen = bufsize; 195 return buf; 196 } 197 bufsize = rounddown(bufsize >> 1, SECTOR_SIZE); 198 } 199 200 return NULL; 201 } 202 203 /** 204 * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors. 205 * @sdkp: The target disk 206 */ 207 static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) 208 { 209 return logical_to_sectors(sdkp->device, sdkp->zone_info.zone_blocks); 210 } 211 212 /** 213 * sd_zbc_report_zones - SCSI .report_zones() callback. 214 * @disk: Disk to report zones for. 215 * @sector: Start sector. 216 * @nr_zones: Maximum number of zones to report. 217 * @cb: Callback function called to report zone information. 218 * @data: Second argument passed to @cb. 219 * 220 * Called by the block layer to iterate over zone information. See also the 221 * disk->fops->report_zones() calls in block/blk-zoned.c. 222 */ 223 int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, 224 unsigned int nr_zones, report_zones_cb cb, void *data) 225 { 226 struct scsi_disk *sdkp = scsi_disk(disk); 227 sector_t lba = sectors_to_logical(sdkp->device, sector); 228 unsigned int nr, i; 229 unsigned char *buf; 230 u64 zone_length, start_lba; 231 size_t offset, buflen = 0; 232 int zone_idx = 0; 233 int ret; 234 235 if (sdkp->device->type != TYPE_ZBC) 236 /* Not a zoned device */ 237 return -EOPNOTSUPP; 238 239 if (!sdkp->capacity) 240 /* Device gone or invalid */ 241 return -ENODEV; 242 243 buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen); 244 if (!buf) 245 return -ENOMEM; 246 247 while (zone_idx < nr_zones && lba < sdkp->capacity) { 248 ret = sd_zbc_do_report_zones(sdkp, buf, buflen, lba, true); 249 if (ret) 250 goto out; 251 252 offset = 0; 253 nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64); 254 if (!nr) 255 break; 256 257 for (i = 0; i < nr && zone_idx < nr_zones; i++) { 258 offset += 64; 259 start_lba = get_unaligned_be64(&buf[offset + 16]); 260 zone_length = get_unaligned_be64(&buf[offset + 8]); 261 if ((zone_idx == 0 && 262 (lba < start_lba || 263 lba >= start_lba + zone_length)) || 264 (zone_idx > 0 && start_lba != lba) || 265 start_lba + zone_length < start_lba) { 266 sd_printk(KERN_ERR, sdkp, 267 "Zone %d at LBA %llu is invalid: %llu + %llu\n", 268 zone_idx, lba, start_lba, zone_length); 269 ret = -EINVAL; 270 goto out; 271 } 272 lba = start_lba + zone_length; 273 if (sd_zbc_is_gap_zone(&buf[offset])) { 274 if (sdkp->zone_starting_lba_gran) 275 continue; 276 sd_printk(KERN_ERR, sdkp, 277 "Gap zone without constant LBA offsets\n"); 278 ret = -EINVAL; 279 goto out; 280 } 281 282 ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx, 283 cb, data); 284 if (ret) 285 goto out; 286 287 zone_idx++; 288 } 289 } 290 291 ret = zone_idx; 292 out: 293 kvfree(buf); 294 return ret; 295 } 296 297 static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd) 298 { 299 struct request *rq = scsi_cmd_to_rq(cmd); 300 struct scsi_disk *sdkp = scsi_disk(rq->q->disk); 301 sector_t sector = blk_rq_pos(rq); 302 303 if (sdkp->device->type != TYPE_ZBC) 304 /* Not a zoned device */ 305 return BLK_STS_IOERR; 306 307 if (sdkp->device->changed) 308 return BLK_STS_IOERR; 309 310 if (sector & (sd_zbc_zone_sectors(sdkp) - 1)) 311 /* Unaligned request */ 312 return BLK_STS_IOERR; 313 314 return BLK_STS_OK; 315 } 316 317 /** 318 * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations 319 * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH. 320 * @cmd: the command to setup 321 * @op: Operation to be performed 322 * @all: All zones control 323 * 324 * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL, 325 * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests. 326 */ 327 blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, 328 unsigned char op, bool all) 329 { 330 struct request *rq = scsi_cmd_to_rq(cmd); 331 sector_t sector = blk_rq_pos(rq); 332 struct scsi_disk *sdkp = scsi_disk(rq->q->disk); 333 sector_t block = sectors_to_logical(sdkp->device, sector); 334 blk_status_t ret; 335 336 ret = sd_zbc_cmnd_checks(cmd); 337 if (ret != BLK_STS_OK) 338 return ret; 339 340 cmd->cmd_len = 16; 341 memset(cmd->cmnd, 0, cmd->cmd_len); 342 cmd->cmnd[0] = ZBC_OUT; 343 cmd->cmnd[1] = op; 344 if (all) 345 cmd->cmnd[14] = 0x1; 346 else 347 put_unaligned_be64(block, &cmd->cmnd[2]); 348 349 rq->timeout = SD_TIMEOUT; 350 cmd->sc_data_direction = DMA_NONE; 351 cmd->transfersize = 0; 352 cmd->allowed = 0; 353 354 return BLK_STS_OK; 355 } 356 357 /** 358 * sd_zbc_complete - ZBC command post processing. 359 * @cmd: Completed command 360 * @good_bytes: Command reply bytes 361 * @sshdr: command sense header 362 * 363 * Called from sd_done() to handle zone commands errors and updates to the 364 * device queue zone write pointer offset cahce. 365 */ 366 unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, 367 struct scsi_sense_hdr *sshdr) 368 { 369 int result = cmd->result; 370 struct request *rq = scsi_cmd_to_rq(cmd); 371 372 if (op_is_zone_mgmt(req_op(rq)) && 373 result && 374 sshdr->sense_key == ILLEGAL_REQUEST && 375 sshdr->asc == 0x24) { 376 /* 377 * INVALID FIELD IN CDB error: a zone management command was 378 * attempted on a conventional zone. Nothing to worry about, 379 * so be quiet about the error. 380 */ 381 rq->rq_flags |= RQF_QUIET; 382 } 383 384 return good_bytes; 385 } 386 387 /** 388 * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics 389 * @sdkp: Target disk 390 * @buf: Buffer where to store the VPD page data 391 * 392 * Read VPD page B6, get information and check that reads are unconstrained. 393 */ 394 static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, 395 unsigned char *buf) 396 { 397 u64 zone_starting_lba_gran; 398 399 if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) { 400 sd_printk(KERN_NOTICE, sdkp, 401 "Read zoned characteristics VPD page failed\n"); 402 return -ENODEV; 403 } 404 405 if (sdkp->device->type != TYPE_ZBC) { 406 /* Host-aware */ 407 sdkp->urswrz = 1; 408 sdkp->zones_optimal_open = get_unaligned_be32(&buf[8]); 409 sdkp->zones_optimal_nonseq = get_unaligned_be32(&buf[12]); 410 sdkp->zones_max_open = 0; 411 return 0; 412 } 413 414 /* Host-managed */ 415 sdkp->urswrz = buf[4] & 1; 416 sdkp->zones_optimal_open = 0; 417 sdkp->zones_optimal_nonseq = 0; 418 sdkp->zones_max_open = get_unaligned_be32(&buf[16]); 419 /* Check zone alignment method */ 420 switch (buf[23] & 0xf) { 421 case 0: 422 case ZBC_CONSTANT_ZONE_LENGTH: 423 /* Use zone length */ 424 break; 425 case ZBC_CONSTANT_ZONE_START_OFFSET: 426 zone_starting_lba_gran = get_unaligned_be64(&buf[24]); 427 if (zone_starting_lba_gran == 0 || 428 !is_power_of_2(zone_starting_lba_gran) || 429 logical_to_sectors(sdkp->device, zone_starting_lba_gran) > 430 UINT_MAX) { 431 sd_printk(KERN_ERR, sdkp, 432 "Invalid zone starting LBA granularity %llu\n", 433 zone_starting_lba_gran); 434 return -ENODEV; 435 } 436 sdkp->zone_starting_lba_gran = zone_starting_lba_gran; 437 break; 438 default: 439 sd_printk(KERN_ERR, sdkp, "Invalid zone alignment method\n"); 440 return -ENODEV; 441 } 442 443 /* 444 * Check for unconstrained reads: host-managed devices with 445 * constrained reads (drives failing read after write pointer) 446 * are not supported. 447 */ 448 if (!sdkp->urswrz) { 449 if (sdkp->first_scan) 450 sd_printk(KERN_NOTICE, sdkp, 451 "constrained reads devices are not supported\n"); 452 return -ENODEV; 453 } 454 455 return 0; 456 } 457 458 /** 459 * sd_zbc_check_capacity - Check the device capacity 460 * @sdkp: Target disk 461 * @buf: command buffer 462 * @zblocks: zone size in logical blocks 463 * 464 * Get the device zone size and check that the device capacity as reported 465 * by READ CAPACITY matches the max_lba value (plus one) of the report zones 466 * command reply for devices with RC_BASIS == 0. 467 * 468 * Returns 0 upon success or an error code upon failure. 469 */ 470 static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf, 471 u32 *zblocks) 472 { 473 u64 zone_blocks; 474 sector_t max_lba; 475 unsigned char *rec; 476 int ret; 477 478 /* Do a report zone to get max_lba and the size of the first zone */ 479 ret = sd_zbc_do_report_zones(sdkp, buf, SD_BUF_SIZE, 0, false); 480 if (ret) 481 return ret; 482 483 if (sdkp->rc_basis == 0) { 484 /* The max_lba field is the capacity of this device */ 485 max_lba = get_unaligned_be64(&buf[8]); 486 if (sdkp->capacity != max_lba + 1) { 487 if (sdkp->first_scan) 488 sd_printk(KERN_WARNING, sdkp, 489 "Changing capacity from %llu to max LBA+1 %llu\n", 490 (unsigned long long)sdkp->capacity, 491 (unsigned long long)max_lba + 1); 492 sdkp->capacity = max_lba + 1; 493 } 494 } 495 496 if (sdkp->zone_starting_lba_gran == 0) { 497 /* Get the size of the first reported zone */ 498 rec = buf + 64; 499 zone_blocks = get_unaligned_be64(&rec[8]); 500 if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) { 501 if (sdkp->first_scan) 502 sd_printk(KERN_NOTICE, sdkp, 503 "Zone size too large\n"); 504 return -EFBIG; 505 } 506 } else { 507 zone_blocks = sdkp->zone_starting_lba_gran; 508 } 509 510 if (!is_power_of_2(zone_blocks)) { 511 sd_printk(KERN_ERR, sdkp, 512 "Zone size %llu is not a power of two.\n", 513 zone_blocks); 514 return -EINVAL; 515 } 516 517 *zblocks = zone_blocks; 518 519 return 0; 520 } 521 522 static void sd_zbc_print_zones(struct scsi_disk *sdkp) 523 { 524 if (sdkp->device->type != TYPE_ZBC || !sdkp->capacity) 525 return; 526 527 if (sdkp->capacity & (sdkp->zone_info.zone_blocks - 1)) 528 sd_printk(KERN_NOTICE, sdkp, 529 "%u zones of %u logical blocks + 1 runt zone\n", 530 sdkp->zone_info.nr_zones - 1, 531 sdkp->zone_info.zone_blocks); 532 else 533 sd_printk(KERN_NOTICE, sdkp, 534 "%u zones of %u logical blocks\n", 535 sdkp->zone_info.nr_zones, 536 sdkp->zone_info.zone_blocks); 537 } 538 539 /* 540 * Call blk_revalidate_disk_zones() if any of the zoned disk properties have 541 * changed that make it necessary to call that function. Called by 542 * sd_revalidate_disk() after the gendisk capacity has been set. 543 */ 544 int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) 545 { 546 struct gendisk *disk = sdkp->disk; 547 struct request_queue *q = disk->queue; 548 u32 zone_blocks = sdkp->early_zone_info.zone_blocks; 549 unsigned int nr_zones = sdkp->early_zone_info.nr_zones; 550 unsigned int flags; 551 int ret; 552 553 /* 554 * There is nothing to do for regular disks, including host-aware disks 555 * that have partitions. 556 */ 557 if (!blk_queue_is_zoned(q)) 558 return 0; 559 560 if (sdkp->zone_info.zone_blocks == zone_blocks && 561 sdkp->zone_info.nr_zones == nr_zones && 562 disk->nr_zones == nr_zones) 563 return 0; 564 565 sdkp->zone_info.zone_blocks = zone_blocks; 566 sdkp->zone_info.nr_zones = nr_zones; 567 568 flags = memalloc_noio_save(); 569 ret = blk_revalidate_disk_zones(disk); 570 memalloc_noio_restore(flags); 571 if (ret) { 572 sdkp->zone_info = (struct zoned_disk_info){ }; 573 sdkp->capacity = 0; 574 return ret; 575 } 576 577 sd_zbc_print_zones(sdkp); 578 579 return 0; 580 } 581 582 /** 583 * sd_zbc_read_zones - Read zone information and update the request queue 584 * @sdkp: SCSI disk pointer. 585 * @lim: queue limits to read into 586 * @buf: 512 byte buffer used for storing SCSI command output. 587 * 588 * Read zone information and update the request queue zone characteristics and 589 * also the zoned device information in *sdkp. Called by sd_revalidate_disk() 590 * before the gendisk capacity has been set. 591 */ 592 int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, 593 u8 buf[SD_BUF_SIZE]) 594 { 595 unsigned int nr_zones; 596 u32 zone_blocks = 0; 597 int ret; 598 599 if (sdkp->device->type != TYPE_ZBC) 600 return 0; 601 602 lim->features |= BLK_FEAT_ZONED; 603 604 /* 605 * Per ZBC and ZAC specifications, writes in sequential write required 606 * zones of host-managed devices must be aligned to the device physical 607 * block size. 608 */ 609 lim->zone_write_granularity = sdkp->physical_block_size; 610 611 /* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */ 612 sdkp->device->use_16_for_rw = 1; 613 sdkp->device->use_10_for_rw = 0; 614 sdkp->device->use_16_for_sync = 1; 615 616 /* Check zoned block device characteristics (unconstrained reads) */ 617 ret = sd_zbc_check_zoned_characteristics(sdkp, buf); 618 if (ret) 619 goto err; 620 621 /* Check the device capacity reported by report zones */ 622 ret = sd_zbc_check_capacity(sdkp, buf, &zone_blocks); 623 if (ret != 0) 624 goto err; 625 626 nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); 627 sdkp->early_zone_info.nr_zones = nr_zones; 628 sdkp->early_zone_info.zone_blocks = zone_blocks; 629 630 /* The drive satisfies the kernel restrictions: set it up */ 631 if (sdkp->zones_max_open == U32_MAX) 632 lim->max_open_zones = 0; 633 else 634 lim->max_open_zones = sdkp->zones_max_open; 635 lim->max_active_zones = 0; 636 lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks); 637 /* Enable block layer zone append emulation */ 638 lim->max_zone_append_sectors = 0; 639 640 return 0; 641 642 err: 643 sdkp->capacity = 0; 644 645 return ret; 646 } 647