1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * SCSI Zoned Block commands 4 * 5 * Copyright (C) 2014-2015 SUSE Linux GmbH 6 * Written by: Hannes Reinecke <hare@suse.de> 7 * Modified by: Damien Le Moal <damien.lemoal@hgst.com> 8 * Modified by: Shaun Tancheff <shaun.tancheff@seagate.com> 9 */ 10 11 #include <linux/blkdev.h> 12 #include <linux/vmalloc.h> 13 #include <linux/sched/mm.h> 14 #include <linux/mutex.h> 15 16 #include <linux/unaligned.h> 17 18 #include <scsi/scsi.h> 19 #include <scsi/scsi_cmnd.h> 20 21 #include "sd.h" 22 23 #define CREATE_TRACE_POINTS 24 #include "sd_trace.h" 25 26 /* Whether or not a SCSI zone descriptor describes a gap zone. */ 27 static bool sd_zbc_is_gap_zone(const u8 buf[64]) 28 { 29 return (buf[0] & 0xf) == ZBC_ZONE_TYPE_GAP; 30 } 31 32 /** 33 * sd_zbc_parse_report - Parse a SCSI zone descriptor 34 * @sdkp: SCSI disk pointer. 35 * @buf: SCSI zone descriptor. 36 * @idx: Index of the zone relative to the first zone reported by the current 37 * sd_zbc_report_zones() call. 38 * @cb: Callback function pointer. 39 * @data: Second argument passed to @cb. 40 * 41 * Return: Value returned by @cb. 42 * 43 * Convert a SCSI zone descriptor into struct blk_zone format. Additionally, 44 * call @cb(blk_zone, @data). 45 */ 46 static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64], 47 unsigned int idx, report_zones_cb cb, void *data) 48 { 49 struct scsi_device *sdp = sdkp->device; 50 struct blk_zone zone = { 0 }; 51 sector_t start_lba, gran; 52 int ret; 53 54 if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf))) 55 return -EINVAL; 56 57 zone.type = buf[0] & 0x0f; 58 zone.cond = (buf[1] >> 4) & 0xf; 59 if (buf[1] & 0x01) 60 zone.reset = 1; 61 if (buf[1] & 0x02) 62 zone.non_seq = 1; 63 64 start_lba = get_unaligned_be64(&buf[16]); 65 zone.start = logical_to_sectors(sdp, start_lba); 66 zone.capacity = logical_to_sectors(sdp, get_unaligned_be64(&buf[8])); 67 zone.len = zone.capacity; 68 if (sdkp->zone_starting_lba_gran) { 69 gran = logical_to_sectors(sdp, sdkp->zone_starting_lba_gran); 70 if (zone.len > gran) { 71 sd_printk(KERN_ERR, sdkp, 72 "Invalid zone at LBA %llu with capacity %llu and length %llu; granularity = %llu\n", 73 start_lba, 74 sectors_to_logical(sdp, zone.capacity), 75 sectors_to_logical(sdp, zone.len), 76 sectors_to_logical(sdp, gran)); 77 return -EINVAL; 78 } 79 /* 80 * Use the starting LBA granularity instead of the zone length 81 * obtained from the REPORT ZONES command. 82 */ 83 zone.len = gran; 84 } 85 if (zone.cond == ZBC_ZONE_COND_FULL) 86 zone.wp = zone.start + zone.len; 87 else 88 zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); 89 90 ret = cb(&zone, idx, data); 91 if (ret) 92 return ret; 93 94 return 0; 95 } 96 97 /** 98 * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. 99 * @sdkp: The target disk 100 * @buf: vmalloc-ed buffer to use for the reply 101 * @buflen: the buffer size 102 * @lba: Start LBA of the report 103 * @partial: Do partial report 104 * 105 * For internal use during device validation. 106 * Using partial=true can significantly speed up execution of a report zones 107 * command because the disk does not have to count all possible report matching 108 * zones and will only report the count of zones fitting in the command reply 109 * buffer. 110 */ 111 static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, 112 unsigned int buflen, sector_t lba, 113 bool partial) 114 { 115 struct scsi_device *sdp = sdkp->device; 116 const int timeout = sdp->request_queue->rq_timeout; 117 struct scsi_sense_hdr sshdr; 118 const struct scsi_exec_args exec_args = { 119 .sshdr = &sshdr, 120 }; 121 unsigned char cmd[16]; 122 unsigned int rep_len; 123 int result; 124 125 memset(cmd, 0, 16); 126 cmd[0] = ZBC_IN; 127 cmd[1] = ZI_REPORT_ZONES; 128 put_unaligned_be64(lba, &cmd[2]); 129 put_unaligned_be32(buflen, &cmd[10]); 130 if (partial) 131 cmd[14] = ZBC_REPORT_ZONE_PARTIAL; 132 133 result = scsi_execute_cmd(sdp, cmd, REQ_OP_DRV_IN, buf, buflen, 134 timeout, SD_MAX_RETRIES, &exec_args); 135 if (result) { 136 sd_printk(KERN_ERR, sdkp, 137 "REPORT ZONES start lba %llu failed\n", lba); 138 sd_print_result(sdkp, "REPORT ZONES", result); 139 if (result > 0 && scsi_sense_valid(&sshdr)) 140 sd_print_sense_hdr(sdkp, &sshdr); 141 return -EIO; 142 } 143 144 rep_len = get_unaligned_be32(&buf[0]); 145 if (rep_len < 64) { 146 sd_printk(KERN_ERR, sdkp, 147 "REPORT ZONES report invalid length %u\n", 148 rep_len); 149 return -EIO; 150 } 151 152 return 0; 153 } 154 155 /** 156 * sd_zbc_alloc_report_buffer() - Allocate a buffer for report zones reply. 157 * @sdkp: The target disk 158 * @nr_zones: Maximum number of zones to report 159 * @buflen: Size of the buffer allocated 160 * 161 * Try to allocate a reply buffer for the number of requested zones. 162 * The size of the buffer allocated may be smaller than requested to 163 * satify the device constraint (max_hw_sectors, max_segments, etc). 164 * 165 * Return the address of the allocated buffer and update @buflen with 166 * the size of the allocated buffer. 167 */ 168 static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp, 169 unsigned int nr_zones, size_t *buflen) 170 { 171 struct request_queue *q = sdkp->disk->queue; 172 size_t bufsize; 173 void *buf; 174 175 /* 176 * Report zone buffer size should be at most 64B times the number of 177 * zones requested plus the 64B reply header, but should be aligned 178 * to SECTOR_SIZE for ATA devices. 179 * Make sure that this size does not exceed the hardware capabilities. 180 * Furthermore, since the report zone command cannot be split, make 181 * sure that the allocated buffer can always be mapped by limiting the 182 * number of pages allocated to the HBA max segments limit. 183 */ 184 nr_zones = min(nr_zones, sdkp->zone_info.nr_zones); 185 bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE); 186 bufsize = min_t(size_t, bufsize, 187 queue_max_hw_sectors(q) << SECTOR_SHIFT); 188 bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); 189 190 while (bufsize >= SECTOR_SIZE) { 191 buf = kvzalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); 192 if (buf) { 193 *buflen = bufsize; 194 return buf; 195 } 196 bufsize = rounddown(bufsize >> 1, SECTOR_SIZE); 197 } 198 199 return NULL; 200 } 201 202 /** 203 * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors. 204 * @sdkp: The target disk 205 */ 206 static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp) 207 { 208 return logical_to_sectors(sdkp->device, sdkp->zone_info.zone_blocks); 209 } 210 211 /** 212 * sd_zbc_report_zones - SCSI .report_zones() callback. 213 * @disk: Disk to report zones for. 214 * @sector: Start sector. 215 * @nr_zones: Maximum number of zones to report. 216 * @cb: Callback function called to report zone information. 217 * @data: Second argument passed to @cb. 218 * 219 * Called by the block layer to iterate over zone information. See also the 220 * disk->fops->report_zones() calls in block/blk-zoned.c. 221 */ 222 int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, 223 unsigned int nr_zones, report_zones_cb cb, void *data) 224 { 225 struct scsi_disk *sdkp = scsi_disk(disk); 226 sector_t lba = sectors_to_logical(sdkp->device, sector); 227 unsigned int nr, i; 228 unsigned char *buf; 229 u64 zone_length, start_lba; 230 size_t offset, buflen = 0; 231 int zone_idx = 0; 232 int ret; 233 234 if (sdkp->device->type != TYPE_ZBC) 235 /* Not a zoned device */ 236 return -EOPNOTSUPP; 237 238 if (!sdkp->capacity) 239 /* Device gone or invalid */ 240 return -ENODEV; 241 242 buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen); 243 if (!buf) 244 return -ENOMEM; 245 246 while (zone_idx < nr_zones && lba < sdkp->capacity) { 247 ret = sd_zbc_do_report_zones(sdkp, buf, buflen, lba, true); 248 if (ret) 249 goto out; 250 251 offset = 0; 252 nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64); 253 if (!nr) 254 break; 255 256 for (i = 0; i < nr && zone_idx < nr_zones; i++) { 257 offset += 64; 258 start_lba = get_unaligned_be64(&buf[offset + 16]); 259 zone_length = get_unaligned_be64(&buf[offset + 8]); 260 if ((zone_idx == 0 && 261 (lba < start_lba || 262 lba >= start_lba + zone_length)) || 263 (zone_idx > 0 && start_lba != lba) || 264 start_lba + zone_length < start_lba) { 265 sd_printk(KERN_ERR, sdkp, 266 "Zone %d at LBA %llu is invalid: %llu + %llu\n", 267 zone_idx, lba, start_lba, zone_length); 268 ret = -EINVAL; 269 goto out; 270 } 271 lba = start_lba + zone_length; 272 if (sd_zbc_is_gap_zone(&buf[offset])) { 273 if (sdkp->zone_starting_lba_gran) 274 continue; 275 sd_printk(KERN_ERR, sdkp, 276 "Gap zone without constant LBA offsets\n"); 277 ret = -EINVAL; 278 goto out; 279 } 280 281 ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx, 282 cb, data); 283 if (ret) 284 goto out; 285 286 zone_idx++; 287 } 288 } 289 290 ret = zone_idx; 291 out: 292 kvfree(buf); 293 return ret; 294 } 295 296 static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd) 297 { 298 struct request *rq = scsi_cmd_to_rq(cmd); 299 struct scsi_disk *sdkp = scsi_disk(rq->q->disk); 300 sector_t sector = blk_rq_pos(rq); 301 302 if (sdkp->device->type != TYPE_ZBC) 303 /* Not a zoned device */ 304 return BLK_STS_IOERR; 305 306 if (sdkp->device->changed) 307 return BLK_STS_IOERR; 308 309 if (sector & (sd_zbc_zone_sectors(sdkp) - 1)) 310 /* Unaligned request */ 311 return BLK_STS_IOERR; 312 313 return BLK_STS_OK; 314 } 315 316 /** 317 * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations 318 * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH. 319 * @cmd: the command to setup 320 * @op: Operation to be performed 321 * @all: All zones control 322 * 323 * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL, 324 * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests. 325 */ 326 blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, 327 unsigned char op, bool all) 328 { 329 struct request *rq = scsi_cmd_to_rq(cmd); 330 sector_t sector = blk_rq_pos(rq); 331 struct scsi_disk *sdkp = scsi_disk(rq->q->disk); 332 sector_t block = sectors_to_logical(sdkp->device, sector); 333 blk_status_t ret; 334 335 ret = sd_zbc_cmnd_checks(cmd); 336 if (ret != BLK_STS_OK) 337 return ret; 338 339 cmd->cmd_len = 16; 340 memset(cmd->cmnd, 0, cmd->cmd_len); 341 cmd->cmnd[0] = ZBC_OUT; 342 cmd->cmnd[1] = op; 343 if (all) 344 cmd->cmnd[14] = 0x1; 345 else 346 put_unaligned_be64(block, &cmd->cmnd[2]); 347 348 rq->timeout = SD_TIMEOUT; 349 cmd->sc_data_direction = DMA_NONE; 350 cmd->transfersize = 0; 351 cmd->allowed = 0; 352 353 return BLK_STS_OK; 354 } 355 356 /** 357 * sd_zbc_complete - ZBC command post processing. 358 * @cmd: Completed command 359 * @good_bytes: Command reply bytes 360 * @sshdr: command sense header 361 * 362 * Called from sd_done() to handle zone commands errors and updates to the 363 * device queue zone write pointer offset cahce. 364 */ 365 unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, 366 struct scsi_sense_hdr *sshdr) 367 { 368 int result = cmd->result; 369 struct request *rq = scsi_cmd_to_rq(cmd); 370 371 if (op_is_zone_mgmt(req_op(rq)) && 372 result && 373 sshdr->sense_key == ILLEGAL_REQUEST && 374 sshdr->asc == 0x24) { 375 /* 376 * INVALID FIELD IN CDB error: a zone management command was 377 * attempted on a conventional zone. Nothing to worry about, 378 * so be quiet about the error. 379 */ 380 rq->rq_flags |= RQF_QUIET; 381 } 382 383 return good_bytes; 384 } 385 386 /** 387 * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics 388 * @sdkp: Target disk 389 * @buf: Buffer where to store the VPD page data 390 * 391 * Read VPD page B6, get information and check that reads are unconstrained. 392 */ 393 static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp, 394 unsigned char *buf) 395 { 396 u64 zone_starting_lba_gran; 397 398 if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) { 399 sd_printk(KERN_NOTICE, sdkp, 400 "Read zoned characteristics VPD page failed\n"); 401 return -ENODEV; 402 } 403 404 if (sdkp->device->type != TYPE_ZBC) { 405 /* Host-aware */ 406 sdkp->urswrz = 1; 407 sdkp->zones_optimal_open = get_unaligned_be32(&buf[8]); 408 sdkp->zones_optimal_nonseq = get_unaligned_be32(&buf[12]); 409 sdkp->zones_max_open = 0; 410 return 0; 411 } 412 413 /* Host-managed */ 414 sdkp->urswrz = buf[4] & 1; 415 sdkp->zones_optimal_open = 0; 416 sdkp->zones_optimal_nonseq = 0; 417 sdkp->zones_max_open = get_unaligned_be32(&buf[16]); 418 /* Check zone alignment method */ 419 switch (buf[23] & 0xf) { 420 case 0: 421 case ZBC_CONSTANT_ZONE_LENGTH: 422 /* Use zone length */ 423 break; 424 case ZBC_CONSTANT_ZONE_START_OFFSET: 425 zone_starting_lba_gran = get_unaligned_be64(&buf[24]); 426 if (zone_starting_lba_gran == 0 || 427 !is_power_of_2(zone_starting_lba_gran) || 428 logical_to_sectors(sdkp->device, zone_starting_lba_gran) > 429 UINT_MAX) { 430 sd_printk(KERN_ERR, sdkp, 431 "Invalid zone starting LBA granularity %llu\n", 432 zone_starting_lba_gran); 433 return -ENODEV; 434 } 435 sdkp->zone_starting_lba_gran = zone_starting_lba_gran; 436 break; 437 default: 438 sd_printk(KERN_ERR, sdkp, "Invalid zone alignment method\n"); 439 return -ENODEV; 440 } 441 442 /* 443 * Check for unconstrained reads: host-managed devices with 444 * constrained reads (drives failing read after write pointer) 445 * are not supported. 446 */ 447 if (!sdkp->urswrz) { 448 if (sdkp->first_scan) 449 sd_printk(KERN_NOTICE, sdkp, 450 "constrained reads devices are not supported\n"); 451 return -ENODEV; 452 } 453 454 return 0; 455 } 456 457 /** 458 * sd_zbc_check_capacity - Check the device capacity 459 * @sdkp: Target disk 460 * @buf: command buffer 461 * @zblocks: zone size in logical blocks 462 * 463 * Get the device zone size and check that the device capacity as reported 464 * by READ CAPACITY matches the max_lba value (plus one) of the report zones 465 * command reply for devices with RC_BASIS == 0. 466 * 467 * Returns 0 upon success or an error code upon failure. 468 */ 469 static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf, 470 u32 *zblocks) 471 { 472 u64 zone_blocks; 473 sector_t max_lba; 474 unsigned char *rec; 475 int ret; 476 477 /* Do a report zone to get max_lba and the size of the first zone */ 478 ret = sd_zbc_do_report_zones(sdkp, buf, SD_BUF_SIZE, 0, false); 479 if (ret) 480 return ret; 481 482 if (sdkp->rc_basis == 0) { 483 /* The max_lba field is the capacity of this device */ 484 max_lba = get_unaligned_be64(&buf[8]); 485 if (sdkp->capacity != max_lba + 1) { 486 if (sdkp->first_scan) 487 sd_printk(KERN_WARNING, sdkp, 488 "Changing capacity from %llu to max LBA+1 %llu\n", 489 (unsigned long long)sdkp->capacity, 490 (unsigned long long)max_lba + 1); 491 sdkp->capacity = max_lba + 1; 492 } 493 } 494 495 if (sdkp->zone_starting_lba_gran == 0) { 496 /* Get the size of the first reported zone */ 497 rec = buf + 64; 498 zone_blocks = get_unaligned_be64(&rec[8]); 499 if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) { 500 if (sdkp->first_scan) 501 sd_printk(KERN_NOTICE, sdkp, 502 "Zone size too large\n"); 503 return -EFBIG; 504 } 505 } else { 506 zone_blocks = sdkp->zone_starting_lba_gran; 507 } 508 509 if (!is_power_of_2(zone_blocks)) { 510 sd_printk(KERN_ERR, sdkp, 511 "Zone size %llu is not a power of two.\n", 512 zone_blocks); 513 return -EINVAL; 514 } 515 516 *zblocks = zone_blocks; 517 518 return 0; 519 } 520 521 static void sd_zbc_print_zones(struct scsi_disk *sdkp) 522 { 523 if (sdkp->device->type != TYPE_ZBC || !sdkp->capacity) 524 return; 525 526 if (sdkp->capacity & (sdkp->zone_info.zone_blocks - 1)) 527 sd_printk(KERN_NOTICE, sdkp, 528 "%u zones of %u logical blocks + 1 runt zone\n", 529 sdkp->zone_info.nr_zones - 1, 530 sdkp->zone_info.zone_blocks); 531 else 532 sd_printk(KERN_NOTICE, sdkp, 533 "%u zones of %u logical blocks\n", 534 sdkp->zone_info.nr_zones, 535 sdkp->zone_info.zone_blocks); 536 } 537 538 /* 539 * Call blk_revalidate_disk_zones() if any of the zoned disk properties have 540 * changed that make it necessary to call that function. Called by 541 * sd_revalidate_disk() after the gendisk capacity has been set. 542 */ 543 int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) 544 { 545 struct gendisk *disk = sdkp->disk; 546 struct request_queue *q = disk->queue; 547 u32 zone_blocks = sdkp->early_zone_info.zone_blocks; 548 unsigned int nr_zones = sdkp->early_zone_info.nr_zones; 549 unsigned int flags; 550 int ret; 551 552 /* 553 * There is nothing to do for regular disks, including host-aware disks 554 * that have partitions. 555 */ 556 if (!blk_queue_is_zoned(q)) 557 return 0; 558 559 if (sdkp->zone_info.zone_blocks == zone_blocks && 560 sdkp->zone_info.nr_zones == nr_zones && 561 disk->nr_zones == nr_zones) 562 return 0; 563 564 sdkp->zone_info.zone_blocks = zone_blocks; 565 sdkp->zone_info.nr_zones = nr_zones; 566 567 flags = memalloc_noio_save(); 568 ret = blk_revalidate_disk_zones(disk); 569 memalloc_noio_restore(flags); 570 if (ret) { 571 sdkp->zone_info = (struct zoned_disk_info){ }; 572 sdkp->capacity = 0; 573 return ret; 574 } 575 576 sd_zbc_print_zones(sdkp); 577 578 return 0; 579 } 580 581 /** 582 * sd_zbc_read_zones - Read zone information and update the request queue 583 * @sdkp: SCSI disk pointer. 584 * @lim: queue limits to read into 585 * @buf: 512 byte buffer used for storing SCSI command output. 586 * 587 * Read zone information and update the request queue zone characteristics and 588 * also the zoned device information in *sdkp. Called by sd_revalidate_disk() 589 * before the gendisk capacity has been set. 590 */ 591 int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, 592 u8 buf[SD_BUF_SIZE]) 593 { 594 unsigned int nr_zones; 595 u32 zone_blocks = 0; 596 int ret; 597 598 if (sdkp->device->type != TYPE_ZBC) 599 return 0; 600 601 lim->features |= BLK_FEAT_ZONED; 602 603 /* 604 * Per ZBC and ZAC specifications, writes in sequential write required 605 * zones of host-managed devices must be aligned to the device physical 606 * block size. 607 */ 608 lim->zone_write_granularity = sdkp->physical_block_size; 609 610 /* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */ 611 sdkp->device->use_16_for_rw = 1; 612 sdkp->device->use_10_for_rw = 0; 613 sdkp->device->use_16_for_sync = 1; 614 615 /* Check zoned block device characteristics (unconstrained reads) */ 616 ret = sd_zbc_check_zoned_characteristics(sdkp, buf); 617 if (ret) 618 goto err; 619 620 /* Check the device capacity reported by report zones */ 621 ret = sd_zbc_check_capacity(sdkp, buf, &zone_blocks); 622 if (ret != 0) 623 goto err; 624 625 nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); 626 sdkp->early_zone_info.nr_zones = nr_zones; 627 sdkp->early_zone_info.zone_blocks = zone_blocks; 628 629 /* The drive satisfies the kernel restrictions: set it up */ 630 if (sdkp->zones_max_open == U32_MAX) 631 lim->max_open_zones = 0; 632 else 633 lim->max_open_zones = sdkp->zones_max_open; 634 lim->max_active_zones = 0; 635 lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks); 636 /* Enable block layer zone append emulation */ 637 lim->max_zone_append_sectors = 0; 638 639 return 0; 640 641 err: 642 sdkp->capacity = 0; 643 644 return ret; 645 } 646