1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * NVMe I/O command implementation. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/blkdev.h> 8 #include <linux/blk-integrity.h> 9 #include <linux/module.h> 10 #include "nvmet.h" 11 12 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) 13 { 14 const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; 15 /* Number of logical blocks per physical block. */ 16 const u32 lpp = ql->physical_block_size / ql->logical_block_size; 17 /* Logical blocks per physical block, 0's based. */ 18 const __le16 lpp0b = to0based(lpp); 19 20 /* 21 * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, 22 * NAWUPF, and NACWU are defined for this namespace and should be 23 * used by the host for this namespace instead of the AWUN, AWUPF, 24 * and ACWU fields in the Identify Controller data structure. If 25 * any of these fields are zero that means that the corresponding 26 * field from the identify controller data structure should be used. 27 */ 28 id->nsfeat |= 1 << 1; 29 id->nawun = lpp0b; 30 id->nawupf = lpp0b; 31 id->nacwu = lpp0b; 32 33 /* 34 * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and 35 * NOWS are defined for this namespace and should be used by 36 * the host for I/O optimization. 37 */ 38 id->nsfeat |= 1 << 4; 39 /* NPWG = Namespace Preferred Write Granularity. 0's based */ 40 id->npwg = lpp0b; 41 /* NPWA = Namespace Preferred Write Alignment. 0's based */ 42 id->npwa = id->npwg; 43 /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ 44 id->npdg = to0based(ql->discard_granularity / ql->logical_block_size); 45 /* NPDG = Namespace Preferred Deallocate Alignment */ 46 id->npda = id->npdg; 47 /* NOWS = Namespace Optimal Write Size */ 48 id->nows = to0based(ql->io_opt / ql->logical_block_size); 49 } 50 51 void nvmet_bdev_ns_disable(struct nvmet_ns *ns) 52 { 53 if (ns->bdev) { 54 blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); 55 ns->bdev = NULL; 56 } 57 } 58 59 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) 60 { 61 struct blk_integrity *bi = bdev_get_integrity(ns->bdev); 62 63 if (bi) { 64 ns->metadata_size = bi->tuple_size; 65 if (bi->profile == &t10_pi_type1_crc) 66 ns->pi_type = NVME_NS_DPS_PI_TYPE1; 67 else if (bi->profile == &t10_pi_type3_crc) 68 ns->pi_type = NVME_NS_DPS_PI_TYPE3; 69 else 70 /* Unsupported metadata type */ 71 ns->metadata_size = 0; 72 } 73 } 74 75 int nvmet_bdev_ns_enable(struct nvmet_ns *ns) 76 { 77 int ret; 78 79 ns->bdev = blkdev_get_by_path(ns->device_path, 80 FMODE_READ | FMODE_WRITE, NULL); 81 if (IS_ERR(ns->bdev)) { 82 ret = PTR_ERR(ns->bdev); 83 if (ret != -ENOTBLK) { 84 pr_err("failed to open block device %s: (%ld)\n", 85 ns->device_path, PTR_ERR(ns->bdev)); 86 } 87 ns->bdev = NULL; 88 return ret; 89 } 90 ns->size = bdev_nr_bytes(ns->bdev); 91 ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); 92 93 ns->pi_type = 0; 94 ns->metadata_size = 0; 95 if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) 96 nvmet_bdev_ns_enable_integrity(ns); 97 98 if (bdev_is_zoned(ns->bdev)) { 99 if (!nvmet_bdev_zns_enable(ns)) { 100 nvmet_bdev_ns_disable(ns); 101 return -EINVAL; 102 } 103 ns->csi = NVME_CSI_ZNS; 104 } 105 106 return 0; 107 } 108 109 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) 110 { 111 ns->size = bdev_nr_bytes(ns->bdev); 112 } 113 114 u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) 115 { 116 u16 status = NVME_SC_SUCCESS; 117 118 if (likely(blk_sts == BLK_STS_OK)) 119 return status; 120 /* 121 * Right now there exists M : 1 mapping between block layer error 122 * to the NVMe status code (see nvme_error_status()). For consistency, 123 * when we reverse map we use most appropriate NVMe Status code from 124 * the group of the NVMe staus codes used in the nvme_error_status(). 125 */ 126 switch (blk_sts) { 127 case BLK_STS_NOSPC: 128 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; 129 req->error_loc = offsetof(struct nvme_rw_command, length); 130 break; 131 case BLK_STS_TARGET: 132 status = NVME_SC_LBA_RANGE | NVME_SC_DNR; 133 req->error_loc = offsetof(struct nvme_rw_command, slba); 134 break; 135 case BLK_STS_NOTSUPP: 136 req->error_loc = offsetof(struct nvme_common_command, opcode); 137 switch (req->cmd->common.opcode) { 138 case nvme_cmd_dsm: 139 case nvme_cmd_write_zeroes: 140 status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; 141 break; 142 default: 143 status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 144 } 145 break; 146 case BLK_STS_MEDIUM: 147 status = NVME_SC_ACCESS_DENIED; 148 req->error_loc = offsetof(struct nvme_rw_command, nsid); 149 break; 150 case BLK_STS_IOERR: 151 default: 152 status = NVME_SC_INTERNAL | NVME_SC_DNR; 153 req->error_loc = offsetof(struct nvme_common_command, opcode); 154 } 155 156 switch (req->cmd->common.opcode) { 157 case nvme_cmd_read: 158 case nvme_cmd_write: 159 req->error_slba = le64_to_cpu(req->cmd->rw.slba); 160 break; 161 case nvme_cmd_write_zeroes: 162 req->error_slba = 163 le64_to_cpu(req->cmd->write_zeroes.slba); 164 break; 165 default: 166 req->error_slba = 0; 167 } 168 return status; 169 } 170 171 static void nvmet_bio_done(struct bio *bio) 172 { 173 struct nvmet_req *req = bio->bi_private; 174 175 nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status)); 176 nvmet_req_bio_put(req, bio); 177 } 178 179 #ifdef CONFIG_BLK_DEV_INTEGRITY 180 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, 181 struct sg_mapping_iter *miter) 182 { 183 struct blk_integrity *bi; 184 struct bio_integrity_payload *bip; 185 int rc; 186 size_t resid, len; 187 188 bi = bdev_get_integrity(req->ns->bdev); 189 if (unlikely(!bi)) { 190 pr_err("Unable to locate bio_integrity\n"); 191 return -ENODEV; 192 } 193 194 bip = bio_integrity_alloc(bio, GFP_NOIO, 195 bio_max_segs(req->metadata_sg_cnt)); 196 if (IS_ERR(bip)) { 197 pr_err("Unable to allocate bio_integrity_payload\n"); 198 return PTR_ERR(bip); 199 } 200 201 bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); 202 /* virtual start sector must be in integrity interval units */ 203 bip_set_seed(bip, bio->bi_iter.bi_sector >> 204 (bi->interval_exp - SECTOR_SHIFT)); 205 206 resid = bip->bip_iter.bi_size; 207 while (resid > 0 && sg_miter_next(miter)) { 208 len = min_t(size_t, miter->length, resid); 209 rc = bio_integrity_add_page(bio, miter->page, len, 210 offset_in_page(miter->addr)); 211 if (unlikely(rc != len)) { 212 pr_err("bio_integrity_add_page() failed; %d\n", rc); 213 sg_miter_stop(miter); 214 return -ENOMEM; 215 } 216 217 resid -= len; 218 if (len < miter->length) 219 miter->consumed -= miter->length - len; 220 } 221 sg_miter_stop(miter); 222 223 return 0; 224 } 225 #else 226 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, 227 struct sg_mapping_iter *miter) 228 { 229 return -EINVAL; 230 } 231 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 232 233 static void nvmet_bdev_execute_rw(struct nvmet_req *req) 234 { 235 unsigned int sg_cnt = req->sg_cnt; 236 struct bio *bio; 237 struct scatterlist *sg; 238 struct blk_plug plug; 239 sector_t sector; 240 int op, i, rc; 241 struct sg_mapping_iter prot_miter; 242 unsigned int iter_flags; 243 unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len; 244 245 if (!nvmet_check_transfer_len(req, total_len)) 246 return; 247 248 if (!req->sg_cnt) { 249 nvmet_req_complete(req, 0); 250 return; 251 } 252 253 if (req->cmd->rw.opcode == nvme_cmd_write) { 254 op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; 255 if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) 256 op |= REQ_FUA; 257 iter_flags = SG_MITER_TO_SG; 258 } else { 259 op = REQ_OP_READ; 260 iter_flags = SG_MITER_FROM_SG; 261 } 262 263 if (is_pci_p2pdma_page(sg_page(req->sg))) 264 op |= REQ_NOMERGE; 265 266 sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); 267 268 if (nvmet_use_inline_bvec(req)) { 269 bio = &req->b.inline_bio; 270 bio_init(bio, req->ns->bdev, req->inline_bvec, 271 ARRAY_SIZE(req->inline_bvec), op); 272 } else { 273 bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), op, 274 GFP_KERNEL); 275 } 276 bio->bi_iter.bi_sector = sector; 277 bio->bi_private = req; 278 bio->bi_end_io = nvmet_bio_done; 279 280 blk_start_plug(&plug); 281 if (req->metadata_len) 282 sg_miter_start(&prot_miter, req->metadata_sg, 283 req->metadata_sg_cnt, iter_flags); 284 285 for_each_sg(req->sg, sg, req->sg_cnt, i) { 286 while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) 287 != sg->length) { 288 struct bio *prev = bio; 289 290 if (req->metadata_len) { 291 rc = nvmet_bdev_alloc_bip(req, bio, 292 &prot_miter); 293 if (unlikely(rc)) { 294 bio_io_error(bio); 295 return; 296 } 297 } 298 299 bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), 300 op, GFP_KERNEL); 301 bio->bi_iter.bi_sector = sector; 302 303 bio_chain(bio, prev); 304 submit_bio(prev); 305 } 306 307 sector += sg->length >> 9; 308 sg_cnt--; 309 } 310 311 if (req->metadata_len) { 312 rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter); 313 if (unlikely(rc)) { 314 bio_io_error(bio); 315 return; 316 } 317 } 318 319 submit_bio(bio); 320 blk_finish_plug(&plug); 321 } 322 323 static void nvmet_bdev_execute_flush(struct nvmet_req *req) 324 { 325 struct bio *bio = &req->b.inline_bio; 326 327 if (!nvmet_check_transfer_len(req, 0)) 328 return; 329 330 bio_init(bio, req->ns->bdev, req->inline_bvec, 331 ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH); 332 bio->bi_private = req; 333 bio->bi_end_io = nvmet_bio_done; 334 335 submit_bio(bio); 336 } 337 338 u16 nvmet_bdev_flush(struct nvmet_req *req) 339 { 340 if (blkdev_issue_flush(req->ns->bdev)) 341 return NVME_SC_INTERNAL | NVME_SC_DNR; 342 return 0; 343 } 344 345 static u16 nvmet_bdev_discard_range(struct nvmet_req *req, 346 struct nvme_dsm_range *range, struct bio **bio) 347 { 348 struct nvmet_ns *ns = req->ns; 349 int ret; 350 351 ret = __blkdev_issue_discard(ns->bdev, 352 nvmet_lba_to_sect(ns, range->slba), 353 le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), 354 GFP_KERNEL, 0, bio); 355 if (ret && ret != -EOPNOTSUPP) { 356 req->error_slba = le64_to_cpu(range->slba); 357 return errno_to_nvme_status(req, ret); 358 } 359 return NVME_SC_SUCCESS; 360 } 361 362 static void nvmet_bdev_execute_discard(struct nvmet_req *req) 363 { 364 struct nvme_dsm_range range; 365 struct bio *bio = NULL; 366 int i; 367 u16 status; 368 369 for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { 370 status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, 371 sizeof(range)); 372 if (status) 373 break; 374 375 status = nvmet_bdev_discard_range(req, &range, &bio); 376 if (status) 377 break; 378 } 379 380 if (bio) { 381 bio->bi_private = req; 382 bio->bi_end_io = nvmet_bio_done; 383 if (status) 384 bio_io_error(bio); 385 else 386 submit_bio(bio); 387 } else { 388 nvmet_req_complete(req, status); 389 } 390 } 391 392 static void nvmet_bdev_execute_dsm(struct nvmet_req *req) 393 { 394 if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) 395 return; 396 397 switch (le32_to_cpu(req->cmd->dsm.attributes)) { 398 case NVME_DSMGMT_AD: 399 nvmet_bdev_execute_discard(req); 400 return; 401 case NVME_DSMGMT_IDR: 402 case NVME_DSMGMT_IDW: 403 default: 404 /* Not supported yet */ 405 nvmet_req_complete(req, 0); 406 return; 407 } 408 } 409 410 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) 411 { 412 struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; 413 struct bio *bio = NULL; 414 sector_t sector; 415 sector_t nr_sector; 416 int ret; 417 418 if (!nvmet_check_transfer_len(req, 0)) 419 return; 420 421 sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba); 422 nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << 423 (req->ns->blksize_shift - 9)); 424 425 ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, 426 GFP_KERNEL, &bio, 0); 427 if (bio) { 428 bio->bi_private = req; 429 bio->bi_end_io = nvmet_bio_done; 430 submit_bio(bio); 431 } else { 432 nvmet_req_complete(req, errno_to_nvme_status(req, ret)); 433 } 434 } 435 436 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) 437 { 438 switch (req->cmd->common.opcode) { 439 case nvme_cmd_read: 440 case nvme_cmd_write: 441 req->execute = nvmet_bdev_execute_rw; 442 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) 443 req->metadata_len = nvmet_rw_metadata_len(req); 444 return 0; 445 case nvme_cmd_flush: 446 req->execute = nvmet_bdev_execute_flush; 447 return 0; 448 case nvme_cmd_dsm: 449 req->execute = nvmet_bdev_execute_dsm; 450 return 0; 451 case nvme_cmd_write_zeroes: 452 req->execute = nvmet_bdev_execute_write_zeroes; 453 return 0; 454 default: 455 return nvmet_report_invalid_opcode(req); 456 } 457 } 458