1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * NVMe I/O command implementation. 4 * Copyright (c) 2015-2016 HGST, a Western Digital Company. 5 */ 6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 7 #include <linux/blkdev.h> 8 #include <linux/module.h> 9 #include "nvmet.h" 10 11 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) 12 { 13 const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; 14 /* Number of logical blocks per physical block. */ 15 const u32 lpp = ql->physical_block_size / ql->logical_block_size; 16 /* Logical blocks per physical block, 0's based. */ 17 const __le16 lpp0b = to0based(lpp); 18 19 /* 20 * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, 21 * NAWUPF, and NACWU are defined for this namespace and should be 22 * used by the host for this namespace instead of the AWUN, AWUPF, 23 * and ACWU fields in the Identify Controller data structure. If 24 * any of these fields are zero that means that the corresponding 25 * field from the identify controller data structure should be used. 26 */ 27 id->nsfeat |= 1 << 1; 28 id->nawun = lpp0b; 29 id->nawupf = lpp0b; 30 id->nacwu = lpp0b; 31 32 /* 33 * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and 34 * NOWS are defined for this namespace and should be used by 35 * the host for I/O optimization. 36 */ 37 id->nsfeat |= 1 << 4; 38 /* NPWG = Namespace Preferred Write Granularity. 0's based */ 39 id->npwg = lpp0b; 40 /* NPWA = Namespace Preferred Write Alignment. 0's based */ 41 id->npwa = id->npwg; 42 /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ 43 id->npdg = to0based(ql->discard_granularity / ql->logical_block_size); 44 /* NPDG = Namespace Preferred Deallocate Alignment */ 45 id->npda = id->npdg; 46 /* NOWS = Namespace Optimal Write Size */ 47 id->nows = to0based(ql->io_opt / ql->logical_block_size); 48 } 49 50 int nvmet_bdev_ns_enable(struct nvmet_ns *ns) 51 { 52 int ret; 53 54 ns->bdev = blkdev_get_by_path(ns->device_path, 55 FMODE_READ | FMODE_WRITE, NULL); 56 if (IS_ERR(ns->bdev)) { 57 ret = PTR_ERR(ns->bdev); 58 if (ret != -ENOTBLK) { 59 pr_err("failed to open block device %s: (%ld)\n", 60 ns->device_path, PTR_ERR(ns->bdev)); 61 } 62 ns->bdev = NULL; 63 return ret; 64 } 65 ns->size = i_size_read(ns->bdev->bd_inode); 66 ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); 67 return 0; 68 } 69 70 void nvmet_bdev_ns_disable(struct nvmet_ns *ns) 71 { 72 if (ns->bdev) { 73 blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); 74 ns->bdev = NULL; 75 } 76 } 77 78 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) 79 { 80 ns->size = i_size_read(ns->bdev->bd_inode); 81 } 82 83 static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) 84 { 85 u16 status = NVME_SC_SUCCESS; 86 87 if (likely(blk_sts == BLK_STS_OK)) 88 return status; 89 /* 90 * Right now there exists M : 1 mapping between block layer error 91 * to the NVMe status code (see nvme_error_status()). For consistency, 92 * when we reverse map we use most appropriate NVMe Status code from 93 * the group of the NVMe staus codes used in the nvme_error_status(). 94 */ 95 switch (blk_sts) { 96 case BLK_STS_NOSPC: 97 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR; 98 req->error_loc = offsetof(struct nvme_rw_command, length); 99 break; 100 case BLK_STS_TARGET: 101 status = NVME_SC_LBA_RANGE | NVME_SC_DNR; 102 req->error_loc = offsetof(struct nvme_rw_command, slba); 103 break; 104 case BLK_STS_NOTSUPP: 105 req->error_loc = offsetof(struct nvme_common_command, opcode); 106 switch (req->cmd->common.opcode) { 107 case nvme_cmd_dsm: 108 case nvme_cmd_write_zeroes: 109 status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR; 110 break; 111 default: 112 status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 113 } 114 break; 115 case BLK_STS_MEDIUM: 116 status = NVME_SC_ACCESS_DENIED; 117 req->error_loc = offsetof(struct nvme_rw_command, nsid); 118 break; 119 case BLK_STS_IOERR: 120 /* fallthru */ 121 default: 122 status = NVME_SC_INTERNAL | NVME_SC_DNR; 123 req->error_loc = offsetof(struct nvme_common_command, opcode); 124 } 125 126 switch (req->cmd->common.opcode) { 127 case nvme_cmd_read: 128 case nvme_cmd_write: 129 req->error_slba = le64_to_cpu(req->cmd->rw.slba); 130 break; 131 case nvme_cmd_write_zeroes: 132 req->error_slba = 133 le64_to_cpu(req->cmd->write_zeroes.slba); 134 break; 135 default: 136 req->error_slba = 0; 137 } 138 return status; 139 } 140 141 static void nvmet_bio_done(struct bio *bio) 142 { 143 struct nvmet_req *req = bio->bi_private; 144 145 nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status)); 146 if (bio != &req->b.inline_bio) 147 bio_put(bio); 148 } 149 150 static void nvmet_bdev_execute_rw(struct nvmet_req *req) 151 { 152 int sg_cnt = req->sg_cnt; 153 struct bio *bio; 154 struct scatterlist *sg; 155 struct blk_plug plug; 156 sector_t sector; 157 int op, i; 158 159 if (!nvmet_check_data_len(req, nvmet_rw_len(req))) 160 return; 161 162 if (!req->sg_cnt) { 163 nvmet_req_complete(req, 0); 164 return; 165 } 166 167 if (req->cmd->rw.opcode == nvme_cmd_write) { 168 op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; 169 if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) 170 op |= REQ_FUA; 171 } else { 172 op = REQ_OP_READ; 173 } 174 175 if (is_pci_p2pdma_page(sg_page(req->sg))) 176 op |= REQ_NOMERGE; 177 178 sector = le64_to_cpu(req->cmd->rw.slba); 179 sector <<= (req->ns->blksize_shift - 9); 180 181 if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { 182 bio = &req->b.inline_bio; 183 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); 184 } else { 185 bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); 186 } 187 bio_set_dev(bio, req->ns->bdev); 188 bio->bi_iter.bi_sector = sector; 189 bio->bi_private = req; 190 bio->bi_end_io = nvmet_bio_done; 191 bio->bi_opf = op; 192 193 blk_start_plug(&plug); 194 for_each_sg(req->sg, sg, req->sg_cnt, i) { 195 while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) 196 != sg->length) { 197 struct bio *prev = bio; 198 199 bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); 200 bio_set_dev(bio, req->ns->bdev); 201 bio->bi_iter.bi_sector = sector; 202 bio->bi_opf = op; 203 204 bio_chain(bio, prev); 205 submit_bio(prev); 206 } 207 208 sector += sg->length >> 9; 209 sg_cnt--; 210 } 211 212 submit_bio(bio); 213 blk_finish_plug(&plug); 214 } 215 216 static void nvmet_bdev_execute_flush(struct nvmet_req *req) 217 { 218 struct bio *bio = &req->b.inline_bio; 219 220 if (!nvmet_check_data_len(req, 0)) 221 return; 222 223 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); 224 bio_set_dev(bio, req->ns->bdev); 225 bio->bi_private = req; 226 bio->bi_end_io = nvmet_bio_done; 227 bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; 228 229 submit_bio(bio); 230 } 231 232 u16 nvmet_bdev_flush(struct nvmet_req *req) 233 { 234 if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL, NULL)) 235 return NVME_SC_INTERNAL | NVME_SC_DNR; 236 return 0; 237 } 238 239 static u16 nvmet_bdev_discard_range(struct nvmet_req *req, 240 struct nvme_dsm_range *range, struct bio **bio) 241 { 242 struct nvmet_ns *ns = req->ns; 243 int ret; 244 245 ret = __blkdev_issue_discard(ns->bdev, 246 le64_to_cpu(range->slba) << (ns->blksize_shift - 9), 247 le32_to_cpu(range->nlb) << (ns->blksize_shift - 9), 248 GFP_KERNEL, 0, bio); 249 if (ret && ret != -EOPNOTSUPP) { 250 req->error_slba = le64_to_cpu(range->slba); 251 return errno_to_nvme_status(req, ret); 252 } 253 return NVME_SC_SUCCESS; 254 } 255 256 static void nvmet_bdev_execute_discard(struct nvmet_req *req) 257 { 258 struct nvme_dsm_range range; 259 struct bio *bio = NULL; 260 int i; 261 u16 status; 262 263 for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { 264 status = nvmet_copy_from_sgl(req, i * sizeof(range), &range, 265 sizeof(range)); 266 if (status) 267 break; 268 269 status = nvmet_bdev_discard_range(req, &range, &bio); 270 if (status) 271 break; 272 } 273 274 if (bio) { 275 bio->bi_private = req; 276 bio->bi_end_io = nvmet_bio_done; 277 if (status) 278 bio_io_error(bio); 279 else 280 submit_bio(bio); 281 } else { 282 nvmet_req_complete(req, status); 283 } 284 } 285 286 static void nvmet_bdev_execute_dsm(struct nvmet_req *req) 287 { 288 if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) 289 return; 290 291 switch (le32_to_cpu(req->cmd->dsm.attributes)) { 292 case NVME_DSMGMT_AD: 293 nvmet_bdev_execute_discard(req); 294 return; 295 case NVME_DSMGMT_IDR: 296 case NVME_DSMGMT_IDW: 297 default: 298 /* Not supported yet */ 299 nvmet_req_complete(req, 0); 300 return; 301 } 302 } 303 304 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) 305 { 306 struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; 307 struct bio *bio = NULL; 308 sector_t sector; 309 sector_t nr_sector; 310 int ret; 311 312 if (!nvmet_check_data_len(req, 0)) 313 return; 314 315 sector = le64_to_cpu(write_zeroes->slba) << 316 (req->ns->blksize_shift - 9); 317 nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << 318 (req->ns->blksize_shift - 9)); 319 320 ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, 321 GFP_KERNEL, &bio, 0); 322 if (bio) { 323 bio->bi_private = req; 324 bio->bi_end_io = nvmet_bio_done; 325 submit_bio(bio); 326 } else { 327 nvmet_req_complete(req, errno_to_nvme_status(req, ret)); 328 } 329 } 330 331 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) 332 { 333 struct nvme_command *cmd = req->cmd; 334 335 switch (cmd->common.opcode) { 336 case nvme_cmd_read: 337 case nvme_cmd_write: 338 req->execute = nvmet_bdev_execute_rw; 339 return 0; 340 case nvme_cmd_flush: 341 req->execute = nvmet_bdev_execute_flush; 342 return 0; 343 case nvme_cmd_dsm: 344 req->execute = nvmet_bdev_execute_dsm; 345 return 0; 346 case nvme_cmd_write_zeroes: 347 req->execute = nvmet_bdev_execute_write_zeroes; 348 return 0; 349 default: 350 pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, 351 req->sq->qid); 352 req->error_loc = offsetof(struct nvme_common_command, opcode); 353 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; 354 } 355 } 356