1 /*- 2 * Copyright (C) 2012-2013 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bio.h> 32 #include <sys/bus.h> 33 #include <sys/conf.h> 34 #include <sys/disk.h> 35 #include <sys/fcntl.h> 36 #include <sys/ioccom.h> 37 #include <sys/module.h> 38 #include <sys/proc.h> 39 40 #include <dev/pci/pcivar.h> 41 42 #include "nvme_private.h" 43 44 static int 45 nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 46 struct thread *td) 47 { 48 struct nvme_namespace *ns; 49 struct nvme_controller *ctrlr; 50 struct nvme_pt_command *pt; 51 52 ns = cdev->si_drv1; 53 ctrlr = ns->ctrlr; 54 55 switch (cmd) { 56 case NVME_IO_TEST: 57 case NVME_BIO_TEST: 58 nvme_ns_test(ns, cmd, arg); 59 break; 60 case NVME_PASSTHROUGH_CMD: 61 pt = (struct nvme_pt_command *)arg; 62 return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id, 63 1 /* is_user_buffer */, 0 /* is_admin_cmd */)); 64 case DIOCGMEDIASIZE: 65 *(off_t *)arg = (off_t)nvme_ns_get_size(ns); 66 break; 67 case DIOCGSECTORSIZE: 68 *(u_int *)arg = nvme_ns_get_sector_size(ns); 69 break; 70 default: 71 return (ENOTTY); 72 } 73 74 return (0); 75 } 76 77 static int 78 nvme_ns_open(struct cdev *dev __unused, int flags, int fmt __unused, 79 struct thread *td) 80 { 81 int error = 0; 82 83 if (flags & FWRITE) 84 error = securelevel_gt(td->td_ucred, 0); 85 86 return (error); 87 } 88 89 static int 90 nvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused, 91 struct thread *td) 92 { 93 94 return (0); 95 } 96 97 static void 98 nvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl) 99 { 100 struct bio *bp = arg; 101 102 /* 103 * TODO: add more extensive translation of NVMe status codes 104 * to different bio error codes (i.e. EIO, EINVAL, etc.) 105 */ 106 if (nvme_completion_is_error(cpl)) { 107 bp->bio_error = EIO; 108 bp->bio_flags |= BIO_ERROR; 109 bp->bio_resid = bp->bio_bcount; 110 } else 111 bp->bio_resid = 0; 112 113 biodone(bp); 114 } 115 116 static void 117 nvme_ns_strategy(struct bio *bp) 118 { 119 struct nvme_namespace *ns; 120 int err; 121 122 ns = bp->bio_dev->si_drv1; 123 err = nvme_ns_bio_process(ns, bp, nvme_ns_strategy_done); 124 125 if (err) { 126 bp->bio_error = err; 127 bp->bio_flags |= BIO_ERROR; 128 bp->bio_resid = bp->bio_bcount; 129 biodone(bp); 130 } 131 132 } 133 134 static struct cdevsw nvme_ns_cdevsw = { 135 .d_version = D_VERSION, 136 .d_flags = D_DISK, 137 .d_read = physread, 138 .d_write = physwrite, 139 .d_open = nvme_ns_open, 140 .d_close = nvme_ns_close, 141 .d_strategy = nvme_ns_strategy, 142 .d_ioctl = nvme_ns_ioctl 143 }; 144 145 uint32_t 146 nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns) 147 { 148 return ns->ctrlr->max_xfer_size; 149 } 150 151 uint32_t 152 nvme_ns_get_sector_size(struct nvme_namespace *ns) 153 { 154 return (1 << ns->data.lbaf[ns->data.flbas.format].lbads); 155 } 156 157 uint64_t 158 nvme_ns_get_num_sectors(struct nvme_namespace *ns) 159 { 160 return (ns->data.nsze); 161 } 162 163 uint64_t 164 nvme_ns_get_size(struct nvme_namespace *ns) 165 { 166 return (nvme_ns_get_num_sectors(ns) * nvme_ns_get_sector_size(ns)); 167 } 168 169 uint32_t 170 nvme_ns_get_flags(struct nvme_namespace *ns) 171 { 172 return (ns->flags); 173 } 174 175 const char * 176 nvme_ns_get_serial_number(struct nvme_namespace *ns) 177 { 178 return ((const char *)ns->ctrlr->cdata.sn); 179 } 180 181 const char * 182 nvme_ns_get_model_number(struct nvme_namespace *ns) 183 { 184 return ((const char *)ns->ctrlr->cdata.mn); 185 } 186 187 const struct nvme_namespace_data * 188 nvme_ns_get_data(struct nvme_namespace *ns) 189 { 190 191 return (&ns->data); 192 } 193 194 static void 195 nvme_ns_bio_done(void *arg, const struct nvme_completion *status) 196 { 197 struct bio *bp = arg; 198 nvme_cb_fn_t bp_cb_fn; 199 200 bp_cb_fn = bp->bio_driver1; 201 202 if (bp->bio_driver2) 203 free(bp->bio_driver2, M_NVME); 204 205 bp_cb_fn(bp, status); 206 } 207 208 int 209 nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, 210 nvme_cb_fn_t cb_fn) 211 { 212 struct nvme_dsm_range *dsm_range; 213 int err; 214 215 bp->bio_driver1 = cb_fn; 216 217 switch (bp->bio_cmd) { 218 case BIO_READ: 219 err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp); 220 break; 221 case BIO_WRITE: 222 err = nvme_ns_cmd_write_bio(ns, bp, nvme_ns_bio_done, bp); 223 break; 224 case BIO_FLUSH: 225 err = nvme_ns_cmd_flush(ns, nvme_ns_bio_done, bp); 226 break; 227 case BIO_DELETE: 228 dsm_range = 229 malloc(sizeof(struct nvme_dsm_range), M_NVME, 230 M_ZERO | M_WAITOK); 231 dsm_range->length = 232 bp->bio_bcount/nvme_ns_get_sector_size(ns); 233 dsm_range->starting_lba = 234 bp->bio_offset/nvme_ns_get_sector_size(ns); 235 bp->bio_driver2 = dsm_range; 236 err = nvme_ns_cmd_deallocate(ns, dsm_range, 1, 237 nvme_ns_bio_done, bp); 238 if (err != 0) 239 free(dsm_range, M_NVME); 240 break; 241 default: 242 err = EIO; 243 break; 244 } 245 246 return (err); 247 } 248 249 #ifdef CHATHAM2 250 static void 251 nvme_ns_populate_chatham_data(struct nvme_namespace *ns) 252 { 253 struct nvme_controller *ctrlr; 254 struct nvme_namespace_data *nsdata; 255 256 ctrlr = ns->ctrlr; 257 nsdata = &ns->data; 258 259 nsdata->nsze = ctrlr->chatham_lbas; 260 nsdata->ncap = ctrlr->chatham_lbas; 261 nsdata->nuse = ctrlr->chatham_lbas; 262 263 /* Chatham2 doesn't support thin provisioning. */ 264 nsdata->nsfeat.thin_prov = 0; 265 266 /* Set LBA size to 512 bytes. */ 267 nsdata->lbaf[0].lbads = 9; 268 } 269 #endif /* CHATHAM2 */ 270 271 int 272 nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, 273 struct nvme_controller *ctrlr) 274 { 275 struct nvme_completion_poll_status status; 276 277 ns->ctrlr = ctrlr; 278 ns->id = id; 279 280 /* 281 * Namespaces are reconstructed after a controller reset, so check 282 * to make sure we only call mtx_init once on each mtx. 283 * 284 * TODO: Move this somewhere where it gets called at controller 285 * construction time, which is not invoked as part of each 286 * controller reset. 287 */ 288 if (!mtx_initialized(&ns->lock)) 289 mtx_init(&ns->lock, "nvme ns lock", NULL, MTX_DEF); 290 291 #ifdef CHATHAM2 292 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 293 nvme_ns_populate_chatham_data(ns); 294 else { 295 #endif 296 status.done = FALSE; 297 nvme_ctrlr_cmd_identify_namespace(ctrlr, id, &ns->data, 298 nvme_completion_poll_cb, &status); 299 while (status.done == FALSE) 300 DELAY(5); 301 if (nvme_completion_is_error(&status.cpl)) { 302 nvme_printf(ctrlr, "nvme_identify_namespace failed\n"); 303 return (ENXIO); 304 } 305 #ifdef CHATHAM2 306 } 307 #endif 308 309 /* 310 * Note: format is a 0-based value, so > is appropriate here, 311 * not >=. 312 */ 313 if (ns->data.flbas.format > ns->data.nlbaf) { 314 printf("lba format %d exceeds number supported (%d)\n", 315 ns->data.flbas.format, ns->data.nlbaf+1); 316 return (1); 317 } 318 319 if (ctrlr->cdata.oncs.dsm) 320 ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED; 321 322 if (ctrlr->cdata.vwc.present) 323 ns->flags |= NVME_NS_FLUSH_SUPPORTED; 324 325 /* 326 * cdev may have already been created, if we are reconstructing the 327 * namespace after a controller-level reset. 328 */ 329 if (ns->cdev != NULL) 330 return (0); 331 332 /* 333 * MAKEDEV_ETERNAL was added in r210923, for cdevs that will never 334 * be destroyed. This avoids refcounting on the cdev object. 335 * That should be OK case here, as long as we're not supporting PCIe 336 * surprise removal nor namespace deletion. 337 */ 338 #ifdef MAKEDEV_ETERNAL_KLD 339 ns->cdev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &nvme_ns_cdevsw, 0, 340 NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d", 341 device_get_unit(ctrlr->dev), ns->id); 342 #else 343 ns->cdev = make_dev_credf(0, &nvme_ns_cdevsw, 0, 344 NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d", 345 device_get_unit(ctrlr->dev), ns->id); 346 #endif 347 #ifdef NVME_UNMAPPED_BIO_SUPPORT 348 ns->cdev->si_flags |= SI_UNMAPPED; 349 #endif 350 351 if (ns->cdev != NULL) 352 ns->cdev->si_drv1 = ns; 353 354 return (0); 355 } 356 357 void nvme_ns_destruct(struct nvme_namespace *ns) 358 { 359 360 if (ns->cdev != NULL) 361 destroy_dev(ns->cdev); 362 } 363