1 /*- 2 * Copyright (C) 2012 Intel Corporation 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bio.h> 32 #include <sys/bus.h> 33 #include <sys/conf.h> 34 #include <sys/disk.h> 35 #include <sys/fcntl.h> 36 #include <sys/ioccom.h> 37 #include <sys/module.h> 38 #include <sys/proc.h> 39 40 #include <dev/pci/pcivar.h> 41 42 #include "nvme_private.h" 43 44 static int 45 nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 46 struct thread *td) 47 { 48 struct nvme_namespace *ns; 49 struct nvme_controller *ctrlr; 50 struct nvme_pt_command *pt; 51 52 ns = cdev->si_drv1; 53 ctrlr = ns->ctrlr; 54 55 switch (cmd) { 56 case NVME_IO_TEST: 57 case NVME_BIO_TEST: 58 nvme_ns_test(ns, cmd, arg); 59 break; 60 case NVME_PASSTHROUGH_CMD: 61 pt = (struct nvme_pt_command *)arg; 62 return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, ns->id, 63 1 /* is_user_buffer */, 0 /* is_admin_cmd */)); 64 case DIOCGMEDIASIZE: 65 *(off_t *)arg = (off_t)nvme_ns_get_size(ns); 66 break; 67 case DIOCGSECTORSIZE: 68 *(u_int *)arg = nvme_ns_get_sector_size(ns); 69 break; 70 default: 71 return (ENOTTY); 72 } 73 74 return (0); 75 } 76 77 static int 78 nvme_ns_open(struct cdev *dev __unused, int flags, int fmt __unused, 79 struct thread *td) 80 { 81 int error = 0; 82 83 if (flags & FWRITE) 84 error = securelevel_gt(td->td_ucred, 0); 85 86 return (error); 87 } 88 89 static int 90 nvme_ns_close(struct cdev *dev __unused, int flags, int fmt __unused, 91 struct thread *td) 92 { 93 94 return (0); 95 } 96 97 static void 98 nvme_ns_strategy_done(void *arg, const struct nvme_completion *cpl) 99 { 100 struct bio *bp = arg; 101 102 /* 103 * TODO: add more extensive translation of NVMe status codes 104 * to different bio error codes (i.e. EIO, EINVAL, etc.) 105 */ 106 if (nvme_completion_is_error(cpl)) { 107 bp->bio_error = EIO; 108 bp->bio_flags |= BIO_ERROR; 109 bp->bio_resid = bp->bio_bcount; 110 } else 111 bp->bio_resid = 0; 112 113 biodone(bp); 114 } 115 116 static void 117 nvme_ns_strategy(struct bio *bp) 118 { 119 struct nvme_namespace *ns; 120 int err; 121 122 ns = bp->bio_dev->si_drv1; 123 err = nvme_ns_bio_process(ns, bp, nvme_ns_strategy_done); 124 125 if (err) { 126 bp->bio_error = err; 127 bp->bio_flags |= BIO_ERROR; 128 bp->bio_resid = bp->bio_bcount; 129 biodone(bp); 130 } 131 132 } 133 134 static struct cdevsw nvme_ns_cdevsw = { 135 .d_version = D_VERSION, 136 #ifdef NVME_UNMAPPED_BIO_SUPPORT 137 .d_flags = D_DISK | D_UNMAPPED_IO, 138 #else 139 .d_flags = D_DISK, 140 #endif 141 .d_read = physread, 142 .d_write = physwrite, 143 .d_open = nvme_ns_open, 144 .d_close = nvme_ns_close, 145 .d_strategy = nvme_ns_strategy, 146 .d_ioctl = nvme_ns_ioctl 147 }; 148 149 uint32_t 150 nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns) 151 { 152 return ns->ctrlr->max_xfer_size; 153 } 154 155 uint32_t 156 nvme_ns_get_sector_size(struct nvme_namespace *ns) 157 { 158 return (1 << ns->data.lbaf[0].lbads); 159 } 160 161 uint64_t 162 nvme_ns_get_num_sectors(struct nvme_namespace *ns) 163 { 164 return (ns->data.nsze); 165 } 166 167 uint64_t 168 nvme_ns_get_size(struct nvme_namespace *ns) 169 { 170 return (nvme_ns_get_num_sectors(ns) * nvme_ns_get_sector_size(ns)); 171 } 172 173 uint32_t 174 nvme_ns_get_flags(struct nvme_namespace *ns) 175 { 176 return (ns->flags); 177 } 178 179 const char * 180 nvme_ns_get_serial_number(struct nvme_namespace *ns) 181 { 182 return ((const char *)ns->ctrlr->cdata.sn); 183 } 184 185 const char * 186 nvme_ns_get_model_number(struct nvme_namespace *ns) 187 { 188 return ((const char *)ns->ctrlr->cdata.mn); 189 } 190 191 const struct nvme_namespace_data * 192 nvme_ns_get_data(struct nvme_namespace *ns) 193 { 194 195 return (&ns->data); 196 } 197 198 static void 199 nvme_ns_bio_done(void *arg, const struct nvme_completion *status) 200 { 201 struct bio *bp = arg; 202 nvme_cb_fn_t bp_cb_fn; 203 204 bp_cb_fn = bp->bio_driver1; 205 206 if (bp->bio_driver2) 207 free(bp->bio_driver2, M_NVME); 208 209 bp_cb_fn(bp, status); 210 } 211 212 int 213 nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, 214 nvme_cb_fn_t cb_fn) 215 { 216 struct nvme_dsm_range *dsm_range; 217 int err; 218 219 bp->bio_driver1 = cb_fn; 220 221 switch (bp->bio_cmd) { 222 case BIO_READ: 223 err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp); 224 break; 225 case BIO_WRITE: 226 err = nvme_ns_cmd_write_bio(ns, bp, nvme_ns_bio_done, bp); 227 break; 228 case BIO_FLUSH: 229 err = nvme_ns_cmd_flush(ns, nvme_ns_bio_done, bp); 230 break; 231 case BIO_DELETE: 232 dsm_range = 233 malloc(sizeof(struct nvme_dsm_range), M_NVME, 234 M_ZERO | M_WAITOK); 235 dsm_range->length = 236 bp->bio_bcount/nvme_ns_get_sector_size(ns); 237 dsm_range->starting_lba = 238 bp->bio_offset/nvme_ns_get_sector_size(ns); 239 bp->bio_driver2 = dsm_range; 240 err = nvme_ns_cmd_deallocate(ns, dsm_range, 1, 241 nvme_ns_bio_done, bp); 242 if (err != 0) 243 free(dsm_range, M_NVME); 244 break; 245 default: 246 err = EIO; 247 break; 248 } 249 250 return (err); 251 } 252 253 #ifdef CHATHAM2 254 static void 255 nvme_ns_populate_chatham_data(struct nvme_namespace *ns) 256 { 257 struct nvme_controller *ctrlr; 258 struct nvme_namespace_data *nsdata; 259 260 ctrlr = ns->ctrlr; 261 nsdata = &ns->data; 262 263 nsdata->nsze = ctrlr->chatham_lbas; 264 nsdata->ncap = ctrlr->chatham_lbas; 265 nsdata->nuse = ctrlr->chatham_lbas; 266 267 /* Chatham2 doesn't support thin provisioning. */ 268 nsdata->nsfeat.thin_prov = 0; 269 270 /* Set LBA size to 512 bytes. */ 271 nsdata->lbaf[0].lbads = 9; 272 } 273 #endif /* CHATHAM2 */ 274 275 int 276 nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, 277 struct nvme_controller *ctrlr) 278 { 279 struct nvme_completion_poll_status status; 280 281 ns->ctrlr = ctrlr; 282 ns->id = id; 283 284 /* 285 * Namespaces are reconstructed after a controller reset, so check 286 * to make sure we only call mtx_init once on each mtx. 287 * 288 * TODO: Move this somewhere where it gets called at controller 289 * construction time, which is not invoked as part of each 290 * controller reset. 291 */ 292 if (!mtx_initialized(&ns->lock)) 293 mtx_init(&ns->lock, "nvme ns lock", NULL, MTX_DEF); 294 295 #ifdef CHATHAM2 296 if (pci_get_devid(ctrlr->dev) == CHATHAM_PCI_ID) 297 nvme_ns_populate_chatham_data(ns); 298 else { 299 #endif 300 status.done = FALSE; 301 nvme_ctrlr_cmd_identify_namespace(ctrlr, id, &ns->data, 302 nvme_completion_poll_cb, &status); 303 while (status.done == FALSE) 304 DELAY(5); 305 if (nvme_completion_is_error(&status.cpl)) { 306 nvme_printf(ctrlr, "nvme_identify_namespace failed\n"); 307 return (ENXIO); 308 } 309 #ifdef CHATHAM2 310 } 311 #endif 312 313 if (ctrlr->cdata.oncs.dsm) 314 ns->flags |= NVME_NS_DEALLOCATE_SUPPORTED; 315 316 if (ctrlr->cdata.vwc.present) 317 ns->flags |= NVME_NS_FLUSH_SUPPORTED; 318 319 /* 320 * cdev may have already been created, if we are reconstructing the 321 * namespace after a controller-level reset. 322 */ 323 if (ns->cdev != NULL) 324 return (0); 325 326 /* 327 * MAKEDEV_ETERNAL was added in r210923, for cdevs that will never 328 * be destroyed. This avoids refcounting on the cdev object. 329 * That should be OK case here, as long as we're not supporting PCIe 330 * surprise removal nor namespace deletion. 331 */ 332 #ifdef MAKEDEV_ETERNAL_KLD 333 ns->cdev = make_dev_credf(MAKEDEV_ETERNAL_KLD, &nvme_ns_cdevsw, 0, 334 NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d", 335 device_get_unit(ctrlr->dev), ns->id); 336 #else 337 ns->cdev = make_dev_credf(0, &nvme_ns_cdevsw, 0, 338 NULL, UID_ROOT, GID_WHEEL, 0600, "nvme%dns%d", 339 device_get_unit(ctrlr->dev), ns->id); 340 #endif 341 342 if (ns->cdev != NULL) 343 ns->cdev->si_drv1 = ns; 344 345 return (0); 346 } 347 348 void nvme_ns_destruct(struct nvme_namespace *ns) 349 { 350 351 if (ns->cdev != NULL) 352 destroy_dev(ns->cdev); 353 } 354