1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015 Netflix, Inc.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * Derived from ata_da.c:
28 * Copyright (c) 2009 Alexander Motin <mav@FreeBSD.org>
29 */
30
31 #include <sys/param.h>
32
33 #ifdef _KERNEL
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/bio.h>
37 #include <sys/sysctl.h>
38 #include <sys/taskqueue.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/conf.h>
42 #include <sys/devicestat.h>
43 #include <sys/eventhandler.h>
44 #include <sys/malloc.h>
45 #include <sys/cons.h>
46 #include <sys/proc.h>
47 #include <sys/reboot.h>
48 #include <sys/sbuf.h>
49 #include <geom/geom.h>
50 #include <geom/geom_disk.h>
51 #endif /* _KERNEL */
52
53 #ifndef _KERNEL
54 #include <stdio.h>
55 #include <string.h>
56 #endif /* _KERNEL */
57
58 #include <cam/cam.h>
59 #include <cam/cam_ccb.h>
60 #include <cam/cam_periph.h>
61 #include <cam/cam_xpt_periph.h>
62 #include <cam/cam_sim.h>
63 #include <cam/cam_iosched.h>
64
65 #include <cam/nvme/nvme_all.h>
66
67 typedef enum {
68 NDA_STATE_NORMAL
69 } nda_state;
70
71 typedef enum {
72 NDA_FLAG_OPEN = 0x0001,
73 NDA_FLAG_DIRTY = 0x0002,
74 NDA_FLAG_SCTX_INIT = 0x0004,
75 } nda_flags;
76 #define NDA_FLAG_STRING \
77 "\020" \
78 "\001OPEN" \
79 "\002DIRTY" \
80 "\003SCTX_INIT"
81
82 typedef enum {
83 NDA_Q_4K = 0x01,
84 NDA_Q_NONE = 0x00,
85 } nda_quirks;
86
87 #define NDA_Q_BIT_STRING \
88 "\020" \
89 "\001Bit 0"
90
91 typedef enum {
92 NDA_CCB_BUFFER_IO = 0x01,
93 NDA_CCB_DUMP = 0x02,
94 NDA_CCB_TRIM = 0x03,
95 NDA_CCB_PASS = 0x04,
96 NDA_CCB_TYPE_MASK = 0x0F,
97 } nda_ccb_state;
98
99 /* Offsets into our private area for storing information */
100 #define ccb_state ccb_h.ppriv_field0
101 #define ccb_bp ccb_h.ppriv_ptr1 /* For NDA_CCB_BUFFER_IO */
102 #define ccb_trim ccb_h.ppriv_ptr1 /* For NDA_CCB_TRIM */
103
104 struct nda_softc {
105 struct cam_iosched_softc *cam_iosched;
106 int outstanding_cmds; /* Number of active commands */
107 int refcount; /* Active xpt_action() calls */
108 nda_state state;
109 nda_flags flags;
110 nda_quirks quirks;
111 int unmappedio;
112 quad_t deletes;
113 uint32_t nsid; /* Namespace ID for this nda device */
114 struct disk *disk;
115 struct task sysctl_task;
116 struct sysctl_ctx_list sysctl_ctx;
117 struct sysctl_oid *sysctl_tree;
118 uint64_t trim_count;
119 uint64_t trim_ranges;
120 uint64_t trim_lbas;
121 #ifdef CAM_TEST_FAILURE
122 int force_read_error;
123 int force_write_error;
124 int periodic_read_error;
125 int periodic_read_count;
126 #endif
127 #ifdef CAM_IO_STATS
128 struct sysctl_ctx_list sysctl_stats_ctx;
129 struct sysctl_oid *sysctl_stats_tree;
130 u_int timeouts;
131 u_int errors;
132 u_int invalidations;
133 #endif
134 };
135
136 struct nda_trim_request {
137 struct nvme_dsm_range dsm[NVME_MAX_DSM_TRIM / sizeof(struct nvme_dsm_range)];
138 TAILQ_HEAD(, bio) bps;
139 };
140 _Static_assert(NVME_MAX_DSM_TRIM % sizeof(struct nvme_dsm_range) == 0,
141 "NVME_MAX_DSM_TRIM must be an integral number of ranges");
142
143 /* Need quirk table */
144
145 static disk_ioctl_t ndaioctl;
146 static disk_strategy_t ndastrategy;
147 static dumper_t ndadump;
148 static periph_init_t ndainit;
149 static void ndaasync(void *callback_arg, uint32_t code,
150 struct cam_path *path, void *arg);
151 static void ndasysctlinit(void *context, int pending);
152 static int ndaflagssysctl(SYSCTL_HANDLER_ARGS);
153 static periph_ctor_t ndaregister;
154 static periph_dtor_t ndacleanup;
155 static periph_start_t ndastart;
156 static periph_oninv_t ndaoninvalidate;
157 static void ndadone(struct cam_periph *periph,
158 union ccb *done_ccb);
159 static int ndaerror(union ccb *ccb, uint32_t cam_flags,
160 uint32_t sense_flags);
161 static void ndashutdown(void *arg, int howto);
162 static void ndasuspend(void *arg);
163
164 #ifndef NDA_DEFAULT_SEND_ORDERED
165 #define NDA_DEFAULT_SEND_ORDERED 1
166 #endif
167 #ifndef NDA_DEFAULT_TIMEOUT
168 #define NDA_DEFAULT_TIMEOUT 30 /* Timeout in seconds */
169 #endif
170 #ifndef NDA_DEFAULT_RETRY
171 #define NDA_DEFAULT_RETRY 4
172 #endif
173 #ifndef NDA_MAX_TRIM_ENTRIES
174 #define NDA_MAX_TRIM_ENTRIES (NVME_MAX_DSM_TRIM / sizeof(struct nvme_dsm_range))/* Number of DSM trims to use, max 256 */
175 #endif
176
177 static SYSCTL_NODE(_kern_cam, OID_AUTO, nda, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
178 "CAM Direct Access Disk driver");
179
180 //static int nda_retry_count = NDA_DEFAULT_RETRY;
181 static int nda_send_ordered = NDA_DEFAULT_SEND_ORDERED;
182 static int nda_default_timeout = NDA_DEFAULT_TIMEOUT;
183 static int nda_max_trim_entries = NDA_MAX_TRIM_ENTRIES;
184 static int nda_enable_biospeedup = 1;
185 static int nda_nvd_compat = 1;
186 SYSCTL_INT(_kern_cam_nda, OID_AUTO, max_trim, CTLFLAG_RDTUN,
187 &nda_max_trim_entries, NDA_MAX_TRIM_ENTRIES,
188 "Maximum number of BIO_DELETE to send down as a DSM TRIM.");
189 SYSCTL_INT(_kern_cam_nda, OID_AUTO, enable_biospeedup, CTLFLAG_RDTUN,
190 &nda_enable_biospeedup, 0, "Enable BIO_SPEEDUP processing.");
191 SYSCTL_INT(_kern_cam_nda, OID_AUTO, nvd_compat, CTLFLAG_RDTUN,
192 &nda_nvd_compat, 1, "Enable creation of nvd aliases.");
193
194 /*
195 * All NVMe media is non-rotational, so all nvme device instances
196 * share this to implement the sysctl.
197 */
198 static int nda_rotating_media = 0;
199
200 static struct periph_driver ndadriver =
201 {
202 ndainit, "nda",
203 TAILQ_HEAD_INITIALIZER(ndadriver.units), /* generation */ 0
204 };
205
206 PERIPHDRIVER_DECLARE(nda, ndadriver);
207
208 static MALLOC_DEFINE(M_NVMEDA, "nvme_da", "nvme_da buffers");
209
210 /*
211 * nice wrappers. Maybe these belong in nvme_all.c instead of
212 * here, but this is the only place that uses these. Should
213 * we ever grow another NVME periph, we should move them
214 * all there wholesale.
215 */
216
217 static void
nda_nvme_flush(struct nda_softc * softc,struct ccb_nvmeio * nvmeio)218 nda_nvme_flush(struct nda_softc *softc, struct ccb_nvmeio *nvmeio)
219 {
220 cam_fill_nvmeio(nvmeio,
221 0, /* retries */
222 ndadone, /* cbfcnp */
223 CAM_DIR_NONE, /* flags */
224 NULL, /* data_ptr */
225 0, /* dxfer_len */
226 nda_default_timeout * 1000); /* timeout 30s */
227 nvme_ns_flush_cmd(&nvmeio->cmd, softc->nsid);
228 }
229
230 static void
nda_nvme_trim(struct nda_softc * softc,struct ccb_nvmeio * nvmeio,void * payload,uint32_t num_ranges)231 nda_nvme_trim(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
232 void *payload, uint32_t num_ranges)
233 {
234 cam_fill_nvmeio(nvmeio,
235 0, /* retries */
236 ndadone, /* cbfcnp */
237 CAM_DIR_OUT, /* flags */
238 payload, /* data_ptr */
239 num_ranges * sizeof(struct nvme_dsm_range), /* dxfer_len */
240 nda_default_timeout * 1000); /* timeout 30s */
241 nvme_ns_trim_cmd(&nvmeio->cmd, softc->nsid, num_ranges);
242 }
243
244 static void
nda_nvme_write(struct nda_softc * softc,struct ccb_nvmeio * nvmeio,void * payload,uint64_t lba,uint32_t len,uint32_t count)245 nda_nvme_write(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
246 void *payload, uint64_t lba, uint32_t len, uint32_t count)
247 {
248 cam_fill_nvmeio(nvmeio,
249 0, /* retries */
250 ndadone, /* cbfcnp */
251 CAM_DIR_OUT, /* flags */
252 payload, /* data_ptr */
253 len, /* dxfer_len */
254 nda_default_timeout * 1000); /* timeout 30s */
255 nvme_ns_write_cmd(&nvmeio->cmd, softc->nsid, lba, count);
256 }
257
258 static void
nda_nvme_rw_bio(struct nda_softc * softc,struct ccb_nvmeio * nvmeio,struct bio * bp,uint32_t rwcmd)259 nda_nvme_rw_bio(struct nda_softc *softc, struct ccb_nvmeio *nvmeio,
260 struct bio *bp, uint32_t rwcmd)
261 {
262 int flags = rwcmd == NVME_OPC_READ ? CAM_DIR_IN : CAM_DIR_OUT;
263 void *payload;
264 uint64_t lba;
265 uint32_t count;
266
267 if (bp->bio_flags & BIO_UNMAPPED) {
268 flags |= CAM_DATA_BIO;
269 payload = bp;
270 } else {
271 payload = bp->bio_data;
272 }
273
274 lba = bp->bio_pblkno;
275 count = bp->bio_bcount / softc->disk->d_sectorsize;
276
277 cam_fill_nvmeio(nvmeio,
278 0, /* retries */
279 ndadone, /* cbfcnp */
280 flags, /* flags */
281 payload, /* data_ptr */
282 bp->bio_bcount, /* dxfer_len */
283 nda_default_timeout * 1000); /* timeout 30s */
284 nvme_ns_rw_cmd(&nvmeio->cmd, rwcmd, softc->nsid, lba, count);
285 }
286
287 static int
ndaopen(struct disk * dp)288 ndaopen(struct disk *dp)
289 {
290 struct cam_periph *periph;
291 struct nda_softc *softc;
292 int error;
293
294 periph = (struct cam_periph *)dp->d_drv1;
295 if (cam_periph_acquire(periph) != 0) {
296 return(ENXIO);
297 }
298
299 cam_periph_lock(periph);
300 if ((error = cam_periph_hold(periph, PRIBIO|PCATCH)) != 0) {
301 cam_periph_unlock(periph);
302 cam_periph_release(periph);
303 return (error);
304 }
305
306 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH,
307 ("ndaopen\n"));
308
309 softc = (struct nda_softc *)periph->softc;
310 softc->flags |= NDA_FLAG_OPEN;
311
312 cam_periph_unhold(periph);
313 cam_periph_unlock(periph);
314 return (0);
315 }
316
317 static int
ndaclose(struct disk * dp)318 ndaclose(struct disk *dp)
319 {
320 struct cam_periph *periph;
321 struct nda_softc *softc;
322 union ccb *ccb;
323 int error;
324
325 periph = (struct cam_periph *)dp->d_drv1;
326 softc = (struct nda_softc *)periph->softc;
327 cam_periph_lock(periph);
328
329 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE | CAM_DEBUG_PERIPH,
330 ("ndaclose\n"));
331
332 if ((softc->flags & NDA_FLAG_DIRTY) != 0 &&
333 (periph->flags & CAM_PERIPH_INVALID) == 0 &&
334 cam_periph_hold(periph, PRIBIO) == 0) {
335 ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL);
336 nda_nvme_flush(softc, &ccb->nvmeio);
337 error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0,
338 /*sense_flags*/0, softc->disk->d_devstat);
339
340 if (error != 0)
341 xpt_print(periph->path, "Synchronize cache failed\n");
342 else
343 softc->flags &= ~NDA_FLAG_DIRTY;
344 xpt_release_ccb(ccb);
345 cam_periph_unhold(periph);
346 }
347
348 softc->flags &= ~NDA_FLAG_OPEN;
349
350 while (softc->refcount != 0)
351 cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1);
352 KASSERT(softc->outstanding_cmds == 0,
353 ("nda %d outstanding commands", softc->outstanding_cmds));
354 cam_periph_unlock(periph);
355 cam_periph_release(periph);
356 return (0);
357 }
358
359 static void
ndaschedule(struct cam_periph * periph)360 ndaschedule(struct cam_periph *periph)
361 {
362 struct nda_softc *softc = (struct nda_softc *)periph->softc;
363
364 if (softc->state != NDA_STATE_NORMAL)
365 return;
366
367 cam_iosched_schedule(softc->cam_iosched, periph);
368 }
369
370 static int
ndaioctl(struct disk * dp,u_long cmd,void * data,int fflag,struct thread * td)371 ndaioctl(struct disk *dp, u_long cmd, void *data, int fflag,
372 struct thread *td)
373 {
374 struct cam_periph *periph;
375
376 periph = (struct cam_periph *)dp->d_drv1;
377
378 switch (cmd) {
379 case NVME_IO_TEST:
380 case NVME_BIO_TEST:
381 /*
382 * These don't map well to the underlying CCBs, so
383 * they are usupported via CAM.
384 */
385 return (ENOTTY);
386 case NVME_GET_NSID:
387 {
388 struct nvme_get_nsid *gnsid = (struct nvme_get_nsid *)data;
389 struct ccb_pathinq cpi;
390
391 xpt_path_inq(&cpi, periph->path);
392 strncpy(gnsid->cdev, cpi.xport_specific.nvme.dev_name,
393 sizeof(gnsid->cdev));
394 gnsid->nsid = cpi.xport_specific.nvme.nsid;
395 return (0);
396 }
397 case NVME_PASSTHROUGH_CMD:
398 {
399 struct nvme_pt_command *pt;
400 union ccb *ccb;
401 struct cam_periph_map_info mapinfo;
402 u_int maxmap = dp->d_maxsize;
403 int error;
404
405 /*
406 * Create a NVME_IO CCB to do the passthrough command.
407 */
408 pt = (struct nvme_pt_command *)data;
409 ccb = xpt_alloc_ccb();
410 xpt_setup_ccb(&ccb->ccb_h, periph->path, CAM_PRIORITY_NORMAL);
411 ccb->ccb_state = NDA_CCB_PASS;
412 cam_fill_nvmeio(&ccb->nvmeio,
413 0, /* Retries */
414 ndadone,
415 (pt->is_read ? CAM_DIR_IN : CAM_DIR_OUT) | CAM_DATA_VADDR,
416 pt->buf,
417 pt->len,
418 nda_default_timeout * 1000);
419 memcpy(&ccb->nvmeio.cmd, &pt->cmd, sizeof(pt->cmd));
420
421 /*
422 * Wire the user memory in this request for the I/O
423 */
424 memset(&mapinfo, 0, sizeof(mapinfo));
425 error = cam_periph_mapmem(ccb, &mapinfo, maxmap);
426 if (error)
427 goto out;
428
429 /*
430 * Lock the periph and run the command.
431 */
432 cam_periph_lock(periph);
433 cam_periph_runccb(ccb, NULL, CAM_RETRY_SELTO,
434 SF_RETRY_UA | SF_NO_PRINT, NULL);
435
436 /*
437 * Tear down mapping and return status.
438 */
439 cam_periph_unlock(periph);
440 error = cam_periph_unmapmem(ccb, &mapinfo);
441 if (!cam_ccb_success(ccb))
442 error = EIO;
443 out:
444 cam_periph_lock(periph);
445 xpt_release_ccb(ccb);
446 cam_periph_unlock(periph);
447 return (error);
448 }
449 default:
450 break;
451 }
452 return (ENOTTY);
453 }
454
455 /*
456 * Actually translate the requested transfer into one the physical driver
457 * can understand. The transfer is described by a buf and will include
458 * only one physical transfer.
459 */
460 static void
ndastrategy(struct bio * bp)461 ndastrategy(struct bio *bp)
462 {
463 struct cam_periph *periph;
464 struct nda_softc *softc;
465
466 periph = (struct cam_periph *)bp->bio_disk->d_drv1;
467 softc = (struct nda_softc *)periph->softc;
468
469 cam_periph_lock(periph);
470
471 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastrategy(%p)\n", bp));
472
473 /*
474 * If the device has been made invalid, error out
475 */
476 if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
477 cam_periph_unlock(periph);
478 biofinish(bp, NULL, ENXIO);
479 return;
480 }
481
482 if (bp->bio_cmd == BIO_DELETE)
483 softc->deletes++;
484
485 /*
486 * Place it in the queue of disk activities for this disk
487 */
488 cam_iosched_queue_work(softc->cam_iosched, bp);
489
490 /*
491 * Schedule ourselves for performing the work.
492 */
493 ndaschedule(periph);
494 cam_periph_unlock(periph);
495
496 return;
497 }
498
499 static int
ndadump(void * arg,void * virtual,off_t offset,size_t length)500 ndadump(void *arg, void *virtual, off_t offset, size_t length)
501 {
502 struct cam_periph *periph;
503 struct nda_softc *softc;
504 u_int secsize;
505 struct ccb_nvmeio nvmeio;
506 struct disk *dp;
507 uint64_t lba;
508 uint32_t count;
509 int error = 0;
510
511 dp = arg;
512 periph = dp->d_drv1;
513 softc = (struct nda_softc *)periph->softc;
514 secsize = softc->disk->d_sectorsize;
515 lba = offset / secsize;
516 count = length / secsize;
517
518 if ((periph->flags & CAM_PERIPH_INVALID) != 0)
519 return (ENXIO);
520
521 /* xpt_get_ccb returns a zero'd allocation for the ccb, mimic that here */
522 memset(&nvmeio, 0, sizeof(nvmeio));
523 if (length > 0) {
524 xpt_setup_ccb(&nvmeio.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
525 nvmeio.ccb_state = NDA_CCB_DUMP;
526 nda_nvme_write(softc, &nvmeio, virtual, lba, length, count);
527 error = cam_periph_runccb((union ccb *)&nvmeio, cam_periph_error,
528 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
529 if (error != 0)
530 printf("Aborting dump due to I/O error %d.\n", error);
531
532 return (error);
533 }
534
535 /* Flush */
536 xpt_setup_ccb(&nvmeio.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
537
538 nvmeio.ccb_state = NDA_CCB_DUMP;
539 nda_nvme_flush(softc, &nvmeio);
540 error = cam_periph_runccb((union ccb *)&nvmeio, cam_periph_error,
541 0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
542 if (error != 0)
543 xpt_print(periph->path, "flush cmd failed\n");
544 return (error);
545 }
546
547 static void
ndainit(void)548 ndainit(void)
549 {
550 cam_status status;
551
552 /*
553 * Install a global async callback. This callback will
554 * receive async callbacks like "new device found".
555 */
556 status = xpt_register_async(AC_FOUND_DEVICE, ndaasync, NULL, NULL);
557
558 if (status != CAM_REQ_CMP) {
559 printf("nda: Failed to attach master async callback "
560 "due to status 0x%x!\n", status);
561 } else if (nda_send_ordered) {
562 /* Register our event handlers */
563 if ((EVENTHANDLER_REGISTER(power_suspend, ndasuspend,
564 NULL, EVENTHANDLER_PRI_LAST)) == NULL)
565 printf("ndainit: power event registration failed!\n");
566 if ((EVENTHANDLER_REGISTER(shutdown_post_sync, ndashutdown,
567 NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
568 printf("ndainit: shutdown event registration failed!\n");
569 }
570 }
571
572 /*
573 * Callback from GEOM, called when it has finished cleaning up its
574 * resources.
575 */
576 static void
ndadiskgonecb(struct disk * dp)577 ndadiskgonecb(struct disk *dp)
578 {
579 struct cam_periph *periph;
580
581 periph = (struct cam_periph *)dp->d_drv1;
582
583 cam_periph_release(periph);
584 }
585
586 static void
ndaoninvalidate(struct cam_periph * periph)587 ndaoninvalidate(struct cam_periph *periph)
588 {
589 struct nda_softc *softc;
590
591 softc = (struct nda_softc *)periph->softc;
592
593 /*
594 * De-register any async callbacks.
595 */
596 xpt_register_async(0, ndaasync, periph, periph->path);
597 #ifdef CAM_IO_STATS
598 softc->invalidations++;
599 #endif
600
601 /*
602 * Return all queued I/O with ENXIO. Transactions may be queued up here
603 * for retry (since we are called while there's other transactions
604 * pending). Any requests in the hardware will drain before ndacleanup
605 * is called.
606 */
607 cam_iosched_flush(softc->cam_iosched, NULL, ENXIO);
608
609 /*
610 * Tell GEOM that we've gone away, we'll get a callback when it is
611 * done cleaning up its resources.
612 */
613 disk_gone(softc->disk);
614 }
615
616 static void
ndacleanup(struct cam_periph * periph)617 ndacleanup(struct cam_periph *periph)
618 {
619 struct nda_softc *softc;
620
621 softc = (struct nda_softc *)periph->softc;
622
623 cam_periph_unlock(periph);
624
625 cam_iosched_fini(softc->cam_iosched);
626
627 /*
628 * If we can't free the sysctl tree, oh well...
629 */
630 if ((softc->flags & NDA_FLAG_SCTX_INIT) != 0) {
631 #ifdef CAM_IO_STATS
632 if (sysctl_ctx_free(&softc->sysctl_stats_ctx) != 0)
633 xpt_print(periph->path,
634 "can't remove sysctl stats context\n");
635 #endif
636 if (sysctl_ctx_free(&softc->sysctl_ctx) != 0)
637 xpt_print(periph->path,
638 "can't remove sysctl context\n");
639 }
640
641 disk_destroy(softc->disk);
642 free(softc, M_DEVBUF);
643 cam_periph_lock(periph);
644 }
645
646 static void
ndaasync(void * callback_arg,uint32_t code,struct cam_path * path,void * arg)647 ndaasync(void *callback_arg, uint32_t code,
648 struct cam_path *path, void *arg)
649 {
650 struct cam_periph *periph;
651
652 periph = (struct cam_periph *)callback_arg;
653 switch (code) {
654 case AC_FOUND_DEVICE:
655 {
656 struct ccb_getdev *cgd;
657 cam_status status;
658
659 cgd = (struct ccb_getdev *)arg;
660 if (cgd == NULL)
661 break;
662
663 if (cgd->protocol != PROTO_NVME)
664 break;
665
666 /*
667 * Allocate a peripheral instance for
668 * this device and start the probe
669 * process.
670 */
671 status = cam_periph_alloc(ndaregister, ndaoninvalidate,
672 ndacleanup, ndastart,
673 "nda", CAM_PERIPH_BIO,
674 path, ndaasync,
675 AC_FOUND_DEVICE, cgd);
676
677 if (status != CAM_REQ_CMP
678 && status != CAM_REQ_INPROG)
679 printf("ndaasync: Unable to attach to new device "
680 "due to status 0x%x\n", status);
681 break;
682 }
683 case AC_ADVINFO_CHANGED:
684 {
685 uintptr_t buftype;
686
687 buftype = (uintptr_t)arg;
688 if (buftype == CDAI_TYPE_PHYS_PATH) {
689 struct nda_softc *softc;
690
691 softc = periph->softc;
692 disk_attr_changed(softc->disk, "GEOM::physpath",
693 M_NOWAIT);
694 }
695 break;
696 }
697 case AC_LOST_DEVICE:
698 default:
699 break;
700 }
701 cam_periph_async(periph, code, path, arg);
702 }
703
704 static void
ndasysctlinit(void * context,int pending)705 ndasysctlinit(void *context, int pending)
706 {
707 struct cam_periph *periph;
708 struct nda_softc *softc;
709 char tmpstr[32], tmpstr2[16];
710
711 periph = (struct cam_periph *)context;
712
713 /* periph was held for us when this task was enqueued */
714 if ((periph->flags & CAM_PERIPH_INVALID) != 0) {
715 cam_periph_release(periph);
716 return;
717 }
718
719 softc = (struct nda_softc *)periph->softc;
720 snprintf(tmpstr, sizeof(tmpstr), "CAM NDA unit %d", periph->unit_number);
721 snprintf(tmpstr2, sizeof(tmpstr2), "%d", periph->unit_number);
722
723 sysctl_ctx_init(&softc->sysctl_ctx);
724 softc->flags |= NDA_FLAG_SCTX_INIT;
725 softc->sysctl_tree = SYSCTL_ADD_NODE_WITH_LABEL(&softc->sysctl_ctx,
726 SYSCTL_STATIC_CHILDREN(_kern_cam_nda), OID_AUTO, tmpstr2,
727 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, tmpstr, "device_index");
728 if (softc->sysctl_tree == NULL) {
729 printf("ndasysctlinit: unable to allocate sysctl tree\n");
730 cam_periph_release(periph);
731 return;
732 }
733
734 SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
735 OID_AUTO, "unmapped_io", CTLFLAG_RD,
736 &softc->unmappedio, 0, "Unmapped I/O leaf");
737
738 SYSCTL_ADD_QUAD(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
739 OID_AUTO, "deletes", CTLFLAG_RD,
740 &softc->deletes, "Number of BIO_DELETE requests");
741
742 SYSCTL_ADD_UQUAD(&softc->sysctl_ctx,
743 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO,
744 "trim_count", CTLFLAG_RD, &softc->trim_count,
745 "Total number of unmap/dsm commands sent");
746 SYSCTL_ADD_UQUAD(&softc->sysctl_ctx,
747 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO,
748 "trim_ranges", CTLFLAG_RD, &softc->trim_ranges,
749 "Total number of ranges in unmap/dsm commands");
750 SYSCTL_ADD_UQUAD(&softc->sysctl_ctx,
751 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO,
752 "trim_lbas", CTLFLAG_RD, &softc->trim_lbas,
753 "Total lbas in the unmap/dsm commands sent");
754
755 SYSCTL_ADD_INT(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
756 OID_AUTO, "rotating", CTLFLAG_RD, &nda_rotating_media, 1,
757 "Rotating media");
758
759 SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
760 OID_AUTO, "flags", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
761 softc, 0, ndaflagssysctl, "A",
762 "Flags for drive");
763
764 #ifdef CAM_IO_STATS
765 softc->sysctl_stats_tree = SYSCTL_ADD_NODE(&softc->sysctl_stats_ctx,
766 SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "stats",
767 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Statistics");
768 if (softc->sysctl_stats_tree == NULL) {
769 printf("ndasysctlinit: unable to allocate sysctl tree for stats\n");
770 cam_periph_release(periph);
771 return;
772 }
773 SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
774 SYSCTL_CHILDREN(softc->sysctl_stats_tree),
775 OID_AUTO, "timeouts", CTLFLAG_RD,
776 &softc->timeouts, 0,
777 "Device timeouts reported by the SIM");
778 SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
779 SYSCTL_CHILDREN(softc->sysctl_stats_tree),
780 OID_AUTO, "errors", CTLFLAG_RD,
781 &softc->errors, 0,
782 "Transport errors reported by the SIM.");
783 SYSCTL_ADD_INT(&softc->sysctl_stats_ctx,
784 SYSCTL_CHILDREN(softc->sysctl_stats_tree),
785 OID_AUTO, "pack_invalidations", CTLFLAG_RD,
786 &softc->invalidations, 0,
787 "Device pack invalidations.");
788 #endif
789
790 #ifdef CAM_TEST_FAILURE
791 SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
792 OID_AUTO, "invalidate", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE,
793 periph, 0, cam_periph_invalidate_sysctl, "I",
794 "Write 1 to invalidate the drive immediately");
795 #endif
796
797 cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx,
798 softc->sysctl_tree);
799
800 cam_periph_release(periph);
801 }
802
803 static int
ndaflagssysctl(SYSCTL_HANDLER_ARGS)804 ndaflagssysctl(SYSCTL_HANDLER_ARGS)
805 {
806 struct sbuf sbuf;
807 struct nda_softc *softc = arg1;
808 int error;
809
810 sbuf_new_for_sysctl(&sbuf, NULL, 0, req);
811 if (softc->flags != 0)
812 sbuf_printf(&sbuf, "0x%b", (unsigned)softc->flags, NDA_FLAG_STRING);
813 else
814 sbuf_putc(&sbuf, '0');
815 error = sbuf_finish(&sbuf);
816 sbuf_delete(&sbuf);
817
818 return (error);
819 }
820
821 static int
ndagetattr(struct bio * bp)822 ndagetattr(struct bio *bp)
823 {
824 int ret;
825 struct cam_periph *periph;
826
827 if (g_handleattr_int(bp, "GEOM::canspeedup", nda_enable_biospeedup))
828 return (EJUSTRETURN);
829
830 periph = (struct cam_periph *)bp->bio_disk->d_drv1;
831 cam_periph_lock(periph);
832 ret = xpt_getattr(bp->bio_data, bp->bio_length, bp->bio_attribute,
833 periph->path);
834 cam_periph_unlock(periph);
835 if (ret == 0)
836 bp->bio_completed = bp->bio_length;
837 return ret;
838 }
839
840 static cam_status
ndaregister(struct cam_periph * periph,void * arg)841 ndaregister(struct cam_periph *periph, void *arg)
842 {
843 struct nda_softc *softc;
844 struct disk *disk;
845 struct ccb_pathinq cpi;
846 const struct nvme_namespace_data *nsd;
847 const struct nvme_controller_data *cd;
848 char announce_buf[80];
849 uint8_t flbas_fmt, lbads, vwc_present;
850 u_int maxio;
851 int quirks;
852
853 nsd = nvme_get_identify_ns(periph);
854 cd = nvme_get_identify_cntrl(periph);
855
856 softc = (struct nda_softc *)malloc(sizeof(*softc), M_DEVBUF,
857 M_NOWAIT | M_ZERO);
858
859 if (softc == NULL) {
860 printf("ndaregister: Unable to probe new device. "
861 "Unable to allocate softc\n");
862 return(CAM_REQ_CMP_ERR);
863 }
864
865 /* ident_data parsing */
866
867 periph->softc = softc;
868 softc->quirks = NDA_Q_NONE;
869 xpt_path_inq(&cpi, periph->path);
870 TASK_INIT(&softc->sysctl_task, 0, ndasysctlinit, periph);
871
872 /*
873 * The name space ID is the lun, save it for later I/O
874 */
875 softc->nsid = (uint32_t)xpt_path_lun_id(periph->path);
876
877 /*
878 * Register this media as a disk
879 */
880 (void)cam_periph_acquire(periph);
881 cam_periph_unlock(periph);
882 snprintf(announce_buf, sizeof(announce_buf),
883 "kern.cam.nda.%d.quirks", periph->unit_number);
884 quirks = softc->quirks;
885 TUNABLE_INT_FETCH(announce_buf, &quirks);
886 softc->quirks = quirks;
887 softc->disk = disk = disk_alloc();
888 disk->d_rotation_rate = DISK_RR_NON_ROTATING;
889 disk->d_open = ndaopen;
890 disk->d_close = ndaclose;
891 disk->d_strategy = ndastrategy;
892 disk->d_ioctl = ndaioctl;
893 disk->d_getattr = ndagetattr;
894 if (cam_sim_pollable(periph->sim))
895 disk->d_dump = ndadump;
896 disk->d_gone = ndadiskgonecb;
897 disk->d_name = "nda";
898 disk->d_drv1 = periph;
899 disk->d_unit = periph->unit_number;
900 maxio = cpi.maxio; /* Honor max I/O size of SIM */
901 if (maxio == 0)
902 maxio = DFLTPHYS; /* traditional default */
903 else if (maxio > maxphys)
904 maxio = maxphys; /* for safety */
905 disk->d_maxsize = maxio;
906 flbas_fmt = NVMEV(NVME_NS_DATA_FLBAS_FORMAT, nsd->flbas);
907 lbads = NVMEV(NVME_NS_DATA_LBAF_LBADS, nsd->lbaf[flbas_fmt]);
908 disk->d_sectorsize = 1 << lbads;
909 disk->d_mediasize = (off_t)(disk->d_sectorsize * nsd->nsze);
910 disk->d_delmaxsize = disk->d_mediasize;
911 disk->d_flags = DISKFLAG_DIRECT_COMPLETION;
912 if (nvme_ctrlr_has_dataset_mgmt(cd))
913 disk->d_flags |= DISKFLAG_CANDELETE;
914 vwc_present = NVMEV(NVME_CTRLR_DATA_VWC_PRESENT, cd->vwc);
915 if (vwc_present)
916 disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
917 if ((cpi.hba_misc & PIM_UNMAPPED) != 0) {
918 disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
919 softc->unmappedio = 1;
920 }
921 /*
922 * d_ident and d_descr are both far bigger than the length of either
923 * the serial or model number strings.
924 */
925 cam_strvis_flag(disk->d_descr, cd->mn, NVME_MODEL_NUMBER_LENGTH,
926 sizeof(disk->d_descr), CAM_STRVIS_FLAG_NONASCII_SPC);
927
928 cam_strvis_flag(disk->d_ident, cd->sn, NVME_SERIAL_NUMBER_LENGTH,
929 sizeof(disk->d_ident), CAM_STRVIS_FLAG_NONASCII_SPC);
930
931 disk->d_hba_vendor = cpi.hba_vendor;
932 disk->d_hba_device = cpi.hba_device;
933 disk->d_hba_subvendor = cpi.hba_subvendor;
934 disk->d_hba_subdevice = cpi.hba_subdevice;
935 snprintf(disk->d_attachment, sizeof(disk->d_attachment),
936 "%s%d", cpi.dev_name, cpi.unit_number);
937 if (NVMEV(NVME_NS_DATA_NSFEAT_NPVALID, nsd->nsfeat) != 0 &&
938 nsd->npwg != 0)
939 disk->d_stripesize = ((nsd->npwg + 1) * disk->d_sectorsize);
940 else
941 disk->d_stripesize = nsd->noiob * disk->d_sectorsize;
942 disk->d_stripeoffset = 0;
943 disk->d_devstat = devstat_new_entry(periph->periph_name,
944 periph->unit_number, disk->d_sectorsize,
945 DEVSTAT_ALL_SUPPORTED,
946 DEVSTAT_TYPE_DIRECT | XPORT_DEVSTAT_TYPE(cpi.transport),
947 DEVSTAT_PRIORITY_DISK);
948
949 if (cam_iosched_init(&softc->cam_iosched, periph, disk,
950 ndaschedule) != 0) {
951 printf("ndaregister: Unable to probe new device. "
952 "Unable to allocate iosched memory\n");
953 free(softc, M_DEVBUF);
954 return(CAM_REQ_CMP_ERR);
955 }
956 cam_iosched_set_sort_queue(softc->cam_iosched, 0);
957
958 /*
959 * Add alias for older nvd drives to ease transition.
960 */
961 if (nda_nvd_compat)
962 disk_add_alias(disk, "nvd");
963
964 cam_periph_lock(periph);
965
966 snprintf(announce_buf, sizeof(announce_buf),
967 "%juMB (%ju %u byte sectors)",
968 (uintmax_t)((uintmax_t)disk->d_mediasize / (1024*1024)),
969 (uintmax_t)disk->d_mediasize / disk->d_sectorsize,
970 disk->d_sectorsize);
971 xpt_announce_periph(periph, announce_buf);
972 xpt_announce_quirks(periph, softc->quirks, NDA_Q_BIT_STRING);
973
974 /*
975 * Create our sysctl variables, now that we know
976 * we have successfully attached.
977 */
978 if (cam_periph_acquire(periph) == 0)
979 taskqueue_enqueue(taskqueue_thread, &softc->sysctl_task);
980
981 /*
982 * Register for device going away and info about the drive
983 * changing (though with NVMe, it can't)
984 */
985 xpt_register_async(AC_LOST_DEVICE | AC_ADVINFO_CHANGED,
986 ndaasync, periph, periph->path);
987
988 softc->state = NDA_STATE_NORMAL;
989
990 /*
991 * We'll release this reference once GEOM calls us back via
992 * ndadiskgonecb(), telling us that our provider has been freed.
993 */
994 if (cam_periph_acquire(periph) == 0)
995 disk_create(softc->disk, DISK_VERSION);
996
997 cam_periph_release_locked(periph);
998 return(CAM_REQ_CMP);
999 }
1000
1001 static void
ndastart(struct cam_periph * periph,union ccb * start_ccb)1002 ndastart(struct cam_periph *periph, union ccb *start_ccb)
1003 {
1004 struct nda_softc *softc = (struct nda_softc *)periph->softc;
1005 struct ccb_nvmeio *nvmeio = &start_ccb->nvmeio;
1006
1007 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart\n"));
1008
1009 switch (softc->state) {
1010 case NDA_STATE_NORMAL:
1011 {
1012 struct bio *bp;
1013
1014 bp = cam_iosched_next_bio(softc->cam_iosched);
1015 CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("ndastart: bio %p\n", bp));
1016 if (bp == NULL) {
1017 xpt_release_ccb(start_ccb);
1018 break;
1019 }
1020
1021 switch (bp->bio_cmd) {
1022 case BIO_WRITE:
1023 softc->flags |= NDA_FLAG_DIRTY;
1024 /* FALLTHROUGH */
1025 case BIO_READ:
1026 {
1027 #ifdef CAM_TEST_FAILURE
1028 int fail = 0;
1029
1030 /*
1031 * Support the failure ioctls. If the command is a
1032 * read, and there are pending forced read errors, or
1033 * if a write and pending write errors, then fail this
1034 * operation with EIO. This is useful for testing
1035 * purposes. Also, support having every Nth read fail.
1036 *
1037 * This is a rather blunt tool.
1038 */
1039 if (bp->bio_cmd == BIO_READ) {
1040 if (softc->force_read_error) {
1041 softc->force_read_error--;
1042 fail = 1;
1043 }
1044 if (softc->periodic_read_error > 0) {
1045 if (++softc->periodic_read_count >=
1046 softc->periodic_read_error) {
1047 softc->periodic_read_count = 0;
1048 fail = 1;
1049 }
1050 }
1051 } else {
1052 if (softc->force_write_error) {
1053 softc->force_write_error--;
1054 fail = 1;
1055 }
1056 }
1057 if (fail) {
1058 biofinish(bp, NULL, EIO);
1059 xpt_release_ccb(start_ccb);
1060 ndaschedule(periph);
1061 return;
1062 }
1063 #endif
1064 KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
1065 round_page(bp->bio_bcount + bp->bio_ma_offset) /
1066 PAGE_SIZE == bp->bio_ma_n,
1067 ("Short bio %p", bp));
1068 nda_nvme_rw_bio(softc, &start_ccb->nvmeio, bp, bp->bio_cmd == BIO_READ ?
1069 NVME_OPC_READ : NVME_OPC_WRITE);
1070 break;
1071 }
1072 case BIO_DELETE:
1073 {
1074 struct nvme_dsm_range *dsm_range, *dsm_end;
1075 struct nda_trim_request *trim;
1076 struct bio *bp1;
1077 int ents;
1078 uint32_t totalcount = 0, ranges = 0;
1079
1080 trim = malloc(sizeof(*trim), M_NVMEDA, M_ZERO | M_NOWAIT);
1081 if (trim == NULL) {
1082 /*
1083 * We have to drop the periph lock when
1084 * returning ENOMEM. g_io_deliver treats these
1085 * request differently and will recursively call
1086 * the start routine which causes us to get into
1087 * ndastrategy with the periph lock held,
1088 * leading to a panic when its acquired again.
1089 */
1090 cam_periph_unlock(periph);
1091 biofinish(bp, NULL, ENOMEM);
1092 cam_periph_lock(periph);
1093 xpt_release_ccb(start_ccb);
1094 ndaschedule(periph);
1095 return;
1096 }
1097 TAILQ_INIT(&trim->bps);
1098 bp1 = bp;
1099 ents = min(nitems(trim->dsm), nda_max_trim_entries);
1100 ents = max(ents, 1);
1101 dsm_range = trim->dsm;
1102 dsm_end = dsm_range + ents;
1103 do {
1104 TAILQ_INSERT_TAIL(&trim->bps, bp1, bio_queue);
1105 dsm_range->length =
1106 htole32(bp1->bio_bcount / softc->disk->d_sectorsize);
1107 dsm_range->starting_lba =
1108 htole64(bp1->bio_offset / softc->disk->d_sectorsize);
1109 ranges++;
1110 totalcount += dsm_range->length;
1111 dsm_range++;
1112 if (dsm_range >= dsm_end)
1113 break;
1114 bp1 = cam_iosched_next_trim(softc->cam_iosched);
1115 /* XXX -- Could collapse adjacent ranges, but we don't for now */
1116 /* XXX -- Could limit based on total payload size */
1117 } while (bp1 != NULL);
1118 start_ccb->ccb_trim = trim;
1119 nda_nvme_trim(softc, &start_ccb->nvmeio, trim->dsm,
1120 dsm_range - trim->dsm);
1121 start_ccb->ccb_state = NDA_CCB_TRIM;
1122 softc->trim_count++;
1123 softc->trim_ranges += ranges;
1124 softc->trim_lbas += totalcount;
1125 /*
1126 * Note: We can have multiple TRIMs in flight, so we don't call
1127 * cam_iosched_submit_trim(softc->cam_iosched);
1128 * since that forces the I/O scheduler to only schedule one at a time.
1129 * On NVMe drives, this is a performance disaster.
1130 */
1131 goto out;
1132 }
1133 case BIO_FLUSH:
1134 nda_nvme_flush(softc, nvmeio);
1135 break;
1136 default:
1137 biofinish(bp, NULL, EOPNOTSUPP);
1138 xpt_release_ccb(start_ccb);
1139 ndaschedule(periph);
1140 return;
1141 }
1142 start_ccb->ccb_state = NDA_CCB_BUFFER_IO;
1143 start_ccb->ccb_bp = bp;
1144 out:
1145 start_ccb->ccb_h.flags |= CAM_UNLOCKED;
1146 softc->outstanding_cmds++;
1147 softc->refcount++; /* For submission only */
1148 cam_periph_unlock(periph);
1149 xpt_action(start_ccb);
1150 cam_periph_lock(periph);
1151 softc->refcount--; /* Submission done */
1152
1153 /* May have more work to do, so ensure we stay scheduled */
1154 ndaschedule(periph);
1155 break;
1156 }
1157 }
1158 }
1159
1160 static void
ndadone(struct cam_periph * periph,union ccb * done_ccb)1161 ndadone(struct cam_periph *periph, union ccb *done_ccb)
1162 {
1163 struct nda_softc *softc;
1164 struct ccb_nvmeio *nvmeio = &done_ccb->nvmeio;
1165 struct cam_path *path;
1166 int state;
1167
1168 softc = (struct nda_softc *)periph->softc;
1169 path = done_ccb->ccb_h.path;
1170
1171 CAM_DEBUG(path, CAM_DEBUG_TRACE, ("ndadone\n"));
1172
1173 state = nvmeio->ccb_state & NDA_CCB_TYPE_MASK;
1174 switch (state) {
1175 case NDA_CCB_BUFFER_IO:
1176 case NDA_CCB_TRIM:
1177 {
1178 int error;
1179
1180 cam_periph_lock(periph);
1181 if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1182 error = ndaerror(done_ccb, 0, 0);
1183 if (error == ERESTART) {
1184 /* A retry was scheduled, so just return. */
1185 cam_periph_unlock(periph);
1186 return;
1187 }
1188 if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
1189 cam_release_devq(path,
1190 /*relsim_flags*/0,
1191 /*reduction*/0,
1192 /*timeout*/0,
1193 /*getcount_only*/0);
1194 } else {
1195 if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
1196 panic("REQ_CMP with QFRZN");
1197 error = 0;
1198 }
1199 if (state == NDA_CCB_BUFFER_IO) {
1200 struct bio *bp;
1201
1202 bp = (struct bio *)done_ccb->ccb_bp;
1203 bp->bio_error = error;
1204 if (error != 0) {
1205 bp->bio_resid = bp->bio_bcount;
1206 bp->bio_flags |= BIO_ERROR;
1207 } else {
1208 bp->bio_resid = 0;
1209 }
1210 softc->outstanding_cmds--;
1211
1212 /*
1213 * We need to call cam_iosched before we call biodone so that we
1214 * don't measure any activity that happens in the completion
1215 * routine, which in the case of sendfile can be quite
1216 * extensive.
1217 */
1218 cam_iosched_bio_complete(softc->cam_iosched, bp, done_ccb);
1219 xpt_release_ccb(done_ccb);
1220 ndaschedule(periph);
1221 cam_periph_unlock(periph);
1222 biodone(bp);
1223 } else { /* state == NDA_CCB_TRIM */
1224 struct nda_trim_request *trim;
1225 struct bio *bp1, *bp2;
1226 TAILQ_HEAD(, bio) queue;
1227
1228 trim = nvmeio->ccb_trim;
1229 TAILQ_INIT(&queue);
1230 TAILQ_CONCAT(&queue, &trim->bps, bio_queue);
1231 free(trim, M_NVMEDA);
1232
1233 /*
1234 * Since we can have multiple trims in flight, we don't
1235 * need to call this here.
1236 * cam_iosched_trim_done(softc->cam_iosched);
1237 */
1238 /*
1239 * The the I/O scheduler that we're finishing the I/O
1240 * so we can keep book. The first one we pass in the CCB
1241 * which has the timing information. The rest we pass in NULL
1242 * so we can keep proper counts.
1243 */
1244 bp1 = TAILQ_FIRST(&queue);
1245 cam_iosched_bio_complete(softc->cam_iosched, bp1, done_ccb);
1246 xpt_release_ccb(done_ccb);
1247 softc->outstanding_cmds--;
1248 ndaschedule(periph);
1249 cam_periph_unlock(periph);
1250 while ((bp2 = TAILQ_FIRST(&queue)) != NULL) {
1251 TAILQ_REMOVE(&queue, bp2, bio_queue);
1252 bp2->bio_error = error;
1253 if (error != 0) {
1254 bp2->bio_flags |= BIO_ERROR;
1255 bp2->bio_resid = bp1->bio_bcount;
1256 } else
1257 bp2->bio_resid = 0;
1258 if (bp1 != bp2)
1259 cam_iosched_bio_complete(softc->cam_iosched, bp2, NULL);
1260 biodone(bp2);
1261 }
1262 }
1263 return;
1264 }
1265 case NDA_CCB_DUMP:
1266 /* No-op. We're polling */
1267 return;
1268 case NDA_CCB_PASS:
1269 /* NVME_PASSTHROUGH_CMD runs this CCB and releases it */
1270 return;
1271 default:
1272 break;
1273 }
1274 xpt_release_ccb(done_ccb);
1275 }
1276
1277 static int
ndaerror(union ccb * ccb,uint32_t cam_flags,uint32_t sense_flags)1278 ndaerror(union ccb *ccb, uint32_t cam_flags, uint32_t sense_flags)
1279 {
1280 #ifdef CAM_IO_STATS
1281 struct nda_softc *softc;
1282 struct cam_periph *periph;
1283
1284 periph = xpt_path_periph(ccb->ccb_h.path);
1285 softc = (struct nda_softc *)periph->softc;
1286 #endif
1287
1288 switch (ccb->ccb_h.status & CAM_STATUS_MASK) {
1289 case CAM_CMD_TIMEOUT:
1290 #ifdef CAM_IO_STATS
1291 softc->timeouts++;
1292 #endif
1293 break;
1294 case CAM_REQ_CMP_ERR:
1295 case CAM_NVME_STATUS_ERROR:
1296 #ifdef CAM_IO_STATS
1297 softc->errors++;
1298 #endif
1299 break;
1300 default:
1301 break;
1302 }
1303
1304 return(cam_periph_error(ccb, cam_flags, sense_flags));
1305 }
1306
1307 /*
1308 * Step through all NDA peripheral drivers, and if the device is still open,
1309 * sync the disk cache to physical media.
1310 */
1311 static void
ndaflush(void)1312 ndaflush(void)
1313 {
1314 struct cam_periph *periph;
1315 struct nda_softc *softc;
1316 union ccb *ccb;
1317 int error;
1318
1319 CAM_PERIPH_FOREACH(periph, &ndadriver) {
1320 softc = (struct nda_softc *)periph->softc;
1321
1322 if (SCHEDULER_STOPPED()) {
1323 /*
1324 * If we panicked with the lock held or the periph is not
1325 * open, do not recurse. Otherwise, call ndadump since
1326 * that avoids the sleeping cam_periph_getccb does if no
1327 * CCBs are available.
1328 */
1329 if (!cam_periph_owned(periph) &&
1330 (softc->flags & NDA_FLAG_OPEN)) {
1331 ndadump(softc->disk, NULL, 0, 0);
1332 }
1333 continue;
1334 }
1335
1336 /*
1337 * We only sync the cache if the drive is still open
1338 */
1339 cam_periph_lock(periph);
1340 if ((softc->flags & NDA_FLAG_OPEN) == 0) {
1341 cam_periph_unlock(periph);
1342 continue;
1343 }
1344
1345 ccb = cam_periph_getccb(periph, CAM_PRIORITY_NORMAL);
1346 nda_nvme_flush(softc, &ccb->nvmeio);
1347 error = cam_periph_runccb(ccb, ndaerror, /*cam_flags*/0,
1348 /*sense_flags*/ SF_NO_RECOVERY | SF_NO_RETRY,
1349 softc->disk->d_devstat);
1350 if (error != 0)
1351 xpt_print(periph->path, "Synchronize cache failed\n");
1352 xpt_release_ccb(ccb);
1353 cam_periph_unlock(periph);
1354 }
1355 }
1356
1357 static void
ndashutdown(void * arg,int howto)1358 ndashutdown(void *arg, int howto)
1359 {
1360
1361 if ((howto & RB_NOSYNC) != 0)
1362 return;
1363
1364 ndaflush();
1365 }
1366
1367 static void
ndasuspend(void * arg)1368 ndasuspend(void *arg)
1369 {
1370
1371 ndaflush();
1372 }
1373