xref: /linux/drivers/scsi/lpfc/lpfc_nvme.c (revision 15a1fbdcfb519c2bd291ed01c6c94e0b89537a77)
1 /*******************************************************************
2  * This file is part of the Emulex Linux Device Driver for         *
3  * Fibre Channel Host Bus Adapters.                                *
4  * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
5  * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries.  *
6  * Copyright (C) 2004-2016 Emulex.  All rights reserved.           *
7  * EMULEX and SLI are trademarks of Emulex.                        *
8  * www.broadcom.com                                                *
9  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
10  *                                                                 *
11  * This program is free software; you can redistribute it and/or   *
12  * modify it under the terms of version 2 of the GNU General       *
13  * Public License as published by the Free Software Foundation.    *
14  * This program is distributed in the hope that it will be useful. *
15  * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
16  * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
17  * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
18  * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
19  * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
20  * more details, a copy of which can be found in the file COPYING  *
21  * included with this package.                                     *
22  ********************************************************************/
23 #include <linux/pci.h>
24 #include <linux/slab.h>
25 #include <linux/interrupt.h>
26 #include <linux/delay.h>
27 #include <asm/unaligned.h>
28 #include <linux/crc-t10dif.h>
29 #include <net/checksum.h>
30 
31 #include <scsi/scsi.h>
32 #include <scsi/scsi_device.h>
33 #include <scsi/scsi_eh.h>
34 #include <scsi/scsi_host.h>
35 #include <scsi/scsi_tcq.h>
36 #include <scsi/scsi_transport_fc.h>
37 #include <scsi/fc/fc_fs.h>
38 
39 #include <linux/nvme.h>
40 #include <linux/nvme-fc-driver.h>
41 #include <linux/nvme-fc.h>
42 #include "lpfc_version.h"
43 #include "lpfc_hw4.h"
44 #include "lpfc_hw.h"
45 #include "lpfc_sli.h"
46 #include "lpfc_sli4.h"
47 #include "lpfc_nl.h"
48 #include "lpfc_disc.h"
49 #include "lpfc.h"
50 #include "lpfc_nvme.h"
51 #include "lpfc_scsi.h"
52 #include "lpfc_logmsg.h"
53 #include "lpfc_crtn.h"
54 #include "lpfc_vport.h"
55 #include "lpfc_debugfs.h"
56 
57 /* NVME initiator-based functions */
58 
59 static struct lpfc_io_buf *
60 lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
61 		  int idx, int expedite);
62 
63 static void
64 lpfc_release_nvme_buf(struct lpfc_hba *, struct lpfc_io_buf *);
65 
66 static struct nvme_fc_port_template lpfc_nvme_template;
67 
68 static union lpfc_wqe128 lpfc_iread_cmd_template;
69 static union lpfc_wqe128 lpfc_iwrite_cmd_template;
70 static union lpfc_wqe128 lpfc_icmnd_cmd_template;
71 
72 /* Setup WQE templates for NVME IOs */
73 void
74 lpfc_nvme_cmd_template(void)
75 {
76 	union lpfc_wqe128 *wqe;
77 
78 	/* IREAD template */
79 	wqe = &lpfc_iread_cmd_template;
80 	memset(wqe, 0, sizeof(union lpfc_wqe128));
81 
82 	/* Word 0, 1, 2 - BDE is variable */
83 
84 	/* Word 3 - cmd_buff_len, payload_offset_len is zero */
85 
86 	/* Word 4 - total_xfer_len is variable */
87 
88 	/* Word 5 - is zero */
89 
90 	/* Word 6 - ctxt_tag, xri_tag is variable */
91 
92 	/* Word 7 */
93 	bf_set(wqe_cmnd, &wqe->fcp_iread.wqe_com, CMD_FCP_IREAD64_WQE);
94 	bf_set(wqe_pu, &wqe->fcp_iread.wqe_com, PARM_READ_CHECK);
95 	bf_set(wqe_class, &wqe->fcp_iread.wqe_com, CLASS3);
96 	bf_set(wqe_ct, &wqe->fcp_iread.wqe_com, SLI4_CT_RPI);
97 
98 	/* Word 8 - abort_tag is variable */
99 
100 	/* Word 9  - reqtag is variable */
101 
102 	/* Word 10 - dbde, wqes is variable */
103 	bf_set(wqe_qosd, &wqe->fcp_iread.wqe_com, 0);
104 	bf_set(wqe_nvme, &wqe->fcp_iread.wqe_com, 1);
105 	bf_set(wqe_iod, &wqe->fcp_iread.wqe_com, LPFC_WQE_IOD_READ);
106 	bf_set(wqe_lenloc, &wqe->fcp_iread.wqe_com, LPFC_WQE_LENLOC_WORD4);
107 	bf_set(wqe_dbde, &wqe->fcp_iread.wqe_com, 0);
108 	bf_set(wqe_wqes, &wqe->fcp_iread.wqe_com, 1);
109 
110 	/* Word 11 - pbde is variable */
111 	bf_set(wqe_cmd_type, &wqe->fcp_iread.wqe_com, NVME_READ_CMD);
112 	bf_set(wqe_cqid, &wqe->fcp_iread.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
113 	bf_set(wqe_pbde, &wqe->fcp_iread.wqe_com, 1);
114 
115 	/* Word 12 - is zero */
116 
117 	/* Word 13, 14, 15 - PBDE is variable */
118 
119 	/* IWRITE template */
120 	wqe = &lpfc_iwrite_cmd_template;
121 	memset(wqe, 0, sizeof(union lpfc_wqe128));
122 
123 	/* Word 0, 1, 2 - BDE is variable */
124 
125 	/* Word 3 - cmd_buff_len, payload_offset_len is zero */
126 
127 	/* Word 4 - total_xfer_len is variable */
128 
129 	/* Word 5 - initial_xfer_len is variable */
130 
131 	/* Word 6 - ctxt_tag, xri_tag is variable */
132 
133 	/* Word 7 */
134 	bf_set(wqe_cmnd, &wqe->fcp_iwrite.wqe_com, CMD_FCP_IWRITE64_WQE);
135 	bf_set(wqe_pu, &wqe->fcp_iwrite.wqe_com, PARM_READ_CHECK);
136 	bf_set(wqe_class, &wqe->fcp_iwrite.wqe_com, CLASS3);
137 	bf_set(wqe_ct, &wqe->fcp_iwrite.wqe_com, SLI4_CT_RPI);
138 
139 	/* Word 8 - abort_tag is variable */
140 
141 	/* Word 9  - reqtag is variable */
142 
143 	/* Word 10 - dbde, wqes is variable */
144 	bf_set(wqe_qosd, &wqe->fcp_iwrite.wqe_com, 0);
145 	bf_set(wqe_nvme, &wqe->fcp_iwrite.wqe_com, 1);
146 	bf_set(wqe_iod, &wqe->fcp_iwrite.wqe_com, LPFC_WQE_IOD_WRITE);
147 	bf_set(wqe_lenloc, &wqe->fcp_iwrite.wqe_com, LPFC_WQE_LENLOC_WORD4);
148 	bf_set(wqe_dbde, &wqe->fcp_iwrite.wqe_com, 0);
149 	bf_set(wqe_wqes, &wqe->fcp_iwrite.wqe_com, 1);
150 
151 	/* Word 11 - pbde is variable */
152 	bf_set(wqe_cmd_type, &wqe->fcp_iwrite.wqe_com, NVME_WRITE_CMD);
153 	bf_set(wqe_cqid, &wqe->fcp_iwrite.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
154 	bf_set(wqe_pbde, &wqe->fcp_iwrite.wqe_com, 1);
155 
156 	/* Word 12 - is zero */
157 
158 	/* Word 13, 14, 15 - PBDE is variable */
159 
160 	/* ICMND template */
161 	wqe = &lpfc_icmnd_cmd_template;
162 	memset(wqe, 0, sizeof(union lpfc_wqe128));
163 
164 	/* Word 0, 1, 2 - BDE is variable */
165 
166 	/* Word 3 - payload_offset_len is variable */
167 
168 	/* Word 4, 5 - is zero */
169 
170 	/* Word 6 - ctxt_tag, xri_tag is variable */
171 
172 	/* Word 7 */
173 	bf_set(wqe_cmnd, &wqe->fcp_icmd.wqe_com, CMD_FCP_ICMND64_WQE);
174 	bf_set(wqe_pu, &wqe->fcp_icmd.wqe_com, 0);
175 	bf_set(wqe_class, &wqe->fcp_icmd.wqe_com, CLASS3);
176 	bf_set(wqe_ct, &wqe->fcp_icmd.wqe_com, SLI4_CT_RPI);
177 
178 	/* Word 8 - abort_tag is variable */
179 
180 	/* Word 9  - reqtag is variable */
181 
182 	/* Word 10 - dbde, wqes is variable */
183 	bf_set(wqe_qosd, &wqe->fcp_icmd.wqe_com, 1);
184 	bf_set(wqe_nvme, &wqe->fcp_icmd.wqe_com, 1);
185 	bf_set(wqe_iod, &wqe->fcp_icmd.wqe_com, LPFC_WQE_IOD_NONE);
186 	bf_set(wqe_lenloc, &wqe->fcp_icmd.wqe_com, LPFC_WQE_LENLOC_NONE);
187 	bf_set(wqe_dbde, &wqe->fcp_icmd.wqe_com, 0);
188 	bf_set(wqe_wqes, &wqe->fcp_icmd.wqe_com, 1);
189 
190 	/* Word 11 */
191 	bf_set(wqe_cmd_type, &wqe->fcp_icmd.wqe_com, FCP_COMMAND);
192 	bf_set(wqe_cqid, &wqe->fcp_icmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
193 	bf_set(wqe_pbde, &wqe->fcp_icmd.wqe_com, 0);
194 
195 	/* Word 12, 13, 14, 15 - is zero */
196 }
197 
198 /**
199  * lpfc_nvme_prep_abort_wqe - set up 'abort' work queue entry.
200  * @pwqeq: Pointer to command iocb.
201  * @xritag: Tag that  uniqely identifies the local exchange resource.
202  * @opt: Option bits -
203  *		bit 0 = inhibit sending abts on the link
204  *
205  * This function is called with hbalock held.
206  **/
207 void
208 lpfc_nvme_prep_abort_wqe(struct lpfc_iocbq *pwqeq, u16 xritag, u8 opt)
209 {
210 	union lpfc_wqe128 *wqe = &pwqeq->wqe;
211 
212 	/* WQEs are reused.  Clear stale data and set key fields to
213 	 * zero like ia, iaab, iaar, xri_tag, and ctxt_tag.
214 	 */
215 	memset(wqe, 0, sizeof(*wqe));
216 
217 	if (opt & INHIBIT_ABORT)
218 		bf_set(abort_cmd_ia, &wqe->abort_cmd, 1);
219 	/* Abort specified xri tag, with the mask deliberately zeroed */
220 	bf_set(abort_cmd_criteria, &wqe->abort_cmd, T_XRI_TAG);
221 
222 	bf_set(wqe_cmnd, &wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX);
223 
224 	/* Abort the IO associated with this outstanding exchange ID. */
225 	wqe->abort_cmd.wqe_com.abort_tag = xritag;
226 
227 	/* iotag for the wqe completion. */
228 	bf_set(wqe_reqtag, &wqe->abort_cmd.wqe_com, pwqeq->iotag);
229 
230 	bf_set(wqe_qosd, &wqe->abort_cmd.wqe_com, 1);
231 	bf_set(wqe_lenloc, &wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE);
232 
233 	bf_set(wqe_cmd_type, &wqe->abort_cmd.wqe_com, OTHER_COMMAND);
234 	bf_set(wqe_wqec, &wqe->abort_cmd.wqe_com, 1);
235 	bf_set(wqe_cqid, &wqe->abort_cmd.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
236 }
237 
238 /**
239  * lpfc_nvme_create_queue -
240  * @lpfc_pnvme: Pointer to the driver's nvme instance data
241  * @qidx: An cpu index used to affinitize IO queues and MSIX vectors.
242  * @handle: An opaque driver handle used in follow-up calls.
243  *
244  * Driver registers this routine to preallocate and initialize any
245  * internal data structures to bind the @qidx to its internal IO queues.
246  * A hardware queue maps (qidx) to a specific driver MSI-X vector/EQ/CQ/WQ.
247  *
248  * Return value :
249  *   0 - Success
250  *   -EINVAL - Unsupported input value.
251  *   -ENOMEM - Could not alloc necessary memory
252  **/
253 static int
254 lpfc_nvme_create_queue(struct nvme_fc_local_port *pnvme_lport,
255 		       unsigned int qidx, u16 qsize,
256 		       void **handle)
257 {
258 	struct lpfc_nvme_lport *lport;
259 	struct lpfc_vport *vport;
260 	struct lpfc_nvme_qhandle *qhandle;
261 	char *str;
262 
263 	if (!pnvme_lport->private)
264 		return -ENOMEM;
265 
266 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
267 	vport = lport->vport;
268 	qhandle = kzalloc(sizeof(struct lpfc_nvme_qhandle), GFP_KERNEL);
269 	if (qhandle == NULL)
270 		return -ENOMEM;
271 
272 	qhandle->cpu_id = raw_smp_processor_id();
273 	qhandle->qidx = qidx;
274 	/*
275 	 * NVME qidx == 0 is the admin queue, so both admin queue
276 	 * and first IO queue will use MSI-X vector and associated
277 	 * EQ/CQ/WQ at index 0. After that they are sequentially assigned.
278 	 */
279 	if (qidx) {
280 		str = "IO ";  /* IO queue */
281 		qhandle->index = ((qidx - 1) %
282 			lpfc_nvme_template.max_hw_queues);
283 	} else {
284 		str = "ADM";  /* Admin queue */
285 		qhandle->index = qidx;
286 	}
287 
288 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
289 			 "6073 Binding %s HdwQueue %d  (cpu %d) to "
290 			 "hdw_queue %d qhandle x%px\n", str,
291 			 qidx, qhandle->cpu_id, qhandle->index, qhandle);
292 	*handle = (void *)qhandle;
293 	return 0;
294 }
295 
296 /**
297  * lpfc_nvme_delete_queue -
298  * @lpfc_pnvme: Pointer to the driver's nvme instance data
299  * @qidx: An cpu index used to affinitize IO queues and MSIX vectors.
300  * @handle: An opaque driver handle from lpfc_nvme_create_queue
301  *
302  * Driver registers this routine to free
303  * any internal data structures to bind the @qidx to its internal
304  * IO queues.
305  *
306  * Return value :
307  *   0 - Success
308  *   TODO:  What are the failure codes.
309  **/
310 static void
311 lpfc_nvme_delete_queue(struct nvme_fc_local_port *pnvme_lport,
312 		       unsigned int qidx,
313 		       void *handle)
314 {
315 	struct lpfc_nvme_lport *lport;
316 	struct lpfc_vport *vport;
317 
318 	if (!pnvme_lport->private)
319 		return;
320 
321 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
322 	vport = lport->vport;
323 
324 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
325 			"6001 ENTER.  lpfc_pnvme x%px, qidx x%x qhandle x%px\n",
326 			lport, qidx, handle);
327 	kfree(handle);
328 }
329 
330 static void
331 lpfc_nvme_localport_delete(struct nvme_fc_local_port *localport)
332 {
333 	struct lpfc_nvme_lport *lport = localport->private;
334 
335 	lpfc_printf_vlog(lport->vport, KERN_INFO, LOG_NVME,
336 			 "6173 localport x%px delete complete\n",
337 			 lport);
338 
339 	/* release any threads waiting for the unreg to complete */
340 	if (lport->vport->localport)
341 		complete(lport->lport_unreg_cmp);
342 }
343 
344 /* lpfc_nvme_remoteport_delete
345  *
346  * @remoteport: Pointer to an nvme transport remoteport instance.
347  *
348  * This is a template downcall.  NVME transport calls this function
349  * when it has completed the unregistration of a previously
350  * registered remoteport.
351  *
352  * Return value :
353  * None
354  */
355 static void
356 lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport)
357 {
358 	struct lpfc_nvme_rport *rport = remoteport->private;
359 	struct lpfc_vport *vport;
360 	struct lpfc_nodelist *ndlp;
361 
362 	ndlp = rport->ndlp;
363 	if (!ndlp)
364 		goto rport_err;
365 
366 	vport = ndlp->vport;
367 	if (!vport)
368 		goto rport_err;
369 
370 	/* Remove this rport from the lport's list - memory is owned by the
371 	 * transport. Remove the ndlp reference for the NVME transport before
372 	 * calling state machine to remove the node.
373 	 */
374 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
375 			"6146 remoteport delete of remoteport x%px\n",
376 			remoteport);
377 	spin_lock_irq(&vport->phba->hbalock);
378 
379 	/* The register rebind might have occurred before the delete
380 	 * downcall.  Guard against this race.
381 	 */
382 	if (ndlp->upcall_flags & NLP_WAIT_FOR_UNREG) {
383 		ndlp->nrport = NULL;
384 		ndlp->upcall_flags &= ~NLP_WAIT_FOR_UNREG;
385 	}
386 	spin_unlock_irq(&vport->phba->hbalock);
387 
388 	/* Remove original register reference. The host transport
389 	 * won't reference this rport/remoteport any further.
390 	 */
391 	lpfc_nlp_put(ndlp);
392 
393  rport_err:
394 	return;
395 }
396 
397 static void
398 lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
399 		       struct lpfc_wcqe_complete *wcqe)
400 {
401 	struct lpfc_vport *vport = cmdwqe->vport;
402 	struct lpfc_nvme_lport *lport;
403 	uint32_t status;
404 	struct nvmefc_ls_req *pnvme_lsreq;
405 	struct lpfc_dmabuf *buf_ptr;
406 	struct lpfc_nodelist *ndlp;
407 
408 	pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2;
409 	status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
410 
411 	if (vport->localport) {
412 		lport = (struct lpfc_nvme_lport *)vport->localport->private;
413 		if (lport) {
414 			atomic_inc(&lport->fc4NvmeLsCmpls);
415 			if (status) {
416 				if (bf_get(lpfc_wcqe_c_xb, wcqe))
417 					atomic_inc(&lport->cmpl_ls_xb);
418 				atomic_inc(&lport->cmpl_ls_err);
419 			}
420 		}
421 	}
422 
423 	ndlp = (struct lpfc_nodelist *)cmdwqe->context1;
424 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
425 			 "6047 nvme cmpl Enter "
426 			 "Data %px DID %x Xri: %x status %x reason x%x "
427 			 "cmd:x%px lsreg:x%px bmp:x%px ndlp:x%px\n",
428 			 pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
429 			 cmdwqe->sli4_xritag, status,
430 			 (wcqe->parameter & 0xffff),
431 			 cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp);
432 
433 	lpfc_nvmeio_data(phba, "NVME LS  CMPL: xri x%x stat x%x parm x%x\n",
434 			 cmdwqe->sli4_xritag, status, wcqe->parameter);
435 
436 	if (cmdwqe->context3) {
437 		buf_ptr = (struct lpfc_dmabuf *)cmdwqe->context3;
438 		lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
439 		kfree(buf_ptr);
440 		cmdwqe->context3 = NULL;
441 	}
442 	if (pnvme_lsreq->done)
443 		pnvme_lsreq->done(pnvme_lsreq, status);
444 	else
445 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
446 				 "6046 nvme cmpl without done call back? "
447 				 "Data %px DID %x Xri: %x status %x\n",
448 				pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
449 				cmdwqe->sli4_xritag, status);
450 	if (ndlp) {
451 		lpfc_nlp_put(ndlp);
452 		cmdwqe->context1 = NULL;
453 	}
454 	lpfc_sli_release_iocbq(phba, cmdwqe);
455 }
456 
457 static int
458 lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
459 		  struct lpfc_dmabuf *inp,
460 		  struct nvmefc_ls_req *pnvme_lsreq,
461 		  void (*cmpl)(struct lpfc_hba *, struct lpfc_iocbq *,
462 			       struct lpfc_wcqe_complete *),
463 		  struct lpfc_nodelist *ndlp, uint32_t num_entry,
464 		  uint32_t tmo, uint8_t retry)
465 {
466 	struct lpfc_hba *phba = vport->phba;
467 	union lpfc_wqe128 *wqe;
468 	struct lpfc_iocbq *genwqe;
469 	struct ulp_bde64 *bpl;
470 	struct ulp_bde64 bde;
471 	int i, rc, xmit_len, first_len;
472 
473 	/* Allocate buffer for  command WQE */
474 	genwqe = lpfc_sli_get_iocbq(phba);
475 	if (genwqe == NULL)
476 		return 1;
477 
478 	wqe = &genwqe->wqe;
479 	/* Initialize only 64 bytes */
480 	memset(wqe, 0, sizeof(union lpfc_wqe));
481 
482 	genwqe->context3 = (uint8_t *)bmp;
483 	genwqe->iocb_flag |= LPFC_IO_NVME_LS;
484 
485 	/* Save for completion so we can release these resources */
486 	genwqe->context1 = lpfc_nlp_get(ndlp);
487 	genwqe->context2 = (uint8_t *)pnvme_lsreq;
488 	/* Fill in payload, bp points to frame payload */
489 
490 	if (!tmo)
491 		/* FC spec states we need 3 * ratov for CT requests */
492 		tmo = (3 * phba->fc_ratov);
493 
494 	/* For this command calculate the xmit length of the request bde. */
495 	xmit_len = 0;
496 	first_len = 0;
497 	bpl = (struct ulp_bde64 *)bmp->virt;
498 	for (i = 0; i < num_entry; i++) {
499 		bde.tus.w = bpl[i].tus.w;
500 		if (bde.tus.f.bdeFlags != BUFF_TYPE_BDE_64)
501 			break;
502 		xmit_len += bde.tus.f.bdeSize;
503 		if (i == 0)
504 			first_len = xmit_len;
505 	}
506 
507 	genwqe->rsvd2 = num_entry;
508 	genwqe->hba_wqidx = 0;
509 
510 	/* Words 0 - 2 */
511 	wqe->generic.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64;
512 	wqe->generic.bde.tus.f.bdeSize = first_len;
513 	wqe->generic.bde.addrLow = bpl[0].addrLow;
514 	wqe->generic.bde.addrHigh = bpl[0].addrHigh;
515 
516 	/* Word 3 */
517 	wqe->gen_req.request_payload_len = first_len;
518 
519 	/* Word 4 */
520 
521 	/* Word 5 */
522 	bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0);
523 	bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1);
524 	bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1);
525 	bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_ELS4_REQ);
526 	bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME);
527 
528 	/* Word 6 */
529 	bf_set(wqe_ctxt_tag, &wqe->gen_req.wqe_com,
530 	       phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]);
531 	bf_set(wqe_xri_tag, &wqe->gen_req.wqe_com, genwqe->sli4_xritag);
532 
533 	/* Word 7 */
534 	bf_set(wqe_tmo, &wqe->gen_req.wqe_com, (vport->phba->fc_ratov-1));
535 	bf_set(wqe_class, &wqe->gen_req.wqe_com, CLASS3);
536 	bf_set(wqe_cmnd, &wqe->gen_req.wqe_com, CMD_GEN_REQUEST64_WQE);
537 	bf_set(wqe_ct, &wqe->gen_req.wqe_com, SLI4_CT_RPI);
538 
539 	/* Word 8 */
540 	wqe->gen_req.wqe_com.abort_tag = genwqe->iotag;
541 
542 	/* Word 9 */
543 	bf_set(wqe_reqtag, &wqe->gen_req.wqe_com, genwqe->iotag);
544 
545 	/* Word 10 */
546 	bf_set(wqe_dbde, &wqe->gen_req.wqe_com, 1);
547 	bf_set(wqe_iod, &wqe->gen_req.wqe_com, LPFC_WQE_IOD_READ);
548 	bf_set(wqe_qosd, &wqe->gen_req.wqe_com, 1);
549 	bf_set(wqe_lenloc, &wqe->gen_req.wqe_com, LPFC_WQE_LENLOC_NONE);
550 	bf_set(wqe_ebde_cnt, &wqe->gen_req.wqe_com, 0);
551 
552 	/* Word 11 */
553 	bf_set(wqe_cqid, &wqe->gen_req.wqe_com, LPFC_WQE_CQ_ID_DEFAULT);
554 	bf_set(wqe_cmd_type, &wqe->gen_req.wqe_com, OTHER_COMMAND);
555 
556 
557 	/* Issue GEN REQ WQE for NPORT <did> */
558 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
559 			 "6050 Issue GEN REQ WQE to NPORT x%x "
560 			 "Data: x%x x%x wq:x%px lsreq:x%px bmp:x%px "
561 			 "xmit:%d 1st:%d\n",
562 			 ndlp->nlp_DID, genwqe->iotag,
563 			 vport->port_state,
564 			genwqe, pnvme_lsreq, bmp, xmit_len, first_len);
565 	genwqe->wqe_cmpl = cmpl;
566 	genwqe->iocb_cmpl = NULL;
567 	genwqe->drvrTimeout = tmo + LPFC_DRVR_TIMEOUT;
568 	genwqe->vport = vport;
569 	genwqe->retry = retry;
570 
571 	lpfc_nvmeio_data(phba, "NVME LS  XMIT: xri x%x iotag x%x to x%06x\n",
572 			 genwqe->sli4_xritag, genwqe->iotag, ndlp->nlp_DID);
573 
574 	rc = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], genwqe);
575 	if (rc) {
576 		lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
577 				 "6045 Issue GEN REQ WQE to NPORT x%x "
578 				 "Data: x%x x%x\n",
579 				 ndlp->nlp_DID, genwqe->iotag,
580 				 vport->port_state);
581 		lpfc_sli_release_iocbq(phba, genwqe);
582 		return 1;
583 	}
584 	return 0;
585 }
586 
587 /**
588  * lpfc_nvme_ls_req - Issue an Link Service request
589  * @lpfc_pnvme: Pointer to the driver's nvme instance data
590  * @lpfc_nvme_lport: Pointer to the driver's local port data
591  * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
592  *
593  * Driver registers this routine to handle any link service request
594  * from the nvme_fc transport to a remote nvme-aware port.
595  *
596  * Return value :
597  *   0 - Success
598  *   TODO: What are the failure codes.
599  **/
600 static int
601 lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
602 		 struct nvme_fc_remote_port *pnvme_rport,
603 		 struct nvmefc_ls_req *pnvme_lsreq)
604 {
605 	int ret = 0;
606 	struct lpfc_nvme_lport *lport;
607 	struct lpfc_nvme_rport *rport;
608 	struct lpfc_vport *vport;
609 	struct lpfc_nodelist *ndlp;
610 	struct ulp_bde64 *bpl;
611 	struct lpfc_dmabuf *bmp;
612 	uint16_t ntype, nstate;
613 
614 	/* there are two dma buf in the request, actually there is one and
615 	 * the second one is just the start address + cmd size.
616 	 * Before calling lpfc_nvme_gen_req these buffers need to be wrapped
617 	 * in a lpfc_dmabuf struct. When freeing we just free the wrapper
618 	 * because the nvem layer owns the data bufs.
619 	 * We do not have to break these packets open, we don't care what is in
620 	 * them. And we do not have to look at the resonse data, we only care
621 	 * that we got a response. All of the caring is going to happen in the
622 	 * nvme-fc layer.
623 	 */
624 
625 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
626 	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
627 	if (unlikely(!lport) || unlikely(!rport))
628 		return -EINVAL;
629 
630 	vport = lport->vport;
631 
632 	if (vport->load_flag & FC_UNLOADING)
633 		return -ENODEV;
634 
635 	/* Need the ndlp.  It is stored in the driver's rport. */
636 	ndlp = rport->ndlp;
637 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
638 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
639 				 "6051 Remoteport x%px, rport has invalid ndlp. "
640 				 "Failing LS Req\n", pnvme_rport);
641 		return -ENODEV;
642 	}
643 
644 	/* The remote node has to be a mapped nvme target or an
645 	 * unmapped nvme initiator or it's an error.
646 	 */
647 	ntype = ndlp->nlp_type;
648 	nstate = ndlp->nlp_state;
649 	if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) ||
650 	    (ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) {
651 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
652 				 "6088 DID x%06x not ready for "
653 				 "IO. State x%x, Type x%x\n",
654 				 pnvme_rport->port_id,
655 				 ndlp->nlp_state, ndlp->nlp_type);
656 		return -ENODEV;
657 	}
658 	bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
659 	if (!bmp) {
660 
661 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
662 				 "6044 Could not find node for DID %x\n",
663 				 pnvme_rport->port_id);
664 		return 2;
665 	}
666 	INIT_LIST_HEAD(&bmp->list);
667 	bmp->virt = lpfc_mbuf_alloc(vport->phba, MEM_PRI, &(bmp->phys));
668 	if (!bmp->virt) {
669 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
670 				 "6042 Could not find node for DID %x\n",
671 				 pnvme_rport->port_id);
672 		kfree(bmp);
673 		return 3;
674 	}
675 	bpl = (struct ulp_bde64 *)bmp->virt;
676 	bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rqstdma));
677 	bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rqstdma));
678 	bpl->tus.f.bdeFlags = 0;
679 	bpl->tus.f.bdeSize = pnvme_lsreq->rqstlen;
680 	bpl->tus.w = le32_to_cpu(bpl->tus.w);
681 	bpl++;
682 
683 	bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rspdma));
684 	bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rspdma));
685 	bpl->tus.f.bdeFlags = BUFF_TYPE_BDE_64I;
686 	bpl->tus.f.bdeSize = pnvme_lsreq->rsplen;
687 	bpl->tus.w = le32_to_cpu(bpl->tus.w);
688 
689 	/* Expand print to include key fields. */
690 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
691 			 "6149 Issue LS Req to DID 0x%06x lport x%px, "
692 			 "rport x%px lsreq x%px rqstlen:%d rsplen:%d "
693 			 "%pad %pad\n",
694 			 ndlp->nlp_DID, pnvme_lport, pnvme_rport,
695 			 pnvme_lsreq, pnvme_lsreq->rqstlen,
696 			 pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
697 			 &pnvme_lsreq->rspdma);
698 
699 	atomic_inc(&lport->fc4NvmeLsRequests);
700 
701 	/* Hardcode the wait to 30 seconds.  Connections are failing otherwise.
702 	 * This code allows it all to work.
703 	 */
704 	ret = lpfc_nvme_gen_req(vport, bmp, pnvme_lsreq->rqstaddr,
705 				pnvme_lsreq, lpfc_nvme_cmpl_gen_req,
706 				ndlp, 2, 30, 0);
707 	if (ret != WQE_SUCCESS) {
708 		atomic_inc(&lport->xmt_ls_err);
709 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
710 				 "6052 EXIT. issue ls wqe failed lport x%px, "
711 				 "rport x%px lsreq x%px Status %x DID %x\n",
712 				 pnvme_lport, pnvme_rport, pnvme_lsreq,
713 				 ret, ndlp->nlp_DID);
714 		lpfc_mbuf_free(vport->phba, bmp->virt, bmp->phys);
715 		kfree(bmp);
716 		return ret;
717 	}
718 
719 	/* Stub in routine and return 0 for now. */
720 	return ret;
721 }
722 
723 /**
724  * lpfc_nvme_ls_abort - Issue an Link Service request
725  * @lpfc_pnvme: Pointer to the driver's nvme instance data
726  * @lpfc_nvme_lport: Pointer to the driver's local port data
727  * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
728  *
729  * Driver registers this routine to handle any link service request
730  * from the nvme_fc transport to a remote nvme-aware port.
731  *
732  * Return value :
733  *   0 - Success
734  *   TODO: What are the failure codes.
735  **/
736 static void
737 lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
738 		   struct nvme_fc_remote_port *pnvme_rport,
739 		   struct nvmefc_ls_req *pnvme_lsreq)
740 {
741 	struct lpfc_nvme_lport *lport;
742 	struct lpfc_vport *vport;
743 	struct lpfc_hba *phba;
744 	struct lpfc_nodelist *ndlp;
745 	LIST_HEAD(abort_list);
746 	struct lpfc_sli_ring *pring;
747 	struct lpfc_iocbq *wqe, *next_wqe;
748 
749 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
750 	if (unlikely(!lport))
751 		return;
752 	vport = lport->vport;
753 	phba = vport->phba;
754 
755 	if (vport->load_flag & FC_UNLOADING)
756 		return;
757 
758 	ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
759 	if (!ndlp) {
760 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
761 				 "6049 Could not find node for DID %x\n",
762 				 pnvme_rport->port_id);
763 		return;
764 	}
765 
766 	/* Expand print to include key fields. */
767 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
768 			 "6040 ENTER.  lport x%px, rport x%px lsreq x%px rqstlen:%d "
769 			 "rsplen:%d %pad %pad\n",
770 			 pnvme_lport, pnvme_rport,
771 			 pnvme_lsreq, pnvme_lsreq->rqstlen,
772 			 pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
773 			 &pnvme_lsreq->rspdma);
774 
775 	/*
776 	 * Lock the ELS ring txcmplq and build a local list of all ELS IOs
777 	 * that need an ABTS.  The IOs need to stay on the txcmplq so that
778 	 * the abort operation completes them successfully.
779 	 */
780 	pring = phba->sli4_hba.nvmels_wq->pring;
781 	spin_lock_irq(&phba->hbalock);
782 	spin_lock(&pring->ring_lock);
783 	list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) {
784 		/* Add to abort_list on on NDLP match. */
785 		if (lpfc_check_sli_ndlp(phba, pring, wqe, ndlp)) {
786 			wqe->iocb_flag |= LPFC_DRIVER_ABORTED;
787 			list_add_tail(&wqe->dlist, &abort_list);
788 		}
789 	}
790 	spin_unlock(&pring->ring_lock);
791 	spin_unlock_irq(&phba->hbalock);
792 
793 	/* Abort the targeted IOs and remove them from the abort list. */
794 	list_for_each_entry_safe(wqe, next_wqe, &abort_list, dlist) {
795 		atomic_inc(&lport->xmt_ls_abort);
796 		spin_lock_irq(&phba->hbalock);
797 		list_del_init(&wqe->dlist);
798 		lpfc_sli_issue_abort_iotag(phba, pring, wqe);
799 		spin_unlock_irq(&phba->hbalock);
800 	}
801 }
802 
803 /* Fix up the existing sgls for NVME IO. */
804 static inline void
805 lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport,
806 		       struct lpfc_io_buf *lpfc_ncmd,
807 		       struct nvmefc_fcp_req *nCmd)
808 {
809 	struct lpfc_hba  *phba = vport->phba;
810 	struct sli4_sge *sgl;
811 	union lpfc_wqe128 *wqe;
812 	uint32_t *wptr, *dptr;
813 
814 	/*
815 	 * Get a local pointer to the built-in wqe and correct
816 	 * the cmd size to match NVME's 96 bytes and fix
817 	 * the dma address.
818 	 */
819 
820 	wqe = &lpfc_ncmd->cur_iocbq.wqe;
821 
822 	/*
823 	 * Adjust the FCP_CMD and FCP_RSP DMA data and sge_len to
824 	 * match NVME.  NVME sends 96 bytes. Also, use the
825 	 * nvme commands command and response dma addresses
826 	 * rather than the virtual memory to ease the restore
827 	 * operation.
828 	 */
829 	sgl = lpfc_ncmd->dma_sgl;
830 	sgl->sge_len = cpu_to_le32(nCmd->cmdlen);
831 	if (phba->cfg_nvme_embed_cmd) {
832 		sgl->addr_hi = 0;
833 		sgl->addr_lo = 0;
834 
835 		/* Word 0-2 - NVME CMND IU (embedded payload) */
836 		wqe->generic.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_IMMED;
837 		wqe->generic.bde.tus.f.bdeSize = 56;
838 		wqe->generic.bde.addrHigh = 0;
839 		wqe->generic.bde.addrLow =  64;  /* Word 16 */
840 
841 		/* Word 10  - dbde is 0, wqes is 1 in template */
842 
843 		/*
844 		 * Embed the payload in the last half of the WQE
845 		 * WQE words 16-30 get the NVME CMD IU payload
846 		 *
847 		 * WQE words 16-19 get payload Words 1-4
848 		 * WQE words 20-21 get payload Words 6-7
849 		 * WQE words 22-29 get payload Words 16-23
850 		 */
851 		wptr = &wqe->words[16];  /* WQE ptr */
852 		dptr = (uint32_t *)nCmd->cmdaddr;  /* payload ptr */
853 		dptr++;			/* Skip Word 0 in payload */
854 
855 		*wptr++ = *dptr++;	/* Word 1 */
856 		*wptr++ = *dptr++;	/* Word 2 */
857 		*wptr++ = *dptr++;	/* Word 3 */
858 		*wptr++ = *dptr++;	/* Word 4 */
859 		dptr++;			/* Skip Word 5 in payload */
860 		*wptr++ = *dptr++;	/* Word 6 */
861 		*wptr++ = *dptr++;	/* Word 7 */
862 		dptr += 8;		/* Skip Words 8-15 in payload */
863 		*wptr++ = *dptr++;	/* Word 16 */
864 		*wptr++ = *dptr++;	/* Word 17 */
865 		*wptr++ = *dptr++;	/* Word 18 */
866 		*wptr++ = *dptr++;	/* Word 19 */
867 		*wptr++ = *dptr++;	/* Word 20 */
868 		*wptr++ = *dptr++;	/* Word 21 */
869 		*wptr++ = *dptr++;	/* Word 22 */
870 		*wptr   = *dptr;	/* Word 23 */
871 	} else {
872 		sgl->addr_hi = cpu_to_le32(putPaddrHigh(nCmd->cmddma));
873 		sgl->addr_lo = cpu_to_le32(putPaddrLow(nCmd->cmddma));
874 
875 		/* Word 0-2 - NVME CMND IU Inline BDE */
876 		wqe->generic.bde.tus.f.bdeFlags =  BUFF_TYPE_BDE_64;
877 		wqe->generic.bde.tus.f.bdeSize = nCmd->cmdlen;
878 		wqe->generic.bde.addrHigh = sgl->addr_hi;
879 		wqe->generic.bde.addrLow =  sgl->addr_lo;
880 
881 		/* Word 10 */
882 		bf_set(wqe_dbde, &wqe->generic.wqe_com, 1);
883 		bf_set(wqe_wqes, &wqe->generic.wqe_com, 0);
884 	}
885 
886 	sgl++;
887 
888 	/* Setup the physical region for the FCP RSP */
889 	sgl->addr_hi = cpu_to_le32(putPaddrHigh(nCmd->rspdma));
890 	sgl->addr_lo = cpu_to_le32(putPaddrLow(nCmd->rspdma));
891 	sgl->word2 = le32_to_cpu(sgl->word2);
892 	if (nCmd->sg_cnt)
893 		bf_set(lpfc_sli4_sge_last, sgl, 0);
894 	else
895 		bf_set(lpfc_sli4_sge_last, sgl, 1);
896 	sgl->word2 = cpu_to_le32(sgl->word2);
897 	sgl->sge_len = cpu_to_le32(nCmd->rsplen);
898 }
899 
900 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
901 static void
902 lpfc_nvme_ktime(struct lpfc_hba *phba,
903 		struct lpfc_io_buf *lpfc_ncmd)
904 {
905 	uint64_t seg1, seg2, seg3, seg4;
906 	uint64_t segsum;
907 
908 	if (!lpfc_ncmd->ts_last_cmd ||
909 	    !lpfc_ncmd->ts_cmd_start ||
910 	    !lpfc_ncmd->ts_cmd_wqput ||
911 	    !lpfc_ncmd->ts_isr_cmpl ||
912 	    !lpfc_ncmd->ts_data_nvme)
913 		return;
914 
915 	if (lpfc_ncmd->ts_data_nvme < lpfc_ncmd->ts_cmd_start)
916 		return;
917 	if (lpfc_ncmd->ts_cmd_start < lpfc_ncmd->ts_last_cmd)
918 		return;
919 	if (lpfc_ncmd->ts_cmd_wqput < lpfc_ncmd->ts_cmd_start)
920 		return;
921 	if (lpfc_ncmd->ts_isr_cmpl < lpfc_ncmd->ts_cmd_wqput)
922 		return;
923 	if (lpfc_ncmd->ts_data_nvme < lpfc_ncmd->ts_isr_cmpl)
924 		return;
925 	/*
926 	 * Segment 1 - Time from Last FCP command cmpl is handed
927 	 * off to NVME Layer to start of next command.
928 	 * Segment 2 - Time from Driver receives a IO cmd start
929 	 * from NVME Layer to WQ put is done on IO cmd.
930 	 * Segment 3 - Time from Driver WQ put is done on IO cmd
931 	 * to MSI-X ISR for IO cmpl.
932 	 * Segment 4 - Time from MSI-X ISR for IO cmpl to when
933 	 * cmpl is handled off to the NVME Layer.
934 	 */
935 	seg1 = lpfc_ncmd->ts_cmd_start - lpfc_ncmd->ts_last_cmd;
936 	if (seg1 > 5000000)  /* 5 ms - for sequential IOs only */
937 		seg1 = 0;
938 
939 	/* Calculate times relative to start of IO */
940 	seg2 = (lpfc_ncmd->ts_cmd_wqput - lpfc_ncmd->ts_cmd_start);
941 	segsum = seg2;
942 	seg3 = lpfc_ncmd->ts_isr_cmpl - lpfc_ncmd->ts_cmd_start;
943 	if (segsum > seg3)
944 		return;
945 	seg3 -= segsum;
946 	segsum += seg3;
947 
948 	seg4 = lpfc_ncmd->ts_data_nvme - lpfc_ncmd->ts_cmd_start;
949 	if (segsum > seg4)
950 		return;
951 	seg4 -= segsum;
952 
953 	phba->ktime_data_samples++;
954 	phba->ktime_seg1_total += seg1;
955 	if (seg1 < phba->ktime_seg1_min)
956 		phba->ktime_seg1_min = seg1;
957 	else if (seg1 > phba->ktime_seg1_max)
958 		phba->ktime_seg1_max = seg1;
959 	phba->ktime_seg2_total += seg2;
960 	if (seg2 < phba->ktime_seg2_min)
961 		phba->ktime_seg2_min = seg2;
962 	else if (seg2 > phba->ktime_seg2_max)
963 		phba->ktime_seg2_max = seg2;
964 	phba->ktime_seg3_total += seg3;
965 	if (seg3 < phba->ktime_seg3_min)
966 		phba->ktime_seg3_min = seg3;
967 	else if (seg3 > phba->ktime_seg3_max)
968 		phba->ktime_seg3_max = seg3;
969 	phba->ktime_seg4_total += seg4;
970 	if (seg4 < phba->ktime_seg4_min)
971 		phba->ktime_seg4_min = seg4;
972 	else if (seg4 > phba->ktime_seg4_max)
973 		phba->ktime_seg4_max = seg4;
974 
975 	lpfc_ncmd->ts_last_cmd = 0;
976 	lpfc_ncmd->ts_cmd_start = 0;
977 	lpfc_ncmd->ts_cmd_wqput  = 0;
978 	lpfc_ncmd->ts_isr_cmpl = 0;
979 	lpfc_ncmd->ts_data_nvme = 0;
980 }
981 #endif
982 
983 /**
984  * lpfc_nvme_io_cmd_wqe_cmpl - Complete an NVME-over-FCP IO
985  * @lpfc_pnvme: Pointer to the driver's nvme instance data
986  * @lpfc_nvme_lport: Pointer to the driver's local port data
987  * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
988  *
989  * Driver registers this routine as it io request handler.  This
990  * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
991  * data structure to the rport indicated in @lpfc_nvme_rport.
992  *
993  * Return value :
994  *   0 - Success
995  *   TODO: What are the failure codes.
996  **/
997 static void
998 lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
999 			  struct lpfc_wcqe_complete *wcqe)
1000 {
1001 	struct lpfc_io_buf *lpfc_ncmd =
1002 		(struct lpfc_io_buf *)pwqeIn->context1;
1003 	struct lpfc_vport *vport = pwqeIn->vport;
1004 	struct nvmefc_fcp_req *nCmd;
1005 	struct nvme_fc_ersp_iu *ep;
1006 	struct nvme_fc_cmd_iu *cp;
1007 	struct lpfc_nodelist *ndlp;
1008 	struct lpfc_nvme_fcpreq_priv *freqpriv;
1009 	struct lpfc_nvme_lport *lport;
1010 	uint32_t code, status, idx;
1011 	uint16_t cid, sqhd, data;
1012 	uint32_t *ptr;
1013 
1014 	/* Sanity check on return of outstanding command */
1015 	if (!lpfc_ncmd) {
1016 		lpfc_printf_vlog(vport, KERN_ERR,
1017 				 LOG_NODE | LOG_NVME_IOERR,
1018 				 "6071 Null lpfc_ncmd pointer. No "
1019 				 "release, skip completion\n");
1020 		return;
1021 	}
1022 
1023 	/* Guard against abort handler being called at same time */
1024 	spin_lock(&lpfc_ncmd->buf_lock);
1025 
1026 	if (!lpfc_ncmd->nvmeCmd) {
1027 		spin_unlock(&lpfc_ncmd->buf_lock);
1028 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
1029 				 "6066 Missing cmpl ptrs: lpfc_ncmd x%px, "
1030 				 "nvmeCmd x%px\n",
1031 				 lpfc_ncmd, lpfc_ncmd->nvmeCmd);
1032 
1033 		/* Release the lpfc_ncmd regardless of the missing elements. */
1034 		lpfc_release_nvme_buf(phba, lpfc_ncmd);
1035 		return;
1036 	}
1037 	nCmd = lpfc_ncmd->nvmeCmd;
1038 	status = bf_get(lpfc_wcqe_c_status, wcqe);
1039 
1040 	idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
1041 	phba->sli4_hba.hdwq[idx].nvme_cstat.io_cmpls++;
1042 
1043 	if (unlikely(status && vport->localport)) {
1044 		lport = (struct lpfc_nvme_lport *)vport->localport->private;
1045 		if (lport) {
1046 			if (bf_get(lpfc_wcqe_c_xb, wcqe))
1047 				atomic_inc(&lport->cmpl_fcp_xb);
1048 			atomic_inc(&lport->cmpl_fcp_err);
1049 		}
1050 	}
1051 
1052 	lpfc_nvmeio_data(phba, "NVME FCP CMPL: xri x%x stat x%x parm x%x\n",
1053 			 lpfc_ncmd->cur_iocbq.sli4_xritag,
1054 			 status, wcqe->parameter);
1055 	/*
1056 	 * Catch race where our node has transitioned, but the
1057 	 * transport is still transitioning.
1058 	 */
1059 	ndlp = lpfc_ncmd->ndlp;
1060 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
1061 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
1062 				 "6062 Ignoring NVME cmpl.  No ndlp\n");
1063 		goto out_err;
1064 	}
1065 
1066 	code = bf_get(lpfc_wcqe_c_code, wcqe);
1067 	if (code == CQE_CODE_NVME_ERSP) {
1068 		/* For this type of CQE, we need to rebuild the rsp */
1069 		ep = (struct nvme_fc_ersp_iu *)nCmd->rspaddr;
1070 
1071 		/*
1072 		 * Get Command Id from cmd to plug into response. This
1073 		 * code is not needed in the next NVME Transport drop.
1074 		 */
1075 		cp = (struct nvme_fc_cmd_iu *)nCmd->cmdaddr;
1076 		cid = cp->sqe.common.command_id;
1077 
1078 		/*
1079 		 * RSN is in CQE word 2
1080 		 * SQHD is in CQE Word 3 bits 15:0
1081 		 * Cmd Specific info is in CQE Word 1
1082 		 * and in CQE Word 0 bits 15:0
1083 		 */
1084 		sqhd = bf_get(lpfc_wcqe_c_sqhead, wcqe);
1085 
1086 		/* Now lets build the NVME ERSP IU */
1087 		ep->iu_len = cpu_to_be16(8);
1088 		ep->rsn = wcqe->parameter;
1089 		ep->xfrd_len = cpu_to_be32(nCmd->payload_length);
1090 		ep->rsvd12 = 0;
1091 		ptr = (uint32_t *)&ep->cqe.result.u64;
1092 		*ptr++ = wcqe->total_data_placed;
1093 		data = bf_get(lpfc_wcqe_c_ersp0, wcqe);
1094 		*ptr = (uint32_t)data;
1095 		ep->cqe.sq_head = sqhd;
1096 		ep->cqe.sq_id =  nCmd->sqid;
1097 		ep->cqe.command_id = cid;
1098 		ep->cqe.status = 0;
1099 
1100 		lpfc_ncmd->status = IOSTAT_SUCCESS;
1101 		lpfc_ncmd->result = 0;
1102 		nCmd->rcv_rsplen = LPFC_NVME_ERSP_LEN;
1103 		nCmd->transferred_length = nCmd->payload_length;
1104 	} else {
1105 		lpfc_ncmd->status = (status & LPFC_IOCB_STATUS_MASK);
1106 		lpfc_ncmd->result = (wcqe->parameter & IOERR_PARAM_MASK);
1107 
1108 		/* For NVME, the only failure path that results in an
1109 		 * IO error is when the adapter rejects it.  All other
1110 		 * conditions are a success case and resolved by the
1111 		 * transport.
1112 		 * IOSTAT_FCP_RSP_ERROR means:
1113 		 * 1. Length of data received doesn't match total
1114 		 *    transfer length in WQE
1115 		 * 2. If the RSP payload does NOT match these cases:
1116 		 *    a. RSP length 12/24 bytes and all zeros
1117 		 *    b. NVME ERSP
1118 		 */
1119 		switch (lpfc_ncmd->status) {
1120 		case IOSTAT_SUCCESS:
1121 			nCmd->transferred_length = wcqe->total_data_placed;
1122 			nCmd->rcv_rsplen = 0;
1123 			nCmd->status = 0;
1124 			break;
1125 		case IOSTAT_FCP_RSP_ERROR:
1126 			nCmd->transferred_length = wcqe->total_data_placed;
1127 			nCmd->rcv_rsplen = wcqe->parameter;
1128 			nCmd->status = 0;
1129 			/* Sanity check */
1130 			if (nCmd->rcv_rsplen == LPFC_NVME_ERSP_LEN)
1131 				break;
1132 			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
1133 					 "6081 NVME Completion Protocol Error: "
1134 					 "xri %x status x%x result x%x "
1135 					 "placed x%x\n",
1136 					 lpfc_ncmd->cur_iocbq.sli4_xritag,
1137 					 lpfc_ncmd->status, lpfc_ncmd->result,
1138 					 wcqe->total_data_placed);
1139 			break;
1140 		case IOSTAT_LOCAL_REJECT:
1141 			/* Let fall through to set command final state. */
1142 			if (lpfc_ncmd->result == IOERR_ABORT_REQUESTED)
1143 				lpfc_printf_vlog(vport, KERN_INFO,
1144 					 LOG_NVME_IOERR,
1145 					 "6032 Delay Aborted cmd x%px "
1146 					 "nvme cmd x%px, xri x%x, "
1147 					 "xb %d\n",
1148 					 lpfc_ncmd, nCmd,
1149 					 lpfc_ncmd->cur_iocbq.sli4_xritag,
1150 					 bf_get(lpfc_wcqe_c_xb, wcqe));
1151 			/* fall through */
1152 		default:
1153 out_err:
1154 			lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
1155 					 "6072 NVME Completion Error: xri %x "
1156 					 "status x%x result x%x [x%x] "
1157 					 "placed x%x\n",
1158 					 lpfc_ncmd->cur_iocbq.sli4_xritag,
1159 					 lpfc_ncmd->status, lpfc_ncmd->result,
1160 					 wcqe->parameter,
1161 					 wcqe->total_data_placed);
1162 			nCmd->transferred_length = 0;
1163 			nCmd->rcv_rsplen = 0;
1164 			nCmd->status = NVME_SC_INTERNAL;
1165 		}
1166 	}
1167 
1168 	/* pick up SLI4 exhange busy condition */
1169 	if (bf_get(lpfc_wcqe_c_xb, wcqe))
1170 		lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
1171 	else
1172 		lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
1173 
1174 	/* Update stats and complete the IO.  There is
1175 	 * no need for dma unprep because the nvme_transport
1176 	 * owns the dma address.
1177 	 */
1178 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
1179 	if (lpfc_ncmd->ts_cmd_start) {
1180 		lpfc_ncmd->ts_isr_cmpl = pwqeIn->isr_timestamp;
1181 		lpfc_ncmd->ts_data_nvme = ktime_get_ns();
1182 		phba->ktime_last_cmd = lpfc_ncmd->ts_data_nvme;
1183 		lpfc_nvme_ktime(phba, lpfc_ncmd);
1184 	}
1185 	if (unlikely(phba->cpucheck_on & LPFC_CHECK_NVME_IO)) {
1186 		uint32_t cpu;
1187 		idx = lpfc_ncmd->cur_iocbq.hba_wqidx;
1188 		cpu = raw_smp_processor_id();
1189 		if (cpu < LPFC_CHECK_CPU_CNT) {
1190 			if (lpfc_ncmd->cpu != cpu)
1191 				lpfc_printf_vlog(vport,
1192 						 KERN_INFO, LOG_NVME_IOERR,
1193 						 "6701 CPU Check cmpl: "
1194 						 "cpu %d expect %d\n",
1195 						 cpu, lpfc_ncmd->cpu);
1196 			phba->sli4_hba.hdwq[idx].cpucheck_cmpl_io[cpu]++;
1197 		}
1198 	}
1199 #endif
1200 
1201 	/* NVME targets need completion held off until the abort exchange
1202 	 * completes unless the NVME Rport is getting unregistered.
1203 	 */
1204 
1205 	if (!(lpfc_ncmd->flags & LPFC_SBUF_XBUSY)) {
1206 		freqpriv = nCmd->private;
1207 		freqpriv->nvme_buf = NULL;
1208 		lpfc_ncmd->nvmeCmd = NULL;
1209 		spin_unlock(&lpfc_ncmd->buf_lock);
1210 		nCmd->done(nCmd);
1211 	} else
1212 		spin_unlock(&lpfc_ncmd->buf_lock);
1213 
1214 	/* Call release with XB=1 to queue the IO into the abort list. */
1215 	lpfc_release_nvme_buf(phba, lpfc_ncmd);
1216 }
1217 
1218 
1219 /**
1220  * lpfc_nvme_prep_io_cmd - Issue an NVME-over-FCP IO
1221  * @lpfc_pnvme: Pointer to the driver's nvme instance data
1222  * @lpfc_nvme_lport: Pointer to the driver's local port data
1223  * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
1224  * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
1225  * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
1226  *
1227  * Driver registers this routine as it io request handler.  This
1228  * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
1229  * data structure to the rport indicated in @lpfc_nvme_rport.
1230  *
1231  * Return value :
1232  *   0 - Success
1233  *   TODO: What are the failure codes.
1234  **/
1235 static int
1236 lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport,
1237 		      struct lpfc_io_buf *lpfc_ncmd,
1238 		      struct lpfc_nodelist *pnode,
1239 		      struct lpfc_fc4_ctrl_stat *cstat)
1240 {
1241 	struct lpfc_hba *phba = vport->phba;
1242 	struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
1243 	struct lpfc_iocbq *pwqeq = &(lpfc_ncmd->cur_iocbq);
1244 	union lpfc_wqe128 *wqe = &pwqeq->wqe;
1245 	uint32_t req_len;
1246 
1247 	if (!NLP_CHK_NODE_ACT(pnode))
1248 		return -EINVAL;
1249 
1250 	/*
1251 	 * There are three possibilities here - use scatter-gather segment, use
1252 	 * the single mapping, or neither.
1253 	 */
1254 	if (nCmd->sg_cnt) {
1255 		if (nCmd->io_dir == NVMEFC_FCP_WRITE) {
1256 			/* From the iwrite template, initialize words 7 - 11 */
1257 			memcpy(&wqe->words[7],
1258 			       &lpfc_iwrite_cmd_template.words[7],
1259 			       sizeof(uint32_t) * 5);
1260 
1261 			/* Word 4 */
1262 			wqe->fcp_iwrite.total_xfer_len = nCmd->payload_length;
1263 
1264 			/* Word 5 */
1265 			if ((phba->cfg_nvme_enable_fb) &&
1266 			    (pnode->nlp_flag & NLP_FIRSTBURST)) {
1267 				req_len = lpfc_ncmd->nvmeCmd->payload_length;
1268 				if (req_len < pnode->nvme_fb_size)
1269 					wqe->fcp_iwrite.initial_xfer_len =
1270 						req_len;
1271 				else
1272 					wqe->fcp_iwrite.initial_xfer_len =
1273 						pnode->nvme_fb_size;
1274 			} else {
1275 				wqe->fcp_iwrite.initial_xfer_len = 0;
1276 			}
1277 			cstat->output_requests++;
1278 		} else {
1279 			/* From the iread template, initialize words 7 - 11 */
1280 			memcpy(&wqe->words[7],
1281 			       &lpfc_iread_cmd_template.words[7],
1282 			       sizeof(uint32_t) * 5);
1283 
1284 			/* Word 4 */
1285 			wqe->fcp_iread.total_xfer_len = nCmd->payload_length;
1286 
1287 			/* Word 5 */
1288 			wqe->fcp_iread.rsrvd5 = 0;
1289 
1290 			cstat->input_requests++;
1291 		}
1292 	} else {
1293 		/* From the icmnd template, initialize words 4 - 11 */
1294 		memcpy(&wqe->words[4], &lpfc_icmnd_cmd_template.words[4],
1295 		       sizeof(uint32_t) * 8);
1296 		cstat->control_requests++;
1297 	}
1298 
1299 	if (pnode->nlp_nvme_info & NLP_NVME_NSLER)
1300 		bf_set(wqe_erp, &wqe->generic.wqe_com, 1);
1301 	/*
1302 	 * Finish initializing those WQE fields that are independent
1303 	 * of the nvme_cmnd request_buffer
1304 	 */
1305 
1306 	/* Word 3 */
1307 	bf_set(payload_offset_len, &wqe->fcp_icmd,
1308 	       (nCmd->rsplen + nCmd->cmdlen));
1309 
1310 	/* Word 6 */
1311 	bf_set(wqe_ctxt_tag, &wqe->generic.wqe_com,
1312 	       phba->sli4_hba.rpi_ids[pnode->nlp_rpi]);
1313 	bf_set(wqe_xri_tag, &wqe->generic.wqe_com, pwqeq->sli4_xritag);
1314 
1315 	/* Word 8 */
1316 	wqe->generic.wqe_com.abort_tag = pwqeq->iotag;
1317 
1318 	/* Word 9 */
1319 	bf_set(wqe_reqtag, &wqe->generic.wqe_com, pwqeq->iotag);
1320 
1321 	/* Words 13 14 15 are for PBDE support */
1322 
1323 	pwqeq->vport = vport;
1324 	return 0;
1325 }
1326 
1327 
1328 /**
1329  * lpfc_nvme_prep_io_dma - Issue an NVME-over-FCP IO
1330  * @lpfc_pnvme: Pointer to the driver's nvme instance data
1331  * @lpfc_nvme_lport: Pointer to the driver's local port data
1332  * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
1333  * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
1334  * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
1335  *
1336  * Driver registers this routine as it io request handler.  This
1337  * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
1338  * data structure to the rport indicated in @lpfc_nvme_rport.
1339  *
1340  * Return value :
1341  *   0 - Success
1342  *   TODO: What are the failure codes.
1343  **/
1344 static int
1345 lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
1346 		      struct lpfc_io_buf *lpfc_ncmd)
1347 {
1348 	struct lpfc_hba *phba = vport->phba;
1349 	struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
1350 	union lpfc_wqe128 *wqe = &lpfc_ncmd->cur_iocbq.wqe;
1351 	struct sli4_sge *sgl = lpfc_ncmd->dma_sgl;
1352 	struct sli4_hybrid_sgl *sgl_xtra = NULL;
1353 	struct scatterlist *data_sg;
1354 	struct sli4_sge *first_data_sgl;
1355 	struct ulp_bde64 *bde;
1356 	dma_addr_t physaddr = 0;
1357 	uint32_t num_bde = 0;
1358 	uint32_t dma_len = 0;
1359 	uint32_t dma_offset = 0;
1360 	int nseg, i, j;
1361 	bool lsp_just_set = false;
1362 
1363 	/* Fix up the command and response DMA stuff. */
1364 	lpfc_nvme_adj_fcp_sgls(vport, lpfc_ncmd, nCmd);
1365 
1366 	/*
1367 	 * There are three possibilities here - use scatter-gather segment, use
1368 	 * the single mapping, or neither.
1369 	 */
1370 	if (nCmd->sg_cnt) {
1371 		/*
1372 		 * Jump over the cmd and rsp SGEs.  The fix routine
1373 		 * has already adjusted for this.
1374 		 */
1375 		sgl += 2;
1376 
1377 		first_data_sgl = sgl;
1378 		lpfc_ncmd->seg_cnt = nCmd->sg_cnt;
1379 		if (lpfc_ncmd->seg_cnt > lpfc_nvme_template.max_sgl_segments) {
1380 			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
1381 					"6058 Too many sg segments from "
1382 					"NVME Transport.  Max %d, "
1383 					"nvmeIO sg_cnt %d\n",
1384 					phba->cfg_nvme_seg_cnt + 1,
1385 					lpfc_ncmd->seg_cnt);
1386 			lpfc_ncmd->seg_cnt = 0;
1387 			return 1;
1388 		}
1389 
1390 		/*
1391 		 * The driver established a maximum scatter-gather segment count
1392 		 * during probe that limits the number of sg elements in any
1393 		 * single nvme command.  Just run through the seg_cnt and format
1394 		 * the sge's.
1395 		 */
1396 		nseg = nCmd->sg_cnt;
1397 		data_sg = nCmd->first_sgl;
1398 
1399 		/* for tracking the segment boundaries */
1400 		j = 2;
1401 		for (i = 0; i < nseg; i++) {
1402 			if (data_sg == NULL) {
1403 				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
1404 						"6059 dptr err %d, nseg %d\n",
1405 						i, nseg);
1406 				lpfc_ncmd->seg_cnt = 0;
1407 				return 1;
1408 			}
1409 
1410 			sgl->word2 = 0;
1411 			if ((num_bde + 1) == nseg) {
1412 				bf_set(lpfc_sli4_sge_last, sgl, 1);
1413 				bf_set(lpfc_sli4_sge_type, sgl,
1414 				       LPFC_SGE_TYPE_DATA);
1415 			} else {
1416 				bf_set(lpfc_sli4_sge_last, sgl, 0);
1417 
1418 				/* expand the segment */
1419 				if (!lsp_just_set &&
1420 				    !((j + 1) % phba->border_sge_num) &&
1421 				    ((nseg - 1) != i)) {
1422 					/* set LSP type */
1423 					bf_set(lpfc_sli4_sge_type, sgl,
1424 					       LPFC_SGE_TYPE_LSP);
1425 
1426 					sgl_xtra = lpfc_get_sgl_per_hdwq(
1427 							phba, lpfc_ncmd);
1428 
1429 					if (unlikely(!sgl_xtra)) {
1430 						lpfc_ncmd->seg_cnt = 0;
1431 						return 1;
1432 					}
1433 					sgl->addr_lo = cpu_to_le32(putPaddrLow(
1434 						       sgl_xtra->dma_phys_sgl));
1435 					sgl->addr_hi = cpu_to_le32(putPaddrHigh(
1436 						       sgl_xtra->dma_phys_sgl));
1437 
1438 				} else {
1439 					bf_set(lpfc_sli4_sge_type, sgl,
1440 					       LPFC_SGE_TYPE_DATA);
1441 				}
1442 			}
1443 
1444 			if (!(bf_get(lpfc_sli4_sge_type, sgl) &
1445 				     LPFC_SGE_TYPE_LSP)) {
1446 				if ((nseg - 1) == i)
1447 					bf_set(lpfc_sli4_sge_last, sgl, 1);
1448 
1449 				physaddr = data_sg->dma_address;
1450 				dma_len = data_sg->length;
1451 				sgl->addr_lo = cpu_to_le32(
1452 							 putPaddrLow(physaddr));
1453 				sgl->addr_hi = cpu_to_le32(
1454 							putPaddrHigh(physaddr));
1455 
1456 				bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
1457 				sgl->word2 = cpu_to_le32(sgl->word2);
1458 				sgl->sge_len = cpu_to_le32(dma_len);
1459 
1460 				dma_offset += dma_len;
1461 				data_sg = sg_next(data_sg);
1462 
1463 				sgl++;
1464 
1465 				lsp_just_set = false;
1466 			} else {
1467 				sgl->word2 = cpu_to_le32(sgl->word2);
1468 
1469 				sgl->sge_len = cpu_to_le32(
1470 						     phba->cfg_sg_dma_buf_size);
1471 
1472 				sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
1473 				i = i - 1;
1474 
1475 				lsp_just_set = true;
1476 			}
1477 
1478 			j++;
1479 		}
1480 		if (phba->cfg_enable_pbde) {
1481 			/* Use PBDE support for first SGL only, offset == 0 */
1482 			/* Words 13-15 */
1483 			bde = (struct ulp_bde64 *)
1484 				&wqe->words[13];
1485 			bde->addrLow = first_data_sgl->addr_lo;
1486 			bde->addrHigh = first_data_sgl->addr_hi;
1487 			bde->tus.f.bdeSize =
1488 				le32_to_cpu(first_data_sgl->sge_len);
1489 			bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64;
1490 			bde->tus.w = cpu_to_le32(bde->tus.w);
1491 			/* wqe_pbde is 1 in template */
1492 		} else {
1493 			memset(&wqe->words[13], 0, (sizeof(uint32_t) * 3));
1494 			bf_set(wqe_pbde, &wqe->generic.wqe_com, 0);
1495 		}
1496 
1497 	} else {
1498 		lpfc_ncmd->seg_cnt = 0;
1499 
1500 		/* For this clause to be valid, the payload_length
1501 		 * and sg_cnt must zero.
1502 		 */
1503 		if (nCmd->payload_length != 0) {
1504 			lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
1505 					"6063 NVME DMA Prep Err: sg_cnt %d "
1506 					"payload_length x%x\n",
1507 					nCmd->sg_cnt, nCmd->payload_length);
1508 			return 1;
1509 		}
1510 	}
1511 	return 0;
1512 }
1513 
1514 /**
1515  * lpfc_nvme_fcp_io_submit - Issue an NVME-over-FCP IO
1516  * @lpfc_pnvme: Pointer to the driver's nvme instance data
1517  * @lpfc_nvme_lport: Pointer to the driver's local port data
1518  * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
1519  * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
1520  * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
1521  *
1522  * Driver registers this routine as it io request handler.  This
1523  * routine issues an fcp WQE with data from the @lpfc_nvme_fcpreq
1524  * data structure to the rport
1525  indicated in @lpfc_nvme_rport.
1526  *
1527  * Return value :
1528  *   0 - Success
1529  *   TODO: What are the failure codes.
1530  **/
1531 static int
1532 lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport,
1533 			struct nvme_fc_remote_port *pnvme_rport,
1534 			void *hw_queue_handle,
1535 			struct nvmefc_fcp_req *pnvme_fcreq)
1536 {
1537 	int ret = 0;
1538 	int expedite = 0;
1539 	int idx, cpu;
1540 	struct lpfc_nvme_lport *lport;
1541 	struct lpfc_fc4_ctrl_stat *cstat;
1542 	struct lpfc_vport *vport;
1543 	struct lpfc_hba *phba;
1544 	struct lpfc_nodelist *ndlp;
1545 	struct lpfc_io_buf *lpfc_ncmd;
1546 	struct lpfc_nvme_rport *rport;
1547 	struct lpfc_nvme_qhandle *lpfc_queue_info;
1548 	struct lpfc_nvme_fcpreq_priv *freqpriv;
1549 	struct nvme_common_command *sqe;
1550 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
1551 	uint64_t start = 0;
1552 #endif
1553 
1554 	/* Validate pointers. LLDD fault handling with transport does
1555 	 * have timing races.
1556 	 */
1557 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
1558 	if (unlikely(!lport)) {
1559 		ret = -EINVAL;
1560 		goto out_fail;
1561 	}
1562 
1563 	vport = lport->vport;
1564 
1565 	if (unlikely(!hw_queue_handle)) {
1566 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
1567 				 "6117 Fail IO, NULL hw_queue_handle\n");
1568 		atomic_inc(&lport->xmt_fcp_err);
1569 		ret = -EBUSY;
1570 		goto out_fail;
1571 	}
1572 
1573 	phba = vport->phba;
1574 
1575 	if (vport->load_flag & FC_UNLOADING) {
1576 		ret = -ENODEV;
1577 		goto out_fail;
1578 	}
1579 
1580 	if (unlikely(vport->load_flag & FC_UNLOADING)) {
1581 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
1582 				 "6124 Fail IO, Driver unload\n");
1583 		atomic_inc(&lport->xmt_fcp_err);
1584 		ret = -ENODEV;
1585 		goto out_fail;
1586 	}
1587 
1588 	freqpriv = pnvme_fcreq->private;
1589 	if (unlikely(!freqpriv)) {
1590 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
1591 				 "6158 Fail IO, NULL request data\n");
1592 		atomic_inc(&lport->xmt_fcp_err);
1593 		ret = -EINVAL;
1594 		goto out_fail;
1595 	}
1596 
1597 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
1598 	if (phba->ktime_on)
1599 		start = ktime_get_ns();
1600 #endif
1601 	rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
1602 	lpfc_queue_info = (struct lpfc_nvme_qhandle *)hw_queue_handle;
1603 
1604 	/*
1605 	 * Catch race where our node has transitioned, but the
1606 	 * transport is still transitioning.
1607 	 */
1608 	ndlp = rport->ndlp;
1609 	if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
1610 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE | LOG_NVME_IOERR,
1611 				 "6053 Busy IO, ndlp not ready: rport x%px "
1612 				  "ndlp x%px, DID x%06x\n",
1613 				 rport, ndlp, pnvme_rport->port_id);
1614 		atomic_inc(&lport->xmt_fcp_err);
1615 		ret = -EBUSY;
1616 		goto out_fail;
1617 	}
1618 
1619 	/* The remote node has to be a mapped target or it's an error. */
1620 	if ((ndlp->nlp_type & NLP_NVME_TARGET) &&
1621 	    (ndlp->nlp_state != NLP_STE_MAPPED_NODE)) {
1622 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE | LOG_NVME_IOERR,
1623 				 "6036 Fail IO, DID x%06x not ready for "
1624 				 "IO. State x%x, Type x%x Flg x%x\n",
1625 				 pnvme_rport->port_id,
1626 				 ndlp->nlp_state, ndlp->nlp_type,
1627 				 ndlp->upcall_flags);
1628 		atomic_inc(&lport->xmt_fcp_bad_ndlp);
1629 		ret = -EBUSY;
1630 		goto out_fail;
1631 
1632 	}
1633 
1634 	/* Currently only NVME Keep alive commands should be expedited
1635 	 * if the driver runs out of a resource. These should only be
1636 	 * issued on the admin queue, qidx 0
1637 	 */
1638 	if (!lpfc_queue_info->qidx && !pnvme_fcreq->sg_cnt) {
1639 		sqe = &((struct nvme_fc_cmd_iu *)
1640 			pnvme_fcreq->cmdaddr)->sqe.common;
1641 		if (sqe->opcode == nvme_admin_keep_alive)
1642 			expedite = 1;
1643 	}
1644 
1645 	/* The node is shared with FCP IO, make sure the IO pending count does
1646 	 * not exceed the programmed depth.
1647 	 */
1648 	if (lpfc_ndlp_check_qdepth(phba, ndlp)) {
1649 		if ((atomic_read(&ndlp->cmd_pending) >= ndlp->cmd_qdepth) &&
1650 		    !expedite) {
1651 			lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
1652 					 "6174 Fail IO, ndlp qdepth exceeded: "
1653 					 "idx %d DID %x pend %d qdepth %d\n",
1654 					 lpfc_queue_info->index, ndlp->nlp_DID,
1655 					 atomic_read(&ndlp->cmd_pending),
1656 					 ndlp->cmd_qdepth);
1657 			atomic_inc(&lport->xmt_fcp_qdepth);
1658 			ret = -EBUSY;
1659 			goto out_fail;
1660 		}
1661 	}
1662 
1663 	/* Lookup Hardware Queue index based on fcp_io_sched module parameter */
1664 	if (phba->cfg_fcp_io_sched == LPFC_FCP_SCHED_BY_HDWQ) {
1665 		idx = lpfc_queue_info->index;
1666 	} else {
1667 		cpu = raw_smp_processor_id();
1668 		idx = phba->sli4_hba.cpu_map[cpu].hdwq;
1669 	}
1670 
1671 	lpfc_ncmd = lpfc_get_nvme_buf(phba, ndlp, idx, expedite);
1672 	if (lpfc_ncmd == NULL) {
1673 		atomic_inc(&lport->xmt_fcp_noxri);
1674 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
1675 				 "6065 Fail IO, driver buffer pool is empty: "
1676 				 "idx %d DID %x\n",
1677 				 lpfc_queue_info->index, ndlp->nlp_DID);
1678 		ret = -EBUSY;
1679 		goto out_fail;
1680 	}
1681 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
1682 	if (start) {
1683 		lpfc_ncmd->ts_cmd_start = start;
1684 		lpfc_ncmd->ts_last_cmd = phba->ktime_last_cmd;
1685 	} else {
1686 		lpfc_ncmd->ts_cmd_start = 0;
1687 	}
1688 #endif
1689 
1690 	/*
1691 	 * Store the data needed by the driver to issue, abort, and complete
1692 	 * an IO.
1693 	 * Do not let the IO hang out forever.  There is no midlayer issuing
1694 	 * an abort so inform the FW of the maximum IO pending time.
1695 	 */
1696 	freqpriv->nvme_buf = lpfc_ncmd;
1697 	lpfc_ncmd->nvmeCmd = pnvme_fcreq;
1698 	lpfc_ncmd->ndlp = ndlp;
1699 	lpfc_ncmd->qidx = lpfc_queue_info->qidx;
1700 
1701 	/*
1702 	 * Issue the IO on the WQ indicated by index in the hw_queue_handle.
1703 	 * This identfier was create in our hardware queue create callback
1704 	 * routine. The driver now is dependent on the IO queue steering from
1705 	 * the transport.  We are trusting the upper NVME layers know which
1706 	 * index to use and that they have affinitized a CPU to this hardware
1707 	 * queue. A hardware queue maps to a driver MSI-X vector/EQ/CQ/WQ.
1708 	 */
1709 	lpfc_ncmd->cur_iocbq.hba_wqidx = idx;
1710 	cstat = &phba->sli4_hba.hdwq[idx].nvme_cstat;
1711 
1712 	lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp, cstat);
1713 	ret = lpfc_nvme_prep_io_dma(vport, lpfc_ncmd);
1714 	if (ret) {
1715 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
1716 				 "6175 Fail IO, Prep DMA: "
1717 				 "idx %d DID %x\n",
1718 				 lpfc_queue_info->index, ndlp->nlp_DID);
1719 		atomic_inc(&lport->xmt_fcp_err);
1720 		ret = -ENOMEM;
1721 		goto out_free_nvme_buf;
1722 	}
1723 
1724 	lpfc_nvmeio_data(phba, "NVME FCP XMIT: xri x%x idx %d to %06x\n",
1725 			 lpfc_ncmd->cur_iocbq.sli4_xritag,
1726 			 lpfc_queue_info->index, ndlp->nlp_DID);
1727 
1728 	ret = lpfc_sli4_issue_wqe(phba, lpfc_ncmd->hdwq, &lpfc_ncmd->cur_iocbq);
1729 	if (ret) {
1730 		atomic_inc(&lport->xmt_fcp_wqerr);
1731 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
1732 				 "6113 Fail IO, Could not issue WQE err %x "
1733 				 "sid: x%x did: x%x oxid: x%x\n",
1734 				 ret, vport->fc_myDID, ndlp->nlp_DID,
1735 				 lpfc_ncmd->cur_iocbq.sli4_xritag);
1736 		goto out_free_nvme_buf;
1737 	}
1738 
1739 	if (phba->cfg_xri_rebalancing)
1740 		lpfc_keep_pvt_pool_above_lowwm(phba, lpfc_ncmd->hdwq_no);
1741 
1742 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
1743 	if (lpfc_ncmd->ts_cmd_start)
1744 		lpfc_ncmd->ts_cmd_wqput = ktime_get_ns();
1745 
1746 	if (phba->cpucheck_on & LPFC_CHECK_NVME_IO) {
1747 		cpu = raw_smp_processor_id();
1748 		if (cpu < LPFC_CHECK_CPU_CNT) {
1749 			lpfc_ncmd->cpu = cpu;
1750 			if (idx != cpu)
1751 				lpfc_printf_vlog(vport,
1752 						 KERN_INFO, LOG_NVME_IOERR,
1753 						"6702 CPU Check cmd: "
1754 						"cpu %d wq %d\n",
1755 						lpfc_ncmd->cpu,
1756 						lpfc_queue_info->index);
1757 			phba->sli4_hba.hdwq[idx].cpucheck_xmt_io[cpu]++;
1758 		}
1759 	}
1760 #endif
1761 	return 0;
1762 
1763  out_free_nvme_buf:
1764 	if (lpfc_ncmd->nvmeCmd->sg_cnt) {
1765 		if (lpfc_ncmd->nvmeCmd->io_dir == NVMEFC_FCP_WRITE)
1766 			cstat->output_requests--;
1767 		else
1768 			cstat->input_requests--;
1769 	} else
1770 		cstat->control_requests--;
1771 	lpfc_release_nvme_buf(phba, lpfc_ncmd);
1772  out_fail:
1773 	return ret;
1774 }
1775 
1776 /**
1777  * lpfc_nvme_abort_fcreq_cmpl - Complete an NVME FCP abort request.
1778  * @phba: Pointer to HBA context object
1779  * @cmdiocb: Pointer to command iocb object.
1780  * @rspiocb: Pointer to response iocb object.
1781  *
1782  * This is the callback function for any NVME FCP IO that was aborted.
1783  *
1784  * Return value:
1785  *   None
1786  **/
1787 void
1788 lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
1789 			   struct lpfc_wcqe_complete *abts_cmpl)
1790 {
1791 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
1792 			"6145 ABORT_XRI_CN completing on rpi x%x "
1793 			"original iotag x%x, abort cmd iotag x%x "
1794 			"req_tag x%x, status x%x, hwstatus x%x\n",
1795 			cmdiocb->iocb.un.acxri.abortContextTag,
1796 			cmdiocb->iocb.un.acxri.abortIoTag,
1797 			cmdiocb->iotag,
1798 			bf_get(lpfc_wcqe_c_request_tag, abts_cmpl),
1799 			bf_get(lpfc_wcqe_c_status, abts_cmpl),
1800 			bf_get(lpfc_wcqe_c_hw_status, abts_cmpl));
1801 	lpfc_sli_release_iocbq(phba, cmdiocb);
1802 }
1803 
1804 /**
1805  * lpfc_nvme_fcp_abort - Issue an NVME-over-FCP ABTS
1806  * @lpfc_pnvme: Pointer to the driver's nvme instance data
1807  * @lpfc_nvme_lport: Pointer to the driver's local port data
1808  * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
1809  * @lpfc_nvme_fcreq: IO request from nvme fc to driver.
1810  * @hw_queue_handle: Driver-returned handle in lpfc_nvme_create_queue
1811  *
1812  * Driver registers this routine as its nvme request io abort handler.  This
1813  * routine issues an fcp Abort WQE with data from the @lpfc_nvme_fcpreq
1814  * data structure to the rport indicated in @lpfc_nvme_rport.  This routine
1815  * is executed asynchronously - one the target is validated as "MAPPED" and
1816  * ready for IO, the driver issues the abort request and returns.
1817  *
1818  * Return value:
1819  *   None
1820  **/
1821 static void
1822 lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
1823 		    struct nvme_fc_remote_port *pnvme_rport,
1824 		    void *hw_queue_handle,
1825 		    struct nvmefc_fcp_req *pnvme_fcreq)
1826 {
1827 	struct lpfc_nvme_lport *lport;
1828 	struct lpfc_vport *vport;
1829 	struct lpfc_hba *phba;
1830 	struct lpfc_io_buf *lpfc_nbuf;
1831 	struct lpfc_iocbq *abts_buf;
1832 	struct lpfc_iocbq *nvmereq_wqe;
1833 	struct lpfc_nvme_fcpreq_priv *freqpriv;
1834 	unsigned long flags;
1835 	int ret_val;
1836 
1837 	/* Validate pointers. LLDD fault handling with transport does
1838 	 * have timing races.
1839 	 */
1840 	lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
1841 	if (unlikely(!lport))
1842 		return;
1843 
1844 	vport = lport->vport;
1845 
1846 	if (unlikely(!hw_queue_handle)) {
1847 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
1848 				 "6129 Fail Abort, HW Queue Handle NULL.\n");
1849 		return;
1850 	}
1851 
1852 	phba = vport->phba;
1853 	freqpriv = pnvme_fcreq->private;
1854 
1855 	if (unlikely(!freqpriv))
1856 		return;
1857 	if (vport->load_flag & FC_UNLOADING)
1858 		return;
1859 
1860 	/* Announce entry to new IO submit field. */
1861 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
1862 			 "6002 Abort Request to rport DID x%06x "
1863 			 "for nvme_fc_req x%px\n",
1864 			 pnvme_rport->port_id,
1865 			 pnvme_fcreq);
1866 
1867 	/* If the hba is getting reset, this flag is set.  It is
1868 	 * cleared when the reset is complete and rings reestablished.
1869 	 */
1870 	spin_lock_irqsave(&phba->hbalock, flags);
1871 	/* driver queued commands are in process of being flushed */
1872 	if (phba->hba_flag & HBA_IOQ_FLUSH) {
1873 		spin_unlock_irqrestore(&phba->hbalock, flags);
1874 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
1875 				 "6139 Driver in reset cleanup - flushing "
1876 				 "NVME Req now.  hba_flag x%x\n",
1877 				 phba->hba_flag);
1878 		return;
1879 	}
1880 
1881 	lpfc_nbuf = freqpriv->nvme_buf;
1882 	if (!lpfc_nbuf) {
1883 		spin_unlock_irqrestore(&phba->hbalock, flags);
1884 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
1885 				 "6140 NVME IO req has no matching lpfc nvme "
1886 				 "io buffer.  Skipping abort req.\n");
1887 		return;
1888 	} else if (!lpfc_nbuf->nvmeCmd) {
1889 		spin_unlock_irqrestore(&phba->hbalock, flags);
1890 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
1891 				 "6141 lpfc NVME IO req has no nvme_fcreq "
1892 				 "io buffer.  Skipping abort req.\n");
1893 		return;
1894 	}
1895 	nvmereq_wqe = &lpfc_nbuf->cur_iocbq;
1896 
1897 	/* Guard against IO completion being called at same time */
1898 	spin_lock(&lpfc_nbuf->buf_lock);
1899 
1900 	/*
1901 	 * The lpfc_nbuf and the mapped nvme_fcreq in the driver's
1902 	 * state must match the nvme_fcreq passed by the nvme
1903 	 * transport.  If they don't match, it is likely the driver
1904 	 * has already completed the NVME IO and the nvme transport
1905 	 * has not seen it yet.
1906 	 */
1907 	if (lpfc_nbuf->nvmeCmd != pnvme_fcreq) {
1908 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
1909 				 "6143 NVME req mismatch: "
1910 				 "lpfc_nbuf x%px nvmeCmd x%px, "
1911 				 "pnvme_fcreq x%px.  Skipping Abort xri x%x\n",
1912 				 lpfc_nbuf, lpfc_nbuf->nvmeCmd,
1913 				 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
1914 		goto out_unlock;
1915 	}
1916 
1917 	/* Don't abort IOs no longer on the pending queue. */
1918 	if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
1919 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
1920 				 "6142 NVME IO req x%px not queued - skipping "
1921 				 "abort req xri x%x\n",
1922 				 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
1923 		goto out_unlock;
1924 	}
1925 
1926 	atomic_inc(&lport->xmt_fcp_abort);
1927 	lpfc_nvmeio_data(phba, "NVME FCP ABORT: xri x%x idx %d to %06x\n",
1928 			 nvmereq_wqe->sli4_xritag,
1929 			 nvmereq_wqe->hba_wqidx, pnvme_rport->port_id);
1930 
1931 	/* Outstanding abort is in progress */
1932 	if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) {
1933 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
1934 				 "6144 Outstanding NVME I/O Abort Request "
1935 				 "still pending on nvme_fcreq x%px, "
1936 				 "lpfc_ncmd %px xri x%x\n",
1937 				 pnvme_fcreq, lpfc_nbuf,
1938 				 nvmereq_wqe->sli4_xritag);
1939 		goto out_unlock;
1940 	}
1941 
1942 	abts_buf = __lpfc_sli_get_iocbq(phba);
1943 	if (!abts_buf) {
1944 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
1945 				 "6136 No available abort wqes. Skipping "
1946 				 "Abts req for nvme_fcreq x%px xri x%x\n",
1947 				 pnvme_fcreq, nvmereq_wqe->sli4_xritag);
1948 		goto out_unlock;
1949 	}
1950 
1951 	/* Ready - mark outstanding as aborted by driver. */
1952 	nvmereq_wqe->iocb_flag |= LPFC_DRIVER_ABORTED;
1953 
1954 	lpfc_nvme_prep_abort_wqe(abts_buf, nvmereq_wqe->sli4_xritag, 0);
1955 
1956 	/* ABTS WQE must go to the same WQ as the WQE to be aborted */
1957 	abts_buf->iocb_flag |= LPFC_IO_NVME;
1958 	abts_buf->hba_wqidx = nvmereq_wqe->hba_wqidx;
1959 	abts_buf->vport = vport;
1960 	abts_buf->wqe_cmpl = lpfc_nvme_abort_fcreq_cmpl;
1961 	ret_val = lpfc_sli4_issue_wqe(phba, lpfc_nbuf->hdwq, abts_buf);
1962 	spin_unlock(&lpfc_nbuf->buf_lock);
1963 	spin_unlock_irqrestore(&phba->hbalock, flags);
1964 	if (ret_val) {
1965 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
1966 				 "6137 Failed abts issue_wqe with status x%x "
1967 				 "for nvme_fcreq x%px.\n",
1968 				 ret_val, pnvme_fcreq);
1969 		lpfc_sli_release_iocbq(phba, abts_buf);
1970 		return;
1971 	}
1972 
1973 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
1974 			 "6138 Transport Abort NVME Request Issued for "
1975 			 "ox_id x%x on reqtag x%x\n",
1976 			 nvmereq_wqe->sli4_xritag,
1977 			 abts_buf->iotag);
1978 	return;
1979 
1980 out_unlock:
1981 	spin_unlock(&lpfc_nbuf->buf_lock);
1982 	spin_unlock_irqrestore(&phba->hbalock, flags);
1983 	return;
1984 }
1985 
1986 /* Declare and initialization an instance of the FC NVME template. */
1987 static struct nvme_fc_port_template lpfc_nvme_template = {
1988 	.module	= THIS_MODULE,
1989 
1990 	/* initiator-based functions */
1991 	.localport_delete  = lpfc_nvme_localport_delete,
1992 	.remoteport_delete = lpfc_nvme_remoteport_delete,
1993 	.create_queue = lpfc_nvme_create_queue,
1994 	.delete_queue = lpfc_nvme_delete_queue,
1995 	.ls_req       = lpfc_nvme_ls_req,
1996 	.fcp_io       = lpfc_nvme_fcp_io_submit,
1997 	.ls_abort     = lpfc_nvme_ls_abort,
1998 	.fcp_abort    = lpfc_nvme_fcp_abort,
1999 
2000 	.max_hw_queues = 1,
2001 	.max_sgl_segments = LPFC_NVME_DEFAULT_SEGS,
2002 	.max_dif_sgl_segments = LPFC_NVME_DEFAULT_SEGS,
2003 	.dma_boundary = 0xFFFFFFFF,
2004 
2005 	/* Sizes of additional private data for data structures.
2006 	 * No use for the last two sizes at this time.
2007 	 */
2008 	.local_priv_sz = sizeof(struct lpfc_nvme_lport),
2009 	.remote_priv_sz = sizeof(struct lpfc_nvme_rport),
2010 	.lsrqst_priv_sz = 0,
2011 	.fcprqst_priv_sz = sizeof(struct lpfc_nvme_fcpreq_priv),
2012 };
2013 
2014 /**
2015  * lpfc_get_nvme_buf - Get a nvme buffer from io_buf_list of the HBA
2016  * @phba: The HBA for which this call is being executed.
2017  *
2018  * This routine removes a nvme buffer from head of @hdwq io_buf_list
2019  * and returns to caller.
2020  *
2021  * Return codes:
2022  *   NULL - Error
2023  *   Pointer to lpfc_nvme_buf - Success
2024  **/
2025 static struct lpfc_io_buf *
2026 lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
2027 		  int idx, int expedite)
2028 {
2029 	struct lpfc_io_buf *lpfc_ncmd;
2030 	struct lpfc_sli4_hdw_queue *qp;
2031 	struct sli4_sge *sgl;
2032 	struct lpfc_iocbq *pwqeq;
2033 	union lpfc_wqe128 *wqe;
2034 
2035 	lpfc_ncmd = lpfc_get_io_buf(phba, NULL, idx, expedite);
2036 
2037 	if (lpfc_ncmd) {
2038 		pwqeq = &(lpfc_ncmd->cur_iocbq);
2039 		wqe = &pwqeq->wqe;
2040 
2041 		/* Setup key fields in buffer that may have been changed
2042 		 * if other protocols used this buffer.
2043 		 */
2044 		pwqeq->iocb_flag = LPFC_IO_NVME;
2045 		pwqeq->wqe_cmpl = lpfc_nvme_io_cmd_wqe_cmpl;
2046 		lpfc_ncmd->start_time = jiffies;
2047 		lpfc_ncmd->flags = 0;
2048 
2049 		/* Rsp SGE will be filled in when we rcv an IO
2050 		 * from the NVME Layer to be sent.
2051 		 * The cmd is going to be embedded so we need a SKIP SGE.
2052 		 */
2053 		sgl = lpfc_ncmd->dma_sgl;
2054 		bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_SKIP);
2055 		bf_set(lpfc_sli4_sge_last, sgl, 0);
2056 		sgl->word2 = cpu_to_le32(sgl->word2);
2057 		/* Fill in word 3 / sgl_len during cmd submission */
2058 
2059 		/* Initialize 64 bytes only */
2060 		memset(wqe, 0, sizeof(union lpfc_wqe));
2061 
2062 		if (lpfc_ndlp_check_qdepth(phba, ndlp)) {
2063 			atomic_inc(&ndlp->cmd_pending);
2064 			lpfc_ncmd->flags |= LPFC_SBUF_BUMP_QDEPTH;
2065 		}
2066 
2067 	} else {
2068 		qp = &phba->sli4_hba.hdwq[idx];
2069 		qp->empty_io_bufs++;
2070 	}
2071 
2072 	return  lpfc_ncmd;
2073 }
2074 
2075 /**
2076  * lpfc_release_nvme_buf: Return a nvme buffer back to hba nvme buf list.
2077  * @phba: The Hba for which this call is being executed.
2078  * @lpfc_ncmd: The nvme buffer which is being released.
2079  *
2080  * This routine releases @lpfc_ncmd nvme buffer by adding it to tail of @phba
2081  * lpfc_io_buf_list list. For SLI4 XRI's are tied to the nvme buffer
2082  * and cannot be reused for at least RA_TOV amount of time if it was
2083  * aborted.
2084  **/
2085 static void
2086 lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd)
2087 {
2088 	struct lpfc_sli4_hdw_queue *qp;
2089 	unsigned long iflag = 0;
2090 
2091 	if ((lpfc_ncmd->flags & LPFC_SBUF_BUMP_QDEPTH) && lpfc_ncmd->ndlp)
2092 		atomic_dec(&lpfc_ncmd->ndlp->cmd_pending);
2093 
2094 	lpfc_ncmd->ndlp = NULL;
2095 	lpfc_ncmd->flags &= ~LPFC_SBUF_BUMP_QDEPTH;
2096 
2097 	qp = lpfc_ncmd->hdwq;
2098 	if (unlikely(lpfc_ncmd->flags & LPFC_SBUF_XBUSY)) {
2099 		lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
2100 				"6310 XB release deferred for "
2101 				"ox_id x%x on reqtag x%x\n",
2102 				lpfc_ncmd->cur_iocbq.sli4_xritag,
2103 				lpfc_ncmd->cur_iocbq.iotag);
2104 
2105 		spin_lock_irqsave(&qp->abts_io_buf_list_lock, iflag);
2106 		list_add_tail(&lpfc_ncmd->list,
2107 			&qp->lpfc_abts_io_buf_list);
2108 		qp->abts_nvme_io_bufs++;
2109 		spin_unlock_irqrestore(&qp->abts_io_buf_list_lock, iflag);
2110 	} else
2111 		lpfc_release_io_buf(phba, (struct lpfc_io_buf *)lpfc_ncmd, qp);
2112 }
2113 
2114 /**
2115  * lpfc_nvme_create_localport - Create/Bind an nvme localport instance.
2116  * @pvport - the lpfc_vport instance requesting a localport.
2117  *
2118  * This routine is invoked to create an nvme localport instance to bind
2119  * to the nvme_fc_transport.  It is called once during driver load
2120  * like lpfc_create_shost after all other services are initialized.
2121  * It requires a vport, vpi, and wwns at call time.  Other localport
2122  * parameters are modified as the driver's FCID and the Fabric WWN
2123  * are established.
2124  *
2125  * Return codes
2126  *      0 - successful
2127  *      -ENOMEM - no heap memory available
2128  *      other values - from nvme registration upcall
2129  **/
2130 int
2131 lpfc_nvme_create_localport(struct lpfc_vport *vport)
2132 {
2133 	int ret = 0;
2134 	struct lpfc_hba  *phba = vport->phba;
2135 	struct nvme_fc_port_info nfcp_info;
2136 	struct nvme_fc_local_port *localport;
2137 	struct lpfc_nvme_lport *lport;
2138 
2139 	/* Initialize this localport instance.  The vport wwn usage ensures
2140 	 * that NPIV is accounted for.
2141 	 */
2142 	memset(&nfcp_info, 0, sizeof(struct nvme_fc_port_info));
2143 	nfcp_info.port_role = FC_PORT_ROLE_NVME_INITIATOR;
2144 	nfcp_info.node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
2145 	nfcp_info.port_name = wwn_to_u64(vport->fc_portname.u.wwn);
2146 
2147 	/* We need to tell the transport layer + 1 because it takes page
2148 	 * alignment into account. When space for the SGL is allocated we
2149 	 * allocate + 3, one for cmd, one for rsp and one for this alignment
2150 	 */
2151 	lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1;
2152 
2153 	/* Advertise how many hw queues we support based on cfg_hdw_queue,
2154 	 * which will not exceed cpu count.
2155 	 */
2156 	lpfc_nvme_template.max_hw_queues = phba->cfg_hdw_queue;
2157 
2158 	if (!IS_ENABLED(CONFIG_NVME_FC))
2159 		return ret;
2160 
2161 	/* localport is allocated from the stack, but the registration
2162 	 * call allocates heap memory as well as the private area.
2163 	 */
2164 
2165 	ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template,
2166 					 &vport->phba->pcidev->dev, &localport);
2167 	if (!ret) {
2168 		lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC,
2169 				 "6005 Successfully registered local "
2170 				 "NVME port num %d, localP x%px, private "
2171 				 "x%px, sg_seg %d\n",
2172 				 localport->port_num, localport,
2173 				 localport->private,
2174 				 lpfc_nvme_template.max_sgl_segments);
2175 
2176 		/* Private is our lport size declared in the template. */
2177 		lport = (struct lpfc_nvme_lport *)localport->private;
2178 		vport->localport = localport;
2179 		lport->vport = vport;
2180 		vport->nvmei_support = 1;
2181 
2182 		atomic_set(&lport->xmt_fcp_noxri, 0);
2183 		atomic_set(&lport->xmt_fcp_bad_ndlp, 0);
2184 		atomic_set(&lport->xmt_fcp_qdepth, 0);
2185 		atomic_set(&lport->xmt_fcp_err, 0);
2186 		atomic_set(&lport->xmt_fcp_wqerr, 0);
2187 		atomic_set(&lport->xmt_fcp_abort, 0);
2188 		atomic_set(&lport->xmt_ls_abort, 0);
2189 		atomic_set(&lport->xmt_ls_err, 0);
2190 		atomic_set(&lport->cmpl_fcp_xb, 0);
2191 		atomic_set(&lport->cmpl_fcp_err, 0);
2192 		atomic_set(&lport->cmpl_ls_xb, 0);
2193 		atomic_set(&lport->cmpl_ls_err, 0);
2194 		atomic_set(&lport->fc4NvmeLsRequests, 0);
2195 		atomic_set(&lport->fc4NvmeLsCmpls, 0);
2196 	}
2197 
2198 	return ret;
2199 }
2200 
2201 #if (IS_ENABLED(CONFIG_NVME_FC))
2202 /* lpfc_nvme_lport_unreg_wait - Wait for the host to complete an lport unreg.
2203  *
2204  * The driver has to wait for the host nvme transport to callback
2205  * indicating the localport has successfully unregistered all
2206  * resources.  Since this is an uninterruptible wait, loop every ten
2207  * seconds and print a message indicating no progress.
2208  *
2209  * An uninterruptible wait is used because of the risk of transport-to-
2210  * driver state mismatch.
2211  */
2212 static void
2213 lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
2214 			   struct lpfc_nvme_lport *lport,
2215 			   struct completion *lport_unreg_cmp)
2216 {
2217 	u32 wait_tmo;
2218 	int ret, i, pending = 0;
2219 	struct lpfc_sli_ring  *pring;
2220 	struct lpfc_hba  *phba = vport->phba;
2221 
2222 	/* Host transport has to clean up and confirm requiring an indefinite
2223 	 * wait. Print a message if a 10 second wait expires and renew the
2224 	 * wait. This is unexpected.
2225 	 */
2226 	wait_tmo = msecs_to_jiffies(LPFC_NVME_WAIT_TMO * 1000);
2227 	while (true) {
2228 		ret = wait_for_completion_timeout(lport_unreg_cmp, wait_tmo);
2229 		if (unlikely(!ret)) {
2230 			pending = 0;
2231 			for (i = 0; i < phba->cfg_hdw_queue; i++) {
2232 				pring = phba->sli4_hba.hdwq[i].io_wq->pring;
2233 				if (!pring)
2234 					continue;
2235 				if (pring->txcmplq_cnt)
2236 					pending += pring->txcmplq_cnt;
2237 			}
2238 			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR,
2239 					 "6176 Lport x%px Localport x%px wait "
2240 					 "timed out. Pending %d. Renewing.\n",
2241 					 lport, vport->localport, pending);
2242 			continue;
2243 		}
2244 		break;
2245 	}
2246 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_IOERR,
2247 			 "6177 Lport x%px Localport x%px Complete Success\n",
2248 			 lport, vport->localport);
2249 }
2250 #endif
2251 
2252 /**
2253  * lpfc_nvme_destroy_localport - Destroy lpfc_nvme bound to nvme transport.
2254  * @pnvme: pointer to lpfc nvme data structure.
2255  *
2256  * This routine is invoked to destroy all lports bound to the phba.
2257  * The lport memory was allocated by the nvme fc transport and is
2258  * released there.  This routine ensures all rports bound to the
2259  * lport have been disconnected.
2260  *
2261  **/
2262 void
2263 lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
2264 {
2265 #if (IS_ENABLED(CONFIG_NVME_FC))
2266 	struct nvme_fc_local_port *localport;
2267 	struct lpfc_nvme_lport *lport;
2268 	int ret;
2269 	DECLARE_COMPLETION_ONSTACK(lport_unreg_cmp);
2270 
2271 	if (vport->nvmei_support == 0)
2272 		return;
2273 
2274 	localport = vport->localport;
2275 	lport = (struct lpfc_nvme_lport *)localport->private;
2276 
2277 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
2278 			 "6011 Destroying NVME localport x%px\n",
2279 			 localport);
2280 
2281 	/* lport's rport list is clear.  Unregister
2282 	 * lport and release resources.
2283 	 */
2284 	lport->lport_unreg_cmp = &lport_unreg_cmp;
2285 	ret = nvme_fc_unregister_localport(localport);
2286 
2287 	/* Wait for completion.  This either blocks
2288 	 * indefinitely or succeeds
2289 	 */
2290 	lpfc_nvme_lport_unreg_wait(vport, lport, &lport_unreg_cmp);
2291 	vport->localport = NULL;
2292 
2293 	/* Regardless of the unregister upcall response, clear
2294 	 * nvmei_support.  All rports are unregistered and the
2295 	 * driver will clean up.
2296 	 */
2297 	vport->nvmei_support = 0;
2298 	if (ret == 0) {
2299 		lpfc_printf_vlog(vport,
2300 				 KERN_INFO, LOG_NVME_DISC,
2301 				 "6009 Unregistered lport Success\n");
2302 	} else {
2303 		lpfc_printf_vlog(vport,
2304 				 KERN_INFO, LOG_NVME_DISC,
2305 				 "6010 Unregistered lport "
2306 				 "Failed, status x%x\n",
2307 				 ret);
2308 	}
2309 #endif
2310 }
2311 
2312 void
2313 lpfc_nvme_update_localport(struct lpfc_vport *vport)
2314 {
2315 #if (IS_ENABLED(CONFIG_NVME_FC))
2316 	struct nvme_fc_local_port *localport;
2317 	struct lpfc_nvme_lport *lport;
2318 
2319 	localport = vport->localport;
2320 	if (!localport) {
2321 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME,
2322 				 "6710 Update NVME fail. No localport\n");
2323 		return;
2324 	}
2325 	lport = (struct lpfc_nvme_lport *)localport->private;
2326 	if (!lport) {
2327 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME,
2328 				 "6171 Update NVME fail. localP x%px, No lport\n",
2329 				 localport);
2330 		return;
2331 	}
2332 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
2333 			 "6012 Update NVME lport x%px did x%x\n",
2334 			 localport, vport->fc_myDID);
2335 
2336 	localport->port_id = vport->fc_myDID;
2337 	if (localport->port_id == 0)
2338 		localport->port_role = FC_PORT_ROLE_NVME_DISCOVERY;
2339 	else
2340 		localport->port_role = FC_PORT_ROLE_NVME_INITIATOR;
2341 
2342 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
2343 			 "6030 bound lport x%px to DID x%06x\n",
2344 			 lport, localport->port_id);
2345 #endif
2346 }
2347 
2348 int
2349 lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
2350 {
2351 #if (IS_ENABLED(CONFIG_NVME_FC))
2352 	int ret = 0;
2353 	struct nvme_fc_local_port *localport;
2354 	struct lpfc_nvme_lport *lport;
2355 	struct lpfc_nvme_rport *rport;
2356 	struct lpfc_nvme_rport *oldrport;
2357 	struct nvme_fc_remote_port *remote_port;
2358 	struct nvme_fc_port_info rpinfo;
2359 	struct lpfc_nodelist *prev_ndlp = NULL;
2360 
2361 	lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NVME_DISC,
2362 			 "6006 Register NVME PORT. DID x%06x nlptype x%x\n",
2363 			 ndlp->nlp_DID, ndlp->nlp_type);
2364 
2365 	localport = vport->localport;
2366 	if (!localport)
2367 		return 0;
2368 
2369 	lport = (struct lpfc_nvme_lport *)localport->private;
2370 
2371 	/* NVME rports are not preserved across devloss.
2372 	 * Just register this instance.  Note, rpinfo->dev_loss_tmo
2373 	 * is left 0 to indicate accept transport defaults.  The
2374 	 * driver communicates port role capabilities consistent
2375 	 * with the PRLI response data.
2376 	 */
2377 	memset(&rpinfo, 0, sizeof(struct nvme_fc_port_info));
2378 	rpinfo.port_id = ndlp->nlp_DID;
2379 	if (ndlp->nlp_type & NLP_NVME_TARGET)
2380 		rpinfo.port_role |= FC_PORT_ROLE_NVME_TARGET;
2381 	if (ndlp->nlp_type & NLP_NVME_INITIATOR)
2382 		rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR;
2383 
2384 	if (ndlp->nlp_type & NLP_NVME_DISCOVERY)
2385 		rpinfo.port_role |= FC_PORT_ROLE_NVME_DISCOVERY;
2386 
2387 	rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn);
2388 	rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
2389 
2390 	spin_lock_irq(&vport->phba->hbalock);
2391 	oldrport = lpfc_ndlp_get_nrport(ndlp);
2392 	if (oldrport) {
2393 		prev_ndlp = oldrport->ndlp;
2394 		spin_unlock_irq(&vport->phba->hbalock);
2395 	} else {
2396 		spin_unlock_irq(&vport->phba->hbalock);
2397 		lpfc_nlp_get(ndlp);
2398 	}
2399 
2400 	ret = nvme_fc_register_remoteport(localport, &rpinfo, &remote_port);
2401 	if (!ret) {
2402 		/* If the ndlp already has an nrport, this is just
2403 		 * a resume of the existing rport.  Else this is a
2404 		 * new rport.
2405 		 */
2406 		/* Guard against an unregister/reregister
2407 		 * race that leaves the WAIT flag set.
2408 		 */
2409 		spin_lock_irq(&vport->phba->hbalock);
2410 		ndlp->upcall_flags &= ~NLP_WAIT_FOR_UNREG;
2411 		spin_unlock_irq(&vport->phba->hbalock);
2412 		rport = remote_port->private;
2413 		if (oldrport) {
2414 			/* New remoteport record does not guarantee valid
2415 			 * host private memory area.
2416 			 */
2417 			if (oldrport == remote_port->private) {
2418 				/* Same remoteport - ndlp should match.
2419 				 * Just reuse.
2420 				 */
2421 				lpfc_printf_vlog(ndlp->vport, KERN_INFO,
2422 						 LOG_NVME_DISC,
2423 						 "6014 Rebind lport to current "
2424 						 "remoteport x%px wwpn 0x%llx, "
2425 						 "Data: x%x x%x x%px x%px x%x "
2426 						 " x%06x\n",
2427 						 remote_port,
2428 						 remote_port->port_name,
2429 						 remote_port->port_id,
2430 						 remote_port->port_role,
2431 						 oldrport->ndlp,
2432 						 ndlp,
2433 						 ndlp->nlp_type,
2434 						 ndlp->nlp_DID);
2435 
2436 				/* It's a complete rebind only if the driver
2437 				 * is registering with the same ndlp. Otherwise
2438 				 * the driver likely executed a node swap
2439 				 * prior to this registration and the ndlp to
2440 				 * remoteport binding needs to be redone.
2441 				 */
2442 				if (prev_ndlp == ndlp)
2443 					return 0;
2444 
2445 			}
2446 
2447 			/* Sever the ndlp<->rport association
2448 			 * before dropping the ndlp ref from
2449 			 * register.
2450 			 */
2451 			spin_lock_irq(&vport->phba->hbalock);
2452 			ndlp->nrport = NULL;
2453 			ndlp->upcall_flags &= ~NLP_WAIT_FOR_UNREG;
2454 			spin_unlock_irq(&vport->phba->hbalock);
2455 			rport->ndlp = NULL;
2456 			rport->remoteport = NULL;
2457 
2458 			/* Reference only removed if previous NDLP is no longer
2459 			 * active. It might be just a swap and removing the
2460 			 * reference would cause a premature cleanup.
2461 			 */
2462 			if (prev_ndlp && prev_ndlp != ndlp) {
2463 				if ((!NLP_CHK_NODE_ACT(prev_ndlp)) ||
2464 				    (!prev_ndlp->nrport))
2465 					lpfc_nlp_put(prev_ndlp);
2466 			}
2467 		}
2468 
2469 		/* Clean bind the rport to the ndlp. */
2470 		rport->remoteport = remote_port;
2471 		rport->lport = lport;
2472 		rport->ndlp = ndlp;
2473 		spin_lock_irq(&vport->phba->hbalock);
2474 		ndlp->nrport = rport;
2475 		spin_unlock_irq(&vport->phba->hbalock);
2476 		lpfc_printf_vlog(vport, KERN_INFO,
2477 				 LOG_NVME_DISC | LOG_NODE,
2478 				 "6022 Bind lport x%px to remoteport x%px "
2479 				 "rport x%px WWNN 0x%llx, "
2480 				 "Rport WWPN 0x%llx DID "
2481 				 "x%06x Role x%x, ndlp %p prev_ndlp x%px\n",
2482 				 lport, remote_port, rport,
2483 				 rpinfo.node_name, rpinfo.port_name,
2484 				 rpinfo.port_id, rpinfo.port_role,
2485 				 ndlp, prev_ndlp);
2486 	} else {
2487 		lpfc_printf_vlog(vport, KERN_ERR,
2488 				 LOG_NVME_DISC | LOG_NODE,
2489 				 "6031 RemotePort Registration failed "
2490 				 "err: %d, DID x%06x\n",
2491 				 ret, ndlp->nlp_DID);
2492 	}
2493 
2494 	return ret;
2495 #else
2496 	return 0;
2497 #endif
2498 }
2499 
2500 /**
2501  * lpfc_nvme_rescan_port - Check to see if we should rescan this remoteport
2502  *
2503  * If the ndlp represents an NVME Target, that we are logged into,
2504  * ping the NVME FC Transport layer to initiate a device rescan
2505  * on this remote NPort.
2506  */
2507 void
2508 lpfc_nvme_rescan_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
2509 {
2510 #if (IS_ENABLED(CONFIG_NVME_FC))
2511 	struct lpfc_nvme_rport *nrport;
2512 	struct nvme_fc_remote_port *remoteport = NULL;
2513 
2514 	spin_lock_irq(&vport->phba->hbalock);
2515 	nrport = lpfc_ndlp_get_nrport(ndlp);
2516 	if (nrport)
2517 		remoteport = nrport->remoteport;
2518 	spin_unlock_irq(&vport->phba->hbalock);
2519 
2520 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
2521 			 "6170 Rescan NPort DID x%06x type x%x "
2522 			 "state x%x nrport x%px remoteport x%px\n",
2523 			 ndlp->nlp_DID, ndlp->nlp_type, ndlp->nlp_state,
2524 			 nrport, remoteport);
2525 
2526 	if (!nrport || !remoteport)
2527 		goto rescan_exit;
2528 
2529 	/* Only rescan if we are an NVME target in the MAPPED state */
2530 	if (remoteport->port_role & FC_PORT_ROLE_NVME_DISCOVERY &&
2531 	    ndlp->nlp_state == NLP_STE_MAPPED_NODE) {
2532 		nvme_fc_rescan_remoteport(remoteport);
2533 
2534 		lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
2535 				 "6172 NVME rescanned DID x%06x "
2536 				 "port_state x%x\n",
2537 				 ndlp->nlp_DID, remoteport->port_state);
2538 	}
2539 	return;
2540  rescan_exit:
2541 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
2542 			 "6169 Skip NVME Rport Rescan, NVME remoteport "
2543 			 "unregistered\n");
2544 #endif
2545 }
2546 
2547 /* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport.
2548  *
2549  * There is no notion of Devloss or rport recovery from the current
2550  * nvme_transport perspective.  Loss of an rport just means IO cannot
2551  * be sent and recovery is completely up to the initator.
2552  * For now, the driver just unbinds the DID and port_role so that
2553  * no further IO can be issued.  Changes are planned for later.
2554  *
2555  * Notes - the ndlp reference count is not decremented here since
2556  * since there is no nvme_transport api for devloss.  Node ref count
2557  * is only adjusted in driver unload.
2558  */
2559 void
2560 lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
2561 {
2562 #if (IS_ENABLED(CONFIG_NVME_FC))
2563 	int ret;
2564 	struct nvme_fc_local_port *localport;
2565 	struct lpfc_nvme_lport *lport;
2566 	struct lpfc_nvme_rport *rport;
2567 	struct nvme_fc_remote_port *remoteport = NULL;
2568 
2569 	localport = vport->localport;
2570 
2571 	/* This is fundamental error.  The localport is always
2572 	 * available until driver unload.  Just exit.
2573 	 */
2574 	if (!localport)
2575 		return;
2576 
2577 	lport = (struct lpfc_nvme_lport *)localport->private;
2578 	if (!lport)
2579 		goto input_err;
2580 
2581 	spin_lock_irq(&vport->phba->hbalock);
2582 	rport = lpfc_ndlp_get_nrport(ndlp);
2583 	if (rport)
2584 		remoteport = rport->remoteport;
2585 	spin_unlock_irq(&vport->phba->hbalock);
2586 	if (!remoteport)
2587 		goto input_err;
2588 
2589 	lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
2590 			 "6033 Unreg nvme remoteport x%px, portname x%llx, "
2591 			 "port_id x%06x, portstate x%x port type x%x\n",
2592 			 remoteport, remoteport->port_name,
2593 			 remoteport->port_id, remoteport->port_state,
2594 			 ndlp->nlp_type);
2595 
2596 	/* Sanity check ndlp type.  Only call for NVME ports. Don't
2597 	 * clear any rport state until the transport calls back.
2598 	 */
2599 
2600 	if (ndlp->nlp_type & NLP_NVME_TARGET) {
2601 		/* No concern about the role change on the nvme remoteport.
2602 		 * The transport will update it.
2603 		 */
2604 		ndlp->upcall_flags |= NLP_WAIT_FOR_UNREG;
2605 
2606 		/* Don't let the host nvme transport keep sending keep-alives
2607 		 * on this remoteport. Vport is unloading, no recovery. The
2608 		 * return values is ignored.  The upcall is a courtesy to the
2609 		 * transport.
2610 		 */
2611 		if (vport->load_flag & FC_UNLOADING)
2612 			(void)nvme_fc_set_remoteport_devloss(remoteport, 0);
2613 
2614 		ret = nvme_fc_unregister_remoteport(remoteport);
2615 		if (ret != 0) {
2616 			lpfc_nlp_put(ndlp);
2617 			lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
2618 					 "6167 NVME unregister failed %d "
2619 					 "port_state x%x\n",
2620 					 ret, remoteport->port_state);
2621 		}
2622 	}
2623 	return;
2624 
2625  input_err:
2626 #endif
2627 	lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
2628 			 "6168 State error: lport x%px, rport x%px FCID x%06x\n",
2629 			 vport->localport, ndlp->rport, ndlp->nlp_DID);
2630 }
2631 
2632 /**
2633  * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
2634  * @phba: pointer to lpfc hba data structure.
2635  * @axri: pointer to the fcp xri abort wcqe structure.
2636  * @lpfc_ncmd: The nvme job structure for the request being aborted.
2637  *
2638  * This routine is invoked by the worker thread to process a SLI4 fast-path
2639  * NVME aborted xri.  Aborted NVME IO commands are completed to the transport
2640  * here.
2641  **/
2642 void
2643 lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
2644 			   struct sli4_wcqe_xri_aborted *axri,
2645 			   struct lpfc_io_buf *lpfc_ncmd)
2646 {
2647 	uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
2648 	struct nvmefc_fcp_req *nvme_cmd = NULL;
2649 	struct lpfc_nodelist *ndlp = lpfc_ncmd->ndlp;
2650 
2651 
2652 	if (ndlp)
2653 		lpfc_sli4_abts_err_handler(phba, ndlp, axri);
2654 
2655 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
2656 			"6311 nvme_cmd %p xri x%x tag x%x abort complete and "
2657 			"xri released\n",
2658 			lpfc_ncmd->nvmeCmd, xri,
2659 			lpfc_ncmd->cur_iocbq.iotag);
2660 
2661 	/* Aborted NVME commands are required to not complete
2662 	 * before the abort exchange command fully completes.
2663 	 * Once completed, it is available via the put list.
2664 	 */
2665 	if (lpfc_ncmd->nvmeCmd) {
2666 		nvme_cmd = lpfc_ncmd->nvmeCmd;
2667 		nvme_cmd->done(nvme_cmd);
2668 		lpfc_ncmd->nvmeCmd = NULL;
2669 	}
2670 	lpfc_release_nvme_buf(phba, lpfc_ncmd);
2671 }
2672 
2673 /**
2674  * lpfc_nvme_wait_for_io_drain - Wait for all NVME wqes to complete
2675  * @phba: Pointer to HBA context object.
2676  *
2677  * This function flushes all wqes in the nvme rings and frees all resources
2678  * in the txcmplq. This function does not issue abort wqes for the IO
2679  * commands in txcmplq, they will just be returned with
2680  * IOERR_SLI_DOWN. This function is invoked with EEH when device's PCI
2681  * slot has been permanently disabled.
2682  **/
2683 void
2684 lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba)
2685 {
2686 	struct lpfc_sli_ring  *pring;
2687 	u32 i, wait_cnt = 0;
2688 
2689 	if (phba->sli_rev < LPFC_SLI_REV4 || !phba->sli4_hba.hdwq)
2690 		return;
2691 
2692 	/* Cycle through all IO rings and make sure all outstanding
2693 	 * WQEs have been removed from the txcmplqs.
2694 	 */
2695 	for (i = 0; i < phba->cfg_hdw_queue; i++) {
2696 		if (!phba->sli4_hba.hdwq[i].io_wq)
2697 			continue;
2698 		pring = phba->sli4_hba.hdwq[i].io_wq->pring;
2699 
2700 		if (!pring)
2701 			continue;
2702 
2703 		/* Retrieve everything on the txcmplq */
2704 		while (!list_empty(&pring->txcmplq)) {
2705 			msleep(LPFC_XRI_EXCH_BUSY_WAIT_T1);
2706 			wait_cnt++;
2707 
2708 			/* The sleep is 10mS.  Every ten seconds,
2709 			 * dump a message.  Something is wrong.
2710 			 */
2711 			if ((wait_cnt % 1000) == 0) {
2712 				lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
2713 						"6178 NVME IO not empty, "
2714 						"cnt %d\n", wait_cnt);
2715 			}
2716 		}
2717 	}
2718 }
2719 
2720 void
2721 lpfc_nvme_cancel_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn)
2722 {
2723 #if (IS_ENABLED(CONFIG_NVME_FC))
2724 	struct lpfc_io_buf *lpfc_ncmd;
2725 	struct nvmefc_fcp_req *nCmd;
2726 	struct lpfc_nvme_fcpreq_priv *freqpriv;
2727 
2728 	if (!pwqeIn->context1) {
2729 		lpfc_sli_release_iocbq(phba, pwqeIn);
2730 		return;
2731 	}
2732 	/* For abort iocb just return, IO iocb will do a done call */
2733 	if (bf_get(wqe_cmnd, &pwqeIn->wqe.gen_req.wqe_com) ==
2734 	    CMD_ABORT_XRI_CX) {
2735 		lpfc_sli_release_iocbq(phba, pwqeIn);
2736 		return;
2737 	}
2738 	lpfc_ncmd = (struct lpfc_io_buf *)pwqeIn->context1;
2739 
2740 	spin_lock(&lpfc_ncmd->buf_lock);
2741 	if (!lpfc_ncmd->nvmeCmd) {
2742 		spin_unlock(&lpfc_ncmd->buf_lock);
2743 		lpfc_release_nvme_buf(phba, lpfc_ncmd);
2744 		return;
2745 	}
2746 
2747 	nCmd = lpfc_ncmd->nvmeCmd;
2748 	lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
2749 			"6194 NVME Cancel xri %x\n",
2750 			lpfc_ncmd->cur_iocbq.sli4_xritag);
2751 
2752 	nCmd->transferred_length = 0;
2753 	nCmd->rcv_rsplen = 0;
2754 	nCmd->status = NVME_SC_INTERNAL;
2755 	freqpriv = nCmd->private;
2756 	freqpriv->nvme_buf = NULL;
2757 	lpfc_ncmd->nvmeCmd = NULL;
2758 
2759 	spin_unlock(&lpfc_ncmd->buf_lock);
2760 	nCmd->done(nCmd);
2761 
2762 	/* Call release with XB=1 to queue the IO into the abort list. */
2763 	lpfc_release_nvme_buf(phba, lpfc_ncmd);
2764 #endif
2765 }
2766