xref: /freebsd/sys/dev/liquidio/base/lio_request_manager.c (revision 6fa42b91ca3f481912af98c4d49c44507eb1b8e1)
1 /*
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2017 Cavium, Inc.. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Cavium, Inc. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "lio_bsd.h"
35 #include "lio_common.h"
36 #include "lio_droq.h"
37 #include "lio_iq.h"
38 #include "lio_response_manager.h"
39 #include "lio_device.h"
40 #include "lio_main.h"
41 #include "lio_network.h"
42 #include "cn23xx_pf_device.h"
43 #include "lio_rxtx.h"
44 
45 struct lio_iq_post_status {
46 	int	status;
47 	int	index;
48 };
49 
50 static void	lio_check_db_timeout(void *arg, int pending);
51 static void	__lio_check_db_timeout(struct octeon_device *oct,
52 				       uint64_t iq_no);
53 
54 /* Return 0 on success, 1 on failure */
55 int
56 lio_init_instr_queue(struct octeon_device *oct, union octeon_txpciq txpciq,
57 		     uint32_t num_descs)
58 {
59 	struct lio_instr_queue	*iq;
60 	struct lio_iq_config	*conf = NULL;
61 	struct lio_tq		*db_tq;
62 	struct lio_request_list	*request_buf;
63 	bus_size_t		max_size;
64 	uint32_t		iq_no = (uint32_t)txpciq.s.q_no;
65 	uint32_t		q_size;
66 	int			error, i;
67 
68 	if (LIO_CN23XX_PF(oct))
69 		conf = &(LIO_GET_IQ_CFG(LIO_CHIP_CONF(oct, cn23xx_pf)));
70 	if (conf == NULL) {
71 		lio_dev_err(oct, "Unsupported Chip %x\n", oct->chip_id);
72 		return (1);
73 	}
74 
75 	q_size = (uint32_t)conf->instr_type * num_descs;
76 	iq = oct->instr_queue[iq_no];
77 	iq->oct_dev = oct;
78 
79 	max_size = LIO_CN23XX_PKI_MAX_FRAME_SIZE * num_descs;
80 
81 	error = bus_dma_tag_create(bus_get_dma_tag(oct->device),	/* parent */
82 				   1, 0,				/* alignment, bounds */
83 				   BUS_SPACE_MAXADDR,			/* lowaddr */
84 				   BUS_SPACE_MAXADDR,			/* highaddr */
85 				   NULL, NULL,				/* filter, filterarg */
86 				   max_size,				/* maxsize */
87 				   LIO_MAX_SG,				/* nsegments */
88 				   PAGE_SIZE,				/* maxsegsize */
89 				   0,					/* flags */
90 				   NULL,				/* lockfunc */
91 				   NULL,				/* lockfuncarg */
92 				   &iq->txtag);
93 	if (error) {
94 		lio_dev_err(oct, "Cannot allocate memory for instr queue %d\n",
95 			    iq_no);
96 		return (1);
97 	}
98 
99 	iq->base_addr = lio_dma_alloc(q_size, (vm_paddr_t *)&iq->base_addr_dma);
100 	if (!iq->base_addr) {
101 		lio_dev_err(oct, "Cannot allocate memory for instr queue %d\n",
102 			    iq_no);
103 		return (1);
104 	}
105 
106 	iq->max_count = num_descs;
107 
108 	/*
109 	 * Initialize a list to holds requests that have been posted to
110 	 * Octeon but has yet to be fetched by octeon
111 	 */
112 	iq->request_list = malloc(sizeof(*iq->request_list) * num_descs,
113 				  M_DEVBUF, M_NOWAIT | M_ZERO);
114 	if (iq->request_list == NULL) {
115 		lio_dev_err(oct, "Alloc failed for IQ[%d] nr free list\n",
116 			    iq_no);
117 		return (1);
118 	}
119 
120 	lio_dev_dbg(oct, "IQ[%d]: base: %p basedma: %llx count: %d\n",
121 		    iq_no, iq->base_addr, LIO_CAST64(iq->base_addr_dma),
122 		    iq->max_count);
123 
124 	/* Create the descriptor buffer dma maps */
125 	request_buf = iq->request_list;
126 	for (i = 0; i < num_descs; i++, request_buf++) {
127 		error = bus_dmamap_create(iq->txtag, 0, &request_buf->map);
128 		if (error) {
129 			lio_dev_err(oct, "Unable to create TX DMA map\n");
130 			return (1);
131 		}
132 	}
133 
134 	iq->txpciq.txpciq64 = txpciq.txpciq64;
135 	iq->fill_cnt = 0;
136 	iq->host_write_index = 0;
137 	iq->octeon_read_index = 0;
138 	iq->flush_index = 0;
139 	iq->last_db_time = 0;
140 	iq->db_timeout = (uint32_t)conf->db_timeout;
141 	atomic_store_rel_int(&iq->instr_pending, 0);
142 
143 	/* Initialize the lock for this instruction queue */
144 	mtx_init(&iq->lock, "Tx_lock", NULL, MTX_DEF);
145 	mtx_init(&iq->post_lock, "iq_post_lock", NULL, MTX_DEF);
146 	mtx_init(&iq->enq_lock, "enq_lock", NULL, MTX_DEF);
147 
148 	mtx_init(&iq->iq_flush_running_lock, "iq_flush_running_lock", NULL,
149 		 MTX_DEF);
150 
151 	oct->io_qmask.iq |= BIT_ULL(iq_no);
152 
153 	/* Set the 32B/64B mode for each input queue */
154 	oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
155 	iq->iqcmd_64B = (conf->instr_type == 64);
156 
157 	oct->fn_list.setup_iq_regs(oct, iq_no);
158 
159 	db_tq = &oct->check_db_tq[iq_no];
160 	db_tq->tq = taskqueue_create("lio_check_db_timeout", M_WAITOK,
161 				     taskqueue_thread_enqueue, &db_tq->tq);
162 
163 	TIMEOUT_TASK_INIT(db_tq->tq, &db_tq->work, 0, lio_check_db_timeout,
164 			  (void *)db_tq);
165 	db_tq->ctxul = iq_no;
166 	db_tq->ctxptr = oct;
167 
168 	taskqueue_start_threads(&db_tq->tq, 1, PI_NET,
169 				"lio%d_check_db_timeout:%d",
170 				oct->octeon_id, iq_no);
171 	taskqueue_enqueue_timeout(db_tq->tq, &db_tq->work, 1);
172 
173 	/* Allocate a buf ring */
174 	oct->instr_queue[iq_no]->br =
175 		buf_ring_alloc(LIO_BR_SIZE, M_DEVBUF, M_WAITOK,
176 			       &oct->instr_queue[iq_no]->enq_lock);
177 
178 	return (0);
179 }
180 
181 int
182 lio_delete_instr_queue(struct octeon_device *oct, uint32_t iq_no)
183 {
184 	struct lio_instr_queue		*iq = oct->instr_queue[iq_no];
185 	struct lio_request_list		*request_buf;
186 	struct lio_mbuf_free_info	*finfo;
187 	uint64_t			desc_size = 0, q_size;
188 	int				i;
189 
190 	lio_dev_dbg(oct, "%s[%d]\n", __func__, iq_no);
191 
192 	if (oct->check_db_tq[iq_no].tq != NULL) {
193 		while (taskqueue_cancel_timeout(oct->check_db_tq[iq_no].tq,
194 						&oct->check_db_tq[iq_no].work,
195 						NULL))
196 			taskqueue_drain_timeout(oct->check_db_tq[iq_no].tq,
197 						&oct->check_db_tq[iq_no].work);
198 		taskqueue_free(oct->check_db_tq[iq_no].tq);
199 		oct->check_db_tq[iq_no].tq = NULL;
200 	}
201 
202 	if (LIO_CN23XX_PF(oct))
203 		desc_size =
204 		    LIO_GET_IQ_INSTR_TYPE_CFG(LIO_CHIP_CONF(oct, cn23xx_pf));
205 
206 	request_buf = iq->request_list;
207 	for (i = 0; i < iq->max_count; i++, request_buf++) {
208 		if ((request_buf->reqtype == LIO_REQTYPE_NORESP_NET) ||
209 		    (request_buf->reqtype == LIO_REQTYPE_NORESP_NET_SG)) {
210 			if (request_buf->buf != NULL) {
211 				finfo = request_buf->buf;
212 				bus_dmamap_sync(iq->txtag, request_buf->map,
213 						BUS_DMASYNC_POSTWRITE);
214 				bus_dmamap_unload(iq->txtag,
215 						  request_buf->map);
216 				m_freem(finfo->mb);
217 				request_buf->buf = NULL;
218 				if (request_buf->map != NULL) {
219 					bus_dmamap_destroy(iq->txtag,
220 							   request_buf->map);
221 					request_buf->map = NULL;
222 				}
223 			} else if (request_buf->map != NULL) {
224 				bus_dmamap_unload(iq->txtag, request_buf->map);
225 				bus_dmamap_destroy(iq->txtag, request_buf->map);
226 				request_buf->map = NULL;
227 			}
228 		}
229 	}
230 
231 	if (iq->br != NULL) {
232 		buf_ring_free(iq->br, M_DEVBUF);
233 		iq->br = NULL;
234 	}
235 
236 	if (iq->request_list != NULL) {
237 		free(iq->request_list, M_DEVBUF);
238 		iq->request_list = NULL;
239 	}
240 
241 	if (iq->txtag != NULL) {
242 		bus_dma_tag_destroy(iq->txtag);
243 		iq->txtag = NULL;
244 	}
245 
246 	if (iq->base_addr) {
247 		q_size = iq->max_count * desc_size;
248 		lio_dma_free((uint32_t)q_size, iq->base_addr);
249 
250 		oct->io_qmask.iq &= ~(1ULL << iq_no);
251 		bzero(oct->instr_queue[iq_no], sizeof(struct lio_instr_queue));
252 		oct->num_iqs--;
253 
254 		return (0);
255 	}
256 
257 	return (1);
258 }
259 
260 /* Return 0 on success, 1 on failure */
261 int
262 lio_setup_iq(struct octeon_device *oct, int ifidx, int q_index,
263 	     union octeon_txpciq txpciq, uint32_t num_descs)
264 {
265 	uint32_t	iq_no = (uint32_t)txpciq.s.q_no;
266 
267 	if (oct->instr_queue[iq_no]->oct_dev != NULL) {
268 		lio_dev_dbg(oct, "IQ is in use. Cannot create the IQ: %d again\n",
269 			    iq_no);
270 		oct->instr_queue[iq_no]->txpciq.txpciq64 = txpciq.txpciq64;
271 		return (0);
272 	}
273 
274 	oct->instr_queue[iq_no]->q_index = q_index;
275 	oct->instr_queue[iq_no]->ifidx = ifidx;
276 
277 	if (lio_init_instr_queue(oct, txpciq, num_descs)) {
278 		lio_delete_instr_queue(oct, iq_no);
279 		return (1);
280 	}
281 
282 	oct->num_iqs++;
283 	if (oct->fn_list.enable_io_queues(oct))
284 		return (1);
285 
286 	return (0);
287 }
288 
289 int
290 lio_wait_for_instr_fetch(struct octeon_device *oct)
291 {
292 	int	i, retry = 1000, pending, instr_cnt = 0;
293 
294 	do {
295 		instr_cnt = 0;
296 
297 		for (i = 0; i < LIO_MAX_INSTR_QUEUES(oct); i++) {
298 			if (!(oct->io_qmask.iq & BIT_ULL(i)))
299 				continue;
300 			pending = atomic_load_acq_int(
301 					&oct->instr_queue[i]->instr_pending);
302 			if (pending)
303 				__lio_check_db_timeout(oct, i);
304 			instr_cnt += pending;
305 		}
306 
307 		if (instr_cnt == 0)
308 			break;
309 
310 		lio_sleep_timeout(1);
311 
312 	} while (retry-- && instr_cnt);
313 
314 	return (instr_cnt);
315 }
316 
317 static inline void
318 lio_ring_doorbell(struct octeon_device *oct, struct lio_instr_queue *iq)
319 {
320 
321 	if (atomic_load_acq_int(&oct->status) == LIO_DEV_RUNNING) {
322 		lio_write_csr32(oct, iq->doorbell_reg, iq->fill_cnt);
323 		/* make sure doorbell write goes through */
324 		__compiler_membar();
325 		iq->fill_cnt = 0;
326 		iq->last_db_time = ticks;
327 		return;
328 	}
329 }
330 
331 static inline void
332 __lio_copy_cmd_into_iq(struct lio_instr_queue *iq, uint8_t *cmd)
333 {
334 	uint8_t	*iqptr, cmdsize;
335 
336 	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
337 	iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
338 
339 	memcpy(iqptr, cmd, cmdsize);
340 }
341 
342 static inline struct lio_iq_post_status
343 __lio_post_command2(struct lio_instr_queue *iq, uint8_t *cmd)
344 {
345 	struct lio_iq_post_status	st;
346 
347 	st.status = LIO_IQ_SEND_OK;
348 
349 	/*
350 	 * This ensures that the read index does not wrap around to the same
351 	 * position if queue gets full before Octeon could fetch any instr.
352 	 */
353 	if (atomic_load_acq_int(&iq->instr_pending) >=
354 	    (int32_t)(iq->max_count - 1)) {
355 		st.status = LIO_IQ_SEND_FAILED;
356 		st.index = -1;
357 		return (st);
358 	}
359 
360 	if (atomic_load_acq_int(&iq->instr_pending) >=
361 	    (int32_t)(iq->max_count - 2))
362 		st.status = LIO_IQ_SEND_STOP;
363 
364 	__lio_copy_cmd_into_iq(iq, cmd);
365 
366 	/* "index" is returned, host_write_index is modified. */
367 	st.index = iq->host_write_index;
368 	iq->host_write_index = lio_incr_index(iq->host_write_index, 1,
369 					      iq->max_count);
370 	iq->fill_cnt++;
371 
372 	/*
373 	 * Flush the command into memory. We need to be sure the data is in
374 	 * memory before indicating that the instruction is pending.
375 	 */
376 	wmb();
377 
378 	atomic_add_int(&iq->instr_pending, 1);
379 
380 	return (st);
381 }
382 
383 static inline void
384 __lio_add_to_request_list(struct lio_instr_queue *iq, int idx, void *buf,
385 			  int reqtype)
386 {
387 
388 	iq->request_list[idx].buf = buf;
389 	iq->request_list[idx].reqtype = reqtype;
390 }
391 
392 /* Can only run in process context */
393 int
394 lio_process_iq_request_list(struct octeon_device *oct,
395 			    struct lio_instr_queue *iq, uint32_t budget)
396 {
397 	struct lio_soft_command		*sc;
398 	struct octeon_instr_irh		*irh = NULL;
399 	void				*buf;
400 	uint32_t			inst_count = 0;
401 	uint32_t			old = iq->flush_index;
402 	int				reqtype;
403 
404 	while (old != iq->octeon_read_index) {
405 		reqtype = iq->request_list[old].reqtype;
406 		buf = iq->request_list[old].buf;
407 
408 		if (reqtype == LIO_REQTYPE_NONE)
409 			goto skip_this;
410 
411 		switch (reqtype) {
412 		case LIO_REQTYPE_NORESP_NET:
413 			lio_free_mbuf(iq, buf);
414 			break;
415 		case LIO_REQTYPE_NORESP_NET_SG:
416 			lio_free_sgmbuf(iq, buf);
417 			break;
418 		case LIO_REQTYPE_RESP_NET:
419 		case LIO_REQTYPE_SOFT_COMMAND:
420 			sc = buf;
421 			if (LIO_CN23XX_PF(oct))
422 				irh = (struct octeon_instr_irh *)
423 					&sc->cmd.cmd3.irh;
424 			if (irh->rflag) {
425 				/*
426 				 * We're expecting a response from Octeon.
427 				 * It's up to lio_process_ordered_list() to
428 				 * process  sc. Add sc to the ordered soft
429 				 * command response list because we expect
430 				 * a response from Octeon.
431 				 */
432 				mtx_lock(&oct->response_list
433 					 [LIO_ORDERED_SC_LIST].lock);
434 				atomic_add_int(&oct->response_list
435 					       [LIO_ORDERED_SC_LIST].
436 					       pending_req_count, 1);
437 				STAILQ_INSERT_TAIL(&oct->response_list
438 						   [LIO_ORDERED_SC_LIST].
439 						   head, &sc->node, entries);
440 				mtx_unlock(&oct->response_list
441 					   [LIO_ORDERED_SC_LIST].lock);
442 			} else {
443 				if (sc->callback != NULL) {
444 					/* This callback must not sleep */
445 					sc->callback(oct, LIO_REQUEST_DONE,
446 						     sc->callback_arg);
447 				}
448 			}
449 
450 			break;
451 		default:
452 			lio_dev_err(oct, "%s Unknown reqtype: %d buf: %p at idx %d\n",
453 				    __func__, reqtype, buf, old);
454 		}
455 
456 		iq->request_list[old].buf = NULL;
457 		iq->request_list[old].reqtype = 0;
458 
459 skip_this:
460 		inst_count++;
461 		old = lio_incr_index(old, 1, iq->max_count);
462 
463 		if ((budget) && (inst_count >= budget))
464 			break;
465 	}
466 
467 	iq->flush_index = old;
468 
469 	return (inst_count);
470 }
471 
472 /* Can only be called from process context */
473 int
474 lio_flush_iq(struct octeon_device *oct, struct lio_instr_queue *iq,
475 	     uint32_t budget)
476 {
477 	uint32_t	inst_processed = 0;
478 	uint32_t	tot_inst_processed = 0;
479 	int		tx_done = 1;
480 
481 	if (!mtx_trylock(&iq->iq_flush_running_lock))
482 		return (tx_done);
483 
484 	mtx_lock(&iq->lock);
485 
486 	iq->octeon_read_index = oct->fn_list.update_iq_read_idx(iq);
487 
488 	do {
489 		/* Process any outstanding IQ packets. */
490 		if (iq->flush_index == iq->octeon_read_index)
491 			break;
492 
493 		if (budget)
494 			inst_processed =
495 				lio_process_iq_request_list(oct, iq,
496 							    budget -
497 							    tot_inst_processed);
498 		else
499 			inst_processed =
500 				lio_process_iq_request_list(oct, iq, 0);
501 
502 		if (inst_processed) {
503 			atomic_subtract_int(&iq->instr_pending, inst_processed);
504 			iq->stats.instr_processed += inst_processed;
505 		}
506 		tot_inst_processed += inst_processed;
507 		inst_processed = 0;
508 
509 	} while (tot_inst_processed < budget);
510 
511 	if (budget && (tot_inst_processed >= budget))
512 		tx_done = 0;
513 
514 	iq->last_db_time = ticks;
515 
516 	mtx_unlock(&iq->lock);
517 
518 	mtx_unlock(&iq->iq_flush_running_lock);
519 
520 	return (tx_done);
521 }
522 
523 /*
524  * Process instruction queue after timeout.
525  * This routine gets called from a taskqueue or when removing the module.
526  */
527 static void
528 __lio_check_db_timeout(struct octeon_device *oct, uint64_t iq_no)
529 {
530 	struct lio_instr_queue	*iq;
531 	uint64_t		next_time;
532 
533 	if (oct == NULL)
534 		return;
535 
536 	iq = oct->instr_queue[iq_no];
537 	if (iq == NULL)
538 		return;
539 
540 	if (atomic_load_acq_int(&iq->instr_pending)) {
541 		/* If ticks - last_db_time < db_timeout do nothing  */
542 		next_time = iq->last_db_time + lio_ms_to_ticks(iq->db_timeout);
543 		if (!lio_check_timeout(ticks, next_time))
544 			return;
545 
546 		iq->last_db_time = ticks;
547 
548 		/* Flush the instruction queue */
549 		lio_flush_iq(oct, iq, 0);
550 
551 		lio_enable_irq(NULL, iq);
552 	}
553 
554 	if (oct->props.ifp != NULL && iq->br != NULL) {
555 		if (mtx_trylock(&iq->enq_lock)) {
556 			if (!drbr_empty(oct->props.ifp, iq->br))
557 				lio_mq_start_locked(oct->props.ifp, iq);
558 
559 			mtx_unlock(&iq->enq_lock);
560 		}
561 	}
562 }
563 
564 /*
565  * Called by the Poll thread at regular intervals to check the instruction
566  * queue for commands to be posted and for commands that were fetched by Octeon.
567  */
568 static void
569 lio_check_db_timeout(void *arg, int pending)
570 {
571 	struct lio_tq		*db_tq = (struct lio_tq *)arg;
572 	struct octeon_device	*oct = db_tq->ctxptr;
573 	uint64_t		iq_no = db_tq->ctxul;
574 	uint32_t		delay = 10;
575 
576 	__lio_check_db_timeout(oct, iq_no);
577 	taskqueue_enqueue_timeout(db_tq->tq, &db_tq->work,
578 				  lio_ms_to_ticks(delay));
579 }
580 
581 int
582 lio_send_command(struct octeon_device *oct, uint32_t iq_no,
583 		 uint32_t force_db, void *cmd, void *buf,
584 		 uint32_t datasize, uint32_t reqtype)
585 {
586 	struct lio_iq_post_status	st;
587 	struct lio_instr_queue		*iq = oct->instr_queue[iq_no];
588 
589 	/*
590 	 * Get the lock and prevent other tasks and tx interrupt handler
591 	 * from running.
592 	 */
593 	mtx_lock(&iq->post_lock);
594 
595 	st = __lio_post_command2(iq, cmd);
596 
597 	if (st.status != LIO_IQ_SEND_FAILED) {
598 		__lio_add_to_request_list(iq, st.index, buf, reqtype);
599 		LIO_INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
600 		LIO_INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
601 
602 		if (force_db || (st.status == LIO_IQ_SEND_STOP))
603 			lio_ring_doorbell(oct, iq);
604 	} else {
605 		LIO_INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
606 	}
607 
608 	mtx_unlock(&iq->post_lock);
609 
610 	/*
611 	 * This is only done here to expedite packets being flushed for
612 	 * cases where there are no IQ completion interrupts.
613 	 */
614 
615 	return (st.status);
616 }
617 
618 void
619 lio_prepare_soft_command(struct octeon_device *oct, struct lio_soft_command *sc,
620 			 uint8_t opcode, uint8_t subcode, uint32_t irh_ossp,
621 			 uint64_t ossp0, uint64_t ossp1)
622 {
623 	struct octeon_instr_ih3		*ih3;
624 	struct octeon_instr_pki_ih3	*pki_ih3;
625 	struct octeon_instr_irh		*irh;
626 	struct octeon_instr_rdp		*rdp;
627 
628 	KASSERT(opcode <= 15, ("%s, %d, opcode > 15", __func__, __LINE__));
629 	KASSERT(subcode <= 127, ("%s, %d, opcode > 127", __func__, __LINE__));
630 
631 	if (LIO_CN23XX_PF(oct)) {
632 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
633 
634 		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
635 
636 		pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
637 
638 		pki_ih3->w = 1;
639 		pki_ih3->raw = 1;
640 		pki_ih3->utag = 1;
641 		pki_ih3->uqpg = oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
642 		pki_ih3->utt = 1;
643 		pki_ih3->tag = LIO_CONTROL;
644 		pki_ih3->tagtype = LIO_ATOMIC_TAG;
645 		pki_ih3->qpg = oct->instr_queue[sc->iq_no]->txpciq.s.qpg;
646 		pki_ih3->pm = 0x7;
647 		pki_ih3->sl = 8;
648 
649 		if (sc->datasize)
650 			ih3->dlengsz = sc->datasize;
651 
652 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
653 		irh->opcode = opcode;
654 		irh->subcode = subcode;
655 
656 		/* opcode/subcode specific parameters (ossp) */
657 		irh->ossp = irh_ossp;
658 		sc->cmd.cmd3.ossp[0] = ossp0;
659 		sc->cmd.cmd3.ossp[1] = ossp1;
660 
661 		if (sc->rdatasize) {
662 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
663 			rdp->pcie_port = oct->pcie_port;
664 			rdp->rlen = sc->rdatasize;
665 
666 			irh->rflag = 1;
667 			/* PKI IH3 */
668 			/* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */
669 			ih3->fsz = LIO_SOFTCMDRESP_IH3;
670 		} else {
671 			irh->rflag = 0;
672 			/* PKI IH3 */
673 			/* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */
674 			ih3->fsz = LIO_PCICMD_O3;
675 		}
676 	}
677 }
678 
679 int
680 lio_send_soft_command(struct octeon_device *oct, struct lio_soft_command *sc)
681 {
682 	struct octeon_instr_ih3	*ih3;
683 	struct octeon_instr_irh	*irh;
684 	uint32_t		len = 0;
685 
686 	if (LIO_CN23XX_PF(oct)) {
687 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
688 		if (ih3->dlengsz) {
689 			KASSERT(sc->dmadptr, ("%s, %d, sc->dmadptr is NULL",
690 					      __func__, __LINE__));
691 			sc->cmd.cmd3.dptr = sc->dmadptr;
692 		}
693 
694 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
695 		if (irh->rflag) {
696 			KASSERT(sc->dmarptr, ("%s, %d, sc->dmarptr is NULL",
697 					      __func__, __LINE__));
698 			KASSERT(sc->status_word, ("%s, %d, sc->status_word is NULL",
699 						  __func__, __LINE__));
700 			*sc->status_word = COMPLETION_WORD_INIT;
701 			sc->cmd.cmd3.rptr = sc->dmarptr;
702 		}
703 		len = (uint32_t)ih3->dlengsz;
704 	}
705 	if (sc->wait_time)
706 		sc->timeout = ticks + lio_ms_to_ticks(sc->wait_time);
707 
708 	return (lio_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
709 				 len, LIO_REQTYPE_SOFT_COMMAND));
710 }
711 
712 int
713 lio_setup_sc_buffer_pool(struct octeon_device *oct)
714 {
715 	struct lio_soft_command	*sc;
716 	uint64_t		dma_addr;
717 	int			i;
718 
719 	STAILQ_INIT(&oct->sc_buf_pool.head);
720 	mtx_init(&oct->sc_buf_pool.lock, "sc_pool_lock", NULL, MTX_DEF);
721 	atomic_store_rel_int(&oct->sc_buf_pool.alloc_buf_count, 0);
722 
723 	for (i = 0; i < LIO_MAX_SOFT_COMMAND_BUFFERS; i++) {
724 		sc = (struct lio_soft_command *)
725 			lio_dma_alloc(LIO_SOFT_COMMAND_BUFFER_SIZE, (vm_paddr_t *)&dma_addr);
726 		if (sc == NULL) {
727 			lio_free_sc_buffer_pool(oct);
728 			return (1);
729 		}
730 
731 		sc->dma_addr = dma_addr;
732 		sc->size = LIO_SOFT_COMMAND_BUFFER_SIZE;
733 
734 		STAILQ_INSERT_TAIL(&oct->sc_buf_pool.head, &sc->node, entries);
735 	}
736 
737 	return (0);
738 }
739 
740 int
741 lio_free_sc_buffer_pool(struct octeon_device *oct)
742 {
743 	struct lio_stailq_node	*tmp, *tmp2;
744 	struct lio_soft_command	*sc;
745 
746 	mtx_lock(&oct->sc_buf_pool.lock);
747 
748 	STAILQ_FOREACH_SAFE(tmp, &oct->sc_buf_pool.head, entries, tmp2) {
749 		sc = LIO_STAILQ_FIRST_ENTRY(&oct->sc_buf_pool.head,
750 					    struct lio_soft_command, node);
751 
752 		STAILQ_REMOVE_HEAD(&oct->sc_buf_pool.head, entries);
753 
754 		lio_dma_free(sc->size, sc);
755 	}
756 
757 	STAILQ_INIT(&oct->sc_buf_pool.head);
758 
759 	mtx_unlock(&oct->sc_buf_pool.lock);
760 
761 	return (0);
762 }
763 
764 struct lio_soft_command *
765 lio_alloc_soft_command(struct octeon_device *oct, uint32_t datasize,
766 		       uint32_t rdatasize, uint32_t ctxsize)
767 {
768 	struct lio_soft_command	*sc = NULL;
769 	struct lio_stailq_node	*tmp;
770 	uint64_t		dma_addr;
771 	uint32_t		size;
772 	uint32_t		offset = sizeof(struct lio_soft_command);
773 
774 	KASSERT((offset + datasize + rdatasize + ctxsize) <=
775 		LIO_SOFT_COMMAND_BUFFER_SIZE,
776 		("%s, %d, offset + datasize + rdatasize + ctxsize > LIO_SOFT_COMMAND_BUFFER_SIZE",
777 		 __func__, __LINE__));
778 
779 	mtx_lock(&oct->sc_buf_pool.lock);
780 
781 	if (STAILQ_EMPTY(&oct->sc_buf_pool.head)) {
782 		mtx_unlock(&oct->sc_buf_pool.lock);
783 		return (NULL);
784 	}
785 	tmp = STAILQ_LAST(&oct->sc_buf_pool.head, lio_stailq_node, entries);
786 
787 	STAILQ_REMOVE(&oct->sc_buf_pool.head, tmp, lio_stailq_node, entries);
788 
789 	atomic_add_int(&oct->sc_buf_pool.alloc_buf_count, 1);
790 
791 	mtx_unlock(&oct->sc_buf_pool.lock);
792 
793 	sc = (struct lio_soft_command *)tmp;
794 
795 	dma_addr = sc->dma_addr;
796 	size = sc->size;
797 
798 	bzero(sc, sc->size);
799 
800 	sc->dma_addr = dma_addr;
801 	sc->size = size;
802 
803 	if (ctxsize) {
804 		sc->ctxptr = (uint8_t *)sc + offset;
805 		sc->ctxsize = ctxsize;
806 	}
807 
808 	/* Start data at 128 byte boundary */
809 	offset = (offset + ctxsize + 127) & 0xffffff80;
810 
811 	if (datasize) {
812 		sc->virtdptr = (uint8_t *)sc + offset;
813 		sc->dmadptr = dma_addr + offset;
814 		sc->datasize = datasize;
815 	}
816 	/* Start rdata at 128 byte boundary */
817 	offset = (offset + datasize + 127) & 0xffffff80;
818 
819 	if (rdatasize) {
820 		KASSERT(rdatasize >= 16, ("%s, %d, rdatasize < 16", __func__,
821 					  __LINE__));
822 		sc->virtrptr = (uint8_t *)sc + offset;
823 		sc->dmarptr = dma_addr + offset;
824 		sc->rdatasize = rdatasize;
825 		sc->status_word = (uint64_t *)((uint8_t *)(sc->virtrptr) +
826 					       rdatasize - 8);
827 	}
828 	return (sc);
829 }
830 
831 void
832 lio_free_soft_command(struct octeon_device *oct,
833 		      struct lio_soft_command *sc)
834 {
835 
836 	mtx_lock(&oct->sc_buf_pool.lock);
837 
838 	STAILQ_INSERT_TAIL(&oct->sc_buf_pool.head, &sc->node, entries);
839 
840 	atomic_subtract_int(&oct->sc_buf_pool.alloc_buf_count, 1);
841 
842 	mtx_unlock(&oct->sc_buf_pool.lock);
843 }
844