xref: /linux/drivers/net/ethernet/cavium/liquidio/request_manager.c (revision 24168c5e6dfbdd5b414f048f47f75d64533296ca)
1 /**********************************************************************
2  * Author: Cavium, Inc.
3  *
4  * Contact: support@cavium.com
5  *          Please include "LiquidIO" in the subject.
6  *
7  * Copyright (c) 2003-2016 Cavium, Inc.
8  *
9  * This file is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License, Version 2, as
11  * published by the Free Software Foundation.
12  *
13  * This file is distributed in the hope that it will be useful, but
14  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
15  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
16  * NONINFRINGEMENT.  See the GNU General Public License for more
17  * details.
18  **********************************************************************/
19 #include <linux/pci.h>
20 #include <linux/netdevice.h>
21 #include <linux/vmalloc.h>
22 #include "liquidio_common.h"
23 #include "octeon_droq.h"
24 #include "octeon_iq.h"
25 #include "response_manager.h"
26 #include "octeon_device.h"
27 #include "octeon_main.h"
28 #include "octeon_network.h"
29 #include "cn66xx_device.h"
30 #include "cn23xx_pf_device.h"
31 #include "cn23xx_vf_device.h"
32 
33 struct iq_post_status {
34 	int status;
35 	int index;
36 };
37 
38 static void check_db_timeout(struct work_struct *work);
39 static void  __check_db_timeout(struct octeon_device *oct, u64 iq_no);
40 
41 static void (*reqtype_free_fn[MAX_OCTEON_DEVICES][REQTYPE_LAST + 1]) (void *);
42 
43 /* Define this to return the request status comaptible to old code */
44 /*#define OCTEON_USE_OLD_REQ_STATUS*/
45 
46 /* Return 0 on success, 1 on failure */
47 int octeon_init_instr_queue(struct octeon_device *oct,
48 			    union oct_txpciq txpciq,
49 			    u32 num_descs)
50 {
51 	struct octeon_instr_queue *iq;
52 	struct octeon_iq_config *conf = NULL;
53 	u32 iq_no = (u32)txpciq.s.q_no;
54 	u32 q_size;
55 	struct cavium_wq *db_wq;
56 	int numa_node = dev_to_node(&oct->pci_dev->dev);
57 
58 	if (OCTEON_CN6XXX(oct))
59 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn6xxx)));
60 	else if (OCTEON_CN23XX_PF(oct))
61 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_pf)));
62 	else if (OCTEON_CN23XX_VF(oct))
63 		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_vf)));
64 
65 	if (!conf) {
66 		dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
67 			oct->chip_id);
68 		return 1;
69 	}
70 
71 	q_size = (u32)conf->instr_type * num_descs;
72 
73 	iq = oct->instr_queue[iq_no];
74 
75 	iq->oct_dev = oct;
76 
77 	iq->base_addr = lio_dma_alloc(oct, q_size, &iq->base_addr_dma);
78 	if (!iq->base_addr) {
79 		dev_err(&oct->pci_dev->dev, "Cannot allocate memory for instr queue %d\n",
80 			iq_no);
81 		return 1;
82 	}
83 
84 	iq->max_count = num_descs;
85 
86 	/* Initialize a list to holds requests that have been posted to Octeon
87 	 * but has yet to be fetched by octeon
88 	 */
89 	iq->request_list = vzalloc_node(array_size(num_descs, sizeof(*iq->request_list)),
90 					numa_node);
91 	if (!iq->request_list)
92 		iq->request_list = vzalloc(array_size(num_descs, sizeof(*iq->request_list)));
93 	if (!iq->request_list) {
94 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
95 		dev_err(&oct->pci_dev->dev, "Alloc failed for IQ[%d] nr free list\n",
96 			iq_no);
97 		return 1;
98 	}
99 
100 	dev_dbg(&oct->pci_dev->dev, "IQ[%d]: base: %p basedma: %pad count: %d\n",
101 		iq_no, iq->base_addr, &iq->base_addr_dma, iq->max_count);
102 
103 	iq->txpciq.u64 = txpciq.u64;
104 	iq->fill_threshold = (u32)conf->db_min;
105 	iq->fill_cnt = 0;
106 	iq->host_write_index = 0;
107 	iq->octeon_read_index = 0;
108 	iq->flush_index = 0;
109 	iq->last_db_time = 0;
110 	iq->do_auto_flush = 1;
111 	iq->db_timeout = (u32)conf->db_timeout;
112 	atomic_set(&iq->instr_pending, 0);
113 	iq->pkts_processed = 0;
114 
115 	/* Initialize the spinlock for this instruction queue */
116 	spin_lock_init(&iq->lock);
117 	if (iq_no == 0) {
118 		iq->allow_soft_cmds = true;
119 		spin_lock_init(&iq->post_lock);
120 	} else {
121 		iq->allow_soft_cmds = false;
122 	}
123 
124 	spin_lock_init(&iq->iq_flush_running_lock);
125 
126 	oct->io_qmask.iq |= BIT_ULL(iq_no);
127 
128 	/* Set the 32B/64B mode for each input queue */
129 	oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
130 	iq->iqcmd_64B = (conf->instr_type == 64);
131 
132 	oct->fn_list.setup_iq_regs(oct, iq_no);
133 
134 	oct->check_db_wq[iq_no].wq = alloc_workqueue("check_iq_db",
135 						     WQ_MEM_RECLAIM,
136 						     0);
137 	if (!oct->check_db_wq[iq_no].wq) {
138 		vfree(iq->request_list);
139 		iq->request_list = NULL;
140 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
141 		dev_err(&oct->pci_dev->dev, "check db wq create failed for iq %d\n",
142 			iq_no);
143 		return 1;
144 	}
145 
146 	db_wq = &oct->check_db_wq[iq_no];
147 
148 	INIT_DELAYED_WORK(&db_wq->wk.work, check_db_timeout);
149 	db_wq->wk.ctxptr = oct;
150 	db_wq->wk.ctxul = iq_no;
151 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(1));
152 
153 	return 0;
154 }
155 
156 int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
157 {
158 	u64 desc_size = 0, q_size;
159 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
160 
161 	cancel_delayed_work_sync(&oct->check_db_wq[iq_no].wk.work);
162 	destroy_workqueue(oct->check_db_wq[iq_no].wq);
163 
164 	if (OCTEON_CN6XXX(oct))
165 		desc_size =
166 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn6xxx));
167 	else if (OCTEON_CN23XX_PF(oct))
168 		desc_size =
169 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_pf));
170 	else if (OCTEON_CN23XX_VF(oct))
171 		desc_size =
172 		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_vf));
173 
174 	vfree(iq->request_list);
175 
176 	if (iq->base_addr) {
177 		q_size = iq->max_count * desc_size;
178 		lio_dma_free(oct, (u32)q_size, iq->base_addr,
179 			     iq->base_addr_dma);
180 		oct->io_qmask.iq &= ~(1ULL << iq_no);
181 		vfree(oct->instr_queue[iq_no]);
182 		oct->instr_queue[iq_no] = NULL;
183 		oct->num_iqs--;
184 		return 0;
185 	}
186 	return 1;
187 }
188 EXPORT_SYMBOL_GPL(octeon_delete_instr_queue);
189 
190 /* Return 0 on success, 1 on failure */
191 int octeon_setup_iq(struct octeon_device *oct,
192 		    int ifidx,
193 		    int q_index,
194 		    union oct_txpciq txpciq,
195 		    u32 num_descs,
196 		    void *app_ctx)
197 {
198 	u32 iq_no = (u32)txpciq.s.q_no;
199 	int numa_node = dev_to_node(&oct->pci_dev->dev);
200 
201 	if (oct->instr_queue[iq_no]) {
202 		dev_dbg(&oct->pci_dev->dev, "IQ is in use. Cannot create the IQ: %d again\n",
203 			iq_no);
204 		oct->instr_queue[iq_no]->txpciq.u64 = txpciq.u64;
205 		oct->instr_queue[iq_no]->app_ctx = app_ctx;
206 		return 0;
207 	}
208 	oct->instr_queue[iq_no] =
209 	    vzalloc_node(sizeof(struct octeon_instr_queue), numa_node);
210 	if (!oct->instr_queue[iq_no])
211 		oct->instr_queue[iq_no] =
212 		    vzalloc(sizeof(struct octeon_instr_queue));
213 	if (!oct->instr_queue[iq_no])
214 		return 1;
215 
216 
217 	oct->instr_queue[iq_no]->q_index = q_index;
218 	oct->instr_queue[iq_no]->app_ctx = app_ctx;
219 	oct->instr_queue[iq_no]->ifidx = ifidx;
220 
221 	if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
222 		vfree(oct->instr_queue[iq_no]);
223 		oct->instr_queue[iq_no] = NULL;
224 		return 1;
225 	}
226 
227 	oct->num_iqs++;
228 	if (oct->fn_list.enable_io_queues(oct)) {
229 		octeon_delete_instr_queue(oct, iq_no);
230 		return 1;
231 	}
232 
233 	return 0;
234 }
235 
236 int lio_wait_for_instr_fetch(struct octeon_device *oct)
237 {
238 	int i, retry = 1000, pending, instr_cnt = 0;
239 
240 	do {
241 		instr_cnt = 0;
242 
243 		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
244 			if (!(oct->io_qmask.iq & BIT_ULL(i)))
245 				continue;
246 			pending =
247 			    atomic_read(&oct->instr_queue[i]->instr_pending);
248 			if (pending)
249 				__check_db_timeout(oct, i);
250 			instr_cnt += pending;
251 		}
252 
253 		if (instr_cnt == 0)
254 			break;
255 
256 		schedule_timeout_uninterruptible(1);
257 
258 	} while (retry-- && instr_cnt);
259 
260 	return instr_cnt;
261 }
262 EXPORT_SYMBOL_GPL(lio_wait_for_instr_fetch);
263 
264 static inline void
265 ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq)
266 {
267 	if (atomic_read(&oct->status) == OCT_DEV_RUNNING) {
268 		writel(iq->fill_cnt, iq->doorbell_reg);
269 		/* make sure doorbell write goes through */
270 		iq->fill_cnt = 0;
271 		iq->last_db_time = jiffies;
272 		return;
273 	}
274 }
275 
276 void
277 octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no)
278 {
279 	struct octeon_instr_queue *iq;
280 
281 	iq = oct->instr_queue[iq_no];
282 	spin_lock(&iq->post_lock);
283 	if (iq->fill_cnt)
284 		ring_doorbell(oct, iq);
285 	spin_unlock(&iq->post_lock);
286 }
287 EXPORT_SYMBOL_GPL(octeon_ring_doorbell_locked);
288 
289 static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq,
290 				      u8 *cmd)
291 {
292 	u8 *iqptr, cmdsize;
293 
294 	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
295 	iqptr = iq->base_addr + (cmdsize * iq->host_write_index);
296 
297 	memcpy(iqptr, cmd, cmdsize);
298 }
299 
300 static inline struct iq_post_status
301 __post_command2(struct octeon_instr_queue *iq, u8 *cmd)
302 {
303 	struct iq_post_status st;
304 
305 	st.status = IQ_SEND_OK;
306 
307 	/* This ensures that the read index does not wrap around to the same
308 	 * position if queue gets full before Octeon could fetch any instr.
309 	 */
310 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 1)) {
311 		st.status = IQ_SEND_FAILED;
312 		st.index = -1;
313 		return st;
314 	}
315 
316 	if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 2))
317 		st.status = IQ_SEND_STOP;
318 
319 	__copy_cmd_into_iq(iq, cmd);
320 
321 	/* "index" is returned, host_write_index is modified. */
322 	st.index = iq->host_write_index;
323 	iq->host_write_index = incr_index(iq->host_write_index, 1,
324 					  iq->max_count);
325 	iq->fill_cnt++;
326 
327 	/* Flush the command into memory. We need to be sure the data is in
328 	 * memory before indicating that the instruction is pending.
329 	 */
330 	wmb();
331 
332 	atomic_inc(&iq->instr_pending);
333 
334 	return st;
335 }
336 
337 int
338 octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype,
339 				void (*fn)(void *))
340 {
341 	if (reqtype > REQTYPE_LAST) {
342 		dev_err(&oct->pci_dev->dev, "%s: Invalid reqtype: %d\n",
343 			__func__, reqtype);
344 		return -EINVAL;
345 	}
346 
347 	reqtype_free_fn[oct->octeon_id][reqtype] = fn;
348 
349 	return 0;
350 }
351 EXPORT_SYMBOL_GPL(octeon_register_reqtype_free_fn);
352 
353 static inline void
354 __add_to_request_list(struct octeon_instr_queue *iq,
355 		      int idx, void *buf, int reqtype)
356 {
357 	iq->request_list[idx].buf = buf;
358 	iq->request_list[idx].reqtype = reqtype;
359 }
360 
361 /* Can only run in process context */
362 int
363 lio_process_iq_request_list(struct octeon_device *oct,
364 			    struct octeon_instr_queue *iq, u32 napi_budget)
365 {
366 	struct cavium_wq *cwq = &oct->dma_comp_wq;
367 	int reqtype;
368 	void *buf;
369 	u32 old = iq->flush_index;
370 	u32 inst_count = 0;
371 	unsigned int pkts_compl = 0, bytes_compl = 0;
372 	struct octeon_soft_command *sc;
373 	unsigned long flags;
374 
375 	while (old != iq->octeon_read_index) {
376 		reqtype = iq->request_list[old].reqtype;
377 		buf     = iq->request_list[old].buf;
378 
379 		if (reqtype == REQTYPE_NONE)
380 			goto skip_this;
381 
382 		octeon_update_tx_completion_counters(buf, reqtype, &pkts_compl,
383 						     &bytes_compl);
384 
385 		switch (reqtype) {
386 		case REQTYPE_NORESP_NET:
387 		case REQTYPE_NORESP_NET_SG:
388 		case REQTYPE_RESP_NET_SG:
389 			reqtype_free_fn[oct->octeon_id][reqtype](buf);
390 			break;
391 		case REQTYPE_RESP_NET:
392 		case REQTYPE_SOFT_COMMAND:
393 			sc = buf;
394 			/* We're expecting a response from Octeon.
395 			 * It's up to lio_process_ordered_list() to
396 			 * process  sc. Add sc to the ordered soft
397 			 * command response list because we expect
398 			 * a response from Octeon.
399 			 */
400 			spin_lock_irqsave(&oct->response_list
401 					  [OCTEON_ORDERED_SC_LIST].lock, flags);
402 			atomic_inc(&oct->response_list
403 				   [OCTEON_ORDERED_SC_LIST].pending_req_count);
404 			list_add_tail(&sc->node, &oct->response_list
405 				[OCTEON_ORDERED_SC_LIST].head);
406 			spin_unlock_irqrestore(&oct->response_list
407 					       [OCTEON_ORDERED_SC_LIST].lock,
408 					       flags);
409 			break;
410 		default:
411 			dev_err(&oct->pci_dev->dev,
412 				"%s Unknown reqtype: %d buf: %p at idx %d\n",
413 				__func__, reqtype, buf, old);
414 		}
415 
416 		iq->request_list[old].buf = NULL;
417 		iq->request_list[old].reqtype = 0;
418 
419  skip_this:
420 		inst_count++;
421 		old = incr_index(old, 1, iq->max_count);
422 
423 		if ((napi_budget) && (inst_count >= napi_budget))
424 			break;
425 	}
426 	if (bytes_compl)
427 		octeon_report_tx_completion_to_bql(iq->app_ctx, pkts_compl,
428 						   bytes_compl);
429 	iq->flush_index = old;
430 
431 	if (atomic_read(&oct->response_list
432 			[OCTEON_ORDERED_SC_LIST].pending_req_count))
433 		queue_work(cwq->wq, &cwq->wk.work.work);
434 
435 	return inst_count;
436 }
437 EXPORT_SYMBOL_GPL(lio_process_iq_request_list);
438 
439 /* Can only be called from process context */
440 int
441 octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq,
442 		u32 napi_budget)
443 {
444 	u32 inst_processed = 0;
445 	u32 tot_inst_processed = 0;
446 	int tx_done = 1;
447 
448 	if (!spin_trylock(&iq->iq_flush_running_lock))
449 		return tx_done;
450 
451 	spin_lock_bh(&iq->lock);
452 
453 	iq->octeon_read_index = oct->fn_list.update_iq_read_idx(iq);
454 
455 	do {
456 		/* Process any outstanding IQ packets. */
457 		if (iq->flush_index == iq->octeon_read_index)
458 			break;
459 
460 		if (napi_budget)
461 			inst_processed =
462 				lio_process_iq_request_list(oct, iq,
463 							    napi_budget -
464 							    tot_inst_processed);
465 		else
466 			inst_processed =
467 				lio_process_iq_request_list(oct, iq, 0);
468 
469 		if (inst_processed) {
470 			iq->pkts_processed += inst_processed;
471 			atomic_sub(inst_processed, &iq->instr_pending);
472 			iq->stats.instr_processed += inst_processed;
473 		}
474 
475 		tot_inst_processed += inst_processed;
476 	} while (tot_inst_processed < napi_budget);
477 
478 	if (napi_budget && (tot_inst_processed >= napi_budget))
479 		tx_done = 0;
480 
481 	iq->last_db_time = jiffies;
482 
483 	spin_unlock_bh(&iq->lock);
484 
485 	spin_unlock(&iq->iq_flush_running_lock);
486 
487 	return tx_done;
488 }
489 
490 /* Process instruction queue after timeout.
491  * This routine gets called from a workqueue or when removing the module.
492  */
493 static void __check_db_timeout(struct octeon_device *oct, u64 iq_no)
494 {
495 	struct octeon_instr_queue *iq;
496 	u64 next_time;
497 
498 	if (!oct)
499 		return;
500 
501 	iq = oct->instr_queue[iq_no];
502 	if (!iq)
503 		return;
504 
505 	/* return immediately, if no work pending */
506 	if (!atomic_read(&iq->instr_pending))
507 		return;
508 	/* If jiffies - last_db_time < db_timeout do nothing  */
509 	next_time = iq->last_db_time + iq->db_timeout;
510 	if (!time_after(jiffies, (unsigned long)next_time))
511 		return;
512 	iq->last_db_time = jiffies;
513 
514 	/* Flush the instruction queue */
515 	octeon_flush_iq(oct, iq, 0);
516 
517 	lio_enable_irq(NULL, iq);
518 }
519 
520 /* Called by the Poll thread at regular intervals to check the instruction
521  * queue for commands to be posted and for commands that were fetched by Octeon.
522  */
523 static void check_db_timeout(struct work_struct *work)
524 {
525 	struct cavium_wk *wk = (struct cavium_wk *)work;
526 	struct octeon_device *oct = (struct octeon_device *)wk->ctxptr;
527 	u64 iq_no = wk->ctxul;
528 	struct cavium_wq *db_wq = &oct->check_db_wq[iq_no];
529 	u32 delay = 10;
530 
531 	__check_db_timeout(oct, iq_no);
532 	queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(delay));
533 }
534 
535 int
536 octeon_send_command(struct octeon_device *oct, u32 iq_no,
537 		    u32 force_db, void *cmd, void *buf,
538 		    u32 datasize, u32 reqtype)
539 {
540 	int xmit_stopped;
541 	struct iq_post_status st;
542 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
543 
544 	/* Get the lock and prevent other tasks and tx interrupt handler from
545 	 * running.
546 	 */
547 	if (iq->allow_soft_cmds)
548 		spin_lock_bh(&iq->post_lock);
549 
550 	st = __post_command2(iq, cmd);
551 
552 	if (st.status != IQ_SEND_FAILED) {
553 		xmit_stopped = octeon_report_sent_bytes_to_bql(buf, reqtype);
554 		__add_to_request_list(iq, st.index, buf, reqtype);
555 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize);
556 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1);
557 
558 		if (iq->fill_cnt >= MAX_OCTEON_FILL_COUNT || force_db ||
559 		    xmit_stopped || st.status == IQ_SEND_STOP)
560 			ring_doorbell(oct, iq);
561 	} else {
562 		INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1);
563 	}
564 
565 	if (iq->allow_soft_cmds)
566 		spin_unlock_bh(&iq->post_lock);
567 
568 	/* This is only done here to expedite packets being flushed
569 	 * for cases where there are no IQ completion interrupts.
570 	 */
571 
572 	return st.status;
573 }
574 EXPORT_SYMBOL_GPL(octeon_send_command);
575 
576 void
577 octeon_prepare_soft_command(struct octeon_device *oct,
578 			    struct octeon_soft_command *sc,
579 			    u8 opcode,
580 			    u8 subcode,
581 			    u32 irh_ossp,
582 			    u64 ossp0,
583 			    u64 ossp1)
584 {
585 	struct octeon_config *oct_cfg;
586 	struct octeon_instr_ih2 *ih2;
587 	struct octeon_instr_ih3 *ih3;
588 	struct octeon_instr_pki_ih3 *pki_ih3;
589 	struct octeon_instr_irh *irh;
590 	struct octeon_instr_rdp *rdp;
591 
592 	WARN_ON(opcode > 15);
593 	WARN_ON(subcode > 127);
594 
595 	oct_cfg = octeon_get_conf(oct);
596 
597 	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
598 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
599 
600 		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
601 
602 		pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
603 
604 		pki_ih3->w           = 1;
605 		pki_ih3->raw         = 1;
606 		pki_ih3->utag        = 1;
607 		pki_ih3->uqpg        =
608 			oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
609 		pki_ih3->utt         = 1;
610 		pki_ih3->tag     = LIO_CONTROL;
611 		pki_ih3->tagtype = ATOMIC_TAG;
612 		pki_ih3->qpg         =
613 			oct->instr_queue[sc->iq_no]->txpciq.s.ctrl_qpg;
614 
615 		pki_ih3->pm          = 0x7;
616 		pki_ih3->sl          = 8;
617 
618 		if (sc->datasize)
619 			ih3->dlengsz = sc->datasize;
620 
621 		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
622 		irh->opcode    = opcode;
623 		irh->subcode   = subcode;
624 
625 		/* opcode/subcode specific parameters (ossp) */
626 		irh->ossp       = irh_ossp;
627 		sc->cmd.cmd3.ossp[0] = ossp0;
628 		sc->cmd.cmd3.ossp[1] = ossp1;
629 
630 		if (sc->rdatasize) {
631 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
632 			rdp->pcie_port = oct->pcie_port;
633 			rdp->rlen      = sc->rdatasize;
634 
635 			irh->rflag =  1;
636 			/*PKI IH3*/
637 			/* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */
638 			ih3->fsz    = LIO_SOFTCMDRESP_IH3;
639 		} else {
640 			irh->rflag =  0;
641 			/*PKI IH3*/
642 			/* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */
643 			ih3->fsz    = LIO_PCICMD_O3;
644 		}
645 
646 	} else {
647 		ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
648 		ih2->tagtype = ATOMIC_TAG;
649 		ih2->tag     = LIO_CONTROL;
650 		ih2->raw     = 1;
651 		ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
652 
653 		if (sc->datasize) {
654 			ih2->dlengsz = sc->datasize;
655 			ih2->rs = 1;
656 		}
657 
658 		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
659 		irh->opcode    = opcode;
660 		irh->subcode   = subcode;
661 
662 		/* opcode/subcode specific parameters (ossp) */
663 		irh->ossp       = irh_ossp;
664 		sc->cmd.cmd2.ossp[0] = ossp0;
665 		sc->cmd.cmd2.ossp[1] = ossp1;
666 
667 		if (sc->rdatasize) {
668 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
669 			rdp->pcie_port = oct->pcie_port;
670 			rdp->rlen      = sc->rdatasize;
671 
672 			irh->rflag =  1;
673 			/* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
674 			ih2->fsz   = LIO_SOFTCMDRESP_IH2;
675 		} else {
676 			irh->rflag =  0;
677 			/* irh + ossp[0] + ossp[1] = 24 bytes */
678 			ih2->fsz   = LIO_PCICMD_O2;
679 		}
680 	}
681 }
682 EXPORT_SYMBOL_GPL(octeon_prepare_soft_command);
683 
684 int octeon_send_soft_command(struct octeon_device *oct,
685 			     struct octeon_soft_command *sc)
686 {
687 	struct octeon_instr_queue *iq;
688 	struct octeon_instr_ih2 *ih2;
689 	struct octeon_instr_ih3 *ih3;
690 	struct octeon_instr_irh *irh;
691 	u32 len;
692 
693 	iq = oct->instr_queue[sc->iq_no];
694 	if (!iq->allow_soft_cmds) {
695 		dev_err(&oct->pci_dev->dev, "Soft commands are not allowed on Queue %d\n",
696 			sc->iq_no);
697 		INCR_INSTRQUEUE_PKT_COUNT(oct, sc->iq_no, instr_dropped, 1);
698 		return IQ_SEND_FAILED;
699 	}
700 
701 	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
702 		ih3 =  (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
703 		if (ih3->dlengsz) {
704 			WARN_ON(!sc->dmadptr);
705 			sc->cmd.cmd3.dptr = sc->dmadptr;
706 		}
707 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
708 		if (irh->rflag) {
709 			WARN_ON(!sc->dmarptr);
710 			WARN_ON(!sc->status_word);
711 			*sc->status_word = COMPLETION_WORD_INIT;
712 			sc->cmd.cmd3.rptr = sc->dmarptr;
713 		}
714 		len = (u32)ih3->dlengsz;
715 	} else {
716 		ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
717 		if (ih2->dlengsz) {
718 			WARN_ON(!sc->dmadptr);
719 			sc->cmd.cmd2.dptr = sc->dmadptr;
720 		}
721 		irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
722 		if (irh->rflag) {
723 			WARN_ON(!sc->dmarptr);
724 			WARN_ON(!sc->status_word);
725 			*sc->status_word = COMPLETION_WORD_INIT;
726 			sc->cmd.cmd2.rptr = sc->dmarptr;
727 		}
728 		len = (u32)ih2->dlengsz;
729 	}
730 
731 	sc->expiry_time = jiffies + msecs_to_jiffies(LIO_SC_MAX_TMO_MS);
732 
733 	return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc,
734 				    len, REQTYPE_SOFT_COMMAND));
735 }
736 EXPORT_SYMBOL_GPL(octeon_send_soft_command);
737 
738 int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
739 {
740 	int i;
741 	u64 dma_addr;
742 	struct octeon_soft_command *sc;
743 
744 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
745 	spin_lock_init(&oct->sc_buf_pool.lock);
746 	atomic_set(&oct->sc_buf_pool.alloc_buf_count, 0);
747 
748 	for (i = 0; i < MAX_SOFT_COMMAND_BUFFERS; i++) {
749 		sc = (struct octeon_soft_command *)
750 			lio_dma_alloc(oct,
751 				      SOFT_COMMAND_BUFFER_SIZE,
752 					  (dma_addr_t *)&dma_addr);
753 		if (!sc) {
754 			octeon_free_sc_buffer_pool(oct);
755 			return 1;
756 		}
757 
758 		sc->dma_addr = dma_addr;
759 		sc->size = SOFT_COMMAND_BUFFER_SIZE;
760 
761 		list_add_tail(&sc->node, &oct->sc_buf_pool.head);
762 	}
763 
764 	return 0;
765 }
766 EXPORT_SYMBOL_GPL(octeon_setup_sc_buffer_pool);
767 
768 int octeon_free_sc_done_list(struct octeon_device *oct)
769 {
770 	struct octeon_response_list *done_sc_list, *zombie_sc_list;
771 	struct octeon_soft_command *sc;
772 	struct list_head *tmp, *tmp2;
773 	spinlock_t *sc_lists_lock; /* lock for response_list */
774 
775 	done_sc_list = &oct->response_list[OCTEON_DONE_SC_LIST];
776 	zombie_sc_list = &oct->response_list[OCTEON_ZOMBIE_SC_LIST];
777 
778 	if (!atomic_read(&done_sc_list->pending_req_count))
779 		return 0;
780 
781 	sc_lists_lock = &oct->response_list[OCTEON_ORDERED_SC_LIST].lock;
782 
783 	spin_lock_bh(sc_lists_lock);
784 
785 	list_for_each_safe(tmp, tmp2, &done_sc_list->head) {
786 		sc = list_entry(tmp, struct octeon_soft_command, node);
787 
788 		if (READ_ONCE(sc->caller_is_done)) {
789 			list_del(&sc->node);
790 			atomic_dec(&done_sc_list->pending_req_count);
791 
792 			if (*sc->status_word == COMPLETION_WORD_INIT) {
793 				/* timeout; move sc to zombie list */
794 				list_add_tail(&sc->node, &zombie_sc_list->head);
795 				atomic_inc(&zombie_sc_list->pending_req_count);
796 			} else {
797 				octeon_free_soft_command(oct, sc);
798 			}
799 		}
800 	}
801 
802 	spin_unlock_bh(sc_lists_lock);
803 
804 	return 0;
805 }
806 EXPORT_SYMBOL_GPL(octeon_free_sc_done_list);
807 
808 int octeon_free_sc_zombie_list(struct octeon_device *oct)
809 {
810 	struct octeon_response_list *zombie_sc_list;
811 	struct octeon_soft_command *sc;
812 	struct list_head *tmp, *tmp2;
813 	spinlock_t *sc_lists_lock; /* lock for response_list */
814 
815 	zombie_sc_list = &oct->response_list[OCTEON_ZOMBIE_SC_LIST];
816 	sc_lists_lock = &oct->response_list[OCTEON_ORDERED_SC_LIST].lock;
817 
818 	spin_lock_bh(sc_lists_lock);
819 
820 	list_for_each_safe(tmp, tmp2, &zombie_sc_list->head) {
821 		list_del(tmp);
822 		atomic_dec(&zombie_sc_list->pending_req_count);
823 		sc = list_entry(tmp, struct octeon_soft_command, node);
824 		octeon_free_soft_command(oct, sc);
825 	}
826 
827 	spin_unlock_bh(sc_lists_lock);
828 
829 	return 0;
830 }
831 EXPORT_SYMBOL_GPL(octeon_free_sc_zombie_list);
832 
833 int octeon_free_sc_buffer_pool(struct octeon_device *oct)
834 {
835 	struct list_head *tmp, *tmp2;
836 	struct octeon_soft_command *sc;
837 
838 	octeon_free_sc_zombie_list(oct);
839 
840 	spin_lock_bh(&oct->sc_buf_pool.lock);
841 
842 	list_for_each_safe(tmp, tmp2, &oct->sc_buf_pool.head) {
843 		list_del(tmp);
844 
845 		sc = (struct octeon_soft_command *)tmp;
846 
847 		lio_dma_free(oct, sc->size, sc, sc->dma_addr);
848 	}
849 
850 	INIT_LIST_HEAD(&oct->sc_buf_pool.head);
851 
852 	spin_unlock_bh(&oct->sc_buf_pool.lock);
853 
854 	return 0;
855 }
856 EXPORT_SYMBOL_GPL(octeon_free_sc_buffer_pool);
857 
858 struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct,
859 						      u32 datasize,
860 						      u32 rdatasize,
861 						      u32 ctxsize)
862 {
863 	u64 dma_addr;
864 	u32 size;
865 	u32 offset = sizeof(struct octeon_soft_command);
866 	struct octeon_soft_command *sc = NULL;
867 	struct list_head *tmp;
868 
869 	if (!rdatasize)
870 		rdatasize = 16;
871 
872 	WARN_ON((offset + datasize + rdatasize + ctxsize) >
873 	       SOFT_COMMAND_BUFFER_SIZE);
874 
875 	spin_lock_bh(&oct->sc_buf_pool.lock);
876 
877 	if (list_empty(&oct->sc_buf_pool.head)) {
878 		spin_unlock_bh(&oct->sc_buf_pool.lock);
879 		return NULL;
880 	}
881 
882 	list_for_each(tmp, &oct->sc_buf_pool.head)
883 		break;
884 
885 	list_del(tmp);
886 
887 	atomic_inc(&oct->sc_buf_pool.alloc_buf_count);
888 
889 	spin_unlock_bh(&oct->sc_buf_pool.lock);
890 
891 	sc = (struct octeon_soft_command *)tmp;
892 
893 	dma_addr = sc->dma_addr;
894 	size = sc->size;
895 
896 	memset(sc, 0, sc->size);
897 
898 	sc->dma_addr = dma_addr;
899 	sc->size = size;
900 
901 	if (ctxsize) {
902 		sc->ctxptr = (u8 *)sc + offset;
903 		sc->ctxsize = ctxsize;
904 	}
905 
906 	/* Start data at 128 byte boundary */
907 	offset = (offset + ctxsize + 127) & 0xffffff80;
908 
909 	if (datasize) {
910 		sc->virtdptr = (u8 *)sc + offset;
911 		sc->dmadptr = dma_addr + offset;
912 		sc->datasize = datasize;
913 	}
914 
915 	/* Start rdata at 128 byte boundary */
916 	offset = (offset + datasize + 127) & 0xffffff80;
917 
918 	if (rdatasize) {
919 		WARN_ON(rdatasize < 16);
920 		sc->virtrptr = (u8 *)sc + offset;
921 		sc->dmarptr = dma_addr + offset;
922 		sc->rdatasize = rdatasize;
923 		sc->status_word = (u64 *)((u8 *)(sc->virtrptr) + rdatasize - 8);
924 	}
925 
926 	return sc;
927 }
928 EXPORT_SYMBOL_GPL(octeon_alloc_soft_command);
929 
930 void octeon_free_soft_command(struct octeon_device *oct,
931 			      struct octeon_soft_command *sc)
932 {
933 	spin_lock_bh(&oct->sc_buf_pool.lock);
934 
935 	list_add_tail(&sc->node, &oct->sc_buf_pool.head);
936 
937 	atomic_dec(&oct->sc_buf_pool.alloc_buf_count);
938 
939 	spin_unlock_bh(&oct->sc_buf_pool.lock);
940 }
941 EXPORT_SYMBOL_GPL(octeon_free_soft_command);
942