xref: /linux/drivers/net/wireless/ath/ath10k/ce.c (revision b9b77222d4ff6b5bb8f5d87fca20de0910618bb9)
1 /*
2  * Copyright (c) 2005-2011 Atheros Communications Inc.
3  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
4  * Copyright (c) 2018 The Linux Foundation. All rights reserved.
5  *
6  * Permission to use, copy, modify, and/or distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "hif.h"
20 #include "ce.h"
21 #include "debug.h"
22 
23 /*
24  * Support for Copy Engine hardware, which is mainly used for
25  * communication between Host and Target over a PCIe interconnect.
26  */
27 
28 /*
29  * A single CopyEngine (CE) comprises two "rings":
30  *   a source ring
31  *   a destination ring
32  *
33  * Each ring consists of a number of descriptors which specify
34  * an address, length, and meta-data.
35  *
36  * Typically, one side of the PCIe/AHB/SNOC interconnect (Host or Target)
37  * controls one ring and the other side controls the other ring.
38  * The source side chooses when to initiate a transfer and it
39  * chooses what to send (buffer address, length). The destination
40  * side keeps a supply of "anonymous receive buffers" available and
41  * it handles incoming data as it arrives (when the destination
42  * receives an interrupt).
43  *
44  * The sender may send a simple buffer (address/length) or it may
45  * send a small list of buffers.  When a small list is sent, hardware
46  * "gathers" these and they end up in a single destination buffer
47  * with a single interrupt.
48  *
49  * There are several "contexts" managed by this layer -- more, it
50  * may seem -- than should be needed. These are provided mainly for
51  * maximum flexibility and especially to facilitate a simpler HIF
52  * implementation. There are per-CopyEngine recv, send, and watermark
53  * contexts. These are supplied by the caller when a recv, send,
54  * or watermark handler is established and they are echoed back to
55  * the caller when the respective callbacks are invoked. There is
56  * also a per-transfer context supplied by the caller when a buffer
57  * (or sendlist) is sent and when a buffer is enqueued for recv.
58  * These per-transfer contexts are echoed back to the caller when
59  * the buffer is sent/received.
60  */
61 
62 static inline u32 shadow_sr_wr_ind_addr(struct ath10k *ar,
63 					struct ath10k_ce_pipe *ce_state)
64 {
65 	u32 ce_id = ce_state->id;
66 	u32 addr = 0;
67 
68 	switch (ce_id) {
69 	case 0:
70 		addr = 0x00032000;
71 		break;
72 	case 3:
73 		addr = 0x0003200C;
74 		break;
75 	case 4:
76 		addr = 0x00032010;
77 		break;
78 	case 5:
79 		addr = 0x00032014;
80 		break;
81 	case 7:
82 		addr = 0x0003201C;
83 		break;
84 	default:
85 		ath10k_warn(ar, "invalid CE id: %d", ce_id);
86 		break;
87 	}
88 	return addr;
89 }
90 
91 static inline u32 shadow_dst_wr_ind_addr(struct ath10k *ar,
92 					 struct ath10k_ce_pipe *ce_state)
93 {
94 	u32 ce_id = ce_state->id;
95 	u32 addr = 0;
96 
97 	switch (ce_id) {
98 	case 1:
99 		addr = 0x00032034;
100 		break;
101 	case 2:
102 		addr = 0x00032038;
103 		break;
104 	case 5:
105 		addr = 0x00032044;
106 		break;
107 	case 7:
108 		addr = 0x0003204C;
109 		break;
110 	case 8:
111 		addr = 0x00032050;
112 		break;
113 	case 9:
114 		addr = 0x00032054;
115 		break;
116 	case 10:
117 		addr = 0x00032058;
118 		break;
119 	case 11:
120 		addr = 0x0003205C;
121 		break;
122 	default:
123 		ath10k_warn(ar, "invalid CE id: %d", ce_id);
124 		break;
125 	}
126 
127 	return addr;
128 }
129 
130 static inline unsigned int
131 ath10k_set_ring_byte(unsigned int offset,
132 		     struct ath10k_hw_ce_regs_addr_map *addr_map)
133 {
134 	return ((offset << addr_map->lsb) & addr_map->mask);
135 }
136 
137 static inline unsigned int
138 ath10k_get_ring_byte(unsigned int offset,
139 		     struct ath10k_hw_ce_regs_addr_map *addr_map)
140 {
141 	return ((offset & addr_map->mask) >> (addr_map->lsb));
142 }
143 
144 static inline u32 ath10k_ce_read32(struct ath10k *ar, u32 offset)
145 {
146 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
147 
148 	return ce->bus_ops->read32(ar, offset);
149 }
150 
151 static inline void ath10k_ce_write32(struct ath10k *ar, u32 offset, u32 value)
152 {
153 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
154 
155 	ce->bus_ops->write32(ar, offset, value);
156 }
157 
158 static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
159 						       u32 ce_ctrl_addr,
160 						       unsigned int n)
161 {
162 	ath10k_ce_write32(ar, ce_ctrl_addr +
163 			  ar->hw_ce_regs->dst_wr_index_addr, n);
164 }
165 
166 static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
167 						      u32 ce_ctrl_addr)
168 {
169 	return ath10k_ce_read32(ar, ce_ctrl_addr +
170 				ar->hw_ce_regs->dst_wr_index_addr);
171 }
172 
173 static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
174 						      u32 ce_ctrl_addr,
175 						      unsigned int n)
176 {
177 	ath10k_ce_write32(ar, ce_ctrl_addr +
178 			  ar->hw_ce_regs->sr_wr_index_addr, n);
179 }
180 
181 static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
182 						     u32 ce_ctrl_addr)
183 {
184 	return ath10k_ce_read32(ar, ce_ctrl_addr +
185 				ar->hw_ce_regs->sr_wr_index_addr);
186 }
187 
188 static inline u32 ath10k_ce_src_ring_read_index_from_ddr(struct ath10k *ar,
189 							 u32 ce_id)
190 {
191 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
192 
193 	return ce->vaddr_rri[ce_id] & CE_DDR_RRI_MASK;
194 }
195 
196 static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
197 						    u32 ce_ctrl_addr)
198 {
199 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
200 	u32 ce_id = COPY_ENGINE_ID(ce_ctrl_addr);
201 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
202 	u32 index;
203 
204 	if (ar->hw_params.rri_on_ddr &&
205 	    (ce_state->attr_flags & CE_ATTR_DIS_INTR))
206 		index = ath10k_ce_src_ring_read_index_from_ddr(ar, ce_id);
207 	else
208 		index = ath10k_ce_read32(ar, ce_ctrl_addr +
209 					 ar->hw_ce_regs->current_srri_addr);
210 
211 	return index;
212 }
213 
214 static inline void
215 ath10k_ce_shadow_src_ring_write_index_set(struct ath10k *ar,
216 					  struct ath10k_ce_pipe *ce_state,
217 					  unsigned int value)
218 {
219 	ath10k_ce_write32(ar, shadow_sr_wr_ind_addr(ar, ce_state), value);
220 }
221 
222 static inline void
223 ath10k_ce_shadow_dest_ring_write_index_set(struct ath10k *ar,
224 					   struct ath10k_ce_pipe *ce_state,
225 					   unsigned int value)
226 {
227 	ath10k_ce_write32(ar, shadow_dst_wr_ind_addr(ar, ce_state), value);
228 }
229 
230 static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
231 						    u32 ce_ctrl_addr,
232 						    unsigned int addr)
233 {
234 	ath10k_ce_write32(ar, ce_ctrl_addr +
235 			  ar->hw_ce_regs->sr_base_addr, addr);
236 }
237 
238 static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
239 					       u32 ce_ctrl_addr,
240 					       unsigned int n)
241 {
242 	ath10k_ce_write32(ar, ce_ctrl_addr +
243 			  ar->hw_ce_regs->sr_size_addr, n);
244 }
245 
246 static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
247 					       u32 ce_ctrl_addr,
248 					       unsigned int n)
249 {
250 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
251 
252 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
253 					  ctrl_regs->addr);
254 
255 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
256 			  (ctrl1_addr &  ~(ctrl_regs->dmax->mask)) |
257 			  ath10k_set_ring_byte(n, ctrl_regs->dmax));
258 }
259 
260 static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
261 						    u32 ce_ctrl_addr,
262 						    unsigned int n)
263 {
264 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
265 
266 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
267 					  ctrl_regs->addr);
268 
269 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
270 			  (ctrl1_addr & ~(ctrl_regs->src_ring->mask)) |
271 			  ath10k_set_ring_byte(n, ctrl_regs->src_ring));
272 }
273 
274 static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
275 						     u32 ce_ctrl_addr,
276 						     unsigned int n)
277 {
278 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
279 
280 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
281 					  ctrl_regs->addr);
282 
283 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
284 			  (ctrl1_addr & ~(ctrl_regs->dst_ring->mask)) |
285 			  ath10k_set_ring_byte(n, ctrl_regs->dst_ring));
286 }
287 
288 static inline
289 	u32 ath10k_ce_dest_ring_read_index_from_ddr(struct ath10k *ar, u32 ce_id)
290 {
291 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
292 
293 	return (ce->vaddr_rri[ce_id] >> CE_DDR_DRRI_SHIFT) &
294 		CE_DDR_RRI_MASK;
295 }
296 
297 static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
298 						     u32 ce_ctrl_addr)
299 {
300 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
301 	u32 ce_id = COPY_ENGINE_ID(ce_ctrl_addr);
302 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
303 	u32 index;
304 
305 	if (ar->hw_params.rri_on_ddr &&
306 	    (ce_state->attr_flags & CE_ATTR_DIS_INTR))
307 		index = ath10k_ce_dest_ring_read_index_from_ddr(ar, ce_id);
308 	else
309 		index = ath10k_ce_read32(ar, ce_ctrl_addr +
310 					 ar->hw_ce_regs->current_drri_addr);
311 
312 	return index;
313 }
314 
315 static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
316 						     u32 ce_ctrl_addr,
317 						     u32 addr)
318 {
319 	ath10k_ce_write32(ar, ce_ctrl_addr +
320 			  ar->hw_ce_regs->dr_base_addr, addr);
321 }
322 
323 static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
324 						u32 ce_ctrl_addr,
325 						unsigned int n)
326 {
327 	ath10k_ce_write32(ar, ce_ctrl_addr +
328 			  ar->hw_ce_regs->dr_size_addr, n);
329 }
330 
331 static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
332 						   u32 ce_ctrl_addr,
333 						   unsigned int n)
334 {
335 	struct ath10k_hw_ce_dst_src_wm_regs *srcr_wm = ar->hw_ce_regs->wm_srcr;
336 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + srcr_wm->addr);
337 
338 	ath10k_ce_write32(ar, ce_ctrl_addr + srcr_wm->addr,
339 			  (addr & ~(srcr_wm->wm_high->mask)) |
340 			  (ath10k_set_ring_byte(n, srcr_wm->wm_high)));
341 }
342 
343 static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
344 						  u32 ce_ctrl_addr,
345 						  unsigned int n)
346 {
347 	struct ath10k_hw_ce_dst_src_wm_regs *srcr_wm = ar->hw_ce_regs->wm_srcr;
348 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + srcr_wm->addr);
349 
350 	ath10k_ce_write32(ar, ce_ctrl_addr + srcr_wm->addr,
351 			  (addr & ~(srcr_wm->wm_low->mask)) |
352 			  (ath10k_set_ring_byte(n, srcr_wm->wm_low)));
353 }
354 
355 static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
356 						    u32 ce_ctrl_addr,
357 						    unsigned int n)
358 {
359 	struct ath10k_hw_ce_dst_src_wm_regs *dstr_wm = ar->hw_ce_regs->wm_dstr;
360 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + dstr_wm->addr);
361 
362 	ath10k_ce_write32(ar, ce_ctrl_addr + dstr_wm->addr,
363 			  (addr & ~(dstr_wm->wm_high->mask)) |
364 			  (ath10k_set_ring_byte(n, dstr_wm->wm_high)));
365 }
366 
367 static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
368 						   u32 ce_ctrl_addr,
369 						   unsigned int n)
370 {
371 	struct ath10k_hw_ce_dst_src_wm_regs *dstr_wm = ar->hw_ce_regs->wm_dstr;
372 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + dstr_wm->addr);
373 
374 	ath10k_ce_write32(ar, ce_ctrl_addr + dstr_wm->addr,
375 			  (addr & ~(dstr_wm->wm_low->mask)) |
376 			  (ath10k_set_ring_byte(n, dstr_wm->wm_low)));
377 }
378 
379 static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
380 							u32 ce_ctrl_addr)
381 {
382 	struct ath10k_hw_ce_host_ie *host_ie = ar->hw_ce_regs->host_ie;
383 
384 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
385 					    ar->hw_ce_regs->host_ie_addr);
386 
387 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
388 			  host_ie_addr | host_ie->copy_complete->mask);
389 }
390 
391 static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
392 							u32 ce_ctrl_addr)
393 {
394 	struct ath10k_hw_ce_host_ie *host_ie = ar->hw_ce_regs->host_ie;
395 
396 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
397 					    ar->hw_ce_regs->host_ie_addr);
398 
399 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
400 			  host_ie_addr & ~(host_ie->copy_complete->mask));
401 }
402 
403 static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
404 						    u32 ce_ctrl_addr)
405 {
406 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
407 
408 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
409 					    ar->hw_ce_regs->host_ie_addr);
410 
411 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
412 			  host_ie_addr & ~(wm_regs->wm_mask));
413 }
414 
415 static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
416 					       u32 ce_ctrl_addr)
417 {
418 	struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs;
419 
420 	u32 misc_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
421 					    ar->hw_ce_regs->misc_ie_addr);
422 
423 	ath10k_ce_write32(ar,
424 			  ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
425 			  misc_ie_addr | misc_regs->err_mask);
426 }
427 
428 static inline void ath10k_ce_error_intr_disable(struct ath10k *ar,
429 						u32 ce_ctrl_addr)
430 {
431 	struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs;
432 
433 	u32 misc_ie_addr = ath10k_ce_read32(ar,
434 			ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr);
435 
436 	ath10k_ce_write32(ar,
437 			  ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
438 			  misc_ie_addr & ~(misc_regs->err_mask));
439 }
440 
441 static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
442 						     u32 ce_ctrl_addr,
443 						     unsigned int mask)
444 {
445 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
446 
447 	ath10k_ce_write32(ar, ce_ctrl_addr + wm_regs->addr, mask);
448 }
449 
450 /*
451  * Guts of ath10k_ce_send.
452  * The caller takes responsibility for any needed locking.
453  */
454 static int _ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
455 				  void *per_transfer_context,
456 				  dma_addr_t buffer,
457 				  unsigned int nbytes,
458 				  unsigned int transfer_id,
459 				  unsigned int flags)
460 {
461 	struct ath10k *ar = ce_state->ar;
462 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
463 	struct ce_desc *desc, sdesc;
464 	unsigned int nentries_mask = src_ring->nentries_mask;
465 	unsigned int sw_index = src_ring->sw_index;
466 	unsigned int write_index = src_ring->write_index;
467 	u32 ctrl_addr = ce_state->ctrl_addr;
468 	u32 desc_flags = 0;
469 	int ret = 0;
470 
471 	if (nbytes > ce_state->src_sz_max)
472 		ath10k_warn(ar, "%s: send more we can (nbytes: %d, max: %d)\n",
473 			    __func__, nbytes, ce_state->src_sz_max);
474 
475 	if (unlikely(CE_RING_DELTA(nentries_mask,
476 				   write_index, sw_index - 1) <= 0)) {
477 		ret = -ENOSR;
478 		goto exit;
479 	}
480 
481 	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
482 				   write_index);
483 
484 	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
485 
486 	if (flags & CE_SEND_FLAG_GATHER)
487 		desc_flags |= CE_DESC_FLAGS_GATHER;
488 	if (flags & CE_SEND_FLAG_BYTE_SWAP)
489 		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
490 
491 	sdesc.addr   = __cpu_to_le32(buffer);
492 	sdesc.nbytes = __cpu_to_le16(nbytes);
493 	sdesc.flags  = __cpu_to_le16(desc_flags);
494 
495 	*desc = sdesc;
496 
497 	src_ring->per_transfer_context[write_index] = per_transfer_context;
498 
499 	/* Update Source Ring Write Index */
500 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
501 
502 	/* WORKAROUND */
503 	if (!(flags & CE_SEND_FLAG_GATHER)) {
504 		if (ar->hw_params.shadow_reg_support)
505 			ath10k_ce_shadow_src_ring_write_index_set(ar, ce_state,
506 								  write_index);
507 		else
508 			ath10k_ce_src_ring_write_index_set(ar, ctrl_addr,
509 							   write_index);
510 	}
511 
512 	src_ring->write_index = write_index;
513 exit:
514 	return ret;
515 }
516 
517 static int _ath10k_ce_send_nolock_64(struct ath10k_ce_pipe *ce_state,
518 				     void *per_transfer_context,
519 				     dma_addr_t buffer,
520 				     unsigned int nbytes,
521 				     unsigned int transfer_id,
522 				     unsigned int flags)
523 {
524 	struct ath10k *ar = ce_state->ar;
525 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
526 	struct ce_desc_64 *desc, sdesc;
527 	unsigned int nentries_mask = src_ring->nentries_mask;
528 	unsigned int sw_index;
529 	unsigned int write_index = src_ring->write_index;
530 	u32 ctrl_addr = ce_state->ctrl_addr;
531 	__le32 *addr;
532 	u32 desc_flags = 0;
533 	int ret = 0;
534 
535 	if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags))
536 		return -ESHUTDOWN;
537 
538 	if (nbytes > ce_state->src_sz_max)
539 		ath10k_warn(ar, "%s: send more we can (nbytes: %d, max: %d)\n",
540 			    __func__, nbytes, ce_state->src_sz_max);
541 
542 	if (ar->hw_params.rri_on_ddr)
543 		sw_index = ath10k_ce_src_ring_read_index_from_ddr(ar, ce_state->id);
544 	else
545 		sw_index = src_ring->sw_index;
546 
547 	if (unlikely(CE_RING_DELTA(nentries_mask,
548 				   write_index, sw_index - 1) <= 0)) {
549 		ret = -ENOSR;
550 		goto exit;
551 	}
552 
553 	desc = CE_SRC_RING_TO_DESC_64(src_ring->base_addr_owner_space,
554 				      write_index);
555 
556 	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
557 
558 	if (flags & CE_SEND_FLAG_GATHER)
559 		desc_flags |= CE_DESC_FLAGS_GATHER;
560 
561 	if (flags & CE_SEND_FLAG_BYTE_SWAP)
562 		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
563 
564 	addr = (__le32 *)&sdesc.addr;
565 
566 	flags |= upper_32_bits(buffer) & CE_DESC_FLAGS_GET_MASK;
567 	addr[0] = __cpu_to_le32(buffer);
568 	addr[1] = __cpu_to_le32(flags);
569 	if (flags & CE_SEND_FLAG_GATHER)
570 		addr[1] |= __cpu_to_le32(CE_WCN3990_DESC_FLAGS_GATHER);
571 	else
572 		addr[1] &= ~(__cpu_to_le32(CE_WCN3990_DESC_FLAGS_GATHER));
573 
574 	sdesc.nbytes = __cpu_to_le16(nbytes);
575 	sdesc.flags  = __cpu_to_le16(desc_flags);
576 
577 	*desc = sdesc;
578 
579 	src_ring->per_transfer_context[write_index] = per_transfer_context;
580 
581 	/* Update Source Ring Write Index */
582 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
583 
584 	if (!(flags & CE_SEND_FLAG_GATHER))
585 		ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index);
586 
587 	src_ring->write_index = write_index;
588 exit:
589 	return ret;
590 }
591 
592 int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
593 			  void *per_transfer_context,
594 			  dma_addr_t buffer,
595 			  unsigned int nbytes,
596 			  unsigned int transfer_id,
597 			  unsigned int flags)
598 {
599 	return ce_state->ops->ce_send_nolock(ce_state, per_transfer_context,
600 				    buffer, nbytes, transfer_id, flags);
601 }
602 EXPORT_SYMBOL(ath10k_ce_send_nolock);
603 
604 void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe)
605 {
606 	struct ath10k *ar = pipe->ar;
607 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
608 	struct ath10k_ce_ring *src_ring = pipe->src_ring;
609 	u32 ctrl_addr = pipe->ctrl_addr;
610 
611 	lockdep_assert_held(&ce->ce_lock);
612 
613 	/*
614 	 * This function must be called only if there is an incomplete
615 	 * scatter-gather transfer (before index register is updated)
616 	 * that needs to be cleaned up.
617 	 */
618 	if (WARN_ON_ONCE(src_ring->write_index == src_ring->sw_index))
619 		return;
620 
621 	if (WARN_ON_ONCE(src_ring->write_index ==
622 			 ath10k_ce_src_ring_write_index_get(ar, ctrl_addr)))
623 		return;
624 
625 	src_ring->write_index--;
626 	src_ring->write_index &= src_ring->nentries_mask;
627 
628 	src_ring->per_transfer_context[src_ring->write_index] = NULL;
629 }
630 EXPORT_SYMBOL(__ath10k_ce_send_revert);
631 
632 int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
633 		   void *per_transfer_context,
634 		   dma_addr_t buffer,
635 		   unsigned int nbytes,
636 		   unsigned int transfer_id,
637 		   unsigned int flags)
638 {
639 	struct ath10k *ar = ce_state->ar;
640 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
641 	int ret;
642 
643 	spin_lock_bh(&ce->ce_lock);
644 	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
645 				    buffer, nbytes, transfer_id, flags);
646 	spin_unlock_bh(&ce->ce_lock);
647 
648 	return ret;
649 }
650 EXPORT_SYMBOL(ath10k_ce_send);
651 
652 int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
653 {
654 	struct ath10k *ar = pipe->ar;
655 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
656 	int delta;
657 
658 	spin_lock_bh(&ce->ce_lock);
659 	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
660 			      pipe->src_ring->write_index,
661 			      pipe->src_ring->sw_index - 1);
662 	spin_unlock_bh(&ce->ce_lock);
663 
664 	return delta;
665 }
666 EXPORT_SYMBOL(ath10k_ce_num_free_src_entries);
667 
668 int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe)
669 {
670 	struct ath10k *ar = pipe->ar;
671 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
672 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
673 	unsigned int nentries_mask = dest_ring->nentries_mask;
674 	unsigned int write_index = dest_ring->write_index;
675 	unsigned int sw_index = dest_ring->sw_index;
676 
677 	lockdep_assert_held(&ce->ce_lock);
678 
679 	return CE_RING_DELTA(nentries_mask, write_index, sw_index - 1);
680 }
681 EXPORT_SYMBOL(__ath10k_ce_rx_num_free_bufs);
682 
683 static int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx,
684 				   dma_addr_t paddr)
685 {
686 	struct ath10k *ar = pipe->ar;
687 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
688 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
689 	unsigned int nentries_mask = dest_ring->nentries_mask;
690 	unsigned int write_index = dest_ring->write_index;
691 	unsigned int sw_index = dest_ring->sw_index;
692 	struct ce_desc *base = dest_ring->base_addr_owner_space;
693 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
694 	u32 ctrl_addr = pipe->ctrl_addr;
695 
696 	lockdep_assert_held(&ce->ce_lock);
697 
698 	if ((pipe->id != 5) &&
699 	    CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
700 		return -ENOSPC;
701 
702 	desc->addr = __cpu_to_le32(paddr);
703 	desc->nbytes = 0;
704 
705 	dest_ring->per_transfer_context[write_index] = ctx;
706 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
707 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
708 	dest_ring->write_index = write_index;
709 
710 	return 0;
711 }
712 
713 static int __ath10k_ce_rx_post_buf_64(struct ath10k_ce_pipe *pipe,
714 				      void *ctx,
715 				      dma_addr_t paddr)
716 {
717 	struct ath10k *ar = pipe->ar;
718 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
719 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
720 	unsigned int nentries_mask = dest_ring->nentries_mask;
721 	unsigned int write_index = dest_ring->write_index;
722 	unsigned int sw_index = dest_ring->sw_index;
723 	struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
724 	struct ce_desc_64 *desc =
725 			CE_DEST_RING_TO_DESC_64(base, write_index);
726 	u32 ctrl_addr = pipe->ctrl_addr;
727 
728 	lockdep_assert_held(&ce->ce_lock);
729 
730 	if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
731 		return -ENOSPC;
732 
733 	desc->addr = __cpu_to_le64(paddr);
734 	desc->addr &= __cpu_to_le64(CE_DESC_37BIT_ADDR_MASK);
735 
736 	desc->nbytes = 0;
737 
738 	dest_ring->per_transfer_context[write_index] = ctx;
739 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
740 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
741 	dest_ring->write_index = write_index;
742 
743 	return 0;
744 }
745 
746 void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries)
747 {
748 	struct ath10k *ar = pipe->ar;
749 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
750 	unsigned int nentries_mask = dest_ring->nentries_mask;
751 	unsigned int write_index = dest_ring->write_index;
752 	u32 ctrl_addr = pipe->ctrl_addr;
753 	u32 cur_write_idx = ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
754 
755 	/* Prevent CE ring stuck issue that will occur when ring is full.
756 	 * Make sure that write index is 1 less than read index.
757 	 */
758 	if (((cur_write_idx + nentries) & nentries_mask) == dest_ring->sw_index)
759 		nentries -= 1;
760 
761 	write_index = CE_RING_IDX_ADD(nentries_mask, write_index, nentries);
762 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
763 	dest_ring->write_index = write_index;
764 }
765 EXPORT_SYMBOL(ath10k_ce_rx_update_write_idx);
766 
767 int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx,
768 			  dma_addr_t paddr)
769 {
770 	struct ath10k *ar = pipe->ar;
771 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
772 	int ret;
773 
774 	spin_lock_bh(&ce->ce_lock);
775 	ret = pipe->ops->ce_rx_post_buf(pipe, ctx, paddr);
776 	spin_unlock_bh(&ce->ce_lock);
777 
778 	return ret;
779 }
780 EXPORT_SYMBOL(ath10k_ce_rx_post_buf);
781 
782 /*
783  * Guts of ath10k_ce_completed_recv_next.
784  * The caller takes responsibility for any necessary locking.
785  */
786 static int
787 	 _ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
788 					       void **per_transfer_contextp,
789 					       unsigned int *nbytesp)
790 {
791 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
792 	unsigned int nentries_mask = dest_ring->nentries_mask;
793 	unsigned int sw_index = dest_ring->sw_index;
794 
795 	struct ce_desc *base = dest_ring->base_addr_owner_space;
796 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
797 	struct ce_desc sdesc;
798 	u16 nbytes;
799 
800 	/* Copy in one go for performance reasons */
801 	sdesc = *desc;
802 
803 	nbytes = __le16_to_cpu(sdesc.nbytes);
804 	if (nbytes == 0) {
805 		/*
806 		 * This closes a relatively unusual race where the Host
807 		 * sees the updated DRRI before the update to the
808 		 * corresponding descriptor has completed. We treat this
809 		 * as a descriptor that is not yet done.
810 		 */
811 		return -EIO;
812 	}
813 
814 	desc->nbytes = 0;
815 
816 	/* Return data from completed destination descriptor */
817 	*nbytesp = nbytes;
818 
819 	if (per_transfer_contextp)
820 		*per_transfer_contextp =
821 			dest_ring->per_transfer_context[sw_index];
822 
823 	/* Copy engine 5 (HTT Rx) will reuse the same transfer context.
824 	 * So update transfer context all CEs except CE5.
825 	 */
826 	if (ce_state->id != 5)
827 		dest_ring->per_transfer_context[sw_index] = NULL;
828 
829 	/* Update sw_index */
830 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
831 	dest_ring->sw_index = sw_index;
832 
833 	return 0;
834 }
835 
836 static int
837 _ath10k_ce_completed_recv_next_nolock_64(struct ath10k_ce_pipe *ce_state,
838 					 void **per_transfer_contextp,
839 					 unsigned int *nbytesp)
840 {
841 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
842 	unsigned int nentries_mask = dest_ring->nentries_mask;
843 	unsigned int sw_index = dest_ring->sw_index;
844 	struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
845 	struct ce_desc_64 *desc =
846 		CE_DEST_RING_TO_DESC_64(base, sw_index);
847 	struct ce_desc_64 sdesc;
848 	u16 nbytes;
849 
850 	/* Copy in one go for performance reasons */
851 	sdesc = *desc;
852 
853 	nbytes = __le16_to_cpu(sdesc.nbytes);
854 	if (nbytes == 0) {
855 		/* This closes a relatively unusual race where the Host
856 		 * sees the updated DRRI before the update to the
857 		 * corresponding descriptor has completed. We treat this
858 		 * as a descriptor that is not yet done.
859 		 */
860 		return -EIO;
861 	}
862 
863 	desc->nbytes = 0;
864 
865 	/* Return data from completed destination descriptor */
866 	*nbytesp = nbytes;
867 
868 	if (per_transfer_contextp)
869 		*per_transfer_contextp =
870 			dest_ring->per_transfer_context[sw_index];
871 
872 	/* Copy engine 5 (HTT Rx) will reuse the same transfer context.
873 	 * So update transfer context all CEs except CE5.
874 	 */
875 	if (ce_state->id != 5)
876 		dest_ring->per_transfer_context[sw_index] = NULL;
877 
878 	/* Update sw_index */
879 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
880 	dest_ring->sw_index = sw_index;
881 
882 	return 0;
883 }
884 
885 int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
886 					 void **per_transfer_ctx,
887 					 unsigned int *nbytesp)
888 {
889 	return ce_state->ops->ce_completed_recv_next_nolock(ce_state,
890 							    per_transfer_ctx,
891 							    nbytesp);
892 }
893 EXPORT_SYMBOL(ath10k_ce_completed_recv_next_nolock);
894 
895 int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
896 				  void **per_transfer_contextp,
897 				  unsigned int *nbytesp)
898 {
899 	struct ath10k *ar = ce_state->ar;
900 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
901 	int ret;
902 
903 	spin_lock_bh(&ce->ce_lock);
904 	ret = ce_state->ops->ce_completed_recv_next_nolock(ce_state,
905 						   per_transfer_contextp,
906 						   nbytesp);
907 
908 	spin_unlock_bh(&ce->ce_lock);
909 
910 	return ret;
911 }
912 EXPORT_SYMBOL(ath10k_ce_completed_recv_next);
913 
914 static int _ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
915 				       void **per_transfer_contextp,
916 				       dma_addr_t *bufferp)
917 {
918 	struct ath10k_ce_ring *dest_ring;
919 	unsigned int nentries_mask;
920 	unsigned int sw_index;
921 	unsigned int write_index;
922 	int ret;
923 	struct ath10k *ar;
924 	struct ath10k_ce *ce;
925 
926 	dest_ring = ce_state->dest_ring;
927 
928 	if (!dest_ring)
929 		return -EIO;
930 
931 	ar = ce_state->ar;
932 	ce = ath10k_ce_priv(ar);
933 
934 	spin_lock_bh(&ce->ce_lock);
935 
936 	nentries_mask = dest_ring->nentries_mask;
937 	sw_index = dest_ring->sw_index;
938 	write_index = dest_ring->write_index;
939 	if (write_index != sw_index) {
940 		struct ce_desc *base = dest_ring->base_addr_owner_space;
941 		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
942 
943 		/* Return data from completed destination descriptor */
944 		*bufferp = __le32_to_cpu(desc->addr);
945 
946 		if (per_transfer_contextp)
947 			*per_transfer_contextp =
948 				dest_ring->per_transfer_context[sw_index];
949 
950 		/* sanity */
951 		dest_ring->per_transfer_context[sw_index] = NULL;
952 		desc->nbytes = 0;
953 
954 		/* Update sw_index */
955 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
956 		dest_ring->sw_index = sw_index;
957 		ret = 0;
958 	} else {
959 		ret = -EIO;
960 	}
961 
962 	spin_unlock_bh(&ce->ce_lock);
963 
964 	return ret;
965 }
966 
967 static int _ath10k_ce_revoke_recv_next_64(struct ath10k_ce_pipe *ce_state,
968 					  void **per_transfer_contextp,
969 					  dma_addr_t *bufferp)
970 {
971 	struct ath10k_ce_ring *dest_ring;
972 	unsigned int nentries_mask;
973 	unsigned int sw_index;
974 	unsigned int write_index;
975 	int ret;
976 	struct ath10k *ar;
977 	struct ath10k_ce *ce;
978 
979 	dest_ring = ce_state->dest_ring;
980 
981 	if (!dest_ring)
982 		return -EIO;
983 
984 	ar = ce_state->ar;
985 	ce = ath10k_ce_priv(ar);
986 
987 	spin_lock_bh(&ce->ce_lock);
988 
989 	nentries_mask = dest_ring->nentries_mask;
990 	sw_index = dest_ring->sw_index;
991 	write_index = dest_ring->write_index;
992 	if (write_index != sw_index) {
993 		struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
994 		struct ce_desc_64 *desc =
995 			CE_DEST_RING_TO_DESC_64(base, sw_index);
996 
997 		/* Return data from completed destination descriptor */
998 		*bufferp = __le64_to_cpu(desc->addr);
999 
1000 		if (per_transfer_contextp)
1001 			*per_transfer_contextp =
1002 				dest_ring->per_transfer_context[sw_index];
1003 
1004 		/* sanity */
1005 		dest_ring->per_transfer_context[sw_index] = NULL;
1006 		desc->nbytes = 0;
1007 
1008 		/* Update sw_index */
1009 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1010 		dest_ring->sw_index = sw_index;
1011 		ret = 0;
1012 	} else {
1013 		ret = -EIO;
1014 	}
1015 
1016 	spin_unlock_bh(&ce->ce_lock);
1017 
1018 	return ret;
1019 }
1020 
1021 int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
1022 			       void **per_transfer_contextp,
1023 			       dma_addr_t *bufferp)
1024 {
1025 	return ce_state->ops->ce_revoke_recv_next(ce_state,
1026 						  per_transfer_contextp,
1027 						  bufferp);
1028 }
1029 EXPORT_SYMBOL(ath10k_ce_revoke_recv_next);
1030 
1031 /*
1032  * Guts of ath10k_ce_completed_send_next.
1033  * The caller takes responsibility for any necessary locking.
1034  */
1035 int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
1036 					 void **per_transfer_contextp)
1037 {
1038 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1039 	u32 ctrl_addr = ce_state->ctrl_addr;
1040 	struct ath10k *ar = ce_state->ar;
1041 	unsigned int nentries_mask = src_ring->nentries_mask;
1042 	unsigned int sw_index = src_ring->sw_index;
1043 	unsigned int read_index;
1044 	struct ce_desc *desc;
1045 
1046 	if (src_ring->hw_index == sw_index) {
1047 		/*
1048 		 * The SW completion index has caught up with the cached
1049 		 * version of the HW completion index.
1050 		 * Update the cached HW completion index to see whether
1051 		 * the SW has really caught up to the HW, or if the cached
1052 		 * value of the HW index has become stale.
1053 		 */
1054 
1055 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1056 		if (read_index == 0xffffffff)
1057 			return -ENODEV;
1058 
1059 		read_index &= nentries_mask;
1060 		src_ring->hw_index = read_index;
1061 	}
1062 
1063 	if (ar->hw_params.rri_on_ddr)
1064 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1065 	else
1066 		read_index = src_ring->hw_index;
1067 
1068 	if (read_index == sw_index)
1069 		return -EIO;
1070 
1071 	if (per_transfer_contextp)
1072 		*per_transfer_contextp =
1073 			src_ring->per_transfer_context[sw_index];
1074 
1075 	/* sanity */
1076 	src_ring->per_transfer_context[sw_index] = NULL;
1077 	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
1078 				   sw_index);
1079 	desc->nbytes = 0;
1080 
1081 	/* Update sw_index */
1082 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1083 	src_ring->sw_index = sw_index;
1084 
1085 	return 0;
1086 }
1087 EXPORT_SYMBOL(ath10k_ce_completed_send_next_nolock);
1088 
1089 static void ath10k_ce_extract_desc_data(struct ath10k *ar,
1090 					struct ath10k_ce_ring *src_ring,
1091 					u32 sw_index,
1092 					dma_addr_t *bufferp,
1093 					u32 *nbytesp,
1094 					u32 *transfer_idp)
1095 {
1096 		struct ce_desc *base = src_ring->base_addr_owner_space;
1097 		struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index);
1098 
1099 		/* Return data from completed source descriptor */
1100 		*bufferp = __le32_to_cpu(desc->addr);
1101 		*nbytesp = __le16_to_cpu(desc->nbytes);
1102 		*transfer_idp = MS(__le16_to_cpu(desc->flags),
1103 				   CE_DESC_FLAGS_META_DATA);
1104 }
1105 
1106 static void ath10k_ce_extract_desc_data_64(struct ath10k *ar,
1107 					   struct ath10k_ce_ring *src_ring,
1108 					   u32 sw_index,
1109 					   dma_addr_t *bufferp,
1110 					   u32 *nbytesp,
1111 					   u32 *transfer_idp)
1112 {
1113 		struct ce_desc_64 *base = src_ring->base_addr_owner_space;
1114 		struct ce_desc_64 *desc =
1115 			CE_SRC_RING_TO_DESC_64(base, sw_index);
1116 
1117 		/* Return data from completed source descriptor */
1118 		*bufferp = __le64_to_cpu(desc->addr);
1119 		*nbytesp = __le16_to_cpu(desc->nbytes);
1120 		*transfer_idp = MS(__le16_to_cpu(desc->flags),
1121 				   CE_DESC_FLAGS_META_DATA);
1122 }
1123 
1124 /* NB: Modeled after ath10k_ce_completed_send_next */
1125 int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
1126 			       void **per_transfer_contextp,
1127 			       dma_addr_t *bufferp,
1128 			       unsigned int *nbytesp,
1129 			       unsigned int *transfer_idp)
1130 {
1131 	struct ath10k_ce_ring *src_ring;
1132 	unsigned int nentries_mask;
1133 	unsigned int sw_index;
1134 	unsigned int write_index;
1135 	int ret;
1136 	struct ath10k *ar;
1137 	struct ath10k_ce *ce;
1138 
1139 	src_ring = ce_state->src_ring;
1140 
1141 	if (!src_ring)
1142 		return -EIO;
1143 
1144 	ar = ce_state->ar;
1145 	ce = ath10k_ce_priv(ar);
1146 
1147 	spin_lock_bh(&ce->ce_lock);
1148 
1149 	nentries_mask = src_ring->nentries_mask;
1150 	sw_index = src_ring->sw_index;
1151 	write_index = src_ring->write_index;
1152 
1153 	if (write_index != sw_index) {
1154 		ce_state->ops->ce_extract_desc_data(ar, src_ring, sw_index,
1155 						    bufferp, nbytesp,
1156 						    transfer_idp);
1157 
1158 		if (per_transfer_contextp)
1159 			*per_transfer_contextp =
1160 				src_ring->per_transfer_context[sw_index];
1161 
1162 		/* sanity */
1163 		src_ring->per_transfer_context[sw_index] = NULL;
1164 
1165 		/* Update sw_index */
1166 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1167 		src_ring->sw_index = sw_index;
1168 		ret = 0;
1169 	} else {
1170 		ret = -EIO;
1171 	}
1172 
1173 	spin_unlock_bh(&ce->ce_lock);
1174 
1175 	return ret;
1176 }
1177 EXPORT_SYMBOL(ath10k_ce_cancel_send_next);
1178 
1179 int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
1180 				  void **per_transfer_contextp)
1181 {
1182 	struct ath10k *ar = ce_state->ar;
1183 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1184 	int ret;
1185 
1186 	spin_lock_bh(&ce->ce_lock);
1187 	ret = ath10k_ce_completed_send_next_nolock(ce_state,
1188 						   per_transfer_contextp);
1189 	spin_unlock_bh(&ce->ce_lock);
1190 
1191 	return ret;
1192 }
1193 EXPORT_SYMBOL(ath10k_ce_completed_send_next);
1194 
1195 /*
1196  * Guts of interrupt handler for per-engine interrupts on a particular CE.
1197  *
1198  * Invokes registered callbacks for recv_complete,
1199  * send_complete, and watermarks.
1200  */
1201 void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
1202 {
1203 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1204 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1205 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
1206 	u32 ctrl_addr = ce_state->ctrl_addr;
1207 
1208 	spin_lock_bh(&ce->ce_lock);
1209 
1210 	/* Clear the copy-complete interrupts that will be handled here. */
1211 	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
1212 					  wm_regs->cc_mask);
1213 
1214 	spin_unlock_bh(&ce->ce_lock);
1215 
1216 	if (ce_state->recv_cb)
1217 		ce_state->recv_cb(ce_state);
1218 
1219 	if (ce_state->send_cb)
1220 		ce_state->send_cb(ce_state);
1221 
1222 	spin_lock_bh(&ce->ce_lock);
1223 
1224 	/*
1225 	 * Misc CE interrupts are not being handled, but still need
1226 	 * to be cleared.
1227 	 */
1228 	ath10k_ce_engine_int_status_clear(ar, ctrl_addr, wm_regs->wm_mask);
1229 
1230 	spin_unlock_bh(&ce->ce_lock);
1231 }
1232 EXPORT_SYMBOL(ath10k_ce_per_engine_service);
1233 
1234 /*
1235  * Handler for per-engine interrupts on ALL active CEs.
1236  * This is used in cases where the system is sharing a
1237  * single interrput for all CEs
1238  */
1239 
1240 void ath10k_ce_per_engine_service_any(struct ath10k *ar)
1241 {
1242 	int ce_id;
1243 	u32 intr_summary;
1244 
1245 	intr_summary = ath10k_ce_interrupt_summary(ar);
1246 
1247 	for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) {
1248 		if (intr_summary & (1 << ce_id))
1249 			intr_summary &= ~(1 << ce_id);
1250 		else
1251 			/* no intr pending on this CE */
1252 			continue;
1253 
1254 		ath10k_ce_per_engine_service(ar, ce_id);
1255 	}
1256 }
1257 EXPORT_SYMBOL(ath10k_ce_per_engine_service_any);
1258 
1259 /*
1260  * Adjust interrupts for the copy complete handler.
1261  * If it's needed for either send or recv, then unmask
1262  * this interrupt; otherwise, mask it.
1263  *
1264  * Called with ce_lock held.
1265  */
1266 static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state)
1267 {
1268 	u32 ctrl_addr = ce_state->ctrl_addr;
1269 	struct ath10k *ar = ce_state->ar;
1270 	bool disable_copy_compl_intr = ce_state->attr_flags & CE_ATTR_DIS_INTR;
1271 
1272 	if ((!disable_copy_compl_intr) &&
1273 	    (ce_state->send_cb || ce_state->recv_cb))
1274 		ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr);
1275 	else
1276 		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
1277 
1278 	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
1279 }
1280 
1281 int ath10k_ce_disable_interrupts(struct ath10k *ar)
1282 {
1283 	int ce_id;
1284 
1285 	for (ce_id = 0; ce_id < CE_COUNT; ce_id++) {
1286 		u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1287 
1288 		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
1289 		ath10k_ce_error_intr_disable(ar, ctrl_addr);
1290 		ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
1291 	}
1292 
1293 	return 0;
1294 }
1295 EXPORT_SYMBOL(ath10k_ce_disable_interrupts);
1296 
1297 void ath10k_ce_enable_interrupts(struct ath10k *ar)
1298 {
1299 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1300 	int ce_id;
1301 	struct ath10k_ce_pipe *ce_state;
1302 
1303 	/* Skip the last copy engine, CE7 the diagnostic window, as that
1304 	 * uses polling and isn't initialized for interrupts.
1305 	 */
1306 	for (ce_id = 0; ce_id < CE_COUNT - 1; ce_id++) {
1307 		ce_state  = &ce->ce_states[ce_id];
1308 		ath10k_ce_per_engine_handler_adjust(ce_state);
1309 	}
1310 }
1311 EXPORT_SYMBOL(ath10k_ce_enable_interrupts);
1312 
1313 static int ath10k_ce_init_src_ring(struct ath10k *ar,
1314 				   unsigned int ce_id,
1315 				   const struct ce_attr *attr)
1316 {
1317 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1318 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1319 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1320 	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1321 
1322 	nentries = roundup_pow_of_two(attr->src_nentries);
1323 
1324 	if (ar->hw_params.target_64bit)
1325 		memset(src_ring->base_addr_owner_space, 0,
1326 		       nentries * sizeof(struct ce_desc_64));
1327 	else
1328 		memset(src_ring->base_addr_owner_space, 0,
1329 		       nentries * sizeof(struct ce_desc));
1330 
1331 	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1332 	src_ring->sw_index &= src_ring->nentries_mask;
1333 	src_ring->hw_index = src_ring->sw_index;
1334 
1335 	src_ring->write_index =
1336 		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
1337 	src_ring->write_index &= src_ring->nentries_mask;
1338 
1339 	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr,
1340 					 src_ring->base_addr_ce_space);
1341 	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
1342 	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
1343 	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
1344 	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
1345 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
1346 
1347 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
1348 		   "boot init ce src ring id %d entries %d base_addr %pK\n",
1349 		   ce_id, nentries, src_ring->base_addr_owner_space);
1350 
1351 	return 0;
1352 }
1353 
1354 static int ath10k_ce_init_dest_ring(struct ath10k *ar,
1355 				    unsigned int ce_id,
1356 				    const struct ce_attr *attr)
1357 {
1358 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1359 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1360 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
1361 	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1362 
1363 	nentries = roundup_pow_of_two(attr->dest_nentries);
1364 
1365 	if (ar->hw_params.target_64bit)
1366 		memset(dest_ring->base_addr_owner_space, 0,
1367 		       nentries * sizeof(struct ce_desc_64));
1368 	else
1369 		memset(dest_ring->base_addr_owner_space, 0,
1370 		       nentries * sizeof(struct ce_desc));
1371 
1372 	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
1373 	dest_ring->sw_index &= dest_ring->nentries_mask;
1374 	dest_ring->write_index =
1375 		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
1376 	dest_ring->write_index &= dest_ring->nentries_mask;
1377 
1378 	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr,
1379 					  dest_ring->base_addr_ce_space);
1380 	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
1381 	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
1382 	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
1383 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
1384 
1385 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
1386 		   "boot ce dest ring id %d entries %d base_addr %pK\n",
1387 		   ce_id, nentries, dest_ring->base_addr_owner_space);
1388 
1389 	return 0;
1390 }
1391 
1392 static int ath10k_ce_alloc_shadow_base(struct ath10k *ar,
1393 				       struct ath10k_ce_ring *src_ring,
1394 				       u32 nentries)
1395 {
1396 	src_ring->shadow_base_unaligned = kcalloc(nentries,
1397 						  sizeof(struct ce_desc),
1398 						  GFP_KERNEL);
1399 	if (!src_ring->shadow_base_unaligned)
1400 		return -ENOMEM;
1401 
1402 	src_ring->shadow_base = (struct ce_desc *)
1403 			PTR_ALIGN(src_ring->shadow_base_unaligned,
1404 				  CE_DESC_RING_ALIGN);
1405 	return 0;
1406 }
1407 
1408 static struct ath10k_ce_ring *
1409 ath10k_ce_alloc_src_ring(struct ath10k *ar, unsigned int ce_id,
1410 			 const struct ce_attr *attr)
1411 {
1412 	struct ath10k_ce_ring *src_ring;
1413 	u32 nentries = attr->src_nentries;
1414 	dma_addr_t base_addr;
1415 	int ret;
1416 
1417 	nentries = roundup_pow_of_two(nentries);
1418 
1419 	src_ring = kzalloc(sizeof(*src_ring) +
1420 			   (nentries *
1421 			    sizeof(*src_ring->per_transfer_context)),
1422 			   GFP_KERNEL);
1423 	if (src_ring == NULL)
1424 		return ERR_PTR(-ENOMEM);
1425 
1426 	src_ring->nentries = nentries;
1427 	src_ring->nentries_mask = nentries - 1;
1428 
1429 	/*
1430 	 * Legacy platforms that do not support cache
1431 	 * coherent DMA are unsupported
1432 	 */
1433 	src_ring->base_addr_owner_space_unaligned =
1434 		dma_alloc_coherent(ar->dev,
1435 				   (nentries * sizeof(struct ce_desc) +
1436 				    CE_DESC_RING_ALIGN),
1437 				   &base_addr, GFP_KERNEL);
1438 	if (!src_ring->base_addr_owner_space_unaligned) {
1439 		kfree(src_ring);
1440 		return ERR_PTR(-ENOMEM);
1441 	}
1442 
1443 	src_ring->base_addr_ce_space_unaligned = base_addr;
1444 
1445 	src_ring->base_addr_owner_space =
1446 			PTR_ALIGN(src_ring->base_addr_owner_space_unaligned,
1447 				  CE_DESC_RING_ALIGN);
1448 	src_ring->base_addr_ce_space =
1449 			ALIGN(src_ring->base_addr_ce_space_unaligned,
1450 			      CE_DESC_RING_ALIGN);
1451 
1452 	if (ar->hw_params.shadow_reg_support) {
1453 		ret = ath10k_ce_alloc_shadow_base(ar, src_ring, nentries);
1454 		if (ret) {
1455 			dma_free_coherent(ar->dev,
1456 					  (nentries * sizeof(struct ce_desc) +
1457 					   CE_DESC_RING_ALIGN),
1458 					  src_ring->base_addr_owner_space_unaligned,
1459 					  base_addr);
1460 			kfree(src_ring);
1461 			return ERR_PTR(ret);
1462 		}
1463 	}
1464 
1465 	return src_ring;
1466 }
1467 
1468 static struct ath10k_ce_ring *
1469 ath10k_ce_alloc_src_ring_64(struct ath10k *ar, unsigned int ce_id,
1470 			    const struct ce_attr *attr)
1471 {
1472 	struct ath10k_ce_ring *src_ring;
1473 	u32 nentries = attr->src_nentries;
1474 	dma_addr_t base_addr;
1475 	int ret;
1476 
1477 	nentries = roundup_pow_of_two(nentries);
1478 
1479 	src_ring = kzalloc(sizeof(*src_ring) +
1480 			   (nentries *
1481 			    sizeof(*src_ring->per_transfer_context)),
1482 			   GFP_KERNEL);
1483 	if (!src_ring)
1484 		return ERR_PTR(-ENOMEM);
1485 
1486 	src_ring->nentries = nentries;
1487 	src_ring->nentries_mask = nentries - 1;
1488 
1489 	/* Legacy platforms that do not support cache
1490 	 * coherent DMA are unsupported
1491 	 */
1492 	src_ring->base_addr_owner_space_unaligned =
1493 		dma_alloc_coherent(ar->dev,
1494 				   (nentries * sizeof(struct ce_desc_64) +
1495 				    CE_DESC_RING_ALIGN),
1496 				   &base_addr, GFP_KERNEL);
1497 	if (!src_ring->base_addr_owner_space_unaligned) {
1498 		kfree(src_ring);
1499 		return ERR_PTR(-ENOMEM);
1500 	}
1501 
1502 	src_ring->base_addr_ce_space_unaligned = base_addr;
1503 
1504 	src_ring->base_addr_owner_space =
1505 			PTR_ALIGN(src_ring->base_addr_owner_space_unaligned,
1506 				  CE_DESC_RING_ALIGN);
1507 	src_ring->base_addr_ce_space =
1508 			ALIGN(src_ring->base_addr_ce_space_unaligned,
1509 			      CE_DESC_RING_ALIGN);
1510 
1511 	if (ar->hw_params.shadow_reg_support) {
1512 		ret = ath10k_ce_alloc_shadow_base(ar, src_ring, nentries);
1513 		if (ret) {
1514 			dma_free_coherent(ar->dev,
1515 					  (nentries * sizeof(struct ce_desc) +
1516 					   CE_DESC_RING_ALIGN),
1517 					  src_ring->base_addr_owner_space_unaligned,
1518 					  base_addr);
1519 			kfree(src_ring);
1520 			return ERR_PTR(ret);
1521 		}
1522 	}
1523 
1524 	return src_ring;
1525 }
1526 
1527 static struct ath10k_ce_ring *
1528 ath10k_ce_alloc_dest_ring(struct ath10k *ar, unsigned int ce_id,
1529 			  const struct ce_attr *attr)
1530 {
1531 	struct ath10k_ce_ring *dest_ring;
1532 	u32 nentries;
1533 	dma_addr_t base_addr;
1534 
1535 	nentries = roundup_pow_of_two(attr->dest_nentries);
1536 
1537 	dest_ring = kzalloc(sizeof(*dest_ring) +
1538 			    (nentries *
1539 			     sizeof(*dest_ring->per_transfer_context)),
1540 			    GFP_KERNEL);
1541 	if (dest_ring == NULL)
1542 		return ERR_PTR(-ENOMEM);
1543 
1544 	dest_ring->nentries = nentries;
1545 	dest_ring->nentries_mask = nentries - 1;
1546 
1547 	/*
1548 	 * Legacy platforms that do not support cache
1549 	 * coherent DMA are unsupported
1550 	 */
1551 	dest_ring->base_addr_owner_space_unaligned =
1552 		dma_zalloc_coherent(ar->dev,
1553 				    (nentries * sizeof(struct ce_desc) +
1554 				     CE_DESC_RING_ALIGN),
1555 				    &base_addr, GFP_KERNEL);
1556 	if (!dest_ring->base_addr_owner_space_unaligned) {
1557 		kfree(dest_ring);
1558 		return ERR_PTR(-ENOMEM);
1559 	}
1560 
1561 	dest_ring->base_addr_ce_space_unaligned = base_addr;
1562 
1563 	dest_ring->base_addr_owner_space =
1564 			PTR_ALIGN(dest_ring->base_addr_owner_space_unaligned,
1565 				  CE_DESC_RING_ALIGN);
1566 	dest_ring->base_addr_ce_space =
1567 				ALIGN(dest_ring->base_addr_ce_space_unaligned,
1568 				      CE_DESC_RING_ALIGN);
1569 
1570 	return dest_ring;
1571 }
1572 
1573 static struct ath10k_ce_ring *
1574 ath10k_ce_alloc_dest_ring_64(struct ath10k *ar, unsigned int ce_id,
1575 			     const struct ce_attr *attr)
1576 {
1577 	struct ath10k_ce_ring *dest_ring;
1578 	u32 nentries;
1579 	dma_addr_t base_addr;
1580 
1581 	nentries = roundup_pow_of_two(attr->dest_nentries);
1582 
1583 	dest_ring = kzalloc(sizeof(*dest_ring) +
1584 			    (nentries *
1585 			     sizeof(*dest_ring->per_transfer_context)),
1586 			    GFP_KERNEL);
1587 	if (!dest_ring)
1588 		return ERR_PTR(-ENOMEM);
1589 
1590 	dest_ring->nentries = nentries;
1591 	dest_ring->nentries_mask = nentries - 1;
1592 
1593 	/* Legacy platforms that do not support cache
1594 	 * coherent DMA are unsupported
1595 	 */
1596 	dest_ring->base_addr_owner_space_unaligned =
1597 		dma_alloc_coherent(ar->dev,
1598 				   (nentries * sizeof(struct ce_desc_64) +
1599 				    CE_DESC_RING_ALIGN),
1600 				   &base_addr, GFP_KERNEL);
1601 	if (!dest_ring->base_addr_owner_space_unaligned) {
1602 		kfree(dest_ring);
1603 		return ERR_PTR(-ENOMEM);
1604 	}
1605 
1606 	dest_ring->base_addr_ce_space_unaligned = base_addr;
1607 
1608 	/* Correctly initialize memory to 0 to prevent garbage
1609 	 * data crashing system when download firmware
1610 	 */
1611 	memset(dest_ring->base_addr_owner_space_unaligned, 0,
1612 	       nentries * sizeof(struct ce_desc_64) + CE_DESC_RING_ALIGN);
1613 
1614 	dest_ring->base_addr_owner_space =
1615 			PTR_ALIGN(dest_ring->base_addr_owner_space_unaligned,
1616 				  CE_DESC_RING_ALIGN);
1617 	dest_ring->base_addr_ce_space =
1618 			ALIGN(dest_ring->base_addr_ce_space_unaligned,
1619 			      CE_DESC_RING_ALIGN);
1620 
1621 	return dest_ring;
1622 }
1623 
1624 /*
1625  * Initialize a Copy Engine based on caller-supplied attributes.
1626  * This may be called once to initialize both source and destination
1627  * rings or it may be called twice for separate source and destination
1628  * initialization. It may be that only one side or the other is
1629  * initialized by software/firmware.
1630  */
1631 int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id,
1632 			const struct ce_attr *attr)
1633 {
1634 	int ret;
1635 
1636 	if (attr->src_nentries) {
1637 		ret = ath10k_ce_init_src_ring(ar, ce_id, attr);
1638 		if (ret) {
1639 			ath10k_err(ar, "Failed to initialize CE src ring for ID: %d (%d)\n",
1640 				   ce_id, ret);
1641 			return ret;
1642 		}
1643 	}
1644 
1645 	if (attr->dest_nentries) {
1646 		ret = ath10k_ce_init_dest_ring(ar, ce_id, attr);
1647 		if (ret) {
1648 			ath10k_err(ar, "Failed to initialize CE dest ring for ID: %d (%d)\n",
1649 				   ce_id, ret);
1650 			return ret;
1651 		}
1652 	}
1653 
1654 	return 0;
1655 }
1656 EXPORT_SYMBOL(ath10k_ce_init_pipe);
1657 
1658 static void ath10k_ce_deinit_src_ring(struct ath10k *ar, unsigned int ce_id)
1659 {
1660 	u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1661 
1662 	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr, 0);
1663 	ath10k_ce_src_ring_size_set(ar, ctrl_addr, 0);
1664 	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, 0);
1665 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, 0);
1666 }
1667 
1668 static void ath10k_ce_deinit_dest_ring(struct ath10k *ar, unsigned int ce_id)
1669 {
1670 	u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1671 
1672 	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr, 0);
1673 	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, 0);
1674 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, 0);
1675 }
1676 
1677 void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id)
1678 {
1679 	ath10k_ce_deinit_src_ring(ar, ce_id);
1680 	ath10k_ce_deinit_dest_ring(ar, ce_id);
1681 }
1682 EXPORT_SYMBOL(ath10k_ce_deinit_pipe);
1683 
1684 static void _ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1685 {
1686 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1687 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1688 
1689 	if (ce_state->src_ring) {
1690 		if (ar->hw_params.shadow_reg_support)
1691 			kfree(ce_state->src_ring->shadow_base_unaligned);
1692 		dma_free_coherent(ar->dev,
1693 				  (ce_state->src_ring->nentries *
1694 				   sizeof(struct ce_desc) +
1695 				   CE_DESC_RING_ALIGN),
1696 				  ce_state->src_ring->base_addr_owner_space,
1697 				  ce_state->src_ring->base_addr_ce_space);
1698 		kfree(ce_state->src_ring);
1699 	}
1700 
1701 	if (ce_state->dest_ring) {
1702 		dma_free_coherent(ar->dev,
1703 				  (ce_state->dest_ring->nentries *
1704 				   sizeof(struct ce_desc) +
1705 				   CE_DESC_RING_ALIGN),
1706 				  ce_state->dest_ring->base_addr_owner_space,
1707 				  ce_state->dest_ring->base_addr_ce_space);
1708 		kfree(ce_state->dest_ring);
1709 	}
1710 
1711 	ce_state->src_ring = NULL;
1712 	ce_state->dest_ring = NULL;
1713 }
1714 
1715 static void _ath10k_ce_free_pipe_64(struct ath10k *ar, int ce_id)
1716 {
1717 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1718 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1719 
1720 	if (ce_state->src_ring) {
1721 		if (ar->hw_params.shadow_reg_support)
1722 			kfree(ce_state->src_ring->shadow_base_unaligned);
1723 		dma_free_coherent(ar->dev,
1724 				  (ce_state->src_ring->nentries *
1725 				   sizeof(struct ce_desc_64) +
1726 				   CE_DESC_RING_ALIGN),
1727 				  ce_state->src_ring->base_addr_owner_space,
1728 				  ce_state->src_ring->base_addr_ce_space);
1729 		kfree(ce_state->src_ring);
1730 	}
1731 
1732 	if (ce_state->dest_ring) {
1733 		dma_free_coherent(ar->dev,
1734 				  (ce_state->dest_ring->nentries *
1735 				   sizeof(struct ce_desc_64) +
1736 				   CE_DESC_RING_ALIGN),
1737 				  ce_state->dest_ring->base_addr_owner_space,
1738 				  ce_state->dest_ring->base_addr_ce_space);
1739 		kfree(ce_state->dest_ring);
1740 	}
1741 
1742 	ce_state->src_ring = NULL;
1743 	ce_state->dest_ring = NULL;
1744 }
1745 
1746 void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1747 {
1748 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1749 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1750 
1751 	ce_state->ops->ce_free_pipe(ar, ce_id);
1752 }
1753 EXPORT_SYMBOL(ath10k_ce_free_pipe);
1754 
1755 void ath10k_ce_dump_registers(struct ath10k *ar,
1756 			      struct ath10k_fw_crash_data *crash_data)
1757 {
1758 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1759 	struct ath10k_ce_crash_data ce_data;
1760 	u32 addr, id;
1761 
1762 	lockdep_assert_held(&ar->data_lock);
1763 
1764 	ath10k_err(ar, "Copy Engine register dump:\n");
1765 
1766 	spin_lock_bh(&ce->ce_lock);
1767 	for (id = 0; id < CE_COUNT; id++) {
1768 		addr = ath10k_ce_base_address(ar, id);
1769 		ce_data.base_addr = cpu_to_le32(addr);
1770 
1771 		ce_data.src_wr_idx =
1772 			cpu_to_le32(ath10k_ce_src_ring_write_index_get(ar, addr));
1773 		ce_data.src_r_idx =
1774 			cpu_to_le32(ath10k_ce_src_ring_read_index_get(ar, addr));
1775 		ce_data.dst_wr_idx =
1776 			cpu_to_le32(ath10k_ce_dest_ring_write_index_get(ar, addr));
1777 		ce_data.dst_r_idx =
1778 			cpu_to_le32(ath10k_ce_dest_ring_read_index_get(ar, addr));
1779 
1780 		if (crash_data)
1781 			crash_data->ce_crash_data[id] = ce_data;
1782 
1783 		ath10k_err(ar, "[%02d]: 0x%08x %3u %3u %3u %3u", id,
1784 			   le32_to_cpu(ce_data.base_addr),
1785 			   le32_to_cpu(ce_data.src_wr_idx),
1786 			   le32_to_cpu(ce_data.src_r_idx),
1787 			   le32_to_cpu(ce_data.dst_wr_idx),
1788 			   le32_to_cpu(ce_data.dst_r_idx));
1789 	}
1790 
1791 	spin_unlock_bh(&ce->ce_lock);
1792 }
1793 EXPORT_SYMBOL(ath10k_ce_dump_registers);
1794 
1795 static const struct ath10k_ce_ops ce_ops = {
1796 	.ce_alloc_src_ring = ath10k_ce_alloc_src_ring,
1797 	.ce_alloc_dst_ring = ath10k_ce_alloc_dest_ring,
1798 	.ce_rx_post_buf = __ath10k_ce_rx_post_buf,
1799 	.ce_completed_recv_next_nolock = _ath10k_ce_completed_recv_next_nolock,
1800 	.ce_revoke_recv_next = _ath10k_ce_revoke_recv_next,
1801 	.ce_extract_desc_data = ath10k_ce_extract_desc_data,
1802 	.ce_free_pipe = _ath10k_ce_free_pipe,
1803 	.ce_send_nolock = _ath10k_ce_send_nolock,
1804 };
1805 
1806 static const struct ath10k_ce_ops ce_64_ops = {
1807 	.ce_alloc_src_ring = ath10k_ce_alloc_src_ring_64,
1808 	.ce_alloc_dst_ring = ath10k_ce_alloc_dest_ring_64,
1809 	.ce_rx_post_buf = __ath10k_ce_rx_post_buf_64,
1810 	.ce_completed_recv_next_nolock =
1811 				_ath10k_ce_completed_recv_next_nolock_64,
1812 	.ce_revoke_recv_next = _ath10k_ce_revoke_recv_next_64,
1813 	.ce_extract_desc_data = ath10k_ce_extract_desc_data_64,
1814 	.ce_free_pipe = _ath10k_ce_free_pipe_64,
1815 	.ce_send_nolock = _ath10k_ce_send_nolock_64,
1816 };
1817 
1818 static void ath10k_ce_set_ops(struct ath10k *ar,
1819 			      struct ath10k_ce_pipe *ce_state)
1820 {
1821 	switch (ar->hw_rev) {
1822 	case ATH10K_HW_WCN3990:
1823 		ce_state->ops = &ce_64_ops;
1824 		break;
1825 	default:
1826 		ce_state->ops = &ce_ops;
1827 		break;
1828 	}
1829 }
1830 
1831 int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
1832 			 const struct ce_attr *attr)
1833 {
1834 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1835 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1836 	int ret;
1837 
1838 	ath10k_ce_set_ops(ar, ce_state);
1839 	/* Make sure there's enough CE ringbuffer entries for HTT TX to avoid
1840 	 * additional TX locking checks.
1841 	 *
1842 	 * For the lack of a better place do the check here.
1843 	 */
1844 	BUILD_BUG_ON(2 * TARGET_NUM_MSDU_DESC >
1845 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1846 	BUILD_BUG_ON(2 * TARGET_10_4_NUM_MSDU_DESC_PFC >
1847 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1848 	BUILD_BUG_ON(2 * TARGET_TLV_NUM_MSDU_DESC >
1849 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1850 
1851 	ce_state->ar = ar;
1852 	ce_state->id = ce_id;
1853 	ce_state->ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1854 	ce_state->attr_flags = attr->flags;
1855 	ce_state->src_sz_max = attr->src_sz_max;
1856 
1857 	if (attr->src_nentries)
1858 		ce_state->send_cb = attr->send_cb;
1859 
1860 	if (attr->dest_nentries)
1861 		ce_state->recv_cb = attr->recv_cb;
1862 
1863 	if (attr->src_nentries) {
1864 		ce_state->src_ring =
1865 			ce_state->ops->ce_alloc_src_ring(ar, ce_id, attr);
1866 		if (IS_ERR(ce_state->src_ring)) {
1867 			ret = PTR_ERR(ce_state->src_ring);
1868 			ath10k_err(ar, "failed to alloc CE src ring %d: %d\n",
1869 				   ce_id, ret);
1870 			ce_state->src_ring = NULL;
1871 			return ret;
1872 		}
1873 	}
1874 
1875 	if (attr->dest_nentries) {
1876 		ce_state->dest_ring = ce_state->ops->ce_alloc_dst_ring(ar,
1877 									ce_id,
1878 									attr);
1879 		if (IS_ERR(ce_state->dest_ring)) {
1880 			ret = PTR_ERR(ce_state->dest_ring);
1881 			ath10k_err(ar, "failed to alloc CE dest ring %d: %d\n",
1882 				   ce_id, ret);
1883 			ce_state->dest_ring = NULL;
1884 			return ret;
1885 		}
1886 	}
1887 
1888 	return 0;
1889 }
1890 EXPORT_SYMBOL(ath10k_ce_alloc_pipe);
1891 
1892 void ath10k_ce_alloc_rri(struct ath10k *ar)
1893 {
1894 	int i;
1895 	u32 value;
1896 	u32 ctrl1_regs;
1897 	u32 ce_base_addr;
1898 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1899 
1900 	ce->vaddr_rri = dma_alloc_coherent(ar->dev,
1901 					   (CE_COUNT * sizeof(u32)),
1902 					   &ce->paddr_rri, GFP_KERNEL);
1903 
1904 	if (!ce->vaddr_rri)
1905 		return;
1906 
1907 	ath10k_ce_write32(ar, ar->hw_ce_regs->ce_rri_low,
1908 			  lower_32_bits(ce->paddr_rri));
1909 	ath10k_ce_write32(ar, ar->hw_ce_regs->ce_rri_high,
1910 			  (upper_32_bits(ce->paddr_rri) &
1911 			  CE_DESC_FLAGS_GET_MASK));
1912 
1913 	for (i = 0; i < CE_COUNT; i++) {
1914 		ctrl1_regs = ar->hw_ce_regs->ctrl1_regs->addr;
1915 		ce_base_addr = ath10k_ce_base_address(ar, i);
1916 		value = ath10k_ce_read32(ar, ce_base_addr + ctrl1_regs);
1917 		value |= ar->hw_ce_regs->upd->mask;
1918 		ath10k_ce_write32(ar, ce_base_addr + ctrl1_regs, value);
1919 	}
1920 
1921 	memset(ce->vaddr_rri, 0, CE_COUNT * sizeof(u32));
1922 }
1923 EXPORT_SYMBOL(ath10k_ce_alloc_rri);
1924 
1925 void ath10k_ce_free_rri(struct ath10k *ar)
1926 {
1927 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1928 
1929 	dma_free_coherent(ar->dev, (CE_COUNT * sizeof(u32)),
1930 			  ce->vaddr_rri,
1931 			  ce->paddr_rri);
1932 }
1933 EXPORT_SYMBOL(ath10k_ce_free_rri);
1934