xref: /freebsd/sys/contrib/dev/athk/ath10k/ce.c (revision 43e29d03f416d7dda52112a29600a7c82ee1a91e)
1 // SPDX-License-Identifier: ISC
2 /*
3  * Copyright (c) 2005-2011 Atheros Communications Inc.
4  * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
5  * Copyright (c) 2018 The Linux Foundation. All rights reserved.
6  */
7 
8 #include "hif.h"
9 #include "ce.h"
10 #include "debug.h"
11 
12 /*
13  * Support for Copy Engine hardware, which is mainly used for
14  * communication between Host and Target over a PCIe interconnect.
15  */
16 
17 /*
18  * A single CopyEngine (CE) comprises two "rings":
19  *   a source ring
20  *   a destination ring
21  *
22  * Each ring consists of a number of descriptors which specify
23  * an address, length, and meta-data.
24  *
25  * Typically, one side of the PCIe/AHB/SNOC interconnect (Host or Target)
26  * controls one ring and the other side controls the other ring.
27  * The source side chooses when to initiate a transfer and it
28  * chooses what to send (buffer address, length). The destination
29  * side keeps a supply of "anonymous receive buffers" available and
30  * it handles incoming data as it arrives (when the destination
31  * receives an interrupt).
32  *
33  * The sender may send a simple buffer (address/length) or it may
34  * send a small list of buffers.  When a small list is sent, hardware
35  * "gathers" these and they end up in a single destination buffer
36  * with a single interrupt.
37  *
38  * There are several "contexts" managed by this layer -- more, it
39  * may seem -- than should be needed. These are provided mainly for
40  * maximum flexibility and especially to facilitate a simpler HIF
41  * implementation. There are per-CopyEngine recv, send, and watermark
42  * contexts. These are supplied by the caller when a recv, send,
43  * or watermark handler is established and they are echoed back to
44  * the caller when the respective callbacks are invoked. There is
45  * also a per-transfer context supplied by the caller when a buffer
46  * (or sendlist) is sent and when a buffer is enqueued for recv.
47  * These per-transfer contexts are echoed back to the caller when
48  * the buffer is sent/received.
49  */
50 
51 static inline u32 shadow_sr_wr_ind_addr(struct ath10k *ar,
52 					struct ath10k_ce_pipe *ce_state)
53 {
54 	u32 ce_id = ce_state->id;
55 	u32 addr = 0;
56 
57 	switch (ce_id) {
58 	case 0:
59 		addr = 0x00032000;
60 		break;
61 	case 3:
62 		addr = 0x0003200C;
63 		break;
64 	case 4:
65 		addr = 0x00032010;
66 		break;
67 	case 5:
68 		addr = 0x00032014;
69 		break;
70 	case 7:
71 		addr = 0x0003201C;
72 		break;
73 	default:
74 		ath10k_warn(ar, "invalid CE id: %d", ce_id);
75 		break;
76 	}
77 	return addr;
78 }
79 
80 #if defined(__linux__)
81 static inline u32 shadow_dst_wr_ind_addr(struct ath10k *ar,
82 					 struct ath10k_ce_pipe *ce_state)
83 {
84 	u32 ce_id = ce_state->id;
85 	u32 addr = 0;
86 
87 	switch (ce_id) {
88 	case 1:
89 		addr = 0x00032034;
90 		break;
91 	case 2:
92 		addr = 0x00032038;
93 		break;
94 	case 5:
95 		addr = 0x00032044;
96 		break;
97 	case 7:
98 		addr = 0x0003204C;
99 		break;
100 	case 8:
101 		addr = 0x00032050;
102 		break;
103 	case 9:
104 		addr = 0x00032054;
105 		break;
106 	case 10:
107 		addr = 0x00032058;
108 		break;
109 	case 11:
110 		addr = 0x0003205C;
111 		break;
112 	default:
113 		ath10k_warn(ar, "invalid CE id: %d", ce_id);
114 		break;
115 	}
116 
117 	return addr;
118 }
119 #endif
120 
121 static inline unsigned int
122 ath10k_set_ring_byte(unsigned int offset,
123 		     struct ath10k_hw_ce_regs_addr_map *addr_map)
124 {
125 	return ((offset << addr_map->lsb) & addr_map->mask);
126 }
127 
128 #if defined(__linux__)
129 static inline unsigned int
130 ath10k_get_ring_byte(unsigned int offset,
131 		     struct ath10k_hw_ce_regs_addr_map *addr_map)
132 {
133 	return ((offset & addr_map->mask) >> (addr_map->lsb));
134 }
135 #endif
136 
137 static inline u32 ath10k_ce_read32(struct ath10k *ar, u32 offset)
138 {
139 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
140 
141 	return ce->bus_ops->read32(ar, offset);
142 }
143 
144 static inline void ath10k_ce_write32(struct ath10k *ar, u32 offset, u32 value)
145 {
146 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
147 
148 	ce->bus_ops->write32(ar, offset, value);
149 }
150 
151 static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
152 						       u32 ce_ctrl_addr,
153 						       unsigned int n)
154 {
155 	ath10k_ce_write32(ar, ce_ctrl_addr +
156 			  ar->hw_ce_regs->dst_wr_index_addr, n);
157 }
158 
159 static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
160 						      u32 ce_ctrl_addr)
161 {
162 	return ath10k_ce_read32(ar, ce_ctrl_addr +
163 				ar->hw_ce_regs->dst_wr_index_addr);
164 }
165 
166 static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
167 						      u32 ce_ctrl_addr,
168 						      unsigned int n)
169 {
170 	ath10k_ce_write32(ar, ce_ctrl_addr +
171 			  ar->hw_ce_regs->sr_wr_index_addr, n);
172 }
173 
174 static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
175 						     u32 ce_ctrl_addr)
176 {
177 	return ath10k_ce_read32(ar, ce_ctrl_addr +
178 				ar->hw_ce_regs->sr_wr_index_addr);
179 }
180 
181 static inline u32 ath10k_ce_src_ring_read_index_from_ddr(struct ath10k *ar,
182 							 u32 ce_id)
183 {
184 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
185 
186 	return ce->vaddr_rri[ce_id] & CE_DDR_RRI_MASK;
187 }
188 
189 static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
190 						    u32 ce_ctrl_addr)
191 {
192 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
193 	u32 ce_id = COPY_ENGINE_ID(ce_ctrl_addr);
194 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
195 	u32 index;
196 
197 	if (ar->hw_params.rri_on_ddr &&
198 	    (ce_state->attr_flags & CE_ATTR_DIS_INTR))
199 		index = ath10k_ce_src_ring_read_index_from_ddr(ar, ce_id);
200 	else
201 		index = ath10k_ce_read32(ar, ce_ctrl_addr +
202 					 ar->hw_ce_regs->current_srri_addr);
203 
204 	return index;
205 }
206 
207 static inline void
208 ath10k_ce_shadow_src_ring_write_index_set(struct ath10k *ar,
209 					  struct ath10k_ce_pipe *ce_state,
210 					  unsigned int value)
211 {
212 	ath10k_ce_write32(ar, shadow_sr_wr_ind_addr(ar, ce_state), value);
213 }
214 
215 #if defined(__linux__)
216 static inline void
217 ath10k_ce_shadow_dest_ring_write_index_set(struct ath10k *ar,
218 					   struct ath10k_ce_pipe *ce_state,
219 					   unsigned int value)
220 {
221 	ath10k_ce_write32(ar, shadow_dst_wr_ind_addr(ar, ce_state), value);
222 }
223 #endif
224 
225 static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
226 						    u32 ce_id,
227 						    u64 addr)
228 {
229 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
230 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
231 	u32 ce_ctrl_addr = ath10k_ce_base_address(ar, ce_id);
232 	u32 addr_lo = lower_32_bits(addr);
233 
234 	ath10k_ce_write32(ar, ce_ctrl_addr +
235 			  ar->hw_ce_regs->sr_base_addr_lo, addr_lo);
236 
237 	if (ce_state->ops->ce_set_src_ring_base_addr_hi) {
238 		ce_state->ops->ce_set_src_ring_base_addr_hi(ar, ce_ctrl_addr,
239 							    addr);
240 	}
241 }
242 
243 static void ath10k_ce_set_src_ring_base_addr_hi(struct ath10k *ar,
244 						u32 ce_ctrl_addr,
245 						u64 addr)
246 {
247 	u32 addr_hi = upper_32_bits(addr) & CE_DESC_ADDR_HI_MASK;
248 
249 	ath10k_ce_write32(ar, ce_ctrl_addr +
250 			  ar->hw_ce_regs->sr_base_addr_hi, addr_hi);
251 }
252 
253 static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
254 					       u32 ce_ctrl_addr,
255 					       unsigned int n)
256 {
257 	ath10k_ce_write32(ar, ce_ctrl_addr +
258 			  ar->hw_ce_regs->sr_size_addr, n);
259 }
260 
261 static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
262 					       u32 ce_ctrl_addr,
263 					       unsigned int n)
264 {
265 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
266 
267 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
268 					  ctrl_regs->addr);
269 
270 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
271 			  (ctrl1_addr &  ~(ctrl_regs->dmax->mask)) |
272 			  ath10k_set_ring_byte(n, ctrl_regs->dmax));
273 }
274 
275 static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
276 						    u32 ce_ctrl_addr,
277 						    unsigned int n)
278 {
279 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
280 
281 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
282 					  ctrl_regs->addr);
283 
284 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
285 			  (ctrl1_addr & ~(ctrl_regs->src_ring->mask)) |
286 			  ath10k_set_ring_byte(n, ctrl_regs->src_ring));
287 }
288 
289 static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
290 						     u32 ce_ctrl_addr,
291 						     unsigned int n)
292 {
293 	struct ath10k_hw_ce_ctrl1 *ctrl_regs = ar->hw_ce_regs->ctrl1_regs;
294 
295 	u32 ctrl1_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
296 					  ctrl_regs->addr);
297 
298 	ath10k_ce_write32(ar, ce_ctrl_addr + ctrl_regs->addr,
299 			  (ctrl1_addr & ~(ctrl_regs->dst_ring->mask)) |
300 			  ath10k_set_ring_byte(n, ctrl_regs->dst_ring));
301 }
302 
303 static inline
304 	u32 ath10k_ce_dest_ring_read_index_from_ddr(struct ath10k *ar, u32 ce_id)
305 {
306 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
307 
308 	return (ce->vaddr_rri[ce_id] >> CE_DDR_DRRI_SHIFT) &
309 		CE_DDR_RRI_MASK;
310 }
311 
312 static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
313 						     u32 ce_ctrl_addr)
314 {
315 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
316 	u32 ce_id = COPY_ENGINE_ID(ce_ctrl_addr);
317 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
318 	u32 index;
319 
320 	if (ar->hw_params.rri_on_ddr &&
321 	    (ce_state->attr_flags & CE_ATTR_DIS_INTR))
322 		index = ath10k_ce_dest_ring_read_index_from_ddr(ar, ce_id);
323 	else
324 		index = ath10k_ce_read32(ar, ce_ctrl_addr +
325 					 ar->hw_ce_regs->current_drri_addr);
326 
327 	return index;
328 }
329 
330 static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
331 						     u32 ce_id,
332 						     u64 addr)
333 {
334 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
335 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
336 	u32 ce_ctrl_addr = ath10k_ce_base_address(ar, ce_id);
337 	u32 addr_lo = lower_32_bits(addr);
338 
339 	ath10k_ce_write32(ar, ce_ctrl_addr +
340 			  ar->hw_ce_regs->dr_base_addr_lo, addr_lo);
341 
342 	if (ce_state->ops->ce_set_dest_ring_base_addr_hi) {
343 		ce_state->ops->ce_set_dest_ring_base_addr_hi(ar, ce_ctrl_addr,
344 							     addr);
345 	}
346 }
347 
348 static void ath10k_ce_set_dest_ring_base_addr_hi(struct ath10k *ar,
349 						 u32 ce_ctrl_addr,
350 						 u64 addr)
351 {
352 	u32 addr_hi = upper_32_bits(addr) & CE_DESC_ADDR_HI_MASK;
353 	u32 reg_value;
354 
355 	reg_value = ath10k_ce_read32(ar, ce_ctrl_addr +
356 				     ar->hw_ce_regs->dr_base_addr_hi);
357 	reg_value &= ~CE_DESC_ADDR_HI_MASK;
358 	reg_value |= addr_hi;
359 	ath10k_ce_write32(ar, ce_ctrl_addr +
360 			  ar->hw_ce_regs->dr_base_addr_hi, reg_value);
361 }
362 
363 static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
364 						u32 ce_ctrl_addr,
365 						unsigned int n)
366 {
367 	ath10k_ce_write32(ar, ce_ctrl_addr +
368 			  ar->hw_ce_regs->dr_size_addr, n);
369 }
370 
371 static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
372 						   u32 ce_ctrl_addr,
373 						   unsigned int n)
374 {
375 	struct ath10k_hw_ce_dst_src_wm_regs *srcr_wm = ar->hw_ce_regs->wm_srcr;
376 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + srcr_wm->addr);
377 
378 	ath10k_ce_write32(ar, ce_ctrl_addr + srcr_wm->addr,
379 			  (addr & ~(srcr_wm->wm_high->mask)) |
380 			  (ath10k_set_ring_byte(n, srcr_wm->wm_high)));
381 }
382 
383 static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
384 						  u32 ce_ctrl_addr,
385 						  unsigned int n)
386 {
387 	struct ath10k_hw_ce_dst_src_wm_regs *srcr_wm = ar->hw_ce_regs->wm_srcr;
388 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + srcr_wm->addr);
389 
390 	ath10k_ce_write32(ar, ce_ctrl_addr + srcr_wm->addr,
391 			  (addr & ~(srcr_wm->wm_low->mask)) |
392 			  (ath10k_set_ring_byte(n, srcr_wm->wm_low)));
393 }
394 
395 static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
396 						    u32 ce_ctrl_addr,
397 						    unsigned int n)
398 {
399 	struct ath10k_hw_ce_dst_src_wm_regs *dstr_wm = ar->hw_ce_regs->wm_dstr;
400 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + dstr_wm->addr);
401 
402 	ath10k_ce_write32(ar, ce_ctrl_addr + dstr_wm->addr,
403 			  (addr & ~(dstr_wm->wm_high->mask)) |
404 			  (ath10k_set_ring_byte(n, dstr_wm->wm_high)));
405 }
406 
407 static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
408 						   u32 ce_ctrl_addr,
409 						   unsigned int n)
410 {
411 	struct ath10k_hw_ce_dst_src_wm_regs *dstr_wm = ar->hw_ce_regs->wm_dstr;
412 	u32 addr = ath10k_ce_read32(ar, ce_ctrl_addr + dstr_wm->addr);
413 
414 	ath10k_ce_write32(ar, ce_ctrl_addr + dstr_wm->addr,
415 			  (addr & ~(dstr_wm->wm_low->mask)) |
416 			  (ath10k_set_ring_byte(n, dstr_wm->wm_low)));
417 }
418 
419 static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
420 							u32 ce_ctrl_addr)
421 {
422 	struct ath10k_hw_ce_host_ie *host_ie = ar->hw_ce_regs->host_ie;
423 
424 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
425 					    ar->hw_ce_regs->host_ie_addr);
426 
427 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
428 			  host_ie_addr | host_ie->copy_complete->mask);
429 }
430 
431 static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
432 							u32 ce_ctrl_addr)
433 {
434 	struct ath10k_hw_ce_host_ie *host_ie = ar->hw_ce_regs->host_ie;
435 
436 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
437 					    ar->hw_ce_regs->host_ie_addr);
438 
439 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
440 			  host_ie_addr & ~(host_ie->copy_complete->mask));
441 }
442 
443 static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
444 						    u32 ce_ctrl_addr)
445 {
446 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
447 
448 	u32 host_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
449 					    ar->hw_ce_regs->host_ie_addr);
450 
451 	ath10k_ce_write32(ar, ce_ctrl_addr + ar->hw_ce_regs->host_ie_addr,
452 			  host_ie_addr & ~(wm_regs->wm_mask));
453 }
454 
455 #if defined(__linux__)
456 static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
457 					       u32 ce_ctrl_addr)
458 {
459 	struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs;
460 
461 	u32 misc_ie_addr = ath10k_ce_read32(ar, ce_ctrl_addr +
462 					    ar->hw_ce_regs->misc_ie_addr);
463 
464 	ath10k_ce_write32(ar,
465 			  ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
466 			  misc_ie_addr | misc_regs->err_mask);
467 }
468 #endif
469 
470 static inline void ath10k_ce_error_intr_disable(struct ath10k *ar,
471 						u32 ce_ctrl_addr)
472 {
473 	struct ath10k_hw_ce_misc_regs *misc_regs = ar->hw_ce_regs->misc_regs;
474 
475 	u32 misc_ie_addr = ath10k_ce_read32(ar,
476 			ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr);
477 
478 	ath10k_ce_write32(ar,
479 			  ce_ctrl_addr + ar->hw_ce_regs->misc_ie_addr,
480 			  misc_ie_addr & ~(misc_regs->err_mask));
481 }
482 
483 static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
484 						     u32 ce_ctrl_addr,
485 						     unsigned int mask)
486 {
487 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
488 
489 	ath10k_ce_write32(ar, ce_ctrl_addr + wm_regs->addr, mask);
490 }
491 
492 /*
493  * Guts of ath10k_ce_send.
494  * The caller takes responsibility for any needed locking.
495  */
496 static int _ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
497 				  void *per_transfer_context,
498 				  dma_addr_t buffer,
499 				  unsigned int nbytes,
500 				  unsigned int transfer_id,
501 				  unsigned int flags)
502 {
503 	struct ath10k *ar = ce_state->ar;
504 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
505 	struct ce_desc *desc, sdesc;
506 	unsigned int nentries_mask = src_ring->nentries_mask;
507 	unsigned int sw_index = src_ring->sw_index;
508 	unsigned int write_index = src_ring->write_index;
509 	u32 ctrl_addr = ce_state->ctrl_addr;
510 	u32 desc_flags = 0;
511 	int ret = 0;
512 
513 	if (nbytes > ce_state->src_sz_max)
514 		ath10k_warn(ar, "%s: send more we can (nbytes: %d, max: %d)\n",
515 			    __func__, nbytes, ce_state->src_sz_max);
516 
517 	if (unlikely(CE_RING_DELTA(nentries_mask,
518 				   write_index, sw_index - 1) <= 0)) {
519 		ret = -ENOSR;
520 		goto exit;
521 	}
522 
523 	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
524 				   write_index);
525 
526 	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
527 
528 	if (flags & CE_SEND_FLAG_GATHER)
529 		desc_flags |= CE_DESC_FLAGS_GATHER;
530 	if (flags & CE_SEND_FLAG_BYTE_SWAP)
531 		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
532 
533 	sdesc.addr   = __cpu_to_le32(buffer);
534 	sdesc.nbytes = __cpu_to_le16(nbytes);
535 	sdesc.flags  = __cpu_to_le16(desc_flags);
536 
537 	*desc = sdesc;
538 
539 	src_ring->per_transfer_context[write_index] = per_transfer_context;
540 
541 	/* Update Source Ring Write Index */
542 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
543 
544 	/* WORKAROUND */
545 	if (!(flags & CE_SEND_FLAG_GATHER))
546 		ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index);
547 
548 	src_ring->write_index = write_index;
549 exit:
550 	return ret;
551 }
552 
553 static int _ath10k_ce_send_nolock_64(struct ath10k_ce_pipe *ce_state,
554 				     void *per_transfer_context,
555 				     dma_addr_t buffer,
556 				     unsigned int nbytes,
557 				     unsigned int transfer_id,
558 				     unsigned int flags)
559 {
560 	struct ath10k *ar = ce_state->ar;
561 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
562 	struct ce_desc_64 *desc, sdesc;
563 	unsigned int nentries_mask = src_ring->nentries_mask;
564 	unsigned int sw_index;
565 	unsigned int write_index = src_ring->write_index;
566 	u32 ctrl_addr = ce_state->ctrl_addr;
567 	__le32 *addr;
568 	u32 desc_flags = 0;
569 	int ret = 0;
570 
571 	if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags))
572 		return -ESHUTDOWN;
573 
574 	if (nbytes > ce_state->src_sz_max)
575 		ath10k_warn(ar, "%s: send more we can (nbytes: %d, max: %d)\n",
576 			    __func__, nbytes, ce_state->src_sz_max);
577 
578 	if (ar->hw_params.rri_on_ddr)
579 		sw_index = ath10k_ce_src_ring_read_index_from_ddr(ar, ce_state->id);
580 	else
581 		sw_index = src_ring->sw_index;
582 
583 	if (unlikely(CE_RING_DELTA(nentries_mask,
584 				   write_index, sw_index - 1) <= 0)) {
585 		ret = -ENOSR;
586 		goto exit;
587 	}
588 
589 	desc = CE_SRC_RING_TO_DESC_64(src_ring->base_addr_owner_space,
590 				      write_index);
591 
592 	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
593 
594 	if (flags & CE_SEND_FLAG_GATHER)
595 		desc_flags |= CE_DESC_FLAGS_GATHER;
596 
597 	if (flags & CE_SEND_FLAG_BYTE_SWAP)
598 		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
599 
600 	addr = (__le32 *)&sdesc.addr;
601 
602 	flags |= upper_32_bits(buffer) & CE_DESC_ADDR_HI_MASK;
603 	addr[0] = __cpu_to_le32(buffer);
604 	addr[1] = __cpu_to_le32(flags);
605 	if (flags & CE_SEND_FLAG_GATHER)
606 		addr[1] |= __cpu_to_le32(CE_WCN3990_DESC_FLAGS_GATHER);
607 	else
608 		addr[1] &= ~(__cpu_to_le32(CE_WCN3990_DESC_FLAGS_GATHER));
609 
610 	sdesc.nbytes = __cpu_to_le16(nbytes);
611 	sdesc.flags  = __cpu_to_le16(desc_flags);
612 
613 	*desc = sdesc;
614 
615 	src_ring->per_transfer_context[write_index] = per_transfer_context;
616 
617 	/* Update Source Ring Write Index */
618 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
619 
620 	if (!(flags & CE_SEND_FLAG_GATHER)) {
621 		if (ar->hw_params.shadow_reg_support)
622 			ath10k_ce_shadow_src_ring_write_index_set(ar, ce_state,
623 								  write_index);
624 		else
625 			ath10k_ce_src_ring_write_index_set(ar, ctrl_addr,
626 							   write_index);
627 	}
628 
629 	src_ring->write_index = write_index;
630 exit:
631 	return ret;
632 }
633 
634 int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
635 			  void *per_transfer_context,
636 			  dma_addr_t buffer,
637 			  unsigned int nbytes,
638 			  unsigned int transfer_id,
639 			  unsigned int flags)
640 {
641 	return ce_state->ops->ce_send_nolock(ce_state, per_transfer_context,
642 				    buffer, nbytes, transfer_id, flags);
643 }
644 EXPORT_SYMBOL(ath10k_ce_send_nolock);
645 
646 void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe)
647 {
648 	struct ath10k *ar = pipe->ar;
649 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
650 	struct ath10k_ce_ring *src_ring = pipe->src_ring;
651 	u32 ctrl_addr = pipe->ctrl_addr;
652 
653 	lockdep_assert_held(&ce->ce_lock);
654 
655 	/*
656 	 * This function must be called only if there is an incomplete
657 	 * scatter-gather transfer (before index register is updated)
658 	 * that needs to be cleaned up.
659 	 */
660 	if (WARN_ON_ONCE(src_ring->write_index == src_ring->sw_index))
661 		return;
662 
663 	if (WARN_ON_ONCE(src_ring->write_index ==
664 			 ath10k_ce_src_ring_write_index_get(ar, ctrl_addr)))
665 		return;
666 
667 	src_ring->write_index--;
668 	src_ring->write_index &= src_ring->nentries_mask;
669 
670 	src_ring->per_transfer_context[src_ring->write_index] = NULL;
671 }
672 EXPORT_SYMBOL(__ath10k_ce_send_revert);
673 
674 int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
675 		   void *per_transfer_context,
676 		   dma_addr_t buffer,
677 		   unsigned int nbytes,
678 		   unsigned int transfer_id,
679 		   unsigned int flags)
680 {
681 	struct ath10k *ar = ce_state->ar;
682 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
683 	int ret;
684 
685 	spin_lock_bh(&ce->ce_lock);
686 	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
687 				    buffer, nbytes, transfer_id, flags);
688 	spin_unlock_bh(&ce->ce_lock);
689 
690 	return ret;
691 }
692 EXPORT_SYMBOL(ath10k_ce_send);
693 
694 int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
695 {
696 	struct ath10k *ar = pipe->ar;
697 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
698 	int delta;
699 
700 	spin_lock_bh(&ce->ce_lock);
701 	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
702 			      pipe->src_ring->write_index,
703 			      pipe->src_ring->sw_index - 1);
704 	spin_unlock_bh(&ce->ce_lock);
705 
706 	return delta;
707 }
708 EXPORT_SYMBOL(ath10k_ce_num_free_src_entries);
709 
710 int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe)
711 {
712 	struct ath10k *ar = pipe->ar;
713 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
714 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
715 	unsigned int nentries_mask = dest_ring->nentries_mask;
716 	unsigned int write_index = dest_ring->write_index;
717 	unsigned int sw_index = dest_ring->sw_index;
718 
719 	lockdep_assert_held(&ce->ce_lock);
720 
721 	return CE_RING_DELTA(nentries_mask, write_index, sw_index - 1);
722 }
723 EXPORT_SYMBOL(__ath10k_ce_rx_num_free_bufs);
724 
725 static int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx,
726 				   dma_addr_t paddr)
727 {
728 	struct ath10k *ar = pipe->ar;
729 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
730 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
731 	unsigned int nentries_mask = dest_ring->nentries_mask;
732 	unsigned int write_index = dest_ring->write_index;
733 	unsigned int sw_index = dest_ring->sw_index;
734 	struct ce_desc *base = dest_ring->base_addr_owner_space;
735 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
736 	u32 ctrl_addr = pipe->ctrl_addr;
737 
738 	lockdep_assert_held(&ce->ce_lock);
739 
740 	if ((pipe->id != 5) &&
741 	    CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
742 		return -ENOSPC;
743 
744 	desc->addr = __cpu_to_le32(paddr);
745 	desc->nbytes = 0;
746 
747 	dest_ring->per_transfer_context[write_index] = ctx;
748 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
749 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
750 	dest_ring->write_index = write_index;
751 
752 	return 0;
753 }
754 
755 static int __ath10k_ce_rx_post_buf_64(struct ath10k_ce_pipe *pipe,
756 				      void *ctx,
757 				      dma_addr_t paddr)
758 {
759 	struct ath10k *ar = pipe->ar;
760 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
761 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
762 	unsigned int nentries_mask = dest_ring->nentries_mask;
763 	unsigned int write_index = dest_ring->write_index;
764 	unsigned int sw_index = dest_ring->sw_index;
765 	struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
766 	struct ce_desc_64 *desc =
767 			CE_DEST_RING_TO_DESC_64(base, write_index);
768 	u32 ctrl_addr = pipe->ctrl_addr;
769 
770 	lockdep_assert_held(&ce->ce_lock);
771 
772 	if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
773 		return -ENOSPC;
774 
775 	desc->addr = __cpu_to_le64(paddr);
776 	desc->addr &= __cpu_to_le64(CE_DESC_ADDR_MASK);
777 
778 	desc->nbytes = 0;
779 
780 	dest_ring->per_transfer_context[write_index] = ctx;
781 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
782 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
783 	dest_ring->write_index = write_index;
784 
785 	return 0;
786 }
787 
788 void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries)
789 {
790 	struct ath10k *ar = pipe->ar;
791 	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
792 	unsigned int nentries_mask = dest_ring->nentries_mask;
793 	unsigned int write_index = dest_ring->write_index;
794 	u32 ctrl_addr = pipe->ctrl_addr;
795 	u32 cur_write_idx = ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
796 
797 	/* Prevent CE ring stuck issue that will occur when ring is full.
798 	 * Make sure that write index is 1 less than read index.
799 	 */
800 	if (((cur_write_idx + nentries) & nentries_mask) == dest_ring->sw_index)
801 		nentries -= 1;
802 
803 	write_index = CE_RING_IDX_ADD(nentries_mask, write_index, nentries);
804 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
805 	dest_ring->write_index = write_index;
806 }
807 EXPORT_SYMBOL(ath10k_ce_rx_update_write_idx);
808 
809 int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx,
810 			  dma_addr_t paddr)
811 {
812 	struct ath10k *ar = pipe->ar;
813 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
814 	int ret;
815 
816 	spin_lock_bh(&ce->ce_lock);
817 	ret = pipe->ops->ce_rx_post_buf(pipe, ctx, paddr);
818 	spin_unlock_bh(&ce->ce_lock);
819 
820 	return ret;
821 }
822 EXPORT_SYMBOL(ath10k_ce_rx_post_buf);
823 
824 /*
825  * Guts of ath10k_ce_completed_recv_next.
826  * The caller takes responsibility for any necessary locking.
827  */
828 static int
829 	 _ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
830 					       void **per_transfer_contextp,
831 					       unsigned int *nbytesp)
832 {
833 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
834 	unsigned int nentries_mask = dest_ring->nentries_mask;
835 	unsigned int sw_index = dest_ring->sw_index;
836 
837 	struct ce_desc *base = dest_ring->base_addr_owner_space;
838 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
839 	struct ce_desc sdesc;
840 	u16 nbytes;
841 
842 	/* Copy in one go for performance reasons */
843 	sdesc = *desc;
844 
845 	nbytes = __le16_to_cpu(sdesc.nbytes);
846 	if (nbytes == 0) {
847 		/*
848 		 * This closes a relatively unusual race where the Host
849 		 * sees the updated DRRI before the update to the
850 		 * corresponding descriptor has completed. We treat this
851 		 * as a descriptor that is not yet done.
852 		 */
853 		return -EIO;
854 	}
855 
856 	desc->nbytes = 0;
857 
858 	/* Return data from completed destination descriptor */
859 	*nbytesp = nbytes;
860 
861 	if (per_transfer_contextp)
862 		*per_transfer_contextp =
863 			dest_ring->per_transfer_context[sw_index];
864 
865 	/* Copy engine 5 (HTT Rx) will reuse the same transfer context.
866 	 * So update transfer context all CEs except CE5.
867 	 */
868 	if (ce_state->id != 5)
869 		dest_ring->per_transfer_context[sw_index] = NULL;
870 
871 	/* Update sw_index */
872 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
873 	dest_ring->sw_index = sw_index;
874 
875 	return 0;
876 }
877 
878 static int
879 _ath10k_ce_completed_recv_next_nolock_64(struct ath10k_ce_pipe *ce_state,
880 					 void **per_transfer_contextp,
881 					 unsigned int *nbytesp)
882 {
883 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
884 	unsigned int nentries_mask = dest_ring->nentries_mask;
885 	unsigned int sw_index = dest_ring->sw_index;
886 	struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
887 	struct ce_desc_64 *desc =
888 		CE_DEST_RING_TO_DESC_64(base, sw_index);
889 	struct ce_desc_64 sdesc;
890 	u16 nbytes;
891 
892 	/* Copy in one go for performance reasons */
893 	sdesc = *desc;
894 
895 	nbytes = __le16_to_cpu(sdesc.nbytes);
896 	if (nbytes == 0) {
897 		/* This closes a relatively unusual race where the Host
898 		 * sees the updated DRRI before the update to the
899 		 * corresponding descriptor has completed. We treat this
900 		 * as a descriptor that is not yet done.
901 		 */
902 		return -EIO;
903 	}
904 
905 	desc->nbytes = 0;
906 
907 	/* Return data from completed destination descriptor */
908 	*nbytesp = nbytes;
909 
910 	if (per_transfer_contextp)
911 		*per_transfer_contextp =
912 			dest_ring->per_transfer_context[sw_index];
913 
914 	/* Copy engine 5 (HTT Rx) will reuse the same transfer context.
915 	 * So update transfer context all CEs except CE5.
916 	 */
917 	if (ce_state->id != 5)
918 		dest_ring->per_transfer_context[sw_index] = NULL;
919 
920 	/* Update sw_index */
921 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
922 	dest_ring->sw_index = sw_index;
923 
924 	return 0;
925 }
926 
927 int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
928 					 void **per_transfer_ctx,
929 					 unsigned int *nbytesp)
930 {
931 	return ce_state->ops->ce_completed_recv_next_nolock(ce_state,
932 							    per_transfer_ctx,
933 							    nbytesp);
934 }
935 EXPORT_SYMBOL(ath10k_ce_completed_recv_next_nolock);
936 
937 int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
938 				  void **per_transfer_contextp,
939 				  unsigned int *nbytesp)
940 {
941 	struct ath10k *ar = ce_state->ar;
942 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
943 	int ret;
944 
945 	spin_lock_bh(&ce->ce_lock);
946 	ret = ce_state->ops->ce_completed_recv_next_nolock(ce_state,
947 						   per_transfer_contextp,
948 						   nbytesp);
949 
950 	spin_unlock_bh(&ce->ce_lock);
951 
952 	return ret;
953 }
954 EXPORT_SYMBOL(ath10k_ce_completed_recv_next);
955 
956 static int _ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
957 				       void **per_transfer_contextp,
958 				       dma_addr_t *bufferp)
959 {
960 	struct ath10k_ce_ring *dest_ring;
961 	unsigned int nentries_mask;
962 	unsigned int sw_index;
963 	unsigned int write_index;
964 	int ret;
965 	struct ath10k *ar;
966 	struct ath10k_ce *ce;
967 
968 	dest_ring = ce_state->dest_ring;
969 
970 	if (!dest_ring)
971 		return -EIO;
972 
973 	ar = ce_state->ar;
974 	ce = ath10k_ce_priv(ar);
975 
976 	spin_lock_bh(&ce->ce_lock);
977 
978 	nentries_mask = dest_ring->nentries_mask;
979 	sw_index = dest_ring->sw_index;
980 	write_index = dest_ring->write_index;
981 	if (write_index != sw_index) {
982 		struct ce_desc *base = dest_ring->base_addr_owner_space;
983 		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
984 
985 		/* Return data from completed destination descriptor */
986 		*bufferp = __le32_to_cpu(desc->addr);
987 
988 		if (per_transfer_contextp)
989 			*per_transfer_contextp =
990 				dest_ring->per_transfer_context[sw_index];
991 
992 		/* sanity */
993 		dest_ring->per_transfer_context[sw_index] = NULL;
994 		desc->nbytes = 0;
995 
996 		/* Update sw_index */
997 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
998 		dest_ring->sw_index = sw_index;
999 		ret = 0;
1000 	} else {
1001 		ret = -EIO;
1002 	}
1003 
1004 	spin_unlock_bh(&ce->ce_lock);
1005 
1006 	return ret;
1007 }
1008 
1009 static int _ath10k_ce_revoke_recv_next_64(struct ath10k_ce_pipe *ce_state,
1010 					  void **per_transfer_contextp,
1011 					  dma_addr_t *bufferp)
1012 {
1013 	struct ath10k_ce_ring *dest_ring;
1014 	unsigned int nentries_mask;
1015 	unsigned int sw_index;
1016 	unsigned int write_index;
1017 	int ret;
1018 	struct ath10k *ar;
1019 	struct ath10k_ce *ce;
1020 
1021 	dest_ring = ce_state->dest_ring;
1022 
1023 	if (!dest_ring)
1024 		return -EIO;
1025 
1026 	ar = ce_state->ar;
1027 	ce = ath10k_ce_priv(ar);
1028 
1029 	spin_lock_bh(&ce->ce_lock);
1030 
1031 	nentries_mask = dest_ring->nentries_mask;
1032 	sw_index = dest_ring->sw_index;
1033 	write_index = dest_ring->write_index;
1034 	if (write_index != sw_index) {
1035 		struct ce_desc_64 *base = dest_ring->base_addr_owner_space;
1036 		struct ce_desc_64 *desc =
1037 			CE_DEST_RING_TO_DESC_64(base, sw_index);
1038 
1039 		/* Return data from completed destination descriptor */
1040 		*bufferp = __le64_to_cpu(desc->addr);
1041 
1042 		if (per_transfer_contextp)
1043 			*per_transfer_contextp =
1044 				dest_ring->per_transfer_context[sw_index];
1045 
1046 		/* sanity */
1047 		dest_ring->per_transfer_context[sw_index] = NULL;
1048 		desc->nbytes = 0;
1049 
1050 		/* Update sw_index */
1051 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1052 		dest_ring->sw_index = sw_index;
1053 		ret = 0;
1054 	} else {
1055 		ret = -EIO;
1056 	}
1057 
1058 	spin_unlock_bh(&ce->ce_lock);
1059 
1060 	return ret;
1061 }
1062 
1063 int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
1064 			       void **per_transfer_contextp,
1065 			       dma_addr_t *bufferp)
1066 {
1067 	return ce_state->ops->ce_revoke_recv_next(ce_state,
1068 						  per_transfer_contextp,
1069 						  bufferp);
1070 }
1071 EXPORT_SYMBOL(ath10k_ce_revoke_recv_next);
1072 
1073 /*
1074  * Guts of ath10k_ce_completed_send_next.
1075  * The caller takes responsibility for any necessary locking.
1076  */
1077 static int _ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
1078 						 void **per_transfer_contextp)
1079 {
1080 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1081 	u32 ctrl_addr = ce_state->ctrl_addr;
1082 	struct ath10k *ar = ce_state->ar;
1083 	unsigned int nentries_mask = src_ring->nentries_mask;
1084 	unsigned int sw_index = src_ring->sw_index;
1085 	unsigned int read_index;
1086 	struct ce_desc *desc;
1087 
1088 	if (src_ring->hw_index == sw_index) {
1089 		/*
1090 		 * The SW completion index has caught up with the cached
1091 		 * version of the HW completion index.
1092 		 * Update the cached HW completion index to see whether
1093 		 * the SW has really caught up to the HW, or if the cached
1094 		 * value of the HW index has become stale.
1095 		 */
1096 
1097 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1098 		if (read_index == 0xffffffff)
1099 			return -ENODEV;
1100 
1101 		read_index &= nentries_mask;
1102 		src_ring->hw_index = read_index;
1103 	}
1104 
1105 	if (ar->hw_params.rri_on_ddr)
1106 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1107 	else
1108 		read_index = src_ring->hw_index;
1109 
1110 	if (read_index == sw_index)
1111 		return -EIO;
1112 
1113 	if (per_transfer_contextp)
1114 		*per_transfer_contextp =
1115 			src_ring->per_transfer_context[sw_index];
1116 
1117 	/* sanity */
1118 	src_ring->per_transfer_context[sw_index] = NULL;
1119 	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
1120 				   sw_index);
1121 	desc->nbytes = 0;
1122 
1123 	/* Update sw_index */
1124 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1125 	src_ring->sw_index = sw_index;
1126 
1127 	return 0;
1128 }
1129 
1130 static int _ath10k_ce_completed_send_next_nolock_64(struct ath10k_ce_pipe *ce_state,
1131 						    void **per_transfer_contextp)
1132 {
1133 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1134 	u32 ctrl_addr = ce_state->ctrl_addr;
1135 	struct ath10k *ar = ce_state->ar;
1136 	unsigned int nentries_mask = src_ring->nentries_mask;
1137 	unsigned int sw_index = src_ring->sw_index;
1138 	unsigned int read_index;
1139 	struct ce_desc_64 *desc;
1140 
1141 	if (src_ring->hw_index == sw_index) {
1142 		/*
1143 		 * The SW completion index has caught up with the cached
1144 		 * version of the HW completion index.
1145 		 * Update the cached HW completion index to see whether
1146 		 * the SW has really caught up to the HW, or if the cached
1147 		 * value of the HW index has become stale.
1148 		 */
1149 
1150 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1151 		if (read_index == 0xffffffff)
1152 			return -ENODEV;
1153 
1154 		read_index &= nentries_mask;
1155 		src_ring->hw_index = read_index;
1156 	}
1157 
1158 	if (ar->hw_params.rri_on_ddr)
1159 		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1160 	else
1161 		read_index = src_ring->hw_index;
1162 
1163 	if (read_index == sw_index)
1164 		return -EIO;
1165 
1166 	if (per_transfer_contextp)
1167 		*per_transfer_contextp =
1168 			src_ring->per_transfer_context[sw_index];
1169 
1170 	/* sanity */
1171 	src_ring->per_transfer_context[sw_index] = NULL;
1172 	desc = CE_SRC_RING_TO_DESC_64(src_ring->base_addr_owner_space,
1173 				      sw_index);
1174 	desc->nbytes = 0;
1175 
1176 	/* Update sw_index */
1177 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1178 	src_ring->sw_index = sw_index;
1179 
1180 	return 0;
1181 }
1182 
1183 int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
1184 					 void **per_transfer_contextp)
1185 {
1186 	return ce_state->ops->ce_completed_send_next_nolock(ce_state,
1187 							    per_transfer_contextp);
1188 }
1189 EXPORT_SYMBOL(ath10k_ce_completed_send_next_nolock);
1190 
1191 static void ath10k_ce_extract_desc_data(struct ath10k *ar,
1192 					struct ath10k_ce_ring *src_ring,
1193 					u32 sw_index,
1194 					dma_addr_t *bufferp,
1195 					u32 *nbytesp,
1196 					u32 *transfer_idp)
1197 {
1198 		struct ce_desc *base = src_ring->base_addr_owner_space;
1199 		struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index);
1200 
1201 		/* Return data from completed source descriptor */
1202 		*bufferp = __le32_to_cpu(desc->addr);
1203 		*nbytesp = __le16_to_cpu(desc->nbytes);
1204 		*transfer_idp = MS(__le16_to_cpu(desc->flags),
1205 				   CE_DESC_FLAGS_META_DATA);
1206 }
1207 
1208 static void ath10k_ce_extract_desc_data_64(struct ath10k *ar,
1209 					   struct ath10k_ce_ring *src_ring,
1210 					   u32 sw_index,
1211 					   dma_addr_t *bufferp,
1212 					   u32 *nbytesp,
1213 					   u32 *transfer_idp)
1214 {
1215 		struct ce_desc_64 *base = src_ring->base_addr_owner_space;
1216 		struct ce_desc_64 *desc =
1217 			CE_SRC_RING_TO_DESC_64(base, sw_index);
1218 
1219 		/* Return data from completed source descriptor */
1220 		*bufferp = __le64_to_cpu(desc->addr);
1221 		*nbytesp = __le16_to_cpu(desc->nbytes);
1222 		*transfer_idp = MS(__le16_to_cpu(desc->flags),
1223 				   CE_DESC_FLAGS_META_DATA);
1224 }
1225 
1226 /* NB: Modeled after ath10k_ce_completed_send_next */
1227 int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
1228 			       void **per_transfer_contextp,
1229 			       dma_addr_t *bufferp,
1230 			       unsigned int *nbytesp,
1231 			       unsigned int *transfer_idp)
1232 {
1233 	struct ath10k_ce_ring *src_ring;
1234 	unsigned int nentries_mask;
1235 	unsigned int sw_index;
1236 	unsigned int write_index;
1237 	int ret;
1238 	struct ath10k *ar;
1239 	struct ath10k_ce *ce;
1240 
1241 	src_ring = ce_state->src_ring;
1242 
1243 	if (!src_ring)
1244 		return -EIO;
1245 
1246 	ar = ce_state->ar;
1247 	ce = ath10k_ce_priv(ar);
1248 
1249 	spin_lock_bh(&ce->ce_lock);
1250 
1251 	nentries_mask = src_ring->nentries_mask;
1252 	sw_index = src_ring->sw_index;
1253 	write_index = src_ring->write_index;
1254 
1255 	if (write_index != sw_index) {
1256 		ce_state->ops->ce_extract_desc_data(ar, src_ring, sw_index,
1257 						    bufferp, nbytesp,
1258 						    transfer_idp);
1259 
1260 		if (per_transfer_contextp)
1261 			*per_transfer_contextp =
1262 				src_ring->per_transfer_context[sw_index];
1263 
1264 		/* sanity */
1265 		src_ring->per_transfer_context[sw_index] = NULL;
1266 
1267 		/* Update sw_index */
1268 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
1269 		src_ring->sw_index = sw_index;
1270 		ret = 0;
1271 	} else {
1272 		ret = -EIO;
1273 	}
1274 
1275 	spin_unlock_bh(&ce->ce_lock);
1276 
1277 	return ret;
1278 }
1279 EXPORT_SYMBOL(ath10k_ce_cancel_send_next);
1280 
1281 int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
1282 				  void **per_transfer_contextp)
1283 {
1284 	struct ath10k *ar = ce_state->ar;
1285 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1286 	int ret;
1287 
1288 	spin_lock_bh(&ce->ce_lock);
1289 	ret = ath10k_ce_completed_send_next_nolock(ce_state,
1290 						   per_transfer_contextp);
1291 	spin_unlock_bh(&ce->ce_lock);
1292 
1293 	return ret;
1294 }
1295 EXPORT_SYMBOL(ath10k_ce_completed_send_next);
1296 
1297 /*
1298  * Guts of interrupt handler for per-engine interrupts on a particular CE.
1299  *
1300  * Invokes registered callbacks for recv_complete,
1301  * send_complete, and watermarks.
1302  */
1303 void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
1304 {
1305 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1306 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1307 	struct ath10k_hw_ce_host_wm_regs *wm_regs = ar->hw_ce_regs->wm_regs;
1308 	u32 ctrl_addr = ce_state->ctrl_addr;
1309 
1310 	/*
1311 	 * Clear before handling
1312 	 *
1313 	 * Misc CE interrupts are not being handled, but still need
1314 	 * to be cleared.
1315 	 *
1316 	 * NOTE: When the last copy engine interrupt is cleared the
1317 	 * hardware will go to sleep.  Once this happens any access to
1318 	 * the CE registers can cause a hardware fault.
1319 	 */
1320 	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
1321 					  wm_regs->cc_mask | wm_regs->wm_mask);
1322 
1323 	if (ce_state->recv_cb)
1324 		ce_state->recv_cb(ce_state);
1325 
1326 	if (ce_state->send_cb)
1327 		ce_state->send_cb(ce_state);
1328 }
1329 EXPORT_SYMBOL(ath10k_ce_per_engine_service);
1330 
1331 /*
1332  * Handler for per-engine interrupts on ALL active CEs.
1333  * This is used in cases where the system is sharing a
1334  * single interrput for all CEs
1335  */
1336 
1337 void ath10k_ce_per_engine_service_any(struct ath10k *ar)
1338 {
1339 	int ce_id;
1340 	u32 intr_summary;
1341 
1342 	intr_summary = ath10k_ce_interrupt_summary(ar);
1343 
1344 	for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) {
1345 		if (intr_summary & (1 << ce_id))
1346 			intr_summary &= ~(1 << ce_id);
1347 		else
1348 			/* no intr pending on this CE */
1349 			continue;
1350 
1351 		ath10k_ce_per_engine_service(ar, ce_id);
1352 	}
1353 }
1354 EXPORT_SYMBOL(ath10k_ce_per_engine_service_any);
1355 
1356 /*
1357  * Adjust interrupts for the copy complete handler.
1358  * If it's needed for either send or recv, then unmask
1359  * this interrupt; otherwise, mask it.
1360  *
1361  * Called with ce_lock held.
1362  */
1363 static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state)
1364 {
1365 	u32 ctrl_addr = ce_state->ctrl_addr;
1366 	struct ath10k *ar = ce_state->ar;
1367 	bool disable_copy_compl_intr = ce_state->attr_flags & CE_ATTR_DIS_INTR;
1368 
1369 	if ((!disable_copy_compl_intr) &&
1370 	    (ce_state->send_cb || ce_state->recv_cb))
1371 		ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr);
1372 	else
1373 		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
1374 
1375 	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
1376 }
1377 
1378 void ath10k_ce_disable_interrupt(struct ath10k *ar, int ce_id)
1379 {
1380 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1381 	struct ath10k_ce_pipe *ce_state;
1382 	u32 ctrl_addr;
1383 
1384 	ce_state  = &ce->ce_states[ce_id];
1385 	if (ce_state->attr_flags & CE_ATTR_POLL)
1386 		return;
1387 
1388 	ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1389 
1390 	ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
1391 	ath10k_ce_error_intr_disable(ar, ctrl_addr);
1392 	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
1393 }
1394 EXPORT_SYMBOL(ath10k_ce_disable_interrupt);
1395 
1396 void ath10k_ce_disable_interrupts(struct ath10k *ar)
1397 {
1398 	int ce_id;
1399 
1400 	for (ce_id = 0; ce_id < CE_COUNT; ce_id++)
1401 		ath10k_ce_disable_interrupt(ar, ce_id);
1402 }
1403 EXPORT_SYMBOL(ath10k_ce_disable_interrupts);
1404 
1405 void ath10k_ce_enable_interrupt(struct ath10k *ar, int ce_id)
1406 {
1407 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1408 	struct ath10k_ce_pipe *ce_state;
1409 
1410 	ce_state  = &ce->ce_states[ce_id];
1411 	if (ce_state->attr_flags & CE_ATTR_POLL)
1412 		return;
1413 
1414 	ath10k_ce_per_engine_handler_adjust(ce_state);
1415 }
1416 EXPORT_SYMBOL(ath10k_ce_enable_interrupt);
1417 
1418 void ath10k_ce_enable_interrupts(struct ath10k *ar)
1419 {
1420 	int ce_id;
1421 
1422 	/* Enable interrupts for copy engine that
1423 	 * are not using polling mode.
1424 	 */
1425 	for (ce_id = 0; ce_id < CE_COUNT; ce_id++)
1426 		ath10k_ce_enable_interrupt(ar, ce_id);
1427 }
1428 EXPORT_SYMBOL(ath10k_ce_enable_interrupts);
1429 
1430 static int ath10k_ce_init_src_ring(struct ath10k *ar,
1431 				   unsigned int ce_id,
1432 				   const struct ce_attr *attr)
1433 {
1434 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1435 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1436 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
1437 	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1438 
1439 	nentries = roundup_pow_of_two(attr->src_nentries);
1440 
1441 	if (ar->hw_params.target_64bit)
1442 		memset(src_ring->base_addr_owner_space, 0,
1443 		       nentries * sizeof(struct ce_desc_64));
1444 	else
1445 		memset(src_ring->base_addr_owner_space, 0,
1446 		       nentries * sizeof(struct ce_desc));
1447 
1448 	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
1449 	src_ring->sw_index &= src_ring->nentries_mask;
1450 	src_ring->hw_index = src_ring->sw_index;
1451 
1452 	src_ring->write_index =
1453 		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
1454 	src_ring->write_index &= src_ring->nentries_mask;
1455 
1456 	ath10k_ce_src_ring_base_addr_set(ar, ce_id,
1457 					 src_ring->base_addr_ce_space);
1458 	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
1459 	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
1460 	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
1461 	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
1462 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
1463 
1464 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
1465 		   "boot init ce src ring id %d entries %d base_addr %pK\n",
1466 		   ce_id, nentries, src_ring->base_addr_owner_space);
1467 
1468 	return 0;
1469 }
1470 
1471 static int ath10k_ce_init_dest_ring(struct ath10k *ar,
1472 				    unsigned int ce_id,
1473 				    const struct ce_attr *attr)
1474 {
1475 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1476 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1477 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
1478 	u32 nentries, ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1479 
1480 	nentries = roundup_pow_of_two(attr->dest_nentries);
1481 
1482 	if (ar->hw_params.target_64bit)
1483 		memset(dest_ring->base_addr_owner_space, 0,
1484 		       nentries * sizeof(struct ce_desc_64));
1485 	else
1486 		memset(dest_ring->base_addr_owner_space, 0,
1487 		       nentries * sizeof(struct ce_desc));
1488 
1489 	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
1490 	dest_ring->sw_index &= dest_ring->nentries_mask;
1491 	dest_ring->write_index =
1492 		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
1493 	dest_ring->write_index &= dest_ring->nentries_mask;
1494 
1495 	ath10k_ce_dest_ring_base_addr_set(ar, ce_id,
1496 					  dest_ring->base_addr_ce_space);
1497 	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
1498 	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
1499 	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
1500 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
1501 
1502 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
1503 		   "boot ce dest ring id %d entries %d base_addr %pK\n",
1504 		   ce_id, nentries, dest_ring->base_addr_owner_space);
1505 
1506 	return 0;
1507 }
1508 
1509 static int ath10k_ce_alloc_shadow_base(struct ath10k *ar,
1510 				       struct ath10k_ce_ring *src_ring,
1511 				       u32 nentries)
1512 {
1513 	src_ring->shadow_base_unaligned = kcalloc(nentries,
1514 						  sizeof(struct ce_desc_64),
1515 						  GFP_KERNEL);
1516 	if (!src_ring->shadow_base_unaligned)
1517 		return -ENOMEM;
1518 
1519 	src_ring->shadow_base = (struct ce_desc_64 *)
1520 			PTR_ALIGN(src_ring->shadow_base_unaligned,
1521 				  CE_DESC_RING_ALIGN);
1522 	return 0;
1523 }
1524 
1525 static struct ath10k_ce_ring *
1526 ath10k_ce_alloc_src_ring(struct ath10k *ar, unsigned int ce_id,
1527 			 const struct ce_attr *attr)
1528 {
1529 	struct ath10k_ce_ring *src_ring;
1530 	u32 nentries = attr->src_nentries;
1531 	dma_addr_t base_addr;
1532 	int ret;
1533 
1534 	nentries = roundup_pow_of_two(nentries);
1535 
1536 	src_ring = kzalloc(struct_size(src_ring, per_transfer_context,
1537 				       nentries), GFP_KERNEL);
1538 	if (src_ring == NULL)
1539 		return ERR_PTR(-ENOMEM);
1540 
1541 	src_ring->nentries = nentries;
1542 	src_ring->nentries_mask = nentries - 1;
1543 
1544 	/*
1545 	 * Legacy platforms that do not support cache
1546 	 * coherent DMA are unsupported
1547 	 */
1548 	src_ring->base_addr_owner_space_unaligned =
1549 		dma_alloc_coherent(ar->dev,
1550 				   (nentries * sizeof(struct ce_desc) +
1551 				    CE_DESC_RING_ALIGN),
1552 				   &base_addr, GFP_KERNEL);
1553 	if (!src_ring->base_addr_owner_space_unaligned) {
1554 		kfree(src_ring);
1555 		return ERR_PTR(-ENOMEM);
1556 	}
1557 
1558 	src_ring->base_addr_ce_space_unaligned = base_addr;
1559 
1560 	src_ring->base_addr_owner_space =
1561 			PTR_ALIGN(src_ring->base_addr_owner_space_unaligned,
1562 				  CE_DESC_RING_ALIGN);
1563 	src_ring->base_addr_ce_space =
1564 			ALIGN(src_ring->base_addr_ce_space_unaligned,
1565 			      CE_DESC_RING_ALIGN);
1566 
1567 	if (ar->hw_params.shadow_reg_support) {
1568 		ret = ath10k_ce_alloc_shadow_base(ar, src_ring, nentries);
1569 		if (ret) {
1570 			dma_free_coherent(ar->dev,
1571 					  (nentries * sizeof(struct ce_desc) +
1572 					   CE_DESC_RING_ALIGN),
1573 					  src_ring->base_addr_owner_space_unaligned,
1574 					  base_addr);
1575 			kfree(src_ring);
1576 			return ERR_PTR(ret);
1577 		}
1578 	}
1579 
1580 	return src_ring;
1581 }
1582 
1583 static struct ath10k_ce_ring *
1584 ath10k_ce_alloc_src_ring_64(struct ath10k *ar, unsigned int ce_id,
1585 			    const struct ce_attr *attr)
1586 {
1587 	struct ath10k_ce_ring *src_ring;
1588 	u32 nentries = attr->src_nentries;
1589 	dma_addr_t base_addr;
1590 	int ret;
1591 
1592 	nentries = roundup_pow_of_two(nentries);
1593 
1594 	src_ring = kzalloc(struct_size(src_ring, per_transfer_context,
1595 				       nentries), GFP_KERNEL);
1596 	if (!src_ring)
1597 		return ERR_PTR(-ENOMEM);
1598 
1599 	src_ring->nentries = nentries;
1600 	src_ring->nentries_mask = nentries - 1;
1601 
1602 	/* Legacy platforms that do not support cache
1603 	 * coherent DMA are unsupported
1604 	 */
1605 	src_ring->base_addr_owner_space_unaligned =
1606 		dma_alloc_coherent(ar->dev,
1607 				   (nentries * sizeof(struct ce_desc_64) +
1608 				    CE_DESC_RING_ALIGN),
1609 				   &base_addr, GFP_KERNEL);
1610 	if (!src_ring->base_addr_owner_space_unaligned) {
1611 		kfree(src_ring);
1612 		return ERR_PTR(-ENOMEM);
1613 	}
1614 
1615 	src_ring->base_addr_ce_space_unaligned = base_addr;
1616 
1617 	src_ring->base_addr_owner_space =
1618 			PTR_ALIGN(src_ring->base_addr_owner_space_unaligned,
1619 				  CE_DESC_RING_ALIGN);
1620 	src_ring->base_addr_ce_space =
1621 			ALIGN(src_ring->base_addr_ce_space_unaligned,
1622 			      CE_DESC_RING_ALIGN);
1623 
1624 	if (ar->hw_params.shadow_reg_support) {
1625 		ret = ath10k_ce_alloc_shadow_base(ar, src_ring, nentries);
1626 		if (ret) {
1627 			dma_free_coherent(ar->dev,
1628 					  (nentries * sizeof(struct ce_desc_64) +
1629 					   CE_DESC_RING_ALIGN),
1630 					  src_ring->base_addr_owner_space_unaligned,
1631 					  base_addr);
1632 			kfree(src_ring);
1633 			return ERR_PTR(ret);
1634 		}
1635 	}
1636 
1637 	return src_ring;
1638 }
1639 
1640 static struct ath10k_ce_ring *
1641 ath10k_ce_alloc_dest_ring(struct ath10k *ar, unsigned int ce_id,
1642 			  const struct ce_attr *attr)
1643 {
1644 	struct ath10k_ce_ring *dest_ring;
1645 	u32 nentries;
1646 	dma_addr_t base_addr;
1647 
1648 	nentries = roundup_pow_of_two(attr->dest_nentries);
1649 
1650 	dest_ring = kzalloc(struct_size(dest_ring, per_transfer_context,
1651 					nentries), GFP_KERNEL);
1652 	if (dest_ring == NULL)
1653 		return ERR_PTR(-ENOMEM);
1654 
1655 	dest_ring->nentries = nentries;
1656 	dest_ring->nentries_mask = nentries - 1;
1657 
1658 	/*
1659 	 * Legacy platforms that do not support cache
1660 	 * coherent DMA are unsupported
1661 	 */
1662 	dest_ring->base_addr_owner_space_unaligned =
1663 		dma_alloc_coherent(ar->dev,
1664 				   (nentries * sizeof(struct ce_desc) +
1665 				    CE_DESC_RING_ALIGN),
1666 				   &base_addr, GFP_KERNEL);
1667 	if (!dest_ring->base_addr_owner_space_unaligned) {
1668 		kfree(dest_ring);
1669 		return ERR_PTR(-ENOMEM);
1670 	}
1671 
1672 	dest_ring->base_addr_ce_space_unaligned = base_addr;
1673 
1674 	dest_ring->base_addr_owner_space =
1675 			PTR_ALIGN(dest_ring->base_addr_owner_space_unaligned,
1676 				  CE_DESC_RING_ALIGN);
1677 	dest_ring->base_addr_ce_space =
1678 				ALIGN(dest_ring->base_addr_ce_space_unaligned,
1679 				      CE_DESC_RING_ALIGN);
1680 
1681 	return dest_ring;
1682 }
1683 
1684 static struct ath10k_ce_ring *
1685 ath10k_ce_alloc_dest_ring_64(struct ath10k *ar, unsigned int ce_id,
1686 			     const struct ce_attr *attr)
1687 {
1688 	struct ath10k_ce_ring *dest_ring;
1689 	u32 nentries;
1690 	dma_addr_t base_addr;
1691 
1692 	nentries = roundup_pow_of_two(attr->dest_nentries);
1693 
1694 	dest_ring = kzalloc(struct_size(dest_ring, per_transfer_context,
1695 					nentries), GFP_KERNEL);
1696 	if (!dest_ring)
1697 		return ERR_PTR(-ENOMEM);
1698 
1699 	dest_ring->nentries = nentries;
1700 	dest_ring->nentries_mask = nentries - 1;
1701 
1702 	/* Legacy platforms that do not support cache
1703 	 * coherent DMA are unsupported
1704 	 */
1705 	dest_ring->base_addr_owner_space_unaligned =
1706 		dma_alloc_coherent(ar->dev,
1707 				   (nentries * sizeof(struct ce_desc_64) +
1708 				    CE_DESC_RING_ALIGN),
1709 				   &base_addr, GFP_KERNEL);
1710 	if (!dest_ring->base_addr_owner_space_unaligned) {
1711 		kfree(dest_ring);
1712 		return ERR_PTR(-ENOMEM);
1713 	}
1714 
1715 	dest_ring->base_addr_ce_space_unaligned = base_addr;
1716 
1717 	/* Correctly initialize memory to 0 to prevent garbage
1718 	 * data crashing system when download firmware
1719 	 */
1720 	dest_ring->base_addr_owner_space =
1721 			PTR_ALIGN(dest_ring->base_addr_owner_space_unaligned,
1722 				  CE_DESC_RING_ALIGN);
1723 	dest_ring->base_addr_ce_space =
1724 			ALIGN(dest_ring->base_addr_ce_space_unaligned,
1725 			      CE_DESC_RING_ALIGN);
1726 
1727 	return dest_ring;
1728 }
1729 
1730 /*
1731  * Initialize a Copy Engine based on caller-supplied attributes.
1732  * This may be called once to initialize both source and destination
1733  * rings or it may be called twice for separate source and destination
1734  * initialization. It may be that only one side or the other is
1735  * initialized by software/firmware.
1736  */
1737 int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id,
1738 			const struct ce_attr *attr)
1739 {
1740 	int ret;
1741 
1742 	if (attr->src_nentries) {
1743 		ret = ath10k_ce_init_src_ring(ar, ce_id, attr);
1744 		if (ret) {
1745 			ath10k_err(ar, "Failed to initialize CE src ring for ID: %d (%d)\n",
1746 				   ce_id, ret);
1747 			return ret;
1748 		}
1749 	}
1750 
1751 	if (attr->dest_nentries) {
1752 		ret = ath10k_ce_init_dest_ring(ar, ce_id, attr);
1753 		if (ret) {
1754 			ath10k_err(ar, "Failed to initialize CE dest ring for ID: %d (%d)\n",
1755 				   ce_id, ret);
1756 			return ret;
1757 		}
1758 	}
1759 
1760 	return 0;
1761 }
1762 EXPORT_SYMBOL(ath10k_ce_init_pipe);
1763 
1764 static void ath10k_ce_deinit_src_ring(struct ath10k *ar, unsigned int ce_id)
1765 {
1766 	u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1767 
1768 	ath10k_ce_src_ring_base_addr_set(ar, ce_id, 0);
1769 	ath10k_ce_src_ring_size_set(ar, ctrl_addr, 0);
1770 	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, 0);
1771 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, 0);
1772 }
1773 
1774 static void ath10k_ce_deinit_dest_ring(struct ath10k *ar, unsigned int ce_id)
1775 {
1776 	u32 ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1777 
1778 	ath10k_ce_dest_ring_base_addr_set(ar, ce_id, 0);
1779 	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, 0);
1780 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, 0);
1781 }
1782 
1783 void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id)
1784 {
1785 	ath10k_ce_deinit_src_ring(ar, ce_id);
1786 	ath10k_ce_deinit_dest_ring(ar, ce_id);
1787 }
1788 EXPORT_SYMBOL(ath10k_ce_deinit_pipe);
1789 
1790 static void _ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1791 {
1792 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1793 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1794 
1795 	if (ce_state->src_ring) {
1796 		if (ar->hw_params.shadow_reg_support)
1797 			kfree(ce_state->src_ring->shadow_base_unaligned);
1798 		dma_free_coherent(ar->dev,
1799 				  (ce_state->src_ring->nentries *
1800 				   sizeof(struct ce_desc) +
1801 				   CE_DESC_RING_ALIGN),
1802 				  ce_state->src_ring->base_addr_owner_space,
1803 				  ce_state->src_ring->base_addr_ce_space);
1804 		kfree(ce_state->src_ring);
1805 	}
1806 
1807 	if (ce_state->dest_ring) {
1808 		dma_free_coherent(ar->dev,
1809 				  (ce_state->dest_ring->nentries *
1810 				   sizeof(struct ce_desc) +
1811 				   CE_DESC_RING_ALIGN),
1812 				  ce_state->dest_ring->base_addr_owner_space,
1813 				  ce_state->dest_ring->base_addr_ce_space);
1814 		kfree(ce_state->dest_ring);
1815 	}
1816 
1817 	ce_state->src_ring = NULL;
1818 	ce_state->dest_ring = NULL;
1819 }
1820 
1821 static void _ath10k_ce_free_pipe_64(struct ath10k *ar, int ce_id)
1822 {
1823 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1824 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1825 
1826 	if (ce_state->src_ring) {
1827 		if (ar->hw_params.shadow_reg_support)
1828 			kfree(ce_state->src_ring->shadow_base_unaligned);
1829 		dma_free_coherent(ar->dev,
1830 				  (ce_state->src_ring->nentries *
1831 				   sizeof(struct ce_desc_64) +
1832 				   CE_DESC_RING_ALIGN),
1833 				  ce_state->src_ring->base_addr_owner_space,
1834 				  ce_state->src_ring->base_addr_ce_space);
1835 		kfree(ce_state->src_ring);
1836 	}
1837 
1838 	if (ce_state->dest_ring) {
1839 		dma_free_coherent(ar->dev,
1840 				  (ce_state->dest_ring->nentries *
1841 				   sizeof(struct ce_desc_64) +
1842 				   CE_DESC_RING_ALIGN),
1843 				  ce_state->dest_ring->base_addr_owner_space,
1844 				  ce_state->dest_ring->base_addr_ce_space);
1845 		kfree(ce_state->dest_ring);
1846 	}
1847 
1848 	ce_state->src_ring = NULL;
1849 	ce_state->dest_ring = NULL;
1850 }
1851 
1852 void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1853 {
1854 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1855 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1856 
1857 	ce_state->ops->ce_free_pipe(ar, ce_id);
1858 }
1859 EXPORT_SYMBOL(ath10k_ce_free_pipe);
1860 
1861 void ath10k_ce_dump_registers(struct ath10k *ar,
1862 			      struct ath10k_fw_crash_data *crash_data)
1863 {
1864 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1865 	struct ath10k_ce_crash_data ce_data;
1866 	u32 addr, id;
1867 
1868 	lockdep_assert_held(&ar->dump_mutex);
1869 
1870 	ath10k_err(ar, "Copy Engine register dump:\n");
1871 
1872 	spin_lock_bh(&ce->ce_lock);
1873 	for (id = 0; id < CE_COUNT; id++) {
1874 		addr = ath10k_ce_base_address(ar, id);
1875 		ce_data.base_addr = cpu_to_le32(addr);
1876 
1877 		ce_data.src_wr_idx =
1878 			cpu_to_le32(ath10k_ce_src_ring_write_index_get(ar, addr));
1879 		ce_data.src_r_idx =
1880 			cpu_to_le32(ath10k_ce_src_ring_read_index_get(ar, addr));
1881 		ce_data.dst_wr_idx =
1882 			cpu_to_le32(ath10k_ce_dest_ring_write_index_get(ar, addr));
1883 		ce_data.dst_r_idx =
1884 			cpu_to_le32(ath10k_ce_dest_ring_read_index_get(ar, addr));
1885 
1886 		if (crash_data)
1887 			crash_data->ce_crash_data[id] = ce_data;
1888 
1889 #if defined(__linux__)
1890 		ath10k_err(ar, "[%02d]: 0x%08x %3u %3u %3u %3u", id,
1891 #elif defined(__FreeBSD__)
1892 		ath10k_err(ar, "[%02d]: 0x%08x %3u %3u %3u %3u\n", id,
1893 #endif
1894 			   le32_to_cpu(ce_data.base_addr),
1895 			   le32_to_cpu(ce_data.src_wr_idx),
1896 			   le32_to_cpu(ce_data.src_r_idx),
1897 			   le32_to_cpu(ce_data.dst_wr_idx),
1898 			   le32_to_cpu(ce_data.dst_r_idx));
1899 	}
1900 
1901 	spin_unlock_bh(&ce->ce_lock);
1902 }
1903 EXPORT_SYMBOL(ath10k_ce_dump_registers);
1904 
1905 static const struct ath10k_ce_ops ce_ops = {
1906 	.ce_alloc_src_ring = ath10k_ce_alloc_src_ring,
1907 	.ce_alloc_dst_ring = ath10k_ce_alloc_dest_ring,
1908 	.ce_rx_post_buf = __ath10k_ce_rx_post_buf,
1909 	.ce_completed_recv_next_nolock = _ath10k_ce_completed_recv_next_nolock,
1910 	.ce_revoke_recv_next = _ath10k_ce_revoke_recv_next,
1911 	.ce_extract_desc_data = ath10k_ce_extract_desc_data,
1912 	.ce_free_pipe = _ath10k_ce_free_pipe,
1913 	.ce_send_nolock = _ath10k_ce_send_nolock,
1914 	.ce_set_src_ring_base_addr_hi = NULL,
1915 	.ce_set_dest_ring_base_addr_hi = NULL,
1916 	.ce_completed_send_next_nolock = _ath10k_ce_completed_send_next_nolock,
1917 };
1918 
1919 static const struct ath10k_ce_ops ce_64_ops = {
1920 	.ce_alloc_src_ring = ath10k_ce_alloc_src_ring_64,
1921 	.ce_alloc_dst_ring = ath10k_ce_alloc_dest_ring_64,
1922 	.ce_rx_post_buf = __ath10k_ce_rx_post_buf_64,
1923 	.ce_completed_recv_next_nolock =
1924 				_ath10k_ce_completed_recv_next_nolock_64,
1925 	.ce_revoke_recv_next = _ath10k_ce_revoke_recv_next_64,
1926 	.ce_extract_desc_data = ath10k_ce_extract_desc_data_64,
1927 	.ce_free_pipe = _ath10k_ce_free_pipe_64,
1928 	.ce_send_nolock = _ath10k_ce_send_nolock_64,
1929 	.ce_set_src_ring_base_addr_hi = ath10k_ce_set_src_ring_base_addr_hi,
1930 	.ce_set_dest_ring_base_addr_hi = ath10k_ce_set_dest_ring_base_addr_hi,
1931 	.ce_completed_send_next_nolock = _ath10k_ce_completed_send_next_nolock_64,
1932 };
1933 
1934 static void ath10k_ce_set_ops(struct ath10k *ar,
1935 			      struct ath10k_ce_pipe *ce_state)
1936 {
1937 	switch (ar->hw_rev) {
1938 	case ATH10K_HW_WCN3990:
1939 		ce_state->ops = &ce_64_ops;
1940 		break;
1941 	default:
1942 		ce_state->ops = &ce_ops;
1943 		break;
1944 	}
1945 }
1946 
1947 int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
1948 			 const struct ce_attr *attr)
1949 {
1950 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
1951 	struct ath10k_ce_pipe *ce_state = &ce->ce_states[ce_id];
1952 	int ret;
1953 
1954 	ath10k_ce_set_ops(ar, ce_state);
1955 	/* Make sure there's enough CE ringbuffer entries for HTT TX to avoid
1956 	 * additional TX locking checks.
1957 	 *
1958 	 * For the lack of a better place do the check here.
1959 	 */
1960 	BUILD_BUG_ON(2 * TARGET_NUM_MSDU_DESC >
1961 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1962 	BUILD_BUG_ON(2 * TARGET_10_4_NUM_MSDU_DESC_PFC >
1963 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1964 	BUILD_BUG_ON(2 * TARGET_TLV_NUM_MSDU_DESC >
1965 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1966 
1967 	ce_state->ar = ar;
1968 	ce_state->id = ce_id;
1969 	ce_state->ctrl_addr = ath10k_ce_base_address(ar, ce_id);
1970 	ce_state->attr_flags = attr->flags;
1971 	ce_state->src_sz_max = attr->src_sz_max;
1972 
1973 	if (attr->src_nentries)
1974 		ce_state->send_cb = attr->send_cb;
1975 
1976 	if (attr->dest_nentries)
1977 		ce_state->recv_cb = attr->recv_cb;
1978 
1979 	if (attr->src_nentries) {
1980 		ce_state->src_ring =
1981 			ce_state->ops->ce_alloc_src_ring(ar, ce_id, attr);
1982 		if (IS_ERR(ce_state->src_ring)) {
1983 			ret = PTR_ERR(ce_state->src_ring);
1984 			ath10k_err(ar, "failed to alloc CE src ring %d: %d\n",
1985 				   ce_id, ret);
1986 			ce_state->src_ring = NULL;
1987 			return ret;
1988 		}
1989 	}
1990 
1991 	if (attr->dest_nentries) {
1992 		ce_state->dest_ring = ce_state->ops->ce_alloc_dst_ring(ar,
1993 									ce_id,
1994 									attr);
1995 		if (IS_ERR(ce_state->dest_ring)) {
1996 			ret = PTR_ERR(ce_state->dest_ring);
1997 			ath10k_err(ar, "failed to alloc CE dest ring %d: %d\n",
1998 				   ce_id, ret);
1999 			ce_state->dest_ring = NULL;
2000 			return ret;
2001 		}
2002 	}
2003 
2004 	return 0;
2005 }
2006 EXPORT_SYMBOL(ath10k_ce_alloc_pipe);
2007 
2008 void ath10k_ce_alloc_rri(struct ath10k *ar)
2009 {
2010 	int i;
2011 	u32 value;
2012 	u32 ctrl1_regs;
2013 	u32 ce_base_addr;
2014 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
2015 
2016 	ce->vaddr_rri = dma_alloc_coherent(ar->dev,
2017 					   (CE_COUNT * sizeof(u32)),
2018 					   &ce->paddr_rri, GFP_KERNEL);
2019 
2020 	if (!ce->vaddr_rri)
2021 		return;
2022 
2023 	ath10k_ce_write32(ar, ar->hw_ce_regs->ce_rri_low,
2024 			  lower_32_bits(ce->paddr_rri));
2025 	ath10k_ce_write32(ar, ar->hw_ce_regs->ce_rri_high,
2026 			  (upper_32_bits(ce->paddr_rri) &
2027 			  CE_DESC_ADDR_HI_MASK));
2028 
2029 	for (i = 0; i < CE_COUNT; i++) {
2030 		ctrl1_regs = ar->hw_ce_regs->ctrl1_regs->addr;
2031 		ce_base_addr = ath10k_ce_base_address(ar, i);
2032 		value = ath10k_ce_read32(ar, ce_base_addr + ctrl1_regs);
2033 		value |= ar->hw_ce_regs->upd->mask;
2034 		ath10k_ce_write32(ar, ce_base_addr + ctrl1_regs, value);
2035 	}
2036 }
2037 EXPORT_SYMBOL(ath10k_ce_alloc_rri);
2038 
2039 void ath10k_ce_free_rri(struct ath10k *ar)
2040 {
2041 	struct ath10k_ce *ce = ath10k_ce_priv(ar);
2042 
2043 	dma_free_coherent(ar->dev, (CE_COUNT * sizeof(u32)),
2044 			  ce->vaddr_rri,
2045 			  ce->paddr_rri);
2046 }
2047 EXPORT_SYMBOL(ath10k_ce_free_rri);
2048