xref: /linux/drivers/net/wireless/ath/ath10k/ce.c (revision 00a6d7b6762c27d441e9ac8faff36384bc0fc180)
1 /*
2  * Copyright (c) 2005-2011 Atheros Communications Inc.
3  * Copyright (c) 2011-2013 Qualcomm Atheros, Inc.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "hif.h"
19 #include "pci.h"
20 #include "ce.h"
21 #include "debug.h"
22 
23 /*
24  * Support for Copy Engine hardware, which is mainly used for
25  * communication between Host and Target over a PCIe interconnect.
26  */
27 
28 /*
29  * A single CopyEngine (CE) comprises two "rings":
30  *   a source ring
31  *   a destination ring
32  *
33  * Each ring consists of a number of descriptors which specify
34  * an address, length, and meta-data.
35  *
36  * Typically, one side of the PCIe interconnect (Host or Target)
37  * controls one ring and the other side controls the other ring.
38  * The source side chooses when to initiate a transfer and it
39  * chooses what to send (buffer address, length). The destination
40  * side keeps a supply of "anonymous receive buffers" available and
41  * it handles incoming data as it arrives (when the destination
42  * recieves an interrupt).
43  *
44  * The sender may send a simple buffer (address/length) or it may
45  * send a small list of buffers.  When a small list is sent, hardware
46  * "gathers" these and they end up in a single destination buffer
47  * with a single interrupt.
48  *
49  * There are several "contexts" managed by this layer -- more, it
50  * may seem -- than should be needed. These are provided mainly for
51  * maximum flexibility and especially to facilitate a simpler HIF
52  * implementation. There are per-CopyEngine recv, send, and watermark
53  * contexts. These are supplied by the caller when a recv, send,
54  * or watermark handler is established and they are echoed back to
55  * the caller when the respective callbacks are invoked. There is
56  * also a per-transfer context supplied by the caller when a buffer
57  * (or sendlist) is sent and when a buffer is enqueued for recv.
58  * These per-transfer contexts are echoed back to the caller when
59  * the buffer is sent/received.
60  */
61 
62 static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
63 						       u32 ce_ctrl_addr,
64 						       unsigned int n)
65 {
66 	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS, n);
67 }
68 
69 static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
70 						      u32 ce_ctrl_addr)
71 {
72 	return ath10k_pci_read32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS);
73 }
74 
75 static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
76 						      u32 ce_ctrl_addr,
77 						      unsigned int n)
78 {
79 	ath10k_pci_write32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS, n);
80 }
81 
82 static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
83 						     u32 ce_ctrl_addr)
84 {
85 	return ath10k_pci_read32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS);
86 }
87 
88 static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
89 						    u32 ce_ctrl_addr)
90 {
91 	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_SRRI_ADDRESS);
92 }
93 
94 static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
95 						    u32 ce_ctrl_addr,
96 						    unsigned int addr)
97 {
98 	ath10k_pci_write32(ar, ce_ctrl_addr + SR_BA_ADDRESS, addr);
99 }
100 
101 static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
102 					       u32 ce_ctrl_addr,
103 					       unsigned int n)
104 {
105 	ath10k_pci_write32(ar, ce_ctrl_addr + SR_SIZE_ADDRESS, n);
106 }
107 
108 static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
109 					       u32 ce_ctrl_addr,
110 					       unsigned int n)
111 {
112 	u32 ctrl1_addr = ath10k_pci_read32((ar),
113 					   (ce_ctrl_addr) + CE_CTRL1_ADDRESS);
114 
115 	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
116 			   (ctrl1_addr &  ~CE_CTRL1_DMAX_LENGTH_MASK) |
117 			   CE_CTRL1_DMAX_LENGTH_SET(n));
118 }
119 
120 static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
121 						    u32 ce_ctrl_addr,
122 						    unsigned int n)
123 {
124 	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
125 
126 	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
127 			   (ctrl1_addr & ~CE_CTRL1_SRC_RING_BYTE_SWAP_EN_MASK) |
128 			   CE_CTRL1_SRC_RING_BYTE_SWAP_EN_SET(n));
129 }
130 
131 static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
132 						     u32 ce_ctrl_addr,
133 						     unsigned int n)
134 {
135 	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
136 
137 	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
138 			   (ctrl1_addr & ~CE_CTRL1_DST_RING_BYTE_SWAP_EN_MASK) |
139 			   CE_CTRL1_DST_RING_BYTE_SWAP_EN_SET(n));
140 }
141 
142 static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
143 						     u32 ce_ctrl_addr)
144 {
145 	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_DRRI_ADDRESS);
146 }
147 
148 static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
149 						     u32 ce_ctrl_addr,
150 						     u32 addr)
151 {
152 	ath10k_pci_write32(ar, ce_ctrl_addr + DR_BA_ADDRESS, addr);
153 }
154 
155 static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
156 						u32 ce_ctrl_addr,
157 						unsigned int n)
158 {
159 	ath10k_pci_write32(ar, ce_ctrl_addr + DR_SIZE_ADDRESS, n);
160 }
161 
162 static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
163 						   u32 ce_ctrl_addr,
164 						   unsigned int n)
165 {
166 	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
167 
168 	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
169 			   (addr & ~SRC_WATERMARK_HIGH_MASK) |
170 			   SRC_WATERMARK_HIGH_SET(n));
171 }
172 
173 static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
174 						  u32 ce_ctrl_addr,
175 						  unsigned int n)
176 {
177 	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
178 
179 	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
180 			   (addr & ~SRC_WATERMARK_LOW_MASK) |
181 			   SRC_WATERMARK_LOW_SET(n));
182 }
183 
184 static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
185 						    u32 ce_ctrl_addr,
186 						    unsigned int n)
187 {
188 	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
189 
190 	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
191 			   (addr & ~DST_WATERMARK_HIGH_MASK) |
192 			   DST_WATERMARK_HIGH_SET(n));
193 }
194 
195 static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
196 						   u32 ce_ctrl_addr,
197 						   unsigned int n)
198 {
199 	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
200 
201 	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
202 			   (addr & ~DST_WATERMARK_LOW_MASK) |
203 			   DST_WATERMARK_LOW_SET(n));
204 }
205 
206 static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
207 							u32 ce_ctrl_addr)
208 {
209 	u32 host_ie_addr = ath10k_pci_read32(ar,
210 					     ce_ctrl_addr + HOST_IE_ADDRESS);
211 
212 	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
213 			   host_ie_addr | HOST_IE_COPY_COMPLETE_MASK);
214 }
215 
216 static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
217 							u32 ce_ctrl_addr)
218 {
219 	u32 host_ie_addr = ath10k_pci_read32(ar,
220 					     ce_ctrl_addr + HOST_IE_ADDRESS);
221 
222 	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
223 			   host_ie_addr & ~HOST_IE_COPY_COMPLETE_MASK);
224 }
225 
226 static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
227 						    u32 ce_ctrl_addr)
228 {
229 	u32 host_ie_addr = ath10k_pci_read32(ar,
230 					     ce_ctrl_addr + HOST_IE_ADDRESS);
231 
232 	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
233 			   host_ie_addr & ~CE_WATERMARK_MASK);
234 }
235 
236 static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
237 					       u32 ce_ctrl_addr)
238 {
239 	u32 misc_ie_addr = ath10k_pci_read32(ar,
240 					     ce_ctrl_addr + MISC_IE_ADDRESS);
241 
242 	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
243 			   misc_ie_addr | CE_ERROR_MASK);
244 }
245 
246 static inline void ath10k_ce_error_intr_disable(struct ath10k *ar,
247 						u32 ce_ctrl_addr)
248 {
249 	u32 misc_ie_addr = ath10k_pci_read32(ar,
250 					     ce_ctrl_addr + MISC_IE_ADDRESS);
251 
252 	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
253 			   misc_ie_addr & ~CE_ERROR_MASK);
254 }
255 
256 static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
257 						     u32 ce_ctrl_addr,
258 						     unsigned int mask)
259 {
260 	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IS_ADDRESS, mask);
261 }
262 
263 
264 /*
265  * Guts of ath10k_ce_send, used by both ath10k_ce_send and
266  * ath10k_ce_sendlist_send.
267  * The caller takes responsibility for any needed locking.
268  */
269 int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
270 			  void *per_transfer_context,
271 			  u32 buffer,
272 			  unsigned int nbytes,
273 			  unsigned int transfer_id,
274 			  unsigned int flags)
275 {
276 	struct ath10k *ar = ce_state->ar;
277 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
278 	struct ce_desc *desc, *sdesc;
279 	unsigned int nentries_mask = src_ring->nentries_mask;
280 	unsigned int sw_index = src_ring->sw_index;
281 	unsigned int write_index = src_ring->write_index;
282 	u32 ctrl_addr = ce_state->ctrl_addr;
283 	u32 desc_flags = 0;
284 	int ret = 0;
285 
286 	if (nbytes > ce_state->src_sz_max)
287 		ath10k_warn("%s: send more we can (nbytes: %d, max: %d)\n",
288 			    __func__, nbytes, ce_state->src_sz_max);
289 
290 	ret = ath10k_pci_wake(ar);
291 	if (ret)
292 		return ret;
293 
294 	if (unlikely(CE_RING_DELTA(nentries_mask,
295 				   write_index, sw_index - 1) <= 0)) {
296 		ret = -ENOSR;
297 		goto exit;
298 	}
299 
300 	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
301 				   write_index);
302 	sdesc = CE_SRC_RING_TO_DESC(src_ring->shadow_base, write_index);
303 
304 	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
305 
306 	if (flags & CE_SEND_FLAG_GATHER)
307 		desc_flags |= CE_DESC_FLAGS_GATHER;
308 	if (flags & CE_SEND_FLAG_BYTE_SWAP)
309 		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
310 
311 	sdesc->addr   = __cpu_to_le32(buffer);
312 	sdesc->nbytes = __cpu_to_le16(nbytes);
313 	sdesc->flags  = __cpu_to_le16(desc_flags);
314 
315 	*desc = *sdesc;
316 
317 	src_ring->per_transfer_context[write_index] = per_transfer_context;
318 
319 	/* Update Source Ring Write Index */
320 	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
321 
322 	/* WORKAROUND */
323 	if (!(flags & CE_SEND_FLAG_GATHER))
324 		ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index);
325 
326 	src_ring->write_index = write_index;
327 exit:
328 	ath10k_pci_sleep(ar);
329 	return ret;
330 }
331 
332 int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
333 		   void *per_transfer_context,
334 		   u32 buffer,
335 		   unsigned int nbytes,
336 		   unsigned int transfer_id,
337 		   unsigned int flags)
338 {
339 	struct ath10k *ar = ce_state->ar;
340 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
341 	int ret;
342 
343 	spin_lock_bh(&ar_pci->ce_lock);
344 	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
345 				    buffer, nbytes, transfer_id, flags);
346 	spin_unlock_bh(&ar_pci->ce_lock);
347 
348 	return ret;
349 }
350 
351 int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
352 {
353 	struct ath10k *ar = pipe->ar;
354 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
355 	int delta;
356 
357 	spin_lock_bh(&ar_pci->ce_lock);
358 	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
359 			      pipe->src_ring->write_index,
360 			      pipe->src_ring->sw_index - 1);
361 	spin_unlock_bh(&ar_pci->ce_lock);
362 
363 	return delta;
364 }
365 
366 int ath10k_ce_recv_buf_enqueue(struct ath10k_ce_pipe *ce_state,
367 			       void *per_recv_context,
368 			       u32 buffer)
369 {
370 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
371 	u32 ctrl_addr = ce_state->ctrl_addr;
372 	struct ath10k *ar = ce_state->ar;
373 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
374 	unsigned int nentries_mask = dest_ring->nentries_mask;
375 	unsigned int write_index;
376 	unsigned int sw_index;
377 	int ret;
378 
379 	spin_lock_bh(&ar_pci->ce_lock);
380 	write_index = dest_ring->write_index;
381 	sw_index = dest_ring->sw_index;
382 
383 	ret = ath10k_pci_wake(ar);
384 	if (ret)
385 		goto out;
386 
387 	if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) > 0) {
388 		struct ce_desc *base = dest_ring->base_addr_owner_space;
389 		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
390 
391 		/* Update destination descriptor */
392 		desc->addr    = __cpu_to_le32(buffer);
393 		desc->nbytes = 0;
394 
395 		dest_ring->per_transfer_context[write_index] =
396 							per_recv_context;
397 
398 		/* Update Destination Ring Write Index */
399 		write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
400 		ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
401 		dest_ring->write_index = write_index;
402 		ret = 0;
403 	} else {
404 		ret = -EIO;
405 	}
406 	ath10k_pci_sleep(ar);
407 
408 out:
409 	spin_unlock_bh(&ar_pci->ce_lock);
410 
411 	return ret;
412 }
413 
414 /*
415  * Guts of ath10k_ce_completed_recv_next.
416  * The caller takes responsibility for any necessary locking.
417  */
418 static int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
419 						void **per_transfer_contextp,
420 						u32 *bufferp,
421 						unsigned int *nbytesp,
422 						unsigned int *transfer_idp,
423 						unsigned int *flagsp)
424 {
425 	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
426 	unsigned int nentries_mask = dest_ring->nentries_mask;
427 	unsigned int sw_index = dest_ring->sw_index;
428 
429 	struct ce_desc *base = dest_ring->base_addr_owner_space;
430 	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
431 	struct ce_desc sdesc;
432 	u16 nbytes;
433 
434 	/* Copy in one go for performance reasons */
435 	sdesc = *desc;
436 
437 	nbytes = __le16_to_cpu(sdesc.nbytes);
438 	if (nbytes == 0) {
439 		/*
440 		 * This closes a relatively unusual race where the Host
441 		 * sees the updated DRRI before the update to the
442 		 * corresponding descriptor has completed. We treat this
443 		 * as a descriptor that is not yet done.
444 		 */
445 		return -EIO;
446 	}
447 
448 	desc->nbytes = 0;
449 
450 	/* Return data from completed destination descriptor */
451 	*bufferp = __le32_to_cpu(sdesc.addr);
452 	*nbytesp = nbytes;
453 	*transfer_idp = MS(__le16_to_cpu(sdesc.flags), CE_DESC_FLAGS_META_DATA);
454 
455 	if (__le16_to_cpu(sdesc.flags) & CE_DESC_FLAGS_BYTE_SWAP)
456 		*flagsp = CE_RECV_FLAG_SWAPPED;
457 	else
458 		*flagsp = 0;
459 
460 	if (per_transfer_contextp)
461 		*per_transfer_contextp =
462 			dest_ring->per_transfer_context[sw_index];
463 
464 	/* sanity */
465 	dest_ring->per_transfer_context[sw_index] = NULL;
466 
467 	/* Update sw_index */
468 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
469 	dest_ring->sw_index = sw_index;
470 
471 	return 0;
472 }
473 
474 int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
475 				  void **per_transfer_contextp,
476 				  u32 *bufferp,
477 				  unsigned int *nbytesp,
478 				  unsigned int *transfer_idp,
479 				  unsigned int *flagsp)
480 {
481 	struct ath10k *ar = ce_state->ar;
482 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
483 	int ret;
484 
485 	spin_lock_bh(&ar_pci->ce_lock);
486 	ret = ath10k_ce_completed_recv_next_nolock(ce_state,
487 						   per_transfer_contextp,
488 						   bufferp, nbytesp,
489 						   transfer_idp, flagsp);
490 	spin_unlock_bh(&ar_pci->ce_lock);
491 
492 	return ret;
493 }
494 
495 int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
496 			       void **per_transfer_contextp,
497 			       u32 *bufferp)
498 {
499 	struct ath10k_ce_ring *dest_ring;
500 	unsigned int nentries_mask;
501 	unsigned int sw_index;
502 	unsigned int write_index;
503 	int ret;
504 	struct ath10k *ar;
505 	struct ath10k_pci *ar_pci;
506 
507 	dest_ring = ce_state->dest_ring;
508 
509 	if (!dest_ring)
510 		return -EIO;
511 
512 	ar = ce_state->ar;
513 	ar_pci = ath10k_pci_priv(ar);
514 
515 	spin_lock_bh(&ar_pci->ce_lock);
516 
517 	nentries_mask = dest_ring->nentries_mask;
518 	sw_index = dest_ring->sw_index;
519 	write_index = dest_ring->write_index;
520 	if (write_index != sw_index) {
521 		struct ce_desc *base = dest_ring->base_addr_owner_space;
522 		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
523 
524 		/* Return data from completed destination descriptor */
525 		*bufferp = __le32_to_cpu(desc->addr);
526 
527 		if (per_transfer_contextp)
528 			*per_transfer_contextp =
529 				dest_ring->per_transfer_context[sw_index];
530 
531 		/* sanity */
532 		dest_ring->per_transfer_context[sw_index] = NULL;
533 
534 		/* Update sw_index */
535 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
536 		dest_ring->sw_index = sw_index;
537 		ret = 0;
538 	} else {
539 		ret = -EIO;
540 	}
541 
542 	spin_unlock_bh(&ar_pci->ce_lock);
543 
544 	return ret;
545 }
546 
547 /*
548  * Guts of ath10k_ce_completed_send_next.
549  * The caller takes responsibility for any necessary locking.
550  */
551 static int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
552 						void **per_transfer_contextp,
553 						u32 *bufferp,
554 						unsigned int *nbytesp,
555 						unsigned int *transfer_idp)
556 {
557 	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
558 	u32 ctrl_addr = ce_state->ctrl_addr;
559 	struct ath10k *ar = ce_state->ar;
560 	unsigned int nentries_mask = src_ring->nentries_mask;
561 	unsigned int sw_index = src_ring->sw_index;
562 	struct ce_desc *sdesc, *sbase;
563 	unsigned int read_index;
564 	int ret;
565 
566 	if (src_ring->hw_index == sw_index) {
567 		/*
568 		 * The SW completion index has caught up with the cached
569 		 * version of the HW completion index.
570 		 * Update the cached HW completion index to see whether
571 		 * the SW has really caught up to the HW, or if the cached
572 		 * value of the HW index has become stale.
573 		 */
574 
575 		ret = ath10k_pci_wake(ar);
576 		if (ret)
577 			return ret;
578 
579 		src_ring->hw_index =
580 			ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
581 		src_ring->hw_index &= nentries_mask;
582 
583 		ath10k_pci_sleep(ar);
584 	}
585 
586 	read_index = src_ring->hw_index;
587 
588 	if ((read_index == sw_index) || (read_index == 0xffffffff))
589 		return -EIO;
590 
591 	sbase = src_ring->shadow_base;
592 	sdesc = CE_SRC_RING_TO_DESC(sbase, sw_index);
593 
594 	/* Return data from completed source descriptor */
595 	*bufferp = __le32_to_cpu(sdesc->addr);
596 	*nbytesp = __le16_to_cpu(sdesc->nbytes);
597 	*transfer_idp = MS(__le16_to_cpu(sdesc->flags),
598 			   CE_DESC_FLAGS_META_DATA);
599 
600 	if (per_transfer_contextp)
601 		*per_transfer_contextp =
602 			src_ring->per_transfer_context[sw_index];
603 
604 	/* sanity */
605 	src_ring->per_transfer_context[sw_index] = NULL;
606 
607 	/* Update sw_index */
608 	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
609 	src_ring->sw_index = sw_index;
610 
611 	return 0;
612 }
613 
614 /* NB: Modeled after ath10k_ce_completed_send_next */
615 int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
616 			       void **per_transfer_contextp,
617 			       u32 *bufferp,
618 			       unsigned int *nbytesp,
619 			       unsigned int *transfer_idp)
620 {
621 	struct ath10k_ce_ring *src_ring;
622 	unsigned int nentries_mask;
623 	unsigned int sw_index;
624 	unsigned int write_index;
625 	int ret;
626 	struct ath10k *ar;
627 	struct ath10k_pci *ar_pci;
628 
629 	src_ring = ce_state->src_ring;
630 
631 	if (!src_ring)
632 		return -EIO;
633 
634 	ar = ce_state->ar;
635 	ar_pci = ath10k_pci_priv(ar);
636 
637 	spin_lock_bh(&ar_pci->ce_lock);
638 
639 	nentries_mask = src_ring->nentries_mask;
640 	sw_index = src_ring->sw_index;
641 	write_index = src_ring->write_index;
642 
643 	if (write_index != sw_index) {
644 		struct ce_desc *base = src_ring->base_addr_owner_space;
645 		struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index);
646 
647 		/* Return data from completed source descriptor */
648 		*bufferp = __le32_to_cpu(desc->addr);
649 		*nbytesp = __le16_to_cpu(desc->nbytes);
650 		*transfer_idp = MS(__le16_to_cpu(desc->flags),
651 						CE_DESC_FLAGS_META_DATA);
652 
653 		if (per_transfer_contextp)
654 			*per_transfer_contextp =
655 				src_ring->per_transfer_context[sw_index];
656 
657 		/* sanity */
658 		src_ring->per_transfer_context[sw_index] = NULL;
659 
660 		/* Update sw_index */
661 		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
662 		src_ring->sw_index = sw_index;
663 		ret = 0;
664 	} else {
665 		ret = -EIO;
666 	}
667 
668 	spin_unlock_bh(&ar_pci->ce_lock);
669 
670 	return ret;
671 }
672 
673 int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
674 				  void **per_transfer_contextp,
675 				  u32 *bufferp,
676 				  unsigned int *nbytesp,
677 				  unsigned int *transfer_idp)
678 {
679 	struct ath10k *ar = ce_state->ar;
680 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
681 	int ret;
682 
683 	spin_lock_bh(&ar_pci->ce_lock);
684 	ret = ath10k_ce_completed_send_next_nolock(ce_state,
685 						   per_transfer_contextp,
686 						   bufferp, nbytesp,
687 						   transfer_idp);
688 	spin_unlock_bh(&ar_pci->ce_lock);
689 
690 	return ret;
691 }
692 
693 /*
694  * Guts of interrupt handler for per-engine interrupts on a particular CE.
695  *
696  * Invokes registered callbacks for recv_complete,
697  * send_complete, and watermarks.
698  */
699 void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
700 {
701 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
702 	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
703 	u32 ctrl_addr = ce_state->ctrl_addr;
704 	int ret;
705 
706 	ret = ath10k_pci_wake(ar);
707 	if (ret)
708 		return;
709 
710 	spin_lock_bh(&ar_pci->ce_lock);
711 
712 	/* Clear the copy-complete interrupts that will be handled here. */
713 	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
714 					  HOST_IS_COPY_COMPLETE_MASK);
715 
716 	spin_unlock_bh(&ar_pci->ce_lock);
717 
718 	if (ce_state->recv_cb)
719 		ce_state->recv_cb(ce_state);
720 
721 	if (ce_state->send_cb)
722 		ce_state->send_cb(ce_state);
723 
724 	spin_lock_bh(&ar_pci->ce_lock);
725 
726 	/*
727 	 * Misc CE interrupts are not being handled, but still need
728 	 * to be cleared.
729 	 */
730 	ath10k_ce_engine_int_status_clear(ar, ctrl_addr, CE_WATERMARK_MASK);
731 
732 	spin_unlock_bh(&ar_pci->ce_lock);
733 	ath10k_pci_sleep(ar);
734 }
735 
736 /*
737  * Handler for per-engine interrupts on ALL active CEs.
738  * This is used in cases where the system is sharing a
739  * single interrput for all CEs
740  */
741 
742 void ath10k_ce_per_engine_service_any(struct ath10k *ar)
743 {
744 	int ce_id, ret;
745 	u32 intr_summary;
746 
747 	ret = ath10k_pci_wake(ar);
748 	if (ret)
749 		return;
750 
751 	intr_summary = CE_INTERRUPT_SUMMARY(ar);
752 
753 	for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) {
754 		if (intr_summary & (1 << ce_id))
755 			intr_summary &= ~(1 << ce_id);
756 		else
757 			/* no intr pending on this CE */
758 			continue;
759 
760 		ath10k_ce_per_engine_service(ar, ce_id);
761 	}
762 
763 	ath10k_pci_sleep(ar);
764 }
765 
766 /*
767  * Adjust interrupts for the copy complete handler.
768  * If it's needed for either send or recv, then unmask
769  * this interrupt; otherwise, mask it.
770  *
771  * Called with ce_lock held.
772  */
773 static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state,
774 						int disable_copy_compl_intr)
775 {
776 	u32 ctrl_addr = ce_state->ctrl_addr;
777 	struct ath10k *ar = ce_state->ar;
778 	int ret;
779 
780 	ret = ath10k_pci_wake(ar);
781 	if (ret)
782 		return;
783 
784 	if ((!disable_copy_compl_intr) &&
785 	    (ce_state->send_cb || ce_state->recv_cb))
786 		ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr);
787 	else
788 		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
789 
790 	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
791 
792 	ath10k_pci_sleep(ar);
793 }
794 
795 int ath10k_ce_disable_interrupts(struct ath10k *ar)
796 {
797 	int ce_id, ret;
798 
799 	ret = ath10k_pci_wake(ar);
800 	if (ret)
801 		return ret;
802 
803 	for (ce_id = 0; ce_id < CE_COUNT; ce_id++) {
804 		u32 ctrl_addr = ath10k_ce_base_address(ce_id);
805 
806 		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
807 		ath10k_ce_error_intr_disable(ar, ctrl_addr);
808 		ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
809 	}
810 
811 	ath10k_pci_sleep(ar);
812 
813 	return 0;
814 }
815 
816 void ath10k_ce_send_cb_register(struct ath10k_ce_pipe *ce_state,
817 				void (*send_cb)(struct ath10k_ce_pipe *),
818 				int disable_interrupts)
819 {
820 	struct ath10k *ar = ce_state->ar;
821 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
822 
823 	spin_lock_bh(&ar_pci->ce_lock);
824 	ce_state->send_cb = send_cb;
825 	ath10k_ce_per_engine_handler_adjust(ce_state, disable_interrupts);
826 	spin_unlock_bh(&ar_pci->ce_lock);
827 }
828 
829 void ath10k_ce_recv_cb_register(struct ath10k_ce_pipe *ce_state,
830 				void (*recv_cb)(struct ath10k_ce_pipe *))
831 {
832 	struct ath10k *ar = ce_state->ar;
833 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
834 
835 	spin_lock_bh(&ar_pci->ce_lock);
836 	ce_state->recv_cb = recv_cb;
837 	ath10k_ce_per_engine_handler_adjust(ce_state, 0);
838 	spin_unlock_bh(&ar_pci->ce_lock);
839 }
840 
841 static int ath10k_ce_init_src_ring(struct ath10k *ar,
842 				   unsigned int ce_id,
843 				   struct ath10k_ce_pipe *ce_state,
844 				   const struct ce_attr *attr)
845 {
846 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
847 	struct ath10k_ce_ring *src_ring;
848 	unsigned int nentries = attr->src_nentries;
849 	unsigned int ce_nbytes;
850 	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
851 	dma_addr_t base_addr;
852 	char *ptr;
853 
854 	nentries = roundup_pow_of_two(nentries);
855 
856 	if (ce_state->src_ring) {
857 		WARN_ON(ce_state->src_ring->nentries != nentries);
858 		return 0;
859 	}
860 
861 	ce_nbytes = sizeof(struct ath10k_ce_ring) + (nentries * sizeof(void *));
862 	ptr = kzalloc(ce_nbytes, GFP_KERNEL);
863 	if (ptr == NULL)
864 		return -ENOMEM;
865 
866 	ce_state->src_ring = (struct ath10k_ce_ring *)ptr;
867 	src_ring = ce_state->src_ring;
868 
869 	ptr += sizeof(struct ath10k_ce_ring);
870 	src_ring->nentries = nentries;
871 	src_ring->nentries_mask = nentries - 1;
872 
873 	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
874 	src_ring->sw_index &= src_ring->nentries_mask;
875 	src_ring->hw_index = src_ring->sw_index;
876 
877 	src_ring->write_index =
878 		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
879 	src_ring->write_index &= src_ring->nentries_mask;
880 
881 	src_ring->per_transfer_context = (void **)ptr;
882 
883 	/*
884 	 * Legacy platforms that do not support cache
885 	 * coherent DMA are unsupported
886 	 */
887 	src_ring->base_addr_owner_space_unaligned =
888 		pci_alloc_consistent(ar_pci->pdev,
889 				     (nentries * sizeof(struct ce_desc) +
890 				      CE_DESC_RING_ALIGN),
891 				     &base_addr);
892 	if (!src_ring->base_addr_owner_space_unaligned) {
893 		kfree(ce_state->src_ring);
894 		ce_state->src_ring = NULL;
895 		return -ENOMEM;
896 	}
897 
898 	src_ring->base_addr_ce_space_unaligned = base_addr;
899 
900 	src_ring->base_addr_owner_space = PTR_ALIGN(
901 			src_ring->base_addr_owner_space_unaligned,
902 			CE_DESC_RING_ALIGN);
903 	src_ring->base_addr_ce_space = ALIGN(
904 			src_ring->base_addr_ce_space_unaligned,
905 			CE_DESC_RING_ALIGN);
906 
907 	/*
908 	 * Also allocate a shadow src ring in regular
909 	 * mem to use for faster access.
910 	 */
911 	src_ring->shadow_base_unaligned =
912 		kmalloc((nentries * sizeof(struct ce_desc) +
913 			 CE_DESC_RING_ALIGN), GFP_KERNEL);
914 	if (!src_ring->shadow_base_unaligned) {
915 		pci_free_consistent(ar_pci->pdev,
916 				    (nentries * sizeof(struct ce_desc) +
917 				     CE_DESC_RING_ALIGN),
918 				    src_ring->base_addr_owner_space,
919 				    src_ring->base_addr_ce_space);
920 		kfree(ce_state->src_ring);
921 		ce_state->src_ring = NULL;
922 		return -ENOMEM;
923 	}
924 
925 	src_ring->shadow_base = PTR_ALIGN(
926 			src_ring->shadow_base_unaligned,
927 			CE_DESC_RING_ALIGN);
928 
929 	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr,
930 					 src_ring->base_addr_ce_space);
931 	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
932 	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
933 	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
934 	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
935 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
936 
937 	ath10k_dbg(ATH10K_DBG_BOOT,
938 		   "boot ce src ring id %d entries %d base_addr %p\n",
939 		   ce_id, nentries, src_ring->base_addr_owner_space);
940 
941 	return 0;
942 }
943 
944 static int ath10k_ce_init_dest_ring(struct ath10k *ar,
945 				    unsigned int ce_id,
946 				    struct ath10k_ce_pipe *ce_state,
947 				    const struct ce_attr *attr)
948 {
949 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
950 	struct ath10k_ce_ring *dest_ring;
951 	unsigned int nentries = attr->dest_nentries;
952 	unsigned int ce_nbytes;
953 	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
954 	dma_addr_t base_addr;
955 	char *ptr;
956 
957 	nentries = roundup_pow_of_two(nentries);
958 
959 	if (ce_state->dest_ring) {
960 		WARN_ON(ce_state->dest_ring->nentries != nentries);
961 		return 0;
962 	}
963 
964 	ce_nbytes = sizeof(struct ath10k_ce_ring) + (nentries * sizeof(void *));
965 	ptr = kzalloc(ce_nbytes, GFP_KERNEL);
966 	if (ptr == NULL)
967 		return -ENOMEM;
968 
969 	ce_state->dest_ring = (struct ath10k_ce_ring *)ptr;
970 	dest_ring = ce_state->dest_ring;
971 
972 	ptr += sizeof(struct ath10k_ce_ring);
973 	dest_ring->nentries = nentries;
974 	dest_ring->nentries_mask = nentries - 1;
975 
976 	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
977 	dest_ring->sw_index &= dest_ring->nentries_mask;
978 	dest_ring->write_index =
979 		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
980 	dest_ring->write_index &= dest_ring->nentries_mask;
981 
982 	dest_ring->per_transfer_context = (void **)ptr;
983 
984 	/*
985 	 * Legacy platforms that do not support cache
986 	 * coherent DMA are unsupported
987 	 */
988 	dest_ring->base_addr_owner_space_unaligned =
989 		pci_alloc_consistent(ar_pci->pdev,
990 				     (nentries * sizeof(struct ce_desc) +
991 				      CE_DESC_RING_ALIGN),
992 				     &base_addr);
993 	if (!dest_ring->base_addr_owner_space_unaligned) {
994 		kfree(ce_state->dest_ring);
995 		ce_state->dest_ring = NULL;
996 		return -ENOMEM;
997 	}
998 
999 	dest_ring->base_addr_ce_space_unaligned = base_addr;
1000 
1001 	/*
1002 	 * Correctly initialize memory to 0 to prevent garbage
1003 	 * data crashing system when download firmware
1004 	 */
1005 	memset(dest_ring->base_addr_owner_space_unaligned, 0,
1006 	       nentries * sizeof(struct ce_desc) + CE_DESC_RING_ALIGN);
1007 
1008 	dest_ring->base_addr_owner_space = PTR_ALIGN(
1009 			dest_ring->base_addr_owner_space_unaligned,
1010 			CE_DESC_RING_ALIGN);
1011 	dest_ring->base_addr_ce_space = ALIGN(
1012 			dest_ring->base_addr_ce_space_unaligned,
1013 			CE_DESC_RING_ALIGN);
1014 
1015 	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr,
1016 					  dest_ring->base_addr_ce_space);
1017 	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
1018 	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
1019 	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
1020 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
1021 
1022 	ath10k_dbg(ATH10K_DBG_BOOT,
1023 		   "boot ce dest ring id %d entries %d base_addr %p\n",
1024 		   ce_id, nentries, dest_ring->base_addr_owner_space);
1025 
1026 	return 0;
1027 }
1028 
1029 static struct ath10k_ce_pipe *ath10k_ce_init_state(struct ath10k *ar,
1030 					     unsigned int ce_id,
1031 					     const struct ce_attr *attr)
1032 {
1033 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1034 	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1035 	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
1036 
1037 	spin_lock_bh(&ar_pci->ce_lock);
1038 
1039 	ce_state->ar = ar;
1040 	ce_state->id = ce_id;
1041 	ce_state->ctrl_addr = ctrl_addr;
1042 	ce_state->attr_flags = attr->flags;
1043 	ce_state->src_sz_max = attr->src_sz_max;
1044 
1045 	spin_unlock_bh(&ar_pci->ce_lock);
1046 
1047 	return ce_state;
1048 }
1049 
1050 /*
1051  * Initialize a Copy Engine based on caller-supplied attributes.
1052  * This may be called once to initialize both source and destination
1053  * rings or it may be called twice for separate source and destination
1054  * initialization. It may be that only one side or the other is
1055  * initialized by software/firmware.
1056  */
1057 struct ath10k_ce_pipe *ath10k_ce_init(struct ath10k *ar,
1058 				unsigned int ce_id,
1059 				const struct ce_attr *attr)
1060 {
1061 	struct ath10k_ce_pipe *ce_state;
1062 	int ret;
1063 
1064 	/*
1065 	 * Make sure there's enough CE ringbuffer entries for HTT TX to avoid
1066 	 * additional TX locking checks.
1067 	 *
1068 	 * For the lack of a better place do the check here.
1069 	 */
1070 	BUILD_BUG_ON(2*TARGET_NUM_MSDU_DESC >
1071 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1072 	BUILD_BUG_ON(2*TARGET_10X_NUM_MSDU_DESC >
1073 		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1074 
1075 	ret = ath10k_pci_wake(ar);
1076 	if (ret)
1077 		return NULL;
1078 
1079 	ce_state = ath10k_ce_init_state(ar, ce_id, attr);
1080 	if (!ce_state) {
1081 		ath10k_err("Failed to initialize CE state for ID: %d\n", ce_id);
1082 		goto out;
1083 	}
1084 
1085 	if (attr->src_nentries) {
1086 		ret = ath10k_ce_init_src_ring(ar, ce_id, ce_state, attr);
1087 		if (ret) {
1088 			ath10k_err("Failed to initialize CE src ring for ID: %d (%d)\n",
1089 				   ce_id, ret);
1090 			ath10k_ce_deinit(ce_state);
1091 			ce_state = NULL;
1092 			goto out;
1093 		}
1094 	}
1095 
1096 	if (attr->dest_nentries) {
1097 		ret = ath10k_ce_init_dest_ring(ar, ce_id, ce_state, attr);
1098 		if (ret) {
1099 			ath10k_err("Failed to initialize CE dest ring for ID: %d (%d)\n",
1100 				   ce_id, ret);
1101 			ath10k_ce_deinit(ce_state);
1102 			ce_state = NULL;
1103 			goto out;
1104 		}
1105 	}
1106 
1107 out:
1108 	ath10k_pci_sleep(ar);
1109 	return ce_state;
1110 }
1111 
1112 void ath10k_ce_deinit(struct ath10k_ce_pipe *ce_state)
1113 {
1114 	struct ath10k *ar = ce_state->ar;
1115 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1116 
1117 	if (ce_state->src_ring) {
1118 		kfree(ce_state->src_ring->shadow_base_unaligned);
1119 		pci_free_consistent(ar_pci->pdev,
1120 				    (ce_state->src_ring->nentries *
1121 				     sizeof(struct ce_desc) +
1122 				     CE_DESC_RING_ALIGN),
1123 				    ce_state->src_ring->base_addr_owner_space,
1124 				    ce_state->src_ring->base_addr_ce_space);
1125 		kfree(ce_state->src_ring);
1126 	}
1127 
1128 	if (ce_state->dest_ring) {
1129 		pci_free_consistent(ar_pci->pdev,
1130 				    (ce_state->dest_ring->nentries *
1131 				     sizeof(struct ce_desc) +
1132 				     CE_DESC_RING_ALIGN),
1133 				    ce_state->dest_ring->base_addr_owner_space,
1134 				    ce_state->dest_ring->base_addr_ce_space);
1135 		kfree(ce_state->dest_ring);
1136 	}
1137 
1138 	ce_state->src_ring = NULL;
1139 	ce_state->dest_ring = NULL;
1140 }
1141