xref: /linux/drivers/dma/mv_xor.c (revision 32786fdc9506aeba98278c1844d4bfb766863832)
1 /*
2  * offload engine driver for the Marvell XOR engine
3  * Copyright (C) 2007, 2008, Marvell International Ltd.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  */
14 
15 #include <linux/init.h>
16 #include <linux/slab.h>
17 #include <linux/delay.h>
18 #include <linux/dma-mapping.h>
19 #include <linux/spinlock.h>
20 #include <linux/interrupt.h>
21 #include <linux/of_device.h>
22 #include <linux/platform_device.h>
23 #include <linux/memory.h>
24 #include <linux/clk.h>
25 #include <linux/of.h>
26 #include <linux/of_irq.h>
27 #include <linux/irqdomain.h>
28 #include <linux/cpumask.h>
29 #include <linux/platform_data/dma-mv_xor.h>
30 
31 #include "dmaengine.h"
32 #include "mv_xor.h"
33 
34 enum mv_xor_type {
35 	XOR_ORION,
36 	XOR_ARMADA_38X,
37 	XOR_ARMADA_37XX,
38 };
39 
40 enum mv_xor_mode {
41 	XOR_MODE_IN_REG,
42 	XOR_MODE_IN_DESC,
43 };
44 
45 static void mv_xor_issue_pending(struct dma_chan *chan);
46 
47 #define to_mv_xor_chan(chan)		\
48 	container_of(chan, struct mv_xor_chan, dmachan)
49 
50 #define to_mv_xor_slot(tx)		\
51 	container_of(tx, struct mv_xor_desc_slot, async_tx)
52 
53 #define mv_chan_to_devp(chan)           \
54 	((chan)->dmadev.dev)
55 
56 static void mv_desc_init(struct mv_xor_desc_slot *desc,
57 			 dma_addr_t addr, u32 byte_count,
58 			 enum dma_ctrl_flags flags)
59 {
60 	struct mv_xor_desc *hw_desc = desc->hw_desc;
61 
62 	hw_desc->status = XOR_DESC_DMA_OWNED;
63 	hw_desc->phy_next_desc = 0;
64 	/* Enable end-of-descriptor interrupts only for DMA_PREP_INTERRUPT */
65 	hw_desc->desc_command = (flags & DMA_PREP_INTERRUPT) ?
66 				XOR_DESC_EOD_INT_EN : 0;
67 	hw_desc->phy_dest_addr = addr;
68 	hw_desc->byte_count = byte_count;
69 }
70 
71 /* Populate the descriptor */
72 static void mv_xor_config_sg_ll_desc(struct mv_xor_desc_slot *desc,
73 				     dma_addr_t dma_src, dma_addr_t dma_dst,
74 				     u32 len, struct mv_xor_desc_slot *prev)
75 {
76 	struct mv_xor_desc *hw_desc = desc->hw_desc;
77 
78 	hw_desc->status = XOR_DESC_DMA_OWNED;
79 	hw_desc->phy_next_desc = 0;
80 	/* Configure for XOR with only one src address -> MEMCPY */
81 	hw_desc->desc_command = XOR_DESC_OPERATION_XOR | (0x1 << 0);
82 	hw_desc->phy_dest_addr = dma_dst;
83 	hw_desc->phy_src_addr[0] = dma_src;
84 	hw_desc->byte_count = len;
85 
86 	if (prev) {
87 		struct mv_xor_desc *hw_prev = prev->hw_desc;
88 
89 		hw_prev->phy_next_desc = desc->async_tx.phys;
90 	}
91 }
92 
93 static void mv_xor_desc_config_eod(struct mv_xor_desc_slot *desc)
94 {
95 	struct mv_xor_desc *hw_desc = desc->hw_desc;
96 
97 	/* Enable end-of-descriptor interrupt */
98 	hw_desc->desc_command |= XOR_DESC_EOD_INT_EN;
99 }
100 
101 static void mv_desc_set_mode(struct mv_xor_desc_slot *desc)
102 {
103 	struct mv_xor_desc *hw_desc = desc->hw_desc;
104 
105 	switch (desc->type) {
106 	case DMA_XOR:
107 	case DMA_INTERRUPT:
108 		hw_desc->desc_command |= XOR_DESC_OPERATION_XOR;
109 		break;
110 	case DMA_MEMCPY:
111 		hw_desc->desc_command |= XOR_DESC_OPERATION_MEMCPY;
112 		break;
113 	default:
114 		BUG();
115 		return;
116 	}
117 }
118 
119 static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
120 				  u32 next_desc_addr)
121 {
122 	struct mv_xor_desc *hw_desc = desc->hw_desc;
123 	BUG_ON(hw_desc->phy_next_desc);
124 	hw_desc->phy_next_desc = next_desc_addr;
125 }
126 
127 static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
128 				 int index, dma_addr_t addr)
129 {
130 	struct mv_xor_desc *hw_desc = desc->hw_desc;
131 	hw_desc->phy_src_addr[mv_phy_src_idx(index)] = addr;
132 	if (desc->type == DMA_XOR)
133 		hw_desc->desc_command |= (1 << index);
134 }
135 
136 static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
137 {
138 	return readl_relaxed(XOR_CURR_DESC(chan));
139 }
140 
141 static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
142 					u32 next_desc_addr)
143 {
144 	writel_relaxed(next_desc_addr, XOR_NEXT_DESC(chan));
145 }
146 
147 static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
148 {
149 	u32 val = readl_relaxed(XOR_INTR_MASK(chan));
150 	val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
151 	writel_relaxed(val, XOR_INTR_MASK(chan));
152 }
153 
154 static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
155 {
156 	u32 intr_cause = readl_relaxed(XOR_INTR_CAUSE(chan));
157 	intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
158 	return intr_cause;
159 }
160 
161 static void mv_chan_clear_eoc_cause(struct mv_xor_chan *chan)
162 {
163 	u32 val;
164 
165 	val = XOR_INT_END_OF_DESC | XOR_INT_END_OF_CHAIN | XOR_INT_STOPPED;
166 	val = ~(val << (chan->idx * 16));
167 	dev_dbg(mv_chan_to_devp(chan), "%s, val 0x%08x\n", __func__, val);
168 	writel_relaxed(val, XOR_INTR_CAUSE(chan));
169 }
170 
171 static void mv_chan_clear_err_status(struct mv_xor_chan *chan)
172 {
173 	u32 val = 0xFFFF0000 >> (chan->idx * 16);
174 	writel_relaxed(val, XOR_INTR_CAUSE(chan));
175 }
176 
177 static void mv_chan_set_mode(struct mv_xor_chan *chan,
178 			     u32 op_mode)
179 {
180 	u32 config = readl_relaxed(XOR_CONFIG(chan));
181 
182 	config &= ~0x7;
183 	config |= op_mode;
184 
185 #if defined(__BIG_ENDIAN)
186 	config |= XOR_DESCRIPTOR_SWAP;
187 #else
188 	config &= ~XOR_DESCRIPTOR_SWAP;
189 #endif
190 
191 	writel_relaxed(config, XOR_CONFIG(chan));
192 }
193 
194 static void mv_chan_activate(struct mv_xor_chan *chan)
195 {
196 	dev_dbg(mv_chan_to_devp(chan), " activate chan.\n");
197 
198 	/* writel ensures all descriptors are flushed before activation */
199 	writel(BIT(0), XOR_ACTIVATION(chan));
200 }
201 
202 static char mv_chan_is_busy(struct mv_xor_chan *chan)
203 {
204 	u32 state = readl_relaxed(XOR_ACTIVATION(chan));
205 
206 	state = (state >> 4) & 0x3;
207 
208 	return (state == 1) ? 1 : 0;
209 }
210 
211 /*
212  * mv_chan_start_new_chain - program the engine to operate on new
213  * chain headed by sw_desc
214  * Caller must hold &mv_chan->lock while calling this function
215  */
216 static void mv_chan_start_new_chain(struct mv_xor_chan *mv_chan,
217 				    struct mv_xor_desc_slot *sw_desc)
218 {
219 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n",
220 		__func__, __LINE__, sw_desc);
221 
222 	/* set the hardware chain */
223 	mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
224 
225 	mv_chan->pending++;
226 	mv_xor_issue_pending(&mv_chan->dmachan);
227 }
228 
229 static dma_cookie_t
230 mv_desc_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
231 				struct mv_xor_chan *mv_chan,
232 				dma_cookie_t cookie)
233 {
234 	BUG_ON(desc->async_tx.cookie < 0);
235 
236 	if (desc->async_tx.cookie > 0) {
237 		cookie = desc->async_tx.cookie;
238 
239 		dma_descriptor_unmap(&desc->async_tx);
240 		/* call the callback (must not sleep or submit new
241 		 * operations to this channel)
242 		 */
243 		dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL);
244 	}
245 
246 	/* run dependent operations */
247 	dma_run_dependencies(&desc->async_tx);
248 
249 	return cookie;
250 }
251 
252 static int
253 mv_chan_clean_completed_slots(struct mv_xor_chan *mv_chan)
254 {
255 	struct mv_xor_desc_slot *iter, *_iter;
256 
257 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
258 	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
259 				 node) {
260 
261 		if (async_tx_test_ack(&iter->async_tx)) {
262 			list_move_tail(&iter->node, &mv_chan->free_slots);
263 			if (!list_empty(&iter->sg_tx_list)) {
264 				list_splice_tail_init(&iter->sg_tx_list,
265 							&mv_chan->free_slots);
266 			}
267 		}
268 	}
269 	return 0;
270 }
271 
272 static int
273 mv_desc_clean_slot(struct mv_xor_desc_slot *desc,
274 		   struct mv_xor_chan *mv_chan)
275 {
276 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n",
277 		__func__, __LINE__, desc, desc->async_tx.flags);
278 
279 	/* the client is allowed to attach dependent operations
280 	 * until 'ack' is set
281 	 */
282 	if (!async_tx_test_ack(&desc->async_tx)) {
283 		/* move this slot to the completed_slots */
284 		list_move_tail(&desc->node, &mv_chan->completed_slots);
285 		if (!list_empty(&desc->sg_tx_list)) {
286 			list_splice_tail_init(&desc->sg_tx_list,
287 					      &mv_chan->completed_slots);
288 		}
289 	} else {
290 		list_move_tail(&desc->node, &mv_chan->free_slots);
291 		if (!list_empty(&desc->sg_tx_list)) {
292 			list_splice_tail_init(&desc->sg_tx_list,
293 					      &mv_chan->free_slots);
294 		}
295 	}
296 
297 	return 0;
298 }
299 
300 /* This function must be called with the mv_xor_chan spinlock held */
301 static void mv_chan_slot_cleanup(struct mv_xor_chan *mv_chan)
302 {
303 	struct mv_xor_desc_slot *iter, *_iter;
304 	dma_cookie_t cookie = 0;
305 	int busy = mv_chan_is_busy(mv_chan);
306 	u32 current_desc = mv_chan_get_current_desc(mv_chan);
307 	int current_cleaned = 0;
308 	struct mv_xor_desc *hw_desc;
309 
310 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
311 	dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc);
312 	mv_chan_clean_completed_slots(mv_chan);
313 
314 	/* free completed slots from the chain starting with
315 	 * the oldest descriptor
316 	 */
317 
318 	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
319 				 node) {
320 
321 		/* clean finished descriptors */
322 		hw_desc = iter->hw_desc;
323 		if (hw_desc->status & XOR_DESC_SUCCESS) {
324 			cookie = mv_desc_run_tx_complete_actions(iter, mv_chan,
325 								 cookie);
326 
327 			/* done processing desc, clean slot */
328 			mv_desc_clean_slot(iter, mv_chan);
329 
330 			/* break if we did cleaned the current */
331 			if (iter->async_tx.phys == current_desc) {
332 				current_cleaned = 1;
333 				break;
334 			}
335 		} else {
336 			if (iter->async_tx.phys == current_desc) {
337 				current_cleaned = 0;
338 				break;
339 			}
340 		}
341 	}
342 
343 	if ((busy == 0) && !list_empty(&mv_chan->chain)) {
344 		if (current_cleaned) {
345 			/*
346 			 * current descriptor cleaned and removed, run
347 			 * from list head
348 			 */
349 			iter = list_entry(mv_chan->chain.next,
350 					  struct mv_xor_desc_slot,
351 					  node);
352 			mv_chan_start_new_chain(mv_chan, iter);
353 		} else {
354 			if (!list_is_last(&iter->node, &mv_chan->chain)) {
355 				/*
356 				 * descriptors are still waiting after
357 				 * current, trigger them
358 				 */
359 				iter = list_entry(iter->node.next,
360 						  struct mv_xor_desc_slot,
361 						  node);
362 				mv_chan_start_new_chain(mv_chan, iter);
363 			} else {
364 				/*
365 				 * some descriptors are still waiting
366 				 * to be cleaned
367 				 */
368 				tasklet_schedule(&mv_chan->irq_tasklet);
369 			}
370 		}
371 	}
372 
373 	if (cookie > 0)
374 		mv_chan->dmachan.completed_cookie = cookie;
375 }
376 
377 static void mv_xor_tasklet(unsigned long data)
378 {
379 	struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
380 
381 	spin_lock_bh(&chan->lock);
382 	mv_chan_slot_cleanup(chan);
383 	spin_unlock_bh(&chan->lock);
384 }
385 
386 static struct mv_xor_desc_slot *
387 mv_chan_alloc_slot(struct mv_xor_chan *mv_chan)
388 {
389 	struct mv_xor_desc_slot *iter;
390 
391 	spin_lock_bh(&mv_chan->lock);
392 
393 	if (!list_empty(&mv_chan->free_slots)) {
394 		iter = list_first_entry(&mv_chan->free_slots,
395 					struct mv_xor_desc_slot,
396 					node);
397 
398 		list_move_tail(&iter->node, &mv_chan->allocated_slots);
399 
400 		spin_unlock_bh(&mv_chan->lock);
401 
402 		/* pre-ack descriptor */
403 		async_tx_ack(&iter->async_tx);
404 		iter->async_tx.cookie = -EBUSY;
405 
406 		return iter;
407 
408 	}
409 
410 	spin_unlock_bh(&mv_chan->lock);
411 
412 	/* try to free some slots if the allocation fails */
413 	tasklet_schedule(&mv_chan->irq_tasklet);
414 
415 	return NULL;
416 }
417 
418 /************************ DMA engine API functions ****************************/
419 static dma_cookie_t
420 mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
421 {
422 	struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
423 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
424 	struct mv_xor_desc_slot *old_chain_tail;
425 	dma_cookie_t cookie;
426 	int new_hw_chain = 1;
427 
428 	dev_dbg(mv_chan_to_devp(mv_chan),
429 		"%s sw_desc %p: async_tx %p\n",
430 		__func__, sw_desc, &sw_desc->async_tx);
431 
432 	spin_lock_bh(&mv_chan->lock);
433 	cookie = dma_cookie_assign(tx);
434 
435 	if (list_empty(&mv_chan->chain))
436 		list_move_tail(&sw_desc->node, &mv_chan->chain);
437 	else {
438 		new_hw_chain = 0;
439 
440 		old_chain_tail = list_entry(mv_chan->chain.prev,
441 					    struct mv_xor_desc_slot,
442 					    node);
443 		list_move_tail(&sw_desc->node, &mv_chan->chain);
444 
445 		dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n",
446 			&old_chain_tail->async_tx.phys);
447 
448 		/* fix up the hardware chain */
449 		mv_desc_set_next_desc(old_chain_tail, sw_desc->async_tx.phys);
450 
451 		/* if the channel is not busy */
452 		if (!mv_chan_is_busy(mv_chan)) {
453 			u32 current_desc = mv_chan_get_current_desc(mv_chan);
454 			/*
455 			 * and the curren desc is the end of the chain before
456 			 * the append, then we need to start the channel
457 			 */
458 			if (current_desc == old_chain_tail->async_tx.phys)
459 				new_hw_chain = 1;
460 		}
461 	}
462 
463 	if (new_hw_chain)
464 		mv_chan_start_new_chain(mv_chan, sw_desc);
465 
466 	spin_unlock_bh(&mv_chan->lock);
467 
468 	return cookie;
469 }
470 
471 /* returns the number of allocated descriptors */
472 static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
473 {
474 	void *virt_desc;
475 	dma_addr_t dma_desc;
476 	int idx;
477 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
478 	struct mv_xor_desc_slot *slot = NULL;
479 	int num_descs_in_pool = MV_XOR_POOL_SIZE/MV_XOR_SLOT_SIZE;
480 
481 	/* Allocate descriptor slots */
482 	idx = mv_chan->slots_allocated;
483 	while (idx < num_descs_in_pool) {
484 		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
485 		if (!slot) {
486 			dev_info(mv_chan_to_devp(mv_chan),
487 				 "channel only initialized %d descriptor slots",
488 				 idx);
489 			break;
490 		}
491 		virt_desc = mv_chan->dma_desc_pool_virt;
492 		slot->hw_desc = virt_desc + idx * MV_XOR_SLOT_SIZE;
493 
494 		dma_async_tx_descriptor_init(&slot->async_tx, chan);
495 		slot->async_tx.tx_submit = mv_xor_tx_submit;
496 		INIT_LIST_HEAD(&slot->node);
497 		INIT_LIST_HEAD(&slot->sg_tx_list);
498 		dma_desc = mv_chan->dma_desc_pool;
499 		slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE;
500 		slot->idx = idx++;
501 
502 		spin_lock_bh(&mv_chan->lock);
503 		mv_chan->slots_allocated = idx;
504 		list_add_tail(&slot->node, &mv_chan->free_slots);
505 		spin_unlock_bh(&mv_chan->lock);
506 	}
507 
508 	dev_dbg(mv_chan_to_devp(mv_chan),
509 		"allocated %d descriptor slots\n",
510 		mv_chan->slots_allocated);
511 
512 	return mv_chan->slots_allocated ? : -ENOMEM;
513 }
514 
515 /*
516  * Check if source or destination is an PCIe/IO address (non-SDRAM) and add
517  * a new MBus window if necessary. Use a cache for these check so that
518  * the MMIO mapped registers don't have to be accessed for this check
519  * to speed up this process.
520  */
521 static int mv_xor_add_io_win(struct mv_xor_chan *mv_chan, u32 addr)
522 {
523 	struct mv_xor_device *xordev = mv_chan->xordev;
524 	void __iomem *base = mv_chan->mmr_high_base;
525 	u32 win_enable;
526 	u32 size;
527 	u8 target, attr;
528 	int ret;
529 	int i;
530 
531 	/* Nothing needs to get done for the Armada 3700 */
532 	if (xordev->xor_type == XOR_ARMADA_37XX)
533 		return 0;
534 
535 	/*
536 	 * Loop over the cached windows to check, if the requested area
537 	 * is already mapped. If this the case, nothing needs to be done
538 	 * and we can return.
539 	 */
540 	for (i = 0; i < WINDOW_COUNT; i++) {
541 		if (addr >= xordev->win_start[i] &&
542 		    addr <= xordev->win_end[i]) {
543 			/* Window is already mapped */
544 			return 0;
545 		}
546 	}
547 
548 	/*
549 	 * The window is not mapped, so we need to create the new mapping
550 	 */
551 
552 	/* If no IO window is found that addr has to be located in SDRAM */
553 	ret = mvebu_mbus_get_io_win_info(addr, &size, &target, &attr);
554 	if (ret < 0)
555 		return 0;
556 
557 	/*
558 	 * Mask the base addr 'addr' according to 'size' read back from the
559 	 * MBus window. Otherwise we might end up with an address located
560 	 * somewhere in the middle of this area here.
561 	 */
562 	size -= 1;
563 	addr &= ~size;
564 
565 	/*
566 	 * Reading one of both enabled register is enough, as they are always
567 	 * programmed to the identical values
568 	 */
569 	win_enable = readl(base + WINDOW_BAR_ENABLE(0));
570 
571 	/* Set 'i' to the first free window to write the new values to */
572 	i = ffs(~win_enable) - 1;
573 	if (i >= WINDOW_COUNT)
574 		return -ENOMEM;
575 
576 	writel((addr & 0xffff0000) | (attr << 8) | target,
577 	       base + WINDOW_BASE(i));
578 	writel(size & 0xffff0000, base + WINDOW_SIZE(i));
579 
580 	/* Fill the caching variables for later use */
581 	xordev->win_start[i] = addr;
582 	xordev->win_end[i] = addr + size;
583 
584 	win_enable |= (1 << i);
585 	win_enable |= 3 << (16 + (2 * i));
586 	writel(win_enable, base + WINDOW_BAR_ENABLE(0));
587 	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
588 
589 	return 0;
590 }
591 
592 static struct dma_async_tx_descriptor *
593 mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
594 		    unsigned int src_cnt, size_t len, unsigned long flags)
595 {
596 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
597 	struct mv_xor_desc_slot *sw_desc;
598 	int ret;
599 
600 	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
601 		return NULL;
602 
603 	BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
604 
605 	dev_dbg(mv_chan_to_devp(mv_chan),
606 		"%s src_cnt: %d len: %zu dest %pad flags: %ld\n",
607 		__func__, src_cnt, len, &dest, flags);
608 
609 	/* Check if a new window needs to get added for 'dest' */
610 	ret = mv_xor_add_io_win(mv_chan, dest);
611 	if (ret)
612 		return NULL;
613 
614 	sw_desc = mv_chan_alloc_slot(mv_chan);
615 	if (sw_desc) {
616 		sw_desc->type = DMA_XOR;
617 		sw_desc->async_tx.flags = flags;
618 		mv_desc_init(sw_desc, dest, len, flags);
619 		if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
620 			mv_desc_set_mode(sw_desc);
621 		while (src_cnt--) {
622 			/* Check if a new window needs to get added for 'src' */
623 			ret = mv_xor_add_io_win(mv_chan, src[src_cnt]);
624 			if (ret)
625 				return NULL;
626 			mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]);
627 		}
628 	}
629 
630 	dev_dbg(mv_chan_to_devp(mv_chan),
631 		"%s sw_desc %p async_tx %p \n",
632 		__func__, sw_desc, &sw_desc->async_tx);
633 	return sw_desc ? &sw_desc->async_tx : NULL;
634 }
635 
636 static struct dma_async_tx_descriptor *
637 mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
638 		size_t len, unsigned long flags)
639 {
640 	/*
641 	 * A MEMCPY operation is identical to an XOR operation with only
642 	 * a single source address.
643 	 */
644 	return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags);
645 }
646 
647 static struct dma_async_tx_descriptor *
648 mv_xor_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
649 {
650 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
651 	dma_addr_t src, dest;
652 	size_t len;
653 
654 	src = mv_chan->dummy_src_addr;
655 	dest = mv_chan->dummy_dst_addr;
656 	len = MV_XOR_MIN_BYTE_COUNT;
657 
658 	/*
659 	 * We implement the DMA_INTERRUPT operation as a minimum sized
660 	 * XOR operation with a single dummy source address.
661 	 */
662 	return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags);
663 }
664 
665 /**
666  * mv_xor_prep_dma_sg - prepare descriptors for a memory sg transaction
667  * @chan: DMA channel
668  * @dst_sg: Destination scatter list
669  * @dst_sg_len: Number of entries in destination scatter list
670  * @src_sg: Source scatter list
671  * @src_sg_len: Number of entries in source scatter list
672  * @flags: transfer ack flags
673  *
674  * Return: Async transaction descriptor on success and NULL on failure
675  */
676 static struct dma_async_tx_descriptor *
677 mv_xor_prep_dma_sg(struct dma_chan *chan, struct scatterlist *dst_sg,
678 		   unsigned int dst_sg_len, struct scatterlist *src_sg,
679 		   unsigned int src_sg_len, unsigned long flags)
680 {
681 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
682 	struct mv_xor_desc_slot *new;
683 	struct mv_xor_desc_slot *first = NULL;
684 	struct mv_xor_desc_slot *prev = NULL;
685 	size_t len, dst_avail, src_avail;
686 	dma_addr_t dma_dst, dma_src;
687 	int desc_cnt = 0;
688 	int ret;
689 
690 	dev_dbg(mv_chan_to_devp(mv_chan),
691 		"%s dst_sg_len: %d src_sg_len: %d flags: %ld\n",
692 		__func__, dst_sg_len, src_sg_len, flags);
693 
694 	dst_avail = sg_dma_len(dst_sg);
695 	src_avail = sg_dma_len(src_sg);
696 
697 	/* Run until we are out of scatterlist entries */
698 	while (true) {
699 		/* Allocate and populate the descriptor */
700 		desc_cnt++;
701 		new = mv_chan_alloc_slot(mv_chan);
702 		if (!new) {
703 			dev_err(mv_chan_to_devp(mv_chan),
704 				"Out of descriptors (desc_cnt=%d)!\n",
705 				desc_cnt);
706 			goto err;
707 		}
708 
709 		len = min_t(size_t, src_avail, dst_avail);
710 		len = min_t(size_t, len, MV_XOR_MAX_BYTE_COUNT);
711 		if (len == 0)
712 			goto fetch;
713 
714 		if (len < MV_XOR_MIN_BYTE_COUNT) {
715 			dev_err(mv_chan_to_devp(mv_chan),
716 				"Transfer size of %zu too small!\n", len);
717 			goto err;
718 		}
719 
720 		dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) -
721 			dst_avail;
722 		dma_src = sg_dma_address(src_sg) + sg_dma_len(src_sg) -
723 			src_avail;
724 
725 		/* Check if a new window needs to get added for 'dst' */
726 		ret = mv_xor_add_io_win(mv_chan, dma_dst);
727 		if (ret)
728 			goto err;
729 
730 		/* Check if a new window needs to get added for 'src' */
731 		ret = mv_xor_add_io_win(mv_chan, dma_src);
732 		if (ret)
733 			goto err;
734 
735 		/* Populate the descriptor */
736 		mv_xor_config_sg_ll_desc(new, dma_src, dma_dst, len, prev);
737 		prev = new;
738 		dst_avail -= len;
739 		src_avail -= len;
740 
741 		if (!first)
742 			first = new;
743 		else
744 			list_move_tail(&new->node, &first->sg_tx_list);
745 
746 fetch:
747 		/* Fetch the next dst scatterlist entry */
748 		if (dst_avail == 0) {
749 			if (dst_sg_len == 0)
750 				break;
751 
752 			/* Fetch the next entry: if there are no more: done */
753 			dst_sg = sg_next(dst_sg);
754 			if (dst_sg == NULL)
755 				break;
756 
757 			dst_sg_len--;
758 			dst_avail = sg_dma_len(dst_sg);
759 		}
760 
761 		/* Fetch the next src scatterlist entry */
762 		if (src_avail == 0) {
763 			if (src_sg_len == 0)
764 				break;
765 
766 			/* Fetch the next entry: if there are no more: done */
767 			src_sg = sg_next(src_sg);
768 			if (src_sg == NULL)
769 				break;
770 
771 			src_sg_len--;
772 			src_avail = sg_dma_len(src_sg);
773 		}
774 	}
775 
776 	/* Set the EOD flag in the last descriptor */
777 	mv_xor_desc_config_eod(new);
778 	first->async_tx.flags = flags;
779 
780 	return &first->async_tx;
781 
782 err:
783 	/* Cleanup: Move all descriptors back into the free list */
784 	spin_lock_bh(&mv_chan->lock);
785 	mv_desc_clean_slot(first, mv_chan);
786 	spin_unlock_bh(&mv_chan->lock);
787 
788 	return NULL;
789 }
790 
791 static void mv_xor_free_chan_resources(struct dma_chan *chan)
792 {
793 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
794 	struct mv_xor_desc_slot *iter, *_iter;
795 	int in_use_descs = 0;
796 
797 	spin_lock_bh(&mv_chan->lock);
798 
799 	mv_chan_slot_cleanup(mv_chan);
800 
801 	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
802 					node) {
803 		in_use_descs++;
804 		list_move_tail(&iter->node, &mv_chan->free_slots);
805 	}
806 	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
807 				 node) {
808 		in_use_descs++;
809 		list_move_tail(&iter->node, &mv_chan->free_slots);
810 	}
811 	list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots,
812 				 node) {
813 		in_use_descs++;
814 		list_move_tail(&iter->node, &mv_chan->free_slots);
815 	}
816 	list_for_each_entry_safe_reverse(
817 		iter, _iter, &mv_chan->free_slots, node) {
818 		list_del(&iter->node);
819 		kfree(iter);
820 		mv_chan->slots_allocated--;
821 	}
822 
823 	dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n",
824 		__func__, mv_chan->slots_allocated);
825 	spin_unlock_bh(&mv_chan->lock);
826 
827 	if (in_use_descs)
828 		dev_err(mv_chan_to_devp(mv_chan),
829 			"freeing %d in use descriptors!\n", in_use_descs);
830 }
831 
832 /**
833  * mv_xor_status - poll the status of an XOR transaction
834  * @chan: XOR channel handle
835  * @cookie: XOR transaction identifier
836  * @txstate: XOR transactions state holder (or NULL)
837  */
838 static enum dma_status mv_xor_status(struct dma_chan *chan,
839 					  dma_cookie_t cookie,
840 					  struct dma_tx_state *txstate)
841 {
842 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
843 	enum dma_status ret;
844 
845 	ret = dma_cookie_status(chan, cookie, txstate);
846 	if (ret == DMA_COMPLETE)
847 		return ret;
848 
849 	spin_lock_bh(&mv_chan->lock);
850 	mv_chan_slot_cleanup(mv_chan);
851 	spin_unlock_bh(&mv_chan->lock);
852 
853 	return dma_cookie_status(chan, cookie, txstate);
854 }
855 
856 static void mv_chan_dump_regs(struct mv_xor_chan *chan)
857 {
858 	u32 val;
859 
860 	val = readl_relaxed(XOR_CONFIG(chan));
861 	dev_err(mv_chan_to_devp(chan), "config       0x%08x\n", val);
862 
863 	val = readl_relaxed(XOR_ACTIVATION(chan));
864 	dev_err(mv_chan_to_devp(chan), "activation   0x%08x\n", val);
865 
866 	val = readl_relaxed(XOR_INTR_CAUSE(chan));
867 	dev_err(mv_chan_to_devp(chan), "intr cause   0x%08x\n", val);
868 
869 	val = readl_relaxed(XOR_INTR_MASK(chan));
870 	dev_err(mv_chan_to_devp(chan), "intr mask    0x%08x\n", val);
871 
872 	val = readl_relaxed(XOR_ERROR_CAUSE(chan));
873 	dev_err(mv_chan_to_devp(chan), "error cause  0x%08x\n", val);
874 
875 	val = readl_relaxed(XOR_ERROR_ADDR(chan));
876 	dev_err(mv_chan_to_devp(chan), "error addr   0x%08x\n", val);
877 }
878 
879 static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan,
880 					  u32 intr_cause)
881 {
882 	if (intr_cause & XOR_INT_ERR_DECODE) {
883 		dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n");
884 		return;
885 	}
886 
887 	dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n",
888 		chan->idx, intr_cause);
889 
890 	mv_chan_dump_regs(chan);
891 	WARN_ON(1);
892 }
893 
894 static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
895 {
896 	struct mv_xor_chan *chan = data;
897 	u32 intr_cause = mv_chan_get_intr_cause(chan);
898 
899 	dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause);
900 
901 	if (intr_cause & XOR_INTR_ERRORS)
902 		mv_chan_err_interrupt_handler(chan, intr_cause);
903 
904 	tasklet_schedule(&chan->irq_tasklet);
905 
906 	mv_chan_clear_eoc_cause(chan);
907 
908 	return IRQ_HANDLED;
909 }
910 
911 static void mv_xor_issue_pending(struct dma_chan *chan)
912 {
913 	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
914 
915 	if (mv_chan->pending >= MV_XOR_THRESHOLD) {
916 		mv_chan->pending = 0;
917 		mv_chan_activate(mv_chan);
918 	}
919 }
920 
921 /*
922  * Perform a transaction to verify the HW works.
923  */
924 
925 static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
926 {
927 	int i, ret;
928 	void *src, *dest;
929 	dma_addr_t src_dma, dest_dma;
930 	struct dma_chan *dma_chan;
931 	dma_cookie_t cookie;
932 	struct dma_async_tx_descriptor *tx;
933 	struct dmaengine_unmap_data *unmap;
934 	int err = 0;
935 
936 	src = kmalloc(sizeof(u8) * PAGE_SIZE, GFP_KERNEL);
937 	if (!src)
938 		return -ENOMEM;
939 
940 	dest = kzalloc(sizeof(u8) * PAGE_SIZE, GFP_KERNEL);
941 	if (!dest) {
942 		kfree(src);
943 		return -ENOMEM;
944 	}
945 
946 	/* Fill in src buffer */
947 	for (i = 0; i < PAGE_SIZE; i++)
948 		((u8 *) src)[i] = (u8)i;
949 
950 	dma_chan = &mv_chan->dmachan;
951 	if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
952 		err = -ENODEV;
953 		goto out;
954 	}
955 
956 	unmap = dmaengine_get_unmap_data(dma_chan->device->dev, 2, GFP_KERNEL);
957 	if (!unmap) {
958 		err = -ENOMEM;
959 		goto free_resources;
960 	}
961 
962 	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src),
963 			       (size_t)src & ~PAGE_MASK, PAGE_SIZE,
964 			       DMA_TO_DEVICE);
965 	unmap->addr[0] = src_dma;
966 
967 	ret = dma_mapping_error(dma_chan->device->dev, src_dma);
968 	if (ret) {
969 		err = -ENOMEM;
970 		goto free_resources;
971 	}
972 	unmap->to_cnt = 1;
973 
974 	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest),
975 				(size_t)dest & ~PAGE_MASK, PAGE_SIZE,
976 				DMA_FROM_DEVICE);
977 	unmap->addr[1] = dest_dma;
978 
979 	ret = dma_mapping_error(dma_chan->device->dev, dest_dma);
980 	if (ret) {
981 		err = -ENOMEM;
982 		goto free_resources;
983 	}
984 	unmap->from_cnt = 1;
985 	unmap->len = PAGE_SIZE;
986 
987 	tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
988 				    PAGE_SIZE, 0);
989 	if (!tx) {
990 		dev_err(dma_chan->device->dev,
991 			"Self-test cannot prepare operation, disabling\n");
992 		err = -ENODEV;
993 		goto free_resources;
994 	}
995 
996 	cookie = mv_xor_tx_submit(tx);
997 	if (dma_submit_error(cookie)) {
998 		dev_err(dma_chan->device->dev,
999 			"Self-test submit error, disabling\n");
1000 		err = -ENODEV;
1001 		goto free_resources;
1002 	}
1003 
1004 	mv_xor_issue_pending(dma_chan);
1005 	async_tx_ack(tx);
1006 	msleep(1);
1007 
1008 	if (mv_xor_status(dma_chan, cookie, NULL) !=
1009 	    DMA_COMPLETE) {
1010 		dev_err(dma_chan->device->dev,
1011 			"Self-test copy timed out, disabling\n");
1012 		err = -ENODEV;
1013 		goto free_resources;
1014 	}
1015 
1016 	dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma,
1017 				PAGE_SIZE, DMA_FROM_DEVICE);
1018 	if (memcmp(src, dest, PAGE_SIZE)) {
1019 		dev_err(dma_chan->device->dev,
1020 			"Self-test copy failed compare, disabling\n");
1021 		err = -ENODEV;
1022 		goto free_resources;
1023 	}
1024 
1025 free_resources:
1026 	dmaengine_unmap_put(unmap);
1027 	mv_xor_free_chan_resources(dma_chan);
1028 out:
1029 	kfree(src);
1030 	kfree(dest);
1031 	return err;
1032 }
1033 
1034 #define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
1035 static int
1036 mv_chan_xor_self_test(struct mv_xor_chan *mv_chan)
1037 {
1038 	int i, src_idx, ret;
1039 	struct page *dest;
1040 	struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
1041 	dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
1042 	dma_addr_t dest_dma;
1043 	struct dma_async_tx_descriptor *tx;
1044 	struct dmaengine_unmap_data *unmap;
1045 	struct dma_chan *dma_chan;
1046 	dma_cookie_t cookie;
1047 	u8 cmp_byte = 0;
1048 	u32 cmp_word;
1049 	int err = 0;
1050 	int src_count = MV_XOR_NUM_SRC_TEST;
1051 
1052 	for (src_idx = 0; src_idx < src_count; src_idx++) {
1053 		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
1054 		if (!xor_srcs[src_idx]) {
1055 			while (src_idx--)
1056 				__free_page(xor_srcs[src_idx]);
1057 			return -ENOMEM;
1058 		}
1059 	}
1060 
1061 	dest = alloc_page(GFP_KERNEL);
1062 	if (!dest) {
1063 		while (src_idx--)
1064 			__free_page(xor_srcs[src_idx]);
1065 		return -ENOMEM;
1066 	}
1067 
1068 	/* Fill in src buffers */
1069 	for (src_idx = 0; src_idx < src_count; src_idx++) {
1070 		u8 *ptr = page_address(xor_srcs[src_idx]);
1071 		for (i = 0; i < PAGE_SIZE; i++)
1072 			ptr[i] = (1 << src_idx);
1073 	}
1074 
1075 	for (src_idx = 0; src_idx < src_count; src_idx++)
1076 		cmp_byte ^= (u8) (1 << src_idx);
1077 
1078 	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
1079 		(cmp_byte << 8) | cmp_byte;
1080 
1081 	memset(page_address(dest), 0, PAGE_SIZE);
1082 
1083 	dma_chan = &mv_chan->dmachan;
1084 	if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
1085 		err = -ENODEV;
1086 		goto out;
1087 	}
1088 
1089 	unmap = dmaengine_get_unmap_data(dma_chan->device->dev, src_count + 1,
1090 					 GFP_KERNEL);
1091 	if (!unmap) {
1092 		err = -ENOMEM;
1093 		goto free_resources;
1094 	}
1095 
1096 	/* test xor */
1097 	for (i = 0; i < src_count; i++) {
1098 		unmap->addr[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
1099 					      0, PAGE_SIZE, DMA_TO_DEVICE);
1100 		dma_srcs[i] = unmap->addr[i];
1101 		ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[i]);
1102 		if (ret) {
1103 			err = -ENOMEM;
1104 			goto free_resources;
1105 		}
1106 		unmap->to_cnt++;
1107 	}
1108 
1109 	unmap->addr[src_count] = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
1110 				      DMA_FROM_DEVICE);
1111 	dest_dma = unmap->addr[src_count];
1112 	ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[src_count]);
1113 	if (ret) {
1114 		err = -ENOMEM;
1115 		goto free_resources;
1116 	}
1117 	unmap->from_cnt = 1;
1118 	unmap->len = PAGE_SIZE;
1119 
1120 	tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
1121 				 src_count, PAGE_SIZE, 0);
1122 	if (!tx) {
1123 		dev_err(dma_chan->device->dev,
1124 			"Self-test cannot prepare operation, disabling\n");
1125 		err = -ENODEV;
1126 		goto free_resources;
1127 	}
1128 
1129 	cookie = mv_xor_tx_submit(tx);
1130 	if (dma_submit_error(cookie)) {
1131 		dev_err(dma_chan->device->dev,
1132 			"Self-test submit error, disabling\n");
1133 		err = -ENODEV;
1134 		goto free_resources;
1135 	}
1136 
1137 	mv_xor_issue_pending(dma_chan);
1138 	async_tx_ack(tx);
1139 	msleep(8);
1140 
1141 	if (mv_xor_status(dma_chan, cookie, NULL) !=
1142 	    DMA_COMPLETE) {
1143 		dev_err(dma_chan->device->dev,
1144 			"Self-test xor timed out, disabling\n");
1145 		err = -ENODEV;
1146 		goto free_resources;
1147 	}
1148 
1149 	dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma,
1150 				PAGE_SIZE, DMA_FROM_DEVICE);
1151 	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
1152 		u32 *ptr = page_address(dest);
1153 		if (ptr[i] != cmp_word) {
1154 			dev_err(dma_chan->device->dev,
1155 				"Self-test xor failed compare, disabling. index %d, data %x, expected %x\n",
1156 				i, ptr[i], cmp_word);
1157 			err = -ENODEV;
1158 			goto free_resources;
1159 		}
1160 	}
1161 
1162 free_resources:
1163 	dmaengine_unmap_put(unmap);
1164 	mv_xor_free_chan_resources(dma_chan);
1165 out:
1166 	src_idx = src_count;
1167 	while (src_idx--)
1168 		__free_page(xor_srcs[src_idx]);
1169 	__free_page(dest);
1170 	return err;
1171 }
1172 
1173 static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
1174 {
1175 	struct dma_chan *chan, *_chan;
1176 	struct device *dev = mv_chan->dmadev.dev;
1177 
1178 	dma_async_device_unregister(&mv_chan->dmadev);
1179 
1180 	dma_free_coherent(dev, MV_XOR_POOL_SIZE,
1181 			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
1182 	dma_unmap_single(dev, mv_chan->dummy_src_addr,
1183 			 MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE);
1184 	dma_unmap_single(dev, mv_chan->dummy_dst_addr,
1185 			 MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE);
1186 
1187 	list_for_each_entry_safe(chan, _chan, &mv_chan->dmadev.channels,
1188 				 device_node) {
1189 		list_del(&chan->device_node);
1190 	}
1191 
1192 	free_irq(mv_chan->irq, mv_chan);
1193 
1194 	return 0;
1195 }
1196 
1197 static struct mv_xor_chan *
1198 mv_xor_channel_add(struct mv_xor_device *xordev,
1199 		   struct platform_device *pdev,
1200 		   int idx, dma_cap_mask_t cap_mask, int irq)
1201 {
1202 	int ret = 0;
1203 	struct mv_xor_chan *mv_chan;
1204 	struct dma_device *dma_dev;
1205 
1206 	mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
1207 	if (!mv_chan)
1208 		return ERR_PTR(-ENOMEM);
1209 
1210 	mv_chan->idx = idx;
1211 	mv_chan->irq = irq;
1212 	if (xordev->xor_type == XOR_ORION)
1213 		mv_chan->op_in_desc = XOR_MODE_IN_REG;
1214 	else
1215 		mv_chan->op_in_desc = XOR_MODE_IN_DESC;
1216 
1217 	dma_dev = &mv_chan->dmadev;
1218 	mv_chan->xordev = xordev;
1219 
1220 	/*
1221 	 * These source and destination dummy buffers are used to implement
1222 	 * a DMA_INTERRUPT operation as a minimum-sized XOR operation.
1223 	 * Hence, we only need to map the buffers at initialization-time.
1224 	 */
1225 	mv_chan->dummy_src_addr = dma_map_single(dma_dev->dev,
1226 		mv_chan->dummy_src, MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE);
1227 	mv_chan->dummy_dst_addr = dma_map_single(dma_dev->dev,
1228 		mv_chan->dummy_dst, MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE);
1229 
1230 	/* allocate coherent memory for hardware descriptors
1231 	 * note: writecombine gives slightly better performance, but
1232 	 * requires that we explicitly flush the writes
1233 	 */
1234 	mv_chan->dma_desc_pool_virt =
1235 	  dma_alloc_wc(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool,
1236 		       GFP_KERNEL);
1237 	if (!mv_chan->dma_desc_pool_virt)
1238 		return ERR_PTR(-ENOMEM);
1239 
1240 	/* discover transaction capabilites from the platform data */
1241 	dma_dev->cap_mask = cap_mask;
1242 
1243 	INIT_LIST_HEAD(&dma_dev->channels);
1244 
1245 	/* set base routines */
1246 	dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
1247 	dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
1248 	dma_dev->device_tx_status = mv_xor_status;
1249 	dma_dev->device_issue_pending = mv_xor_issue_pending;
1250 	dma_dev->dev = &pdev->dev;
1251 
1252 	/* set prep routines based on capability */
1253 	if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
1254 		dma_dev->device_prep_dma_interrupt = mv_xor_prep_dma_interrupt;
1255 	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
1256 		dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
1257 	if (dma_has_cap(DMA_SG, dma_dev->cap_mask))
1258 		dma_dev->device_prep_dma_sg = mv_xor_prep_dma_sg;
1259 	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
1260 		dma_dev->max_xor = 8;
1261 		dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
1262 	}
1263 
1264 	mv_chan->mmr_base = xordev->xor_base;
1265 	mv_chan->mmr_high_base = xordev->xor_high_base;
1266 	tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
1267 		     mv_chan);
1268 
1269 	/* clear errors before enabling interrupts */
1270 	mv_chan_clear_err_status(mv_chan);
1271 
1272 	ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler,
1273 			  0, dev_name(&pdev->dev), mv_chan);
1274 	if (ret)
1275 		goto err_free_dma;
1276 
1277 	mv_chan_unmask_interrupts(mv_chan);
1278 
1279 	if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
1280 		mv_chan_set_mode(mv_chan, XOR_OPERATION_MODE_IN_DESC);
1281 	else
1282 		mv_chan_set_mode(mv_chan, XOR_OPERATION_MODE_XOR);
1283 
1284 	spin_lock_init(&mv_chan->lock);
1285 	INIT_LIST_HEAD(&mv_chan->chain);
1286 	INIT_LIST_HEAD(&mv_chan->completed_slots);
1287 	INIT_LIST_HEAD(&mv_chan->free_slots);
1288 	INIT_LIST_HEAD(&mv_chan->allocated_slots);
1289 	mv_chan->dmachan.device = dma_dev;
1290 	dma_cookie_init(&mv_chan->dmachan);
1291 
1292 	list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels);
1293 
1294 	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
1295 		ret = mv_chan_memcpy_self_test(mv_chan);
1296 		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
1297 		if (ret)
1298 			goto err_free_irq;
1299 	}
1300 
1301 	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
1302 		ret = mv_chan_xor_self_test(mv_chan);
1303 		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
1304 		if (ret)
1305 			goto err_free_irq;
1306 	}
1307 
1308 	dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s%s)\n",
1309 		 mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode",
1310 		 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
1311 		 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
1312 		 dma_has_cap(DMA_SG, dma_dev->cap_mask) ? "sg " : "",
1313 		 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
1314 
1315 	dma_async_device_register(dma_dev);
1316 	return mv_chan;
1317 
1318 err_free_irq:
1319 	free_irq(mv_chan->irq, mv_chan);
1320 err_free_dma:
1321 	dma_free_coherent(&pdev->dev, MV_XOR_POOL_SIZE,
1322 			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
1323 	return ERR_PTR(ret);
1324 }
1325 
1326 static void
1327 mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
1328 			 const struct mbus_dram_target_info *dram)
1329 {
1330 	void __iomem *base = xordev->xor_high_base;
1331 	u32 win_enable = 0;
1332 	int i;
1333 
1334 	for (i = 0; i < 8; i++) {
1335 		writel(0, base + WINDOW_BASE(i));
1336 		writel(0, base + WINDOW_SIZE(i));
1337 		if (i < 4)
1338 			writel(0, base + WINDOW_REMAP_HIGH(i));
1339 	}
1340 
1341 	for (i = 0; i < dram->num_cs; i++) {
1342 		const struct mbus_dram_window *cs = dram->cs + i;
1343 
1344 		writel((cs->base & 0xffff0000) |
1345 		       (cs->mbus_attr << 8) |
1346 		       dram->mbus_dram_target_id, base + WINDOW_BASE(i));
1347 		writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
1348 
1349 		/* Fill the caching variables for later use */
1350 		xordev->win_start[i] = cs->base;
1351 		xordev->win_end[i] = cs->base + cs->size - 1;
1352 
1353 		win_enable |= (1 << i);
1354 		win_enable |= 3 << (16 + (2 * i));
1355 	}
1356 
1357 	writel(win_enable, base + WINDOW_BAR_ENABLE(0));
1358 	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
1359 	writel(0, base + WINDOW_OVERRIDE_CTRL(0));
1360 	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
1361 }
1362 
1363 static void
1364 mv_xor_conf_mbus_windows_a3700(struct mv_xor_device *xordev)
1365 {
1366 	void __iomem *base = xordev->xor_high_base;
1367 	u32 win_enable = 0;
1368 	int i;
1369 
1370 	for (i = 0; i < 8; i++) {
1371 		writel(0, base + WINDOW_BASE(i));
1372 		writel(0, base + WINDOW_SIZE(i));
1373 		if (i < 4)
1374 			writel(0, base + WINDOW_REMAP_HIGH(i));
1375 	}
1376 	/*
1377 	 * For Armada3700 open default 4GB Mbus window. The dram
1378 	 * related configuration are done at AXIS level.
1379 	 */
1380 	writel(0xffff0000, base + WINDOW_SIZE(0));
1381 	win_enable |= 1;
1382 	win_enable |= 3 << 16;
1383 
1384 	writel(win_enable, base + WINDOW_BAR_ENABLE(0));
1385 	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
1386 	writel(0, base + WINDOW_OVERRIDE_CTRL(0));
1387 	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
1388 }
1389 
1390 /*
1391  * Since this XOR driver is basically used only for RAID5, we don't
1392  * need to care about synchronizing ->suspend with DMA activity,
1393  * because the DMA engine will naturally be quiet due to the block
1394  * devices being suspended.
1395  */
1396 static int mv_xor_suspend(struct platform_device *pdev, pm_message_t state)
1397 {
1398 	struct mv_xor_device *xordev = platform_get_drvdata(pdev);
1399 	int i;
1400 
1401 	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
1402 		struct mv_xor_chan *mv_chan = xordev->channels[i];
1403 
1404 		if (!mv_chan)
1405 			continue;
1406 
1407 		mv_chan->saved_config_reg =
1408 			readl_relaxed(XOR_CONFIG(mv_chan));
1409 		mv_chan->saved_int_mask_reg =
1410 			readl_relaxed(XOR_INTR_MASK(mv_chan));
1411 	}
1412 
1413 	return 0;
1414 }
1415 
1416 static int mv_xor_resume(struct platform_device *dev)
1417 {
1418 	struct mv_xor_device *xordev = platform_get_drvdata(dev);
1419 	const struct mbus_dram_target_info *dram;
1420 	int i;
1421 
1422 	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
1423 		struct mv_xor_chan *mv_chan = xordev->channels[i];
1424 
1425 		if (!mv_chan)
1426 			continue;
1427 
1428 		writel_relaxed(mv_chan->saved_config_reg,
1429 			       XOR_CONFIG(mv_chan));
1430 		writel_relaxed(mv_chan->saved_int_mask_reg,
1431 			       XOR_INTR_MASK(mv_chan));
1432 	}
1433 
1434 	if (xordev->xor_type == XOR_ARMADA_37XX) {
1435 		mv_xor_conf_mbus_windows_a3700(xordev);
1436 		return 0;
1437 	}
1438 
1439 	dram = mv_mbus_dram_info();
1440 	if (dram)
1441 		mv_xor_conf_mbus_windows(xordev, dram);
1442 
1443 	return 0;
1444 }
1445 
1446 static const struct of_device_id mv_xor_dt_ids[] = {
1447 	{ .compatible = "marvell,orion-xor", .data = (void *)XOR_ORION },
1448 	{ .compatible = "marvell,armada-380-xor", .data = (void *)XOR_ARMADA_38X },
1449 	{ .compatible = "marvell,armada-3700-xor", .data = (void *)XOR_ARMADA_37XX },
1450 	{},
1451 };
1452 
1453 static unsigned int mv_xor_engine_count;
1454 
1455 static int mv_xor_probe(struct platform_device *pdev)
1456 {
1457 	const struct mbus_dram_target_info *dram;
1458 	struct mv_xor_device *xordev;
1459 	struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev);
1460 	struct resource *res;
1461 	unsigned int max_engines, max_channels;
1462 	int i, ret;
1463 
1464 	dev_notice(&pdev->dev, "Marvell shared XOR driver\n");
1465 
1466 	xordev = devm_kzalloc(&pdev->dev, sizeof(*xordev), GFP_KERNEL);
1467 	if (!xordev)
1468 		return -ENOMEM;
1469 
1470 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1471 	if (!res)
1472 		return -ENODEV;
1473 
1474 	xordev->xor_base = devm_ioremap(&pdev->dev, res->start,
1475 					resource_size(res));
1476 	if (!xordev->xor_base)
1477 		return -EBUSY;
1478 
1479 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
1480 	if (!res)
1481 		return -ENODEV;
1482 
1483 	xordev->xor_high_base = devm_ioremap(&pdev->dev, res->start,
1484 					     resource_size(res));
1485 	if (!xordev->xor_high_base)
1486 		return -EBUSY;
1487 
1488 	platform_set_drvdata(pdev, xordev);
1489 
1490 
1491 	/*
1492 	 * We need to know which type of XOR device we use before
1493 	 * setting up. In non-dt case it can only be the legacy one.
1494 	 */
1495 	xordev->xor_type = XOR_ORION;
1496 	if (pdev->dev.of_node) {
1497 		const struct of_device_id *of_id =
1498 			of_match_device(mv_xor_dt_ids,
1499 					&pdev->dev);
1500 
1501 		xordev->xor_type = (uintptr_t)of_id->data;
1502 	}
1503 
1504 	/*
1505 	 * (Re-)program MBUS remapping windows if we are asked to.
1506 	 */
1507 	if (xordev->xor_type == XOR_ARMADA_37XX) {
1508 		mv_xor_conf_mbus_windows_a3700(xordev);
1509 	} else {
1510 		dram = mv_mbus_dram_info();
1511 		if (dram)
1512 			mv_xor_conf_mbus_windows(xordev, dram);
1513 	}
1514 
1515 	/* Not all platforms can gate the clock, so it is not
1516 	 * an error if the clock does not exists.
1517 	 */
1518 	xordev->clk = clk_get(&pdev->dev, NULL);
1519 	if (!IS_ERR(xordev->clk))
1520 		clk_prepare_enable(xordev->clk);
1521 
1522 	/*
1523 	 * We don't want to have more than one channel per CPU in
1524 	 * order for async_tx to perform well. So we limit the number
1525 	 * of engines and channels so that we take into account this
1526 	 * constraint. Note that we also want to use channels from
1527 	 * separate engines when possible.  For dual-CPU Armada 3700
1528 	 * SoC with single XOR engine allow using its both channels.
1529 	 */
1530 	max_engines = num_present_cpus();
1531 	if (xordev->xor_type == XOR_ARMADA_37XX)
1532 		max_channels =	num_present_cpus();
1533 	else
1534 		max_channels = min_t(unsigned int,
1535 				     MV_XOR_MAX_CHANNELS,
1536 				     DIV_ROUND_UP(num_present_cpus(), 2));
1537 
1538 	if (mv_xor_engine_count >= max_engines)
1539 		return 0;
1540 
1541 	if (pdev->dev.of_node) {
1542 		struct device_node *np;
1543 		int i = 0;
1544 
1545 		for_each_child_of_node(pdev->dev.of_node, np) {
1546 			struct mv_xor_chan *chan;
1547 			dma_cap_mask_t cap_mask;
1548 			int irq;
1549 
1550 			if (i >= max_channels)
1551 				continue;
1552 
1553 			dma_cap_zero(cap_mask);
1554 			dma_cap_set(DMA_MEMCPY, cap_mask);
1555 			dma_cap_set(DMA_SG, cap_mask);
1556 			dma_cap_set(DMA_XOR, cap_mask);
1557 			dma_cap_set(DMA_INTERRUPT, cap_mask);
1558 
1559 			irq = irq_of_parse_and_map(np, 0);
1560 			if (!irq) {
1561 				ret = -ENODEV;
1562 				goto err_channel_add;
1563 			}
1564 
1565 			chan = mv_xor_channel_add(xordev, pdev, i,
1566 						  cap_mask, irq);
1567 			if (IS_ERR(chan)) {
1568 				ret = PTR_ERR(chan);
1569 				irq_dispose_mapping(irq);
1570 				goto err_channel_add;
1571 			}
1572 
1573 			xordev->channels[i] = chan;
1574 			i++;
1575 		}
1576 	} else if (pdata && pdata->channels) {
1577 		for (i = 0; i < max_channels; i++) {
1578 			struct mv_xor_channel_data *cd;
1579 			struct mv_xor_chan *chan;
1580 			int irq;
1581 
1582 			cd = &pdata->channels[i];
1583 			if (!cd) {
1584 				ret = -ENODEV;
1585 				goto err_channel_add;
1586 			}
1587 
1588 			irq = platform_get_irq(pdev, i);
1589 			if (irq < 0) {
1590 				ret = irq;
1591 				goto err_channel_add;
1592 			}
1593 
1594 			chan = mv_xor_channel_add(xordev, pdev, i,
1595 						  cd->cap_mask, irq);
1596 			if (IS_ERR(chan)) {
1597 				ret = PTR_ERR(chan);
1598 				goto err_channel_add;
1599 			}
1600 
1601 			xordev->channels[i] = chan;
1602 		}
1603 	}
1604 
1605 	return 0;
1606 
1607 err_channel_add:
1608 	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++)
1609 		if (xordev->channels[i]) {
1610 			mv_xor_channel_remove(xordev->channels[i]);
1611 			if (pdev->dev.of_node)
1612 				irq_dispose_mapping(xordev->channels[i]->irq);
1613 		}
1614 
1615 	if (!IS_ERR(xordev->clk)) {
1616 		clk_disable_unprepare(xordev->clk);
1617 		clk_put(xordev->clk);
1618 	}
1619 
1620 	return ret;
1621 }
1622 
1623 static struct platform_driver mv_xor_driver = {
1624 	.probe		= mv_xor_probe,
1625 	.suspend        = mv_xor_suspend,
1626 	.resume         = mv_xor_resume,
1627 	.driver		= {
1628 		.name	        = MV_XOR_NAME,
1629 		.of_match_table = of_match_ptr(mv_xor_dt_ids),
1630 	},
1631 };
1632 
1633 builtin_platform_driver(mv_xor_driver);
1634 
1635 /*
1636 MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
1637 MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
1638 MODULE_LICENSE("GPL");
1639 */
1640