xref: /linux/drivers/dma/ste_dma40.c (revision 765532c8aaac624b5f8687af6d319c6a1138a257)
1 /*
2  * Copyright (C) ST-Ericsson SA 2007-2010
3  * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
4  * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
5  * License terms: GNU General Public License (GPL) version 2
6  */
7 
8 #include <linux/kernel.h>
9 #include <linux/slab.h>
10 #include <linux/dmaengine.h>
11 #include <linux/platform_device.h>
12 #include <linux/clk.h>
13 #include <linux/delay.h>
14 #include <linux/err.h>
15 
16 #include <plat/ste_dma40.h>
17 
18 #include "ste_dma40_ll.h"
19 
20 #define D40_NAME "dma40"
21 
22 #define D40_PHY_CHAN -1
23 
24 /* For masking out/in 2 bit channel positions */
25 #define D40_CHAN_POS(chan)  (2 * (chan / 2))
26 #define D40_CHAN_POS_MASK(chan) (0x3 << D40_CHAN_POS(chan))
27 
28 /* Maximum iterations taken before giving up suspending a channel */
29 #define D40_SUSPEND_MAX_IT 500
30 
31 /* Hardware requirement on LCLA alignment */
32 #define LCLA_ALIGNMENT 0x40000
33 
34 /* Max number of links per event group */
35 #define D40_LCLA_LINK_PER_EVENT_GRP 128
36 #define D40_LCLA_END D40_LCLA_LINK_PER_EVENT_GRP
37 
38 /* Attempts before giving up to trying to get pages that are aligned */
39 #define MAX_LCLA_ALLOC_ATTEMPTS 256
40 
41 /* Bit markings for allocation map */
42 #define D40_ALLOC_FREE		(1 << 31)
43 #define D40_ALLOC_PHY		(1 << 30)
44 #define D40_ALLOC_LOG_FREE	0
45 
46 /* Hardware designer of the block */
47 #define D40_HW_DESIGNER 0x8
48 
49 /**
50  * enum 40_command - The different commands and/or statuses.
51  *
52  * @D40_DMA_STOP: DMA channel command STOP or status STOPPED,
53  * @D40_DMA_RUN: The DMA channel is RUNNING of the command RUN.
54  * @D40_DMA_SUSPEND_REQ: Request the DMA to SUSPEND as soon as possible.
55  * @D40_DMA_SUSPENDED: The DMA channel is SUSPENDED.
56  */
57 enum d40_command {
58 	D40_DMA_STOP		= 0,
59 	D40_DMA_RUN		= 1,
60 	D40_DMA_SUSPEND_REQ	= 2,
61 	D40_DMA_SUSPENDED	= 3
62 };
63 
64 /**
65  * struct d40_lli_pool - Structure for keeping LLIs in memory
66  *
67  * @base: Pointer to memory area when the pre_alloc_lli's are not large
68  * enough, IE bigger than the most common case, 1 dst and 1 src. NULL if
69  * pre_alloc_lli is used.
70  * @size: The size in bytes of the memory at base or the size of pre_alloc_lli.
71  * @pre_alloc_lli: Pre allocated area for the most common case of transfers,
72  * one buffer to one buffer.
73  */
74 struct d40_lli_pool {
75 	void	*base;
76 	int	 size;
77 	/* Space for dst and src, plus an extra for padding */
78 	u8	 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
79 };
80 
81 /**
82  * struct d40_desc - A descriptor is one DMA job.
83  *
84  * @lli_phy: LLI settings for physical channel. Both src and dst=
85  * points into the lli_pool, to base if lli_len > 1 or to pre_alloc_lli if
86  * lli_len equals one.
87  * @lli_log: Same as above but for logical channels.
88  * @lli_pool: The pool with two entries pre-allocated.
89  * @lli_len: Number of llis of current descriptor.
90  * @lli_current: Number of transfered llis.
91  * @lcla_alloc: Number of LCLA entries allocated.
92  * @txd: DMA engine struct. Used for among other things for communication
93  * during a transfer.
94  * @node: List entry.
95  * @is_in_client_list: true if the client owns this descriptor.
96  * @is_hw_linked: true if this job will automatically be continued for
97  * the previous one.
98  *
99  * This descriptor is used for both logical and physical transfers.
100  */
101 struct d40_desc {
102 	/* LLI physical */
103 	struct d40_phy_lli_bidir	 lli_phy;
104 	/* LLI logical */
105 	struct d40_log_lli_bidir	 lli_log;
106 
107 	struct d40_lli_pool		 lli_pool;
108 	int				 lli_len;
109 	int				 lli_current;
110 	int				 lcla_alloc;
111 
112 	struct dma_async_tx_descriptor	 txd;
113 	struct list_head		 node;
114 
115 	bool				 is_in_client_list;
116 	bool				 is_hw_linked;
117 };
118 
119 /**
120  * struct d40_lcla_pool - LCLA pool settings and data.
121  *
122  * @base: The virtual address of LCLA. 18 bit aligned.
123  * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used.
124  * This pointer is only there for clean-up on error.
125  * @pages: The number of pages needed for all physical channels.
126  * Only used later for clean-up on error
127  * @lock: Lock to protect the content in this struct.
128  * @alloc_map: big map over which LCLA entry is own by which job.
129  */
130 struct d40_lcla_pool {
131 	void		*base;
132 	void		*base_unaligned;
133 	int		 pages;
134 	spinlock_t	 lock;
135 	struct d40_desc	**alloc_map;
136 };
137 
138 /**
139  * struct d40_phy_res - struct for handling eventlines mapped to physical
140  * channels.
141  *
142  * @lock: A lock protection this entity.
143  * @num: The physical channel number of this entity.
144  * @allocated_src: Bit mapped to show which src event line's are mapped to
145  * this physical channel. Can also be free or physically allocated.
146  * @allocated_dst: Same as for src but is dst.
147  * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as
148  * event line number.
149  */
150 struct d40_phy_res {
151 	spinlock_t lock;
152 	int	   num;
153 	u32	   allocated_src;
154 	u32	   allocated_dst;
155 };
156 
157 struct d40_base;
158 
159 /**
160  * struct d40_chan - Struct that describes a channel.
161  *
162  * @lock: A spinlock to protect this struct.
163  * @log_num: The logical number, if any of this channel.
164  * @completed: Starts with 1, after first interrupt it is set to dma engine's
165  * current cookie.
166  * @pending_tx: The number of pending transfers. Used between interrupt handler
167  * and tasklet.
168  * @busy: Set to true when transfer is ongoing on this channel.
169  * @phy_chan: Pointer to physical channel which this instance runs on. If this
170  * point is NULL, then the channel is not allocated.
171  * @chan: DMA engine handle.
172  * @tasklet: Tasklet that gets scheduled from interrupt context to complete a
173  * transfer and call client callback.
174  * @client: Cliented owned descriptor list.
175  * @active: Active descriptor.
176  * @queue: Queued jobs.
177  * @dma_cfg: The client configuration of this dma channel.
178  * @configured: whether the dma_cfg configuration is valid
179  * @base: Pointer to the device instance struct.
180  * @src_def_cfg: Default cfg register setting for src.
181  * @dst_def_cfg: Default cfg register setting for dst.
182  * @log_def: Default logical channel settings.
183  * @lcla: Space for one dst src pair for logical channel transfers.
184  * @lcpa: Pointer to dst and src lcpa settings.
185  *
186  * This struct can either "be" a logical or a physical channel.
187  */
188 struct d40_chan {
189 	spinlock_t			 lock;
190 	int				 log_num;
191 	/* ID of the most recent completed transfer */
192 	int				 completed;
193 	int				 pending_tx;
194 	bool				 busy;
195 	struct d40_phy_res		*phy_chan;
196 	struct dma_chan			 chan;
197 	struct tasklet_struct		 tasklet;
198 	struct list_head		 client;
199 	struct list_head		 active;
200 	struct list_head		 queue;
201 	struct stedma40_chan_cfg	 dma_cfg;
202 	bool				 configured;
203 	struct d40_base			*base;
204 	/* Default register configurations */
205 	u32				 src_def_cfg;
206 	u32				 dst_def_cfg;
207 	struct d40_def_lcsp		 log_def;
208 	struct d40_log_lli_full		*lcpa;
209 	/* Runtime reconfiguration */
210 	dma_addr_t			runtime_addr;
211 	enum dma_data_direction		runtime_direction;
212 };
213 
214 /**
215  * struct d40_base - The big global struct, one for each probe'd instance.
216  *
217  * @interrupt_lock: Lock used to make sure one interrupt is handle a time.
218  * @execmd_lock: Lock for execute command usage since several channels share
219  * the same physical register.
220  * @dev: The device structure.
221  * @virtbase: The virtual base address of the DMA's register.
222  * @rev: silicon revision detected.
223  * @clk: Pointer to the DMA clock structure.
224  * @phy_start: Physical memory start of the DMA registers.
225  * @phy_size: Size of the DMA register map.
226  * @irq: The IRQ number.
227  * @num_phy_chans: The number of physical channels. Read from HW. This
228  * is the number of available channels for this driver, not counting "Secure
229  * mode" allocated physical channels.
230  * @num_log_chans: The number of logical channels. Calculated from
231  * num_phy_chans.
232  * @dma_both: dma_device channels that can do both memcpy and slave transfers.
233  * @dma_slave: dma_device channels that can do only do slave transfers.
234  * @dma_memcpy: dma_device channels that can do only do memcpy transfers.
235  * @log_chans: Room for all possible logical channels in system.
236  * @lookup_log_chans: Used to map interrupt number to logical channel. Points
237  * to log_chans entries.
238  * @lookup_phy_chans: Used to map interrupt number to physical channel. Points
239  * to phy_chans entries.
240  * @plat_data: Pointer to provided platform_data which is the driver
241  * configuration.
242  * @phy_res: Vector containing all physical channels.
243  * @lcla_pool: lcla pool settings and data.
244  * @lcpa_base: The virtual mapped address of LCPA.
245  * @phy_lcpa: The physical address of the LCPA.
246  * @lcpa_size: The size of the LCPA area.
247  * @desc_slab: cache for descriptors.
248  */
249 struct d40_base {
250 	spinlock_t			 interrupt_lock;
251 	spinlock_t			 execmd_lock;
252 	struct device			 *dev;
253 	void __iomem			 *virtbase;
254 	u8				  rev:4;
255 	struct clk			 *clk;
256 	phys_addr_t			  phy_start;
257 	resource_size_t			  phy_size;
258 	int				  irq;
259 	int				  num_phy_chans;
260 	int				  num_log_chans;
261 	struct dma_device		  dma_both;
262 	struct dma_device		  dma_slave;
263 	struct dma_device		  dma_memcpy;
264 	struct d40_chan			 *phy_chans;
265 	struct d40_chan			 *log_chans;
266 	struct d40_chan			**lookup_log_chans;
267 	struct d40_chan			**lookup_phy_chans;
268 	struct stedma40_platform_data	 *plat_data;
269 	/* Physical half channels */
270 	struct d40_phy_res		 *phy_res;
271 	struct d40_lcla_pool		  lcla_pool;
272 	void				 *lcpa_base;
273 	dma_addr_t			  phy_lcpa;
274 	resource_size_t			  lcpa_size;
275 	struct kmem_cache		 *desc_slab;
276 };
277 
278 /**
279  * struct d40_interrupt_lookup - lookup table for interrupt handler
280  *
281  * @src: Interrupt mask register.
282  * @clr: Interrupt clear register.
283  * @is_error: true if this is an error interrupt.
284  * @offset: start delta in the lookup_log_chans in d40_base. If equals to
285  * D40_PHY_CHAN, the lookup_phy_chans shall be used instead.
286  */
287 struct d40_interrupt_lookup {
288 	u32 src;
289 	u32 clr;
290 	bool is_error;
291 	int offset;
292 };
293 
294 /**
295  * struct d40_reg_val - simple lookup struct
296  *
297  * @reg: The register.
298  * @val: The value that belongs to the register in reg.
299  */
300 struct d40_reg_val {
301 	unsigned int reg;
302 	unsigned int val;
303 };
304 
305 static int d40_pool_lli_alloc(struct d40_desc *d40d,
306 			      int lli_len, bool is_log)
307 {
308 	u32 align;
309 	void *base;
310 
311 	if (is_log)
312 		align = sizeof(struct d40_log_lli);
313 	else
314 		align = sizeof(struct d40_phy_lli);
315 
316 	if (lli_len == 1) {
317 		base = d40d->lli_pool.pre_alloc_lli;
318 		d40d->lli_pool.size = sizeof(d40d->lli_pool.pre_alloc_lli);
319 		d40d->lli_pool.base = NULL;
320 	} else {
321 		d40d->lli_pool.size = ALIGN(lli_len * 2 * align, align);
322 
323 		base = kmalloc(d40d->lli_pool.size + align, GFP_NOWAIT);
324 		d40d->lli_pool.base = base;
325 
326 		if (d40d->lli_pool.base == NULL)
327 			return -ENOMEM;
328 	}
329 
330 	if (is_log) {
331 		d40d->lli_log.src = PTR_ALIGN((struct d40_log_lli *) base,
332 					      align);
333 		d40d->lli_log.dst = PTR_ALIGN(d40d->lli_log.src + lli_len,
334 					      align);
335 	} else {
336 		d40d->lli_phy.src = PTR_ALIGN((struct d40_phy_lli *)base,
337 					      align);
338 		d40d->lli_phy.dst = PTR_ALIGN(d40d->lli_phy.src + lli_len,
339 					      align);
340 	}
341 
342 	return 0;
343 }
344 
345 static void d40_pool_lli_free(struct d40_desc *d40d)
346 {
347 	kfree(d40d->lli_pool.base);
348 	d40d->lli_pool.base = NULL;
349 	d40d->lli_pool.size = 0;
350 	d40d->lli_log.src = NULL;
351 	d40d->lli_log.dst = NULL;
352 	d40d->lli_phy.src = NULL;
353 	d40d->lli_phy.dst = NULL;
354 }
355 
356 static int d40_lcla_alloc_one(struct d40_chan *d40c,
357 			      struct d40_desc *d40d)
358 {
359 	unsigned long flags;
360 	int i;
361 	int ret = -EINVAL;
362 	int p;
363 
364 	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
365 
366 	p = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP;
367 
368 	/*
369 	 * Allocate both src and dst at the same time, therefore the half
370 	 * start on 1 since 0 can't be used since zero is used as end marker.
371 	 */
372 	for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
373 		if (!d40c->base->lcla_pool.alloc_map[p + i]) {
374 			d40c->base->lcla_pool.alloc_map[p + i] = d40d;
375 			d40d->lcla_alloc++;
376 			ret = i;
377 			break;
378 		}
379 	}
380 
381 	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
382 
383 	return ret;
384 }
385 
386 static int d40_lcla_free_all(struct d40_chan *d40c,
387 			     struct d40_desc *d40d)
388 {
389 	unsigned long flags;
390 	int i;
391 	int ret = -EINVAL;
392 
393 	if (d40c->log_num == D40_PHY_CHAN)
394 		return 0;
395 
396 	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
397 
398 	for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
399 		if (d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num *
400 						    D40_LCLA_LINK_PER_EVENT_GRP + i] == d40d) {
401 			d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num *
402 							D40_LCLA_LINK_PER_EVENT_GRP + i] = NULL;
403 			d40d->lcla_alloc--;
404 			if (d40d->lcla_alloc == 0) {
405 				ret = 0;
406 				break;
407 			}
408 		}
409 	}
410 
411 	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
412 
413 	return ret;
414 
415 }
416 
417 static void d40_desc_remove(struct d40_desc *d40d)
418 {
419 	list_del(&d40d->node);
420 }
421 
422 static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
423 {
424 	struct d40_desc *desc = NULL;
425 
426 	if (!list_empty(&d40c->client)) {
427 		struct d40_desc *d;
428 		struct d40_desc *_d;
429 
430 		list_for_each_entry_safe(d, _d, &d40c->client, node)
431 			if (async_tx_test_ack(&d->txd)) {
432 				d40_pool_lli_free(d);
433 				d40_desc_remove(d);
434 				desc = d;
435 				memset(desc, 0, sizeof(*desc));
436 				break;
437 			}
438 	}
439 
440 	if (!desc)
441 		desc = kmem_cache_zalloc(d40c->base->desc_slab, GFP_NOWAIT);
442 
443 	if (desc)
444 		INIT_LIST_HEAD(&desc->node);
445 
446 	return desc;
447 }
448 
449 static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d)
450 {
451 
452 	d40_lcla_free_all(d40c, d40d);
453 	kmem_cache_free(d40c->base->desc_slab, d40d);
454 }
455 
456 static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc)
457 {
458 	list_add_tail(&desc->node, &d40c->active);
459 }
460 
461 static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
462 {
463 	int curr_lcla = -EINVAL, next_lcla;
464 
465 	if (d40c->log_num == D40_PHY_CHAN) {
466 		d40_phy_lli_write(d40c->base->virtbase,
467 				  d40c->phy_chan->num,
468 				  d40d->lli_phy.dst,
469 				  d40d->lli_phy.src);
470 		d40d->lli_current = d40d->lli_len;
471 	} else {
472 
473 		if ((d40d->lli_len - d40d->lli_current) > 1)
474 			curr_lcla = d40_lcla_alloc_one(d40c, d40d);
475 
476 		d40_log_lli_lcpa_write(d40c->lcpa,
477 				       &d40d->lli_log.dst[d40d->lli_current],
478 				       &d40d->lli_log.src[d40d->lli_current],
479 				       curr_lcla);
480 
481 		d40d->lli_current++;
482 		for (; d40d->lli_current < d40d->lli_len; d40d->lli_current++) {
483 			struct d40_log_lli *lcla;
484 
485 			if (d40d->lli_current + 1 < d40d->lli_len)
486 				next_lcla = d40_lcla_alloc_one(d40c, d40d);
487 			else
488 				next_lcla = -EINVAL;
489 
490 			lcla = d40c->base->lcla_pool.base +
491 				d40c->phy_chan->num * 1024 +
492 				8 * curr_lcla * 2;
493 
494 			d40_log_lli_lcla_write(lcla,
495 					       &d40d->lli_log.dst[d40d->lli_current],
496 					       &d40d->lli_log.src[d40d->lli_current],
497 					       next_lcla);
498 
499 			(void) dma_map_single(d40c->base->dev, lcla,
500 					      2 * sizeof(struct d40_log_lli),
501 					      DMA_TO_DEVICE);
502 
503 			curr_lcla = next_lcla;
504 
505 			if (curr_lcla == -EINVAL) {
506 				d40d->lli_current++;
507 				break;
508 			}
509 
510 		}
511 	}
512 }
513 
514 static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
515 {
516 	struct d40_desc *d;
517 
518 	if (list_empty(&d40c->active))
519 		return NULL;
520 
521 	d = list_first_entry(&d40c->active,
522 			     struct d40_desc,
523 			     node);
524 	return d;
525 }
526 
527 static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
528 {
529 	list_add_tail(&desc->node, &d40c->queue);
530 }
531 
532 static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
533 {
534 	struct d40_desc *d;
535 
536 	if (list_empty(&d40c->queue))
537 		return NULL;
538 
539 	d = list_first_entry(&d40c->queue,
540 			     struct d40_desc,
541 			     node);
542 	return d;
543 }
544 
545 static struct d40_desc *d40_last_queued(struct d40_chan *d40c)
546 {
547 	struct d40_desc *d;
548 
549 	if (list_empty(&d40c->queue))
550 		return NULL;
551 	list_for_each_entry(d, &d40c->queue, node)
552 		if (list_is_last(&d->node, &d40c->queue))
553 			break;
554 	return d;
555 }
556 
557 /* Support functions for logical channels */
558 
559 
560 static int d40_channel_execute_command(struct d40_chan *d40c,
561 				       enum d40_command command)
562 {
563 	u32 status;
564 	int i;
565 	void __iomem *active_reg;
566 	int ret = 0;
567 	unsigned long flags;
568 	u32 wmask;
569 
570 	spin_lock_irqsave(&d40c->base->execmd_lock, flags);
571 
572 	if (d40c->phy_chan->num % 2 == 0)
573 		active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
574 	else
575 		active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
576 
577 	if (command == D40_DMA_SUSPEND_REQ) {
578 		status = (readl(active_reg) &
579 			  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
580 			D40_CHAN_POS(d40c->phy_chan->num);
581 
582 		if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP)
583 			goto done;
584 	}
585 
586 	wmask = 0xffffffff & ~(D40_CHAN_POS_MASK(d40c->phy_chan->num));
587 	writel(wmask | (command << D40_CHAN_POS(d40c->phy_chan->num)),
588 	       active_reg);
589 
590 	if (command == D40_DMA_SUSPEND_REQ) {
591 
592 		for (i = 0 ; i < D40_SUSPEND_MAX_IT; i++) {
593 			status = (readl(active_reg) &
594 				  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
595 				D40_CHAN_POS(d40c->phy_chan->num);
596 
597 			cpu_relax();
598 			/*
599 			 * Reduce the number of bus accesses while
600 			 * waiting for the DMA to suspend.
601 			 */
602 			udelay(3);
603 
604 			if (status == D40_DMA_STOP ||
605 			    status == D40_DMA_SUSPENDED)
606 				break;
607 		}
608 
609 		if (i == D40_SUSPEND_MAX_IT) {
610 			dev_err(&d40c->chan.dev->device,
611 				"[%s]: unable to suspend the chl %d (log: %d) status %x\n",
612 				__func__, d40c->phy_chan->num, d40c->log_num,
613 				status);
614 			dump_stack();
615 			ret = -EBUSY;
616 		}
617 
618 	}
619 done:
620 	spin_unlock_irqrestore(&d40c->base->execmd_lock, flags);
621 	return ret;
622 }
623 
624 static void d40_term_all(struct d40_chan *d40c)
625 {
626 	struct d40_desc *d40d;
627 
628 	/* Release active descriptors */
629 	while ((d40d = d40_first_active_get(d40c))) {
630 		d40_desc_remove(d40d);
631 		d40_desc_free(d40c, d40d);
632 	}
633 
634 	/* Release queued descriptors waiting for transfer */
635 	while ((d40d = d40_first_queued(d40c))) {
636 		d40_desc_remove(d40d);
637 		d40_desc_free(d40c, d40d);
638 	}
639 
640 
641 	d40c->pending_tx = 0;
642 	d40c->busy = false;
643 }
644 
645 static void d40_config_set_event(struct d40_chan *d40c, bool do_enable)
646 {
647 	u32 val;
648 	unsigned long flags;
649 
650 	/* Notice, that disable requires the physical channel to be stopped */
651 	if (do_enable)
652 		val = D40_ACTIVATE_EVENTLINE;
653 	else
654 		val = D40_DEACTIVATE_EVENTLINE;
655 
656 	spin_lock_irqsave(&d40c->phy_chan->lock, flags);
657 
658 	/* Enable event line connected to device (or memcpy) */
659 	if ((d40c->dma_cfg.dir ==  STEDMA40_PERIPH_TO_MEM) ||
660 	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) {
661 		u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
662 
663 		writel((val << D40_EVENTLINE_POS(event)) |
664 		       ~D40_EVENTLINE_MASK(event),
665 		       d40c->base->virtbase + D40_DREG_PCBASE +
666 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
667 		       D40_CHAN_REG_SSLNK);
668 	}
669 	if (d40c->dma_cfg.dir !=  STEDMA40_PERIPH_TO_MEM) {
670 		u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
671 
672 		writel((val << D40_EVENTLINE_POS(event)) |
673 		       ~D40_EVENTLINE_MASK(event),
674 		       d40c->base->virtbase + D40_DREG_PCBASE +
675 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
676 		       D40_CHAN_REG_SDLNK);
677 	}
678 
679 	spin_unlock_irqrestore(&d40c->phy_chan->lock, flags);
680 }
681 
682 static u32 d40_chan_has_events(struct d40_chan *d40c)
683 {
684 	u32 val;
685 
686 	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
687 		    d40c->phy_chan->num * D40_DREG_PCDELTA +
688 		    D40_CHAN_REG_SSLNK);
689 
690 	val |= readl(d40c->base->virtbase + D40_DREG_PCBASE +
691 		     d40c->phy_chan->num * D40_DREG_PCDELTA +
692 		     D40_CHAN_REG_SDLNK);
693 	return val;
694 }
695 
696 static u32 d40_get_prmo(struct d40_chan *d40c)
697 {
698 	static const unsigned int phy_map[] = {
699 		[STEDMA40_PCHAN_BASIC_MODE]
700 			= D40_DREG_PRMO_PCHAN_BASIC,
701 		[STEDMA40_PCHAN_MODULO_MODE]
702 			= D40_DREG_PRMO_PCHAN_MODULO,
703 		[STEDMA40_PCHAN_DOUBLE_DST_MODE]
704 			= D40_DREG_PRMO_PCHAN_DOUBLE_DST,
705 	};
706 	static const unsigned int log_map[] = {
707 		[STEDMA40_LCHAN_SRC_PHY_DST_LOG]
708 			= D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG,
709 		[STEDMA40_LCHAN_SRC_LOG_DST_PHY]
710 			= D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY,
711 		[STEDMA40_LCHAN_SRC_LOG_DST_LOG]
712 			= D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG,
713 	};
714 
715 	if (d40c->log_num == D40_PHY_CHAN)
716 		return phy_map[d40c->dma_cfg.mode_opt];
717 	else
718 		return log_map[d40c->dma_cfg.mode_opt];
719 }
720 
721 static void d40_config_write(struct d40_chan *d40c)
722 {
723 	u32 addr_base;
724 	u32 var;
725 
726 	/* Odd addresses are even addresses + 4 */
727 	addr_base = (d40c->phy_chan->num % 2) * 4;
728 	/* Setup channel mode to logical or physical */
729 	var = ((u32)(d40c->log_num != D40_PHY_CHAN) + 1) <<
730 		D40_CHAN_POS(d40c->phy_chan->num);
731 	writel(var, d40c->base->virtbase + D40_DREG_PRMSE + addr_base);
732 
733 	/* Setup operational mode option register */
734 	var = d40_get_prmo(d40c) << D40_CHAN_POS(d40c->phy_chan->num);
735 
736 	writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base);
737 
738 	if (d40c->log_num != D40_PHY_CHAN) {
739 		/* Set default config for CFG reg */
740 		writel(d40c->src_def_cfg,
741 		       d40c->base->virtbase + D40_DREG_PCBASE +
742 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
743 		       D40_CHAN_REG_SSCFG);
744 		writel(d40c->dst_def_cfg,
745 		       d40c->base->virtbase + D40_DREG_PCBASE +
746 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
747 		       D40_CHAN_REG_SDCFG);
748 
749 		/* Set LIDX for lcla */
750 		writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
751 		       D40_SREG_ELEM_LOG_LIDX_MASK,
752 		       d40c->base->virtbase + D40_DREG_PCBASE +
753 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
754 		       D40_CHAN_REG_SDELT);
755 
756 		writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
757 		       D40_SREG_ELEM_LOG_LIDX_MASK,
758 		       d40c->base->virtbase + D40_DREG_PCBASE +
759 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
760 		       D40_CHAN_REG_SSELT);
761 
762 	}
763 }
764 
765 static u32 d40_residue(struct d40_chan *d40c)
766 {
767 	u32 num_elt;
768 
769 	if (d40c->log_num != D40_PHY_CHAN)
770 		num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
771 			>> D40_MEM_LCSP2_ECNT_POS;
772 	else
773 		num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE +
774 				 d40c->phy_chan->num * D40_DREG_PCDELTA +
775 				 D40_CHAN_REG_SDELT) &
776 			   D40_SREG_ELEM_PHY_ECNT_MASK) >>
777 			D40_SREG_ELEM_PHY_ECNT_POS;
778 	return num_elt * (1 << d40c->dma_cfg.dst_info.data_width);
779 }
780 
781 static bool d40_tx_is_linked(struct d40_chan *d40c)
782 {
783 	bool is_link;
784 
785 	if (d40c->log_num != D40_PHY_CHAN)
786 		is_link = readl(&d40c->lcpa->lcsp3) &  D40_MEM_LCSP3_DLOS_MASK;
787 	else
788 		is_link = readl(d40c->base->virtbase + D40_DREG_PCBASE +
789 				d40c->phy_chan->num * D40_DREG_PCDELTA +
790 				D40_CHAN_REG_SDLNK) &
791 			D40_SREG_LNK_PHYS_LNK_MASK;
792 	return is_link;
793 }
794 
795 static int d40_pause(struct dma_chan *chan)
796 {
797 	struct d40_chan *d40c =
798 		container_of(chan, struct d40_chan, chan);
799 	int res = 0;
800 	unsigned long flags;
801 
802 	if (!d40c->busy)
803 		return 0;
804 
805 	spin_lock_irqsave(&d40c->lock, flags);
806 
807 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
808 	if (res == 0) {
809 		if (d40c->log_num != D40_PHY_CHAN) {
810 			d40_config_set_event(d40c, false);
811 			/* Resume the other logical channels if any */
812 			if (d40_chan_has_events(d40c))
813 				res = d40_channel_execute_command(d40c,
814 								  D40_DMA_RUN);
815 		}
816 	}
817 
818 	spin_unlock_irqrestore(&d40c->lock, flags);
819 	return res;
820 }
821 
822 static int d40_resume(struct dma_chan *chan)
823 {
824 	struct d40_chan *d40c =
825 		container_of(chan, struct d40_chan, chan);
826 	int res = 0;
827 	unsigned long flags;
828 
829 	if (!d40c->busy)
830 		return 0;
831 
832 	spin_lock_irqsave(&d40c->lock, flags);
833 
834 	if (d40c->base->rev == 0)
835 		if (d40c->log_num != D40_PHY_CHAN) {
836 			res = d40_channel_execute_command(d40c,
837 							  D40_DMA_SUSPEND_REQ);
838 			goto no_suspend;
839 		}
840 
841 	/* If bytes left to transfer or linked tx resume job */
842 	if (d40_residue(d40c) || d40_tx_is_linked(d40c)) {
843 
844 		if (d40c->log_num != D40_PHY_CHAN)
845 			d40_config_set_event(d40c, true);
846 
847 		res = d40_channel_execute_command(d40c, D40_DMA_RUN);
848 	}
849 
850 no_suspend:
851 	spin_unlock_irqrestore(&d40c->lock, flags);
852 	return res;
853 }
854 
855 static void d40_tx_submit_log(struct d40_chan *d40c, struct d40_desc *d40d)
856 {
857 	/* TODO: Write */
858 }
859 
860 static void d40_tx_submit_phy(struct d40_chan *d40c, struct d40_desc *d40d)
861 {
862 	struct d40_desc *d40d_prev = NULL;
863 	int i;
864 	u32 val;
865 
866 	if (!list_empty(&d40c->queue))
867 		d40d_prev = d40_last_queued(d40c);
868 	else if (!list_empty(&d40c->active))
869 		d40d_prev = d40_first_active_get(d40c);
870 
871 	if (!d40d_prev)
872 		return;
873 
874 	/* Here we try to join this job with previous jobs */
875 	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
876 		    d40c->phy_chan->num * D40_DREG_PCDELTA +
877 		    D40_CHAN_REG_SSLNK);
878 
879 	/* Figure out which link we're currently transmitting */
880 	for (i = 0; i < d40d_prev->lli_len; i++)
881 		if (val == d40d_prev->lli_phy.src[i].reg_lnk)
882 			break;
883 
884 	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
885 		    d40c->phy_chan->num * D40_DREG_PCDELTA +
886 		    D40_CHAN_REG_SSELT) >> D40_SREG_ELEM_LOG_ECNT_POS;
887 
888 	if (i == (d40d_prev->lli_len - 1) && val > 0) {
889 		/* Change the current one */
890 		writel(virt_to_phys(d40d->lli_phy.src),
891 		       d40c->base->virtbase + D40_DREG_PCBASE +
892 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
893 		       D40_CHAN_REG_SSLNK);
894 		writel(virt_to_phys(d40d->lli_phy.dst),
895 		       d40c->base->virtbase + D40_DREG_PCBASE +
896 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
897 		       D40_CHAN_REG_SDLNK);
898 
899 		d40d->is_hw_linked = true;
900 
901 	} else if (i < d40d_prev->lli_len) {
902 		(void) dma_unmap_single(d40c->base->dev,
903 					virt_to_phys(d40d_prev->lli_phy.src),
904 					d40d_prev->lli_pool.size,
905 					DMA_TO_DEVICE);
906 
907 		/* Keep the settings */
908 		val = d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk &
909 			~D40_SREG_LNK_PHYS_LNK_MASK;
910 		d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk =
911 			val | virt_to_phys(d40d->lli_phy.src);
912 
913 		val = d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk &
914 			~D40_SREG_LNK_PHYS_LNK_MASK;
915 		d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk =
916 			val | virt_to_phys(d40d->lli_phy.dst);
917 
918 		(void) dma_map_single(d40c->base->dev,
919 				      d40d_prev->lli_phy.src,
920 				      d40d_prev->lli_pool.size,
921 				      DMA_TO_DEVICE);
922 		d40d->is_hw_linked = true;
923 	}
924 }
925 
926 static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
927 {
928 	struct d40_chan *d40c = container_of(tx->chan,
929 					     struct d40_chan,
930 					     chan);
931 	struct d40_desc *d40d = container_of(tx, struct d40_desc, txd);
932 	unsigned long flags;
933 
934 	(void) d40_pause(&d40c->chan);
935 
936 	spin_lock_irqsave(&d40c->lock, flags);
937 
938 	d40c->chan.cookie++;
939 
940 	if (d40c->chan.cookie < 0)
941 		d40c->chan.cookie = 1;
942 
943 	d40d->txd.cookie = d40c->chan.cookie;
944 
945 	if (d40c->log_num == D40_PHY_CHAN)
946 		d40_tx_submit_phy(d40c, d40d);
947 	else
948 		d40_tx_submit_log(d40c, d40d);
949 
950 	d40_desc_queue(d40c, d40d);
951 
952 	spin_unlock_irqrestore(&d40c->lock, flags);
953 
954 	(void) d40_resume(&d40c->chan);
955 
956 	return tx->cookie;
957 }
958 
959 static int d40_start(struct d40_chan *d40c)
960 {
961 	if (d40c->base->rev == 0) {
962 		int err;
963 
964 		if (d40c->log_num != D40_PHY_CHAN) {
965 			err = d40_channel_execute_command(d40c,
966 							  D40_DMA_SUSPEND_REQ);
967 			if (err)
968 				return err;
969 		}
970 	}
971 
972 	if (d40c->log_num != D40_PHY_CHAN)
973 		d40_config_set_event(d40c, true);
974 
975 	return d40_channel_execute_command(d40c, D40_DMA_RUN);
976 }
977 
978 static struct d40_desc *d40_queue_start(struct d40_chan *d40c)
979 {
980 	struct d40_desc *d40d;
981 	int err;
982 
983 	/* Start queued jobs, if any */
984 	d40d = d40_first_queued(d40c);
985 
986 	if (d40d != NULL) {
987 		d40c->busy = true;
988 
989 		/* Remove from queue */
990 		d40_desc_remove(d40d);
991 
992 		/* Add to active queue */
993 		d40_desc_submit(d40c, d40d);
994 
995 		/*
996 		 * If this job is already linked in hw,
997 		 * do not submit it.
998 		 */
999 
1000 		if (!d40d->is_hw_linked) {
1001 			/* Initiate DMA job */
1002 			d40_desc_load(d40c, d40d);
1003 
1004 			/* Start dma job */
1005 			err = d40_start(d40c);
1006 
1007 			if (err)
1008 				return NULL;
1009 		}
1010 	}
1011 
1012 	return d40d;
1013 }
1014 
1015 /* called from interrupt context */
1016 static void dma_tc_handle(struct d40_chan *d40c)
1017 {
1018 	struct d40_desc *d40d;
1019 
1020 	/* Get first active entry from list */
1021 	d40d = d40_first_active_get(d40c);
1022 
1023 	if (d40d == NULL)
1024 		return;
1025 
1026 	d40_lcla_free_all(d40c, d40d);
1027 
1028 	if (d40d->lli_current < d40d->lli_len) {
1029 		d40_desc_load(d40c, d40d);
1030 		/* Start dma job */
1031 		(void) d40_start(d40c);
1032 		return;
1033 	}
1034 
1035 	if (d40_queue_start(d40c) == NULL)
1036 		d40c->busy = false;
1037 
1038 	d40c->pending_tx++;
1039 	tasklet_schedule(&d40c->tasklet);
1040 
1041 }
1042 
1043 static void dma_tasklet(unsigned long data)
1044 {
1045 	struct d40_chan *d40c = (struct d40_chan *) data;
1046 	struct d40_desc *d40d;
1047 	unsigned long flags;
1048 	dma_async_tx_callback callback;
1049 	void *callback_param;
1050 
1051 	spin_lock_irqsave(&d40c->lock, flags);
1052 
1053 	/* Get first active entry from list */
1054 	d40d = d40_first_active_get(d40c);
1055 
1056 	if (d40d == NULL)
1057 		goto err;
1058 
1059 	d40c->completed = d40d->txd.cookie;
1060 
1061 	/*
1062 	 * If terminating a channel pending_tx is set to zero.
1063 	 * This prevents any finished active jobs to return to the client.
1064 	 */
1065 	if (d40c->pending_tx == 0) {
1066 		spin_unlock_irqrestore(&d40c->lock, flags);
1067 		return;
1068 	}
1069 
1070 	/* Callback to client */
1071 	callback = d40d->txd.callback;
1072 	callback_param = d40d->txd.callback_param;
1073 
1074 	if (async_tx_test_ack(&d40d->txd)) {
1075 		d40_pool_lli_free(d40d);
1076 		d40_desc_remove(d40d);
1077 		d40_desc_free(d40c, d40d);
1078 	} else {
1079 		if (!d40d->is_in_client_list) {
1080 			d40_desc_remove(d40d);
1081 			d40_lcla_free_all(d40c, d40d);
1082 			list_add_tail(&d40d->node, &d40c->client);
1083 			d40d->is_in_client_list = true;
1084 		}
1085 	}
1086 
1087 	d40c->pending_tx--;
1088 
1089 	if (d40c->pending_tx)
1090 		tasklet_schedule(&d40c->tasklet);
1091 
1092 	spin_unlock_irqrestore(&d40c->lock, flags);
1093 
1094 	if (callback && (d40d->txd.flags & DMA_PREP_INTERRUPT))
1095 		callback(callback_param);
1096 
1097 	return;
1098 
1099  err:
1100 	/* Rescue manouver if receiving double interrupts */
1101 	if (d40c->pending_tx > 0)
1102 		d40c->pending_tx--;
1103 	spin_unlock_irqrestore(&d40c->lock, flags);
1104 }
1105 
1106 static irqreturn_t d40_handle_interrupt(int irq, void *data)
1107 {
1108 	static const struct d40_interrupt_lookup il[] = {
1109 		{D40_DREG_LCTIS0, D40_DREG_LCICR0, false,  0},
1110 		{D40_DREG_LCTIS1, D40_DREG_LCICR1, false, 32},
1111 		{D40_DREG_LCTIS2, D40_DREG_LCICR2, false, 64},
1112 		{D40_DREG_LCTIS3, D40_DREG_LCICR3, false, 96},
1113 		{D40_DREG_LCEIS0, D40_DREG_LCICR0, true,   0},
1114 		{D40_DREG_LCEIS1, D40_DREG_LCICR1, true,  32},
1115 		{D40_DREG_LCEIS2, D40_DREG_LCICR2, true,  64},
1116 		{D40_DREG_LCEIS3, D40_DREG_LCICR3, true,  96},
1117 		{D40_DREG_PCTIS,  D40_DREG_PCICR,  false, D40_PHY_CHAN},
1118 		{D40_DREG_PCEIS,  D40_DREG_PCICR,  true,  D40_PHY_CHAN},
1119 	};
1120 
1121 	int i;
1122 	u32 regs[ARRAY_SIZE(il)];
1123 	u32 idx;
1124 	u32 row;
1125 	long chan = -1;
1126 	struct d40_chan *d40c;
1127 	unsigned long flags;
1128 	struct d40_base *base = data;
1129 
1130 	spin_lock_irqsave(&base->interrupt_lock, flags);
1131 
1132 	/* Read interrupt status of both logical and physical channels */
1133 	for (i = 0; i < ARRAY_SIZE(il); i++)
1134 		regs[i] = readl(base->virtbase + il[i].src);
1135 
1136 	for (;;) {
1137 
1138 		chan = find_next_bit((unsigned long *)regs,
1139 				     BITS_PER_LONG * ARRAY_SIZE(il), chan + 1);
1140 
1141 		/* No more set bits found? */
1142 		if (chan == BITS_PER_LONG * ARRAY_SIZE(il))
1143 			break;
1144 
1145 		row = chan / BITS_PER_LONG;
1146 		idx = chan & (BITS_PER_LONG - 1);
1147 
1148 		/* ACK interrupt */
1149 		writel(1 << idx, base->virtbase + il[row].clr);
1150 
1151 		if (il[row].offset == D40_PHY_CHAN)
1152 			d40c = base->lookup_phy_chans[idx];
1153 		else
1154 			d40c = base->lookup_log_chans[il[row].offset + idx];
1155 		spin_lock(&d40c->lock);
1156 
1157 		if (!il[row].is_error)
1158 			dma_tc_handle(d40c);
1159 		else
1160 			dev_err(base->dev,
1161 				"[%s] IRQ chan: %ld offset %d idx %d\n",
1162 				__func__, chan, il[row].offset, idx);
1163 
1164 		spin_unlock(&d40c->lock);
1165 	}
1166 
1167 	spin_unlock_irqrestore(&base->interrupt_lock, flags);
1168 
1169 	return IRQ_HANDLED;
1170 }
1171 
1172 static int d40_validate_conf(struct d40_chan *d40c,
1173 			     struct stedma40_chan_cfg *conf)
1174 {
1175 	int res = 0;
1176 	u32 dst_event_group = D40_TYPE_TO_GROUP(conf->dst_dev_type);
1177 	u32 src_event_group = D40_TYPE_TO_GROUP(conf->src_dev_type);
1178 	bool is_log = conf->mode == STEDMA40_MODE_LOGICAL;
1179 
1180 	if (!conf->dir) {
1181 		dev_err(&d40c->chan.dev->device, "[%s] Invalid direction.\n",
1182 			__func__);
1183 		res = -EINVAL;
1184 	}
1185 
1186 	if (conf->dst_dev_type != STEDMA40_DEV_DST_MEMORY &&
1187 	    d40c->base->plat_data->dev_tx[conf->dst_dev_type] == 0 &&
1188 	    d40c->runtime_addr == 0) {
1189 
1190 		dev_err(&d40c->chan.dev->device,
1191 			"[%s] Invalid TX channel address (%d)\n",
1192 			__func__, conf->dst_dev_type);
1193 		res = -EINVAL;
1194 	}
1195 
1196 	if (conf->src_dev_type != STEDMA40_DEV_SRC_MEMORY &&
1197 	    d40c->base->plat_data->dev_rx[conf->src_dev_type] == 0 &&
1198 	    d40c->runtime_addr == 0) {
1199 		dev_err(&d40c->chan.dev->device,
1200 			"[%s] Invalid RX channel address (%d)\n",
1201 			__func__, conf->src_dev_type);
1202 		res = -EINVAL;
1203 	}
1204 
1205 	if (conf->dir == STEDMA40_MEM_TO_PERIPH &&
1206 	    dst_event_group == STEDMA40_DEV_DST_MEMORY) {
1207 		dev_err(&d40c->chan.dev->device, "[%s] Invalid dst\n",
1208 			__func__);
1209 		res = -EINVAL;
1210 	}
1211 
1212 	if (conf->dir == STEDMA40_PERIPH_TO_MEM &&
1213 	    src_event_group == STEDMA40_DEV_SRC_MEMORY) {
1214 		dev_err(&d40c->chan.dev->device, "[%s] Invalid src\n",
1215 			__func__);
1216 		res = -EINVAL;
1217 	}
1218 
1219 	if (src_event_group == STEDMA40_DEV_SRC_MEMORY &&
1220 	    dst_event_group == STEDMA40_DEV_DST_MEMORY && is_log) {
1221 		dev_err(&d40c->chan.dev->device,
1222 			"[%s] No event line\n", __func__);
1223 		res = -EINVAL;
1224 	}
1225 
1226 	if (conf->dir == STEDMA40_PERIPH_TO_PERIPH &&
1227 	    (src_event_group != dst_event_group)) {
1228 		dev_err(&d40c->chan.dev->device,
1229 			"[%s] Invalid event group\n", __func__);
1230 		res = -EINVAL;
1231 	}
1232 
1233 	if (conf->dir == STEDMA40_PERIPH_TO_PERIPH) {
1234 		/*
1235 		 * DMAC HW supports it. Will be added to this driver,
1236 		 * in case any dma client requires it.
1237 		 */
1238 		dev_err(&d40c->chan.dev->device,
1239 			"[%s] periph to periph not supported\n",
1240 			__func__);
1241 		res = -EINVAL;
1242 	}
1243 
1244 	return res;
1245 }
1246 
1247 static bool d40_alloc_mask_set(struct d40_phy_res *phy, bool is_src,
1248 			       int log_event_line, bool is_log)
1249 {
1250 	unsigned long flags;
1251 	spin_lock_irqsave(&phy->lock, flags);
1252 	if (!is_log) {
1253 		/* Physical interrupts are masked per physical full channel */
1254 		if (phy->allocated_src == D40_ALLOC_FREE &&
1255 		    phy->allocated_dst == D40_ALLOC_FREE) {
1256 			phy->allocated_dst = D40_ALLOC_PHY;
1257 			phy->allocated_src = D40_ALLOC_PHY;
1258 			goto found;
1259 		} else
1260 			goto not_found;
1261 	}
1262 
1263 	/* Logical channel */
1264 	if (is_src) {
1265 		if (phy->allocated_src == D40_ALLOC_PHY)
1266 			goto not_found;
1267 
1268 		if (phy->allocated_src == D40_ALLOC_FREE)
1269 			phy->allocated_src = D40_ALLOC_LOG_FREE;
1270 
1271 		if (!(phy->allocated_src & (1 << log_event_line))) {
1272 			phy->allocated_src |= 1 << log_event_line;
1273 			goto found;
1274 		} else
1275 			goto not_found;
1276 	} else {
1277 		if (phy->allocated_dst == D40_ALLOC_PHY)
1278 			goto not_found;
1279 
1280 		if (phy->allocated_dst == D40_ALLOC_FREE)
1281 			phy->allocated_dst = D40_ALLOC_LOG_FREE;
1282 
1283 		if (!(phy->allocated_dst & (1 << log_event_line))) {
1284 			phy->allocated_dst |= 1 << log_event_line;
1285 			goto found;
1286 		} else
1287 			goto not_found;
1288 	}
1289 
1290 not_found:
1291 	spin_unlock_irqrestore(&phy->lock, flags);
1292 	return false;
1293 found:
1294 	spin_unlock_irqrestore(&phy->lock, flags);
1295 	return true;
1296 }
1297 
1298 static bool d40_alloc_mask_free(struct d40_phy_res *phy, bool is_src,
1299 			       int log_event_line)
1300 {
1301 	unsigned long flags;
1302 	bool is_free = false;
1303 
1304 	spin_lock_irqsave(&phy->lock, flags);
1305 	if (!log_event_line) {
1306 		phy->allocated_dst = D40_ALLOC_FREE;
1307 		phy->allocated_src = D40_ALLOC_FREE;
1308 		is_free = true;
1309 		goto out;
1310 	}
1311 
1312 	/* Logical channel */
1313 	if (is_src) {
1314 		phy->allocated_src &= ~(1 << log_event_line);
1315 		if (phy->allocated_src == D40_ALLOC_LOG_FREE)
1316 			phy->allocated_src = D40_ALLOC_FREE;
1317 	} else {
1318 		phy->allocated_dst &= ~(1 << log_event_line);
1319 		if (phy->allocated_dst == D40_ALLOC_LOG_FREE)
1320 			phy->allocated_dst = D40_ALLOC_FREE;
1321 	}
1322 
1323 	is_free = ((phy->allocated_src | phy->allocated_dst) ==
1324 		   D40_ALLOC_FREE);
1325 
1326 out:
1327 	spin_unlock_irqrestore(&phy->lock, flags);
1328 
1329 	return is_free;
1330 }
1331 
1332 static int d40_allocate_channel(struct d40_chan *d40c)
1333 {
1334 	int dev_type;
1335 	int event_group;
1336 	int event_line;
1337 	struct d40_phy_res *phys;
1338 	int i;
1339 	int j;
1340 	int log_num;
1341 	bool is_src;
1342 	bool is_log = d40c->dma_cfg.mode == STEDMA40_MODE_LOGICAL;
1343 
1344 	phys = d40c->base->phy_res;
1345 
1346 	if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
1347 		dev_type = d40c->dma_cfg.src_dev_type;
1348 		log_num = 2 * dev_type;
1349 		is_src = true;
1350 	} else if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
1351 		   d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
1352 		/* dst event lines are used for logical memcpy */
1353 		dev_type = d40c->dma_cfg.dst_dev_type;
1354 		log_num = 2 * dev_type + 1;
1355 		is_src = false;
1356 	} else
1357 		return -EINVAL;
1358 
1359 	event_group = D40_TYPE_TO_GROUP(dev_type);
1360 	event_line = D40_TYPE_TO_EVENT(dev_type);
1361 
1362 	if (!is_log) {
1363 		if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
1364 			/* Find physical half channel */
1365 			for (i = 0; i < d40c->base->num_phy_chans; i++) {
1366 
1367 				if (d40_alloc_mask_set(&phys[i], is_src,
1368 						       0, is_log))
1369 					goto found_phy;
1370 			}
1371 		} else
1372 			for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
1373 				int phy_num = j  + event_group * 2;
1374 				for (i = phy_num; i < phy_num + 2; i++) {
1375 					if (d40_alloc_mask_set(&phys[i],
1376 							       is_src,
1377 							       0,
1378 							       is_log))
1379 						goto found_phy;
1380 				}
1381 			}
1382 		return -EINVAL;
1383 found_phy:
1384 		d40c->phy_chan = &phys[i];
1385 		d40c->log_num = D40_PHY_CHAN;
1386 		goto out;
1387 	}
1388 	if (dev_type == -1)
1389 		return -EINVAL;
1390 
1391 	/* Find logical channel */
1392 	for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
1393 		int phy_num = j + event_group * 2;
1394 		/*
1395 		 * Spread logical channels across all available physical rather
1396 		 * than pack every logical channel at the first available phy
1397 		 * channels.
1398 		 */
1399 		if (is_src) {
1400 			for (i = phy_num; i < phy_num + 2; i++) {
1401 				if (d40_alloc_mask_set(&phys[i], is_src,
1402 						       event_line, is_log))
1403 					goto found_log;
1404 			}
1405 		} else {
1406 			for (i = phy_num + 1; i >= phy_num; i--) {
1407 				if (d40_alloc_mask_set(&phys[i], is_src,
1408 						       event_line, is_log))
1409 					goto found_log;
1410 			}
1411 		}
1412 	}
1413 	return -EINVAL;
1414 
1415 found_log:
1416 	d40c->phy_chan = &phys[i];
1417 	d40c->log_num = log_num;
1418 out:
1419 
1420 	if (is_log)
1421 		d40c->base->lookup_log_chans[d40c->log_num] = d40c;
1422 	else
1423 		d40c->base->lookup_phy_chans[d40c->phy_chan->num] = d40c;
1424 
1425 	return 0;
1426 
1427 }
1428 
1429 static int d40_config_memcpy(struct d40_chan *d40c)
1430 {
1431 	dma_cap_mask_t cap = d40c->chan.device->cap_mask;
1432 
1433 	if (dma_has_cap(DMA_MEMCPY, cap) && !dma_has_cap(DMA_SLAVE, cap)) {
1434 		d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_log;
1435 		d40c->dma_cfg.src_dev_type = STEDMA40_DEV_SRC_MEMORY;
1436 		d40c->dma_cfg.dst_dev_type = d40c->base->plat_data->
1437 			memcpy[d40c->chan.chan_id];
1438 
1439 	} else if (dma_has_cap(DMA_MEMCPY, cap) &&
1440 		   dma_has_cap(DMA_SLAVE, cap)) {
1441 		d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_phy;
1442 	} else {
1443 		dev_err(&d40c->chan.dev->device, "[%s] No memcpy\n",
1444 			__func__);
1445 		return -EINVAL;
1446 	}
1447 
1448 	return 0;
1449 }
1450 
1451 
1452 static int d40_free_dma(struct d40_chan *d40c)
1453 {
1454 
1455 	int res = 0;
1456 	u32 event;
1457 	struct d40_phy_res *phy = d40c->phy_chan;
1458 	bool is_src;
1459 	struct d40_desc *d;
1460 	struct d40_desc *_d;
1461 
1462 
1463 	/* Terminate all queued and active transfers */
1464 	d40_term_all(d40c);
1465 
1466 	/* Release client owned descriptors */
1467 	if (!list_empty(&d40c->client))
1468 		list_for_each_entry_safe(d, _d, &d40c->client, node) {
1469 			d40_pool_lli_free(d);
1470 			d40_desc_remove(d);
1471 			d40_desc_free(d40c, d);
1472 		}
1473 
1474 	if (phy == NULL) {
1475 		dev_err(&d40c->chan.dev->device, "[%s] phy == null\n",
1476 			__func__);
1477 		return -EINVAL;
1478 	}
1479 
1480 	if (phy->allocated_src == D40_ALLOC_FREE &&
1481 	    phy->allocated_dst == D40_ALLOC_FREE) {
1482 		dev_err(&d40c->chan.dev->device, "[%s] channel already free\n",
1483 			__func__);
1484 		return -EINVAL;
1485 	}
1486 
1487 	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
1488 	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
1489 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
1490 		is_src = false;
1491 	} else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
1492 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
1493 		is_src = true;
1494 	} else {
1495 		dev_err(&d40c->chan.dev->device,
1496 			"[%s] Unknown direction\n", __func__);
1497 		return -EINVAL;
1498 	}
1499 
1500 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
1501 	if (res) {
1502 		dev_err(&d40c->chan.dev->device, "[%s] suspend failed\n",
1503 			__func__);
1504 		return res;
1505 	}
1506 
1507 	if (d40c->log_num != D40_PHY_CHAN) {
1508 		/* Release logical channel, deactivate the event line */
1509 
1510 		d40_config_set_event(d40c, false);
1511 		d40c->base->lookup_log_chans[d40c->log_num] = NULL;
1512 
1513 		/*
1514 		 * Check if there are more logical allocation
1515 		 * on this phy channel.
1516 		 */
1517 		if (!d40_alloc_mask_free(phy, is_src, event)) {
1518 			/* Resume the other logical channels if any */
1519 			if (d40_chan_has_events(d40c)) {
1520 				res = d40_channel_execute_command(d40c,
1521 								  D40_DMA_RUN);
1522 				if (res) {
1523 					dev_err(&d40c->chan.dev->device,
1524 						"[%s] Executing RUN command\n",
1525 						__func__);
1526 					return res;
1527 				}
1528 			}
1529 			return 0;
1530 		}
1531 	} else {
1532 		(void) d40_alloc_mask_free(phy, is_src, 0);
1533 	}
1534 
1535 	/* Release physical channel */
1536 	res = d40_channel_execute_command(d40c, D40_DMA_STOP);
1537 	if (res) {
1538 		dev_err(&d40c->chan.dev->device,
1539 			"[%s] Failed to stop channel\n", __func__);
1540 		return res;
1541 	}
1542 	d40c->phy_chan = NULL;
1543 	d40c->configured = false;
1544 	d40c->base->lookup_phy_chans[phy->num] = NULL;
1545 
1546 	return 0;
1547 }
1548 
1549 static bool d40_is_paused(struct d40_chan *d40c)
1550 {
1551 	bool is_paused = false;
1552 	unsigned long flags;
1553 	void __iomem *active_reg;
1554 	u32 status;
1555 	u32 event;
1556 
1557 	spin_lock_irqsave(&d40c->lock, flags);
1558 
1559 	if (d40c->log_num == D40_PHY_CHAN) {
1560 		if (d40c->phy_chan->num % 2 == 0)
1561 			active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
1562 		else
1563 			active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
1564 
1565 		status = (readl(active_reg) &
1566 			  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
1567 			D40_CHAN_POS(d40c->phy_chan->num);
1568 		if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP)
1569 			is_paused = true;
1570 
1571 		goto _exit;
1572 	}
1573 
1574 	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
1575 	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
1576 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
1577 		status = readl(d40c->base->virtbase + D40_DREG_PCBASE +
1578 			       d40c->phy_chan->num * D40_DREG_PCDELTA +
1579 			       D40_CHAN_REG_SDLNK);
1580 	} else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
1581 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
1582 		status = readl(d40c->base->virtbase + D40_DREG_PCBASE +
1583 			       d40c->phy_chan->num * D40_DREG_PCDELTA +
1584 			       D40_CHAN_REG_SSLNK);
1585 	} else {
1586 		dev_err(&d40c->chan.dev->device,
1587 			"[%s] Unknown direction\n", __func__);
1588 		goto _exit;
1589 	}
1590 
1591 	status = (status & D40_EVENTLINE_MASK(event)) >>
1592 		D40_EVENTLINE_POS(event);
1593 
1594 	if (status != D40_DMA_RUN)
1595 		is_paused = true;
1596 _exit:
1597 	spin_unlock_irqrestore(&d40c->lock, flags);
1598 	return is_paused;
1599 
1600 }
1601 
1602 
1603 static u32 stedma40_residue(struct dma_chan *chan)
1604 {
1605 	struct d40_chan *d40c =
1606 		container_of(chan, struct d40_chan, chan);
1607 	u32 bytes_left;
1608 	unsigned long flags;
1609 
1610 	spin_lock_irqsave(&d40c->lock, flags);
1611 	bytes_left = d40_residue(d40c);
1612 	spin_unlock_irqrestore(&d40c->lock, flags);
1613 
1614 	return bytes_left;
1615 }
1616 
1617 struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
1618 						   struct scatterlist *sgl_dst,
1619 						   struct scatterlist *sgl_src,
1620 						   unsigned int sgl_len,
1621 						   unsigned long dma_flags)
1622 {
1623 	int res;
1624 	struct d40_desc *d40d;
1625 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
1626 					     chan);
1627 	unsigned long flags;
1628 
1629 	if (d40c->phy_chan == NULL) {
1630 		dev_err(&d40c->chan.dev->device,
1631 			"[%s] Unallocated channel.\n", __func__);
1632 		return ERR_PTR(-EINVAL);
1633 	}
1634 
1635 	spin_lock_irqsave(&d40c->lock, flags);
1636 	d40d = d40_desc_get(d40c);
1637 
1638 	if (d40d == NULL)
1639 		goto err;
1640 
1641 	d40d->lli_len = sgl_len;
1642 	d40d->lli_current = 0;
1643 	d40d->txd.flags = dma_flags;
1644 
1645 	if (d40c->log_num != D40_PHY_CHAN) {
1646 
1647 		if (d40_pool_lli_alloc(d40d, sgl_len, true) < 0) {
1648 			dev_err(&d40c->chan.dev->device,
1649 				"[%s] Out of memory\n", __func__);
1650 			goto err;
1651 		}
1652 
1653 		(void) d40_log_sg_to_lli(sgl_src,
1654 					 sgl_len,
1655 					 d40d->lli_log.src,
1656 					 d40c->log_def.lcsp1,
1657 					 d40c->dma_cfg.src_info.data_width);
1658 
1659 		(void) d40_log_sg_to_lli(sgl_dst,
1660 					 sgl_len,
1661 					 d40d->lli_log.dst,
1662 					 d40c->log_def.lcsp3,
1663 					 d40c->dma_cfg.dst_info.data_width);
1664 	} else {
1665 		if (d40_pool_lli_alloc(d40d, sgl_len, false) < 0) {
1666 			dev_err(&d40c->chan.dev->device,
1667 				"[%s] Out of memory\n", __func__);
1668 			goto err;
1669 		}
1670 
1671 		res = d40_phy_sg_to_lli(sgl_src,
1672 					sgl_len,
1673 					0,
1674 					d40d->lli_phy.src,
1675 					virt_to_phys(d40d->lli_phy.src),
1676 					d40c->src_def_cfg,
1677 					d40c->dma_cfg.src_info.data_width,
1678 					d40c->dma_cfg.src_info.psize);
1679 
1680 		if (res < 0)
1681 			goto err;
1682 
1683 		res = d40_phy_sg_to_lli(sgl_dst,
1684 					sgl_len,
1685 					0,
1686 					d40d->lli_phy.dst,
1687 					virt_to_phys(d40d->lli_phy.dst),
1688 					d40c->dst_def_cfg,
1689 					d40c->dma_cfg.dst_info.data_width,
1690 					d40c->dma_cfg.dst_info.psize);
1691 
1692 		if (res < 0)
1693 			goto err;
1694 
1695 		(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
1696 				      d40d->lli_pool.size, DMA_TO_DEVICE);
1697 	}
1698 
1699 	dma_async_tx_descriptor_init(&d40d->txd, chan);
1700 
1701 	d40d->txd.tx_submit = d40_tx_submit;
1702 
1703 	spin_unlock_irqrestore(&d40c->lock, flags);
1704 
1705 	return &d40d->txd;
1706 err:
1707 	if (d40d)
1708 		d40_desc_free(d40c, d40d);
1709 	spin_unlock_irqrestore(&d40c->lock, flags);
1710 	return NULL;
1711 }
1712 EXPORT_SYMBOL(stedma40_memcpy_sg);
1713 
1714 bool stedma40_filter(struct dma_chan *chan, void *data)
1715 {
1716 	struct stedma40_chan_cfg *info = data;
1717 	struct d40_chan *d40c =
1718 		container_of(chan, struct d40_chan, chan);
1719 	int err;
1720 
1721 	if (data) {
1722 		err = d40_validate_conf(d40c, info);
1723 		if (!err)
1724 			d40c->dma_cfg = *info;
1725 	} else
1726 		err = d40_config_memcpy(d40c);
1727 
1728 	if (!err)
1729 		d40c->configured = true;
1730 
1731 	return err == 0;
1732 }
1733 EXPORT_SYMBOL(stedma40_filter);
1734 
1735 /* DMA ENGINE functions */
1736 static int d40_alloc_chan_resources(struct dma_chan *chan)
1737 {
1738 	int err;
1739 	unsigned long flags;
1740 	struct d40_chan *d40c =
1741 		container_of(chan, struct d40_chan, chan);
1742 	bool is_free_phy;
1743 	spin_lock_irqsave(&d40c->lock, flags);
1744 
1745 	d40c->completed = chan->cookie = 1;
1746 
1747 	/* If no dma configuration is set use default configuration (memcpy) */
1748 	if (!d40c->configured) {
1749 		err = d40_config_memcpy(d40c);
1750 		if (err) {
1751 			dev_err(&d40c->chan.dev->device,
1752 				"[%s] Failed to configure memcpy channel\n",
1753 				__func__);
1754 			goto fail;
1755 		}
1756 	}
1757 	is_free_phy = (d40c->phy_chan == NULL);
1758 
1759 	err = d40_allocate_channel(d40c);
1760 	if (err) {
1761 		dev_err(&d40c->chan.dev->device,
1762 			"[%s] Failed to allocate channel\n", __func__);
1763 		goto fail;
1764 	}
1765 
1766 	/* Fill in basic CFG register values */
1767 	d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg,
1768 		    &d40c->dst_def_cfg, d40c->log_num != D40_PHY_CHAN);
1769 
1770 	if (d40c->log_num != D40_PHY_CHAN) {
1771 		d40_log_cfg(&d40c->dma_cfg,
1772 			    &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
1773 
1774 		if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM)
1775 			d40c->lcpa = d40c->base->lcpa_base +
1776 			  d40c->dma_cfg.src_dev_type * D40_LCPA_CHAN_SIZE;
1777 		else
1778 			d40c->lcpa = d40c->base->lcpa_base +
1779 			  d40c->dma_cfg.dst_dev_type *
1780 			  D40_LCPA_CHAN_SIZE + D40_LCPA_CHAN_DST_DELTA;
1781 	}
1782 
1783 	/*
1784 	 * Only write channel configuration to the DMA if the physical
1785 	 * resource is free. In case of multiple logical channels
1786 	 * on the same physical resource, only the first write is necessary.
1787 	 */
1788 	if (is_free_phy)
1789 		d40_config_write(d40c);
1790 fail:
1791 	spin_unlock_irqrestore(&d40c->lock, flags);
1792 	return err;
1793 }
1794 
1795 static void d40_free_chan_resources(struct dma_chan *chan)
1796 {
1797 	struct d40_chan *d40c =
1798 		container_of(chan, struct d40_chan, chan);
1799 	int err;
1800 	unsigned long flags;
1801 
1802 	if (d40c->phy_chan == NULL) {
1803 		dev_err(&d40c->chan.dev->device,
1804 			"[%s] Cannot free unallocated channel\n", __func__);
1805 		return;
1806 	}
1807 
1808 
1809 	spin_lock_irqsave(&d40c->lock, flags);
1810 
1811 	err = d40_free_dma(d40c);
1812 
1813 	if (err)
1814 		dev_err(&d40c->chan.dev->device,
1815 			"[%s] Failed to free channel\n", __func__);
1816 	spin_unlock_irqrestore(&d40c->lock, flags);
1817 }
1818 
1819 static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
1820 						       dma_addr_t dst,
1821 						       dma_addr_t src,
1822 						       size_t size,
1823 						       unsigned long dma_flags)
1824 {
1825 	struct d40_desc *d40d;
1826 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
1827 					     chan);
1828 	unsigned long flags;
1829 	int err = 0;
1830 
1831 	if (d40c->phy_chan == NULL) {
1832 		dev_err(&d40c->chan.dev->device,
1833 			"[%s] Channel is not allocated.\n", __func__);
1834 		return ERR_PTR(-EINVAL);
1835 	}
1836 
1837 	spin_lock_irqsave(&d40c->lock, flags);
1838 	d40d = d40_desc_get(d40c);
1839 
1840 	if (d40d == NULL) {
1841 		dev_err(&d40c->chan.dev->device,
1842 			"[%s] Descriptor is NULL\n", __func__);
1843 		goto err;
1844 	}
1845 
1846 	d40d->txd.flags = dma_flags;
1847 
1848 	dma_async_tx_descriptor_init(&d40d->txd, chan);
1849 
1850 	d40d->txd.tx_submit = d40_tx_submit;
1851 
1852 	if (d40c->log_num != D40_PHY_CHAN) {
1853 
1854 		if (d40_pool_lli_alloc(d40d, 1, true) < 0) {
1855 			dev_err(&d40c->chan.dev->device,
1856 				"[%s] Out of memory\n", __func__);
1857 			goto err;
1858 		}
1859 		d40d->lli_len = 1;
1860 		d40d->lli_current = 0;
1861 
1862 		d40_log_fill_lli(d40d->lli_log.src,
1863 				 src,
1864 				 size,
1865 				 d40c->log_def.lcsp1,
1866 				 d40c->dma_cfg.src_info.data_width,
1867 				 true);
1868 
1869 		d40_log_fill_lli(d40d->lli_log.dst,
1870 				 dst,
1871 				 size,
1872 				 d40c->log_def.lcsp3,
1873 				 d40c->dma_cfg.dst_info.data_width,
1874 				 true);
1875 
1876 	} else {
1877 
1878 		if (d40_pool_lli_alloc(d40d, 1, false) < 0) {
1879 			dev_err(&d40c->chan.dev->device,
1880 				"[%s] Out of memory\n", __func__);
1881 			goto err;
1882 		}
1883 
1884 		err = d40_phy_fill_lli(d40d->lli_phy.src,
1885 				       src,
1886 				       size,
1887 				       d40c->dma_cfg.src_info.psize,
1888 				       0,
1889 				       d40c->src_def_cfg,
1890 				       true,
1891 				       d40c->dma_cfg.src_info.data_width,
1892 				       false);
1893 		if (err)
1894 			goto err_fill_lli;
1895 
1896 		err = d40_phy_fill_lli(d40d->lli_phy.dst,
1897 				       dst,
1898 				       size,
1899 				       d40c->dma_cfg.dst_info.psize,
1900 				       0,
1901 				       d40c->dst_def_cfg,
1902 				       true,
1903 				       d40c->dma_cfg.dst_info.data_width,
1904 				       false);
1905 
1906 		if (err)
1907 			goto err_fill_lli;
1908 
1909 		(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
1910 				      d40d->lli_pool.size, DMA_TO_DEVICE);
1911 	}
1912 
1913 	spin_unlock_irqrestore(&d40c->lock, flags);
1914 	return &d40d->txd;
1915 
1916 err_fill_lli:
1917 	dev_err(&d40c->chan.dev->device,
1918 		"[%s] Failed filling in PHY LLI\n", __func__);
1919 err:
1920 	if (d40d)
1921 		d40_desc_free(d40c, d40d);
1922 	spin_unlock_irqrestore(&d40c->lock, flags);
1923 	return NULL;
1924 }
1925 
1926 static struct dma_async_tx_descriptor *
1927 d40_prep_sg(struct dma_chan *chan,
1928 	    struct scatterlist *dst_sg, unsigned int dst_nents,
1929 	    struct scatterlist *src_sg, unsigned int src_nents,
1930 	    unsigned long dma_flags)
1931 {
1932 	if (dst_nents != src_nents)
1933 		return NULL;
1934 
1935 	return stedma40_memcpy_sg(chan, dst_sg, src_sg, dst_nents, dma_flags);
1936 }
1937 
1938 static int d40_prep_slave_sg_log(struct d40_desc *d40d,
1939 				 struct d40_chan *d40c,
1940 				 struct scatterlist *sgl,
1941 				 unsigned int sg_len,
1942 				 enum dma_data_direction direction,
1943 				 unsigned long dma_flags)
1944 {
1945 	dma_addr_t dev_addr = 0;
1946 	int total_size;
1947 
1948 	if (d40_pool_lli_alloc(d40d, sg_len, true) < 0) {
1949 		dev_err(&d40c->chan.dev->device,
1950 			"[%s] Out of memory\n", __func__);
1951 		return -ENOMEM;
1952 	}
1953 
1954 	d40d->lli_len = sg_len;
1955 	d40d->lli_current = 0;
1956 
1957 	if (direction == DMA_FROM_DEVICE)
1958 		if (d40c->runtime_addr)
1959 			dev_addr = d40c->runtime_addr;
1960 		else
1961 			dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
1962 	else if (direction == DMA_TO_DEVICE)
1963 		if (d40c->runtime_addr)
1964 			dev_addr = d40c->runtime_addr;
1965 		else
1966 			dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
1967 
1968 	else
1969 		return -EINVAL;
1970 
1971 	total_size = d40_log_sg_to_dev(sgl, sg_len,
1972 				       &d40d->lli_log,
1973 				       &d40c->log_def,
1974 				       d40c->dma_cfg.src_info.data_width,
1975 				       d40c->dma_cfg.dst_info.data_width,
1976 				       direction,
1977 				       dev_addr);
1978 
1979 	if (total_size < 0)
1980 		return -EINVAL;
1981 
1982 	return 0;
1983 }
1984 
1985 static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
1986 				 struct d40_chan *d40c,
1987 				 struct scatterlist *sgl,
1988 				 unsigned int sgl_len,
1989 				 enum dma_data_direction direction,
1990 				 unsigned long dma_flags)
1991 {
1992 	dma_addr_t src_dev_addr;
1993 	dma_addr_t dst_dev_addr;
1994 	int res;
1995 
1996 	if (d40_pool_lli_alloc(d40d, sgl_len, false) < 0) {
1997 		dev_err(&d40c->chan.dev->device,
1998 			"[%s] Out of memory\n", __func__);
1999 		return -ENOMEM;
2000 	}
2001 
2002 	d40d->lli_len = sgl_len;
2003 	d40d->lli_current = 0;
2004 
2005 	if (direction == DMA_FROM_DEVICE) {
2006 		dst_dev_addr = 0;
2007 		if (d40c->runtime_addr)
2008 			src_dev_addr = d40c->runtime_addr;
2009 		else
2010 			src_dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
2011 	} else if (direction == DMA_TO_DEVICE) {
2012 		if (d40c->runtime_addr)
2013 			dst_dev_addr = d40c->runtime_addr;
2014 		else
2015 			dst_dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
2016 		src_dev_addr = 0;
2017 	} else
2018 		return -EINVAL;
2019 
2020 	res = d40_phy_sg_to_lli(sgl,
2021 				sgl_len,
2022 				src_dev_addr,
2023 				d40d->lli_phy.src,
2024 				virt_to_phys(d40d->lli_phy.src),
2025 				d40c->src_def_cfg,
2026 				d40c->dma_cfg.src_info.data_width,
2027 				d40c->dma_cfg.src_info.psize);
2028 	if (res < 0)
2029 		return res;
2030 
2031 	res = d40_phy_sg_to_lli(sgl,
2032 				sgl_len,
2033 				dst_dev_addr,
2034 				d40d->lli_phy.dst,
2035 				virt_to_phys(d40d->lli_phy.dst),
2036 				d40c->dst_def_cfg,
2037 				d40c->dma_cfg.dst_info.data_width,
2038 				d40c->dma_cfg.dst_info.psize);
2039 	if (res < 0)
2040 		return res;
2041 
2042 	(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
2043 			      d40d->lli_pool.size, DMA_TO_DEVICE);
2044 	return 0;
2045 }
2046 
2047 static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
2048 							 struct scatterlist *sgl,
2049 							 unsigned int sg_len,
2050 							 enum dma_data_direction direction,
2051 							 unsigned long dma_flags)
2052 {
2053 	struct d40_desc *d40d;
2054 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
2055 					     chan);
2056 	unsigned long flags;
2057 	int err;
2058 
2059 	if (d40c->phy_chan == NULL) {
2060 		dev_err(&d40c->chan.dev->device,
2061 			"[%s] Cannot prepare unallocated channel\n", __func__);
2062 		return ERR_PTR(-EINVAL);
2063 	}
2064 
2065 	spin_lock_irqsave(&d40c->lock, flags);
2066 	d40d = d40_desc_get(d40c);
2067 
2068 	if (d40d == NULL)
2069 		goto err;
2070 
2071 	if (d40c->log_num != D40_PHY_CHAN)
2072 		err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len,
2073 					    direction, dma_flags);
2074 	else
2075 		err = d40_prep_slave_sg_phy(d40d, d40c, sgl, sg_len,
2076 					    direction, dma_flags);
2077 	if (err) {
2078 		dev_err(&d40c->chan.dev->device,
2079 			"[%s] Failed to prepare %s slave sg job: %d\n",
2080 			__func__,
2081 			d40c->log_num != D40_PHY_CHAN ? "log" : "phy", err);
2082 		goto err;
2083 	}
2084 
2085 	d40d->txd.flags = dma_flags;
2086 
2087 	dma_async_tx_descriptor_init(&d40d->txd, chan);
2088 
2089 	d40d->txd.tx_submit = d40_tx_submit;
2090 
2091 	spin_unlock_irqrestore(&d40c->lock, flags);
2092 	return &d40d->txd;
2093 
2094 err:
2095 	if (d40d)
2096 		d40_desc_free(d40c, d40d);
2097 	spin_unlock_irqrestore(&d40c->lock, flags);
2098 	return NULL;
2099 }
2100 
2101 static enum dma_status d40_tx_status(struct dma_chan *chan,
2102 				     dma_cookie_t cookie,
2103 				     struct dma_tx_state *txstate)
2104 {
2105 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
2106 	dma_cookie_t last_used;
2107 	dma_cookie_t last_complete;
2108 	int ret;
2109 
2110 	if (d40c->phy_chan == NULL) {
2111 		dev_err(&d40c->chan.dev->device,
2112 			"[%s] Cannot read status of unallocated channel\n",
2113 			__func__);
2114 		return -EINVAL;
2115 	}
2116 
2117 	last_complete = d40c->completed;
2118 	last_used = chan->cookie;
2119 
2120 	if (d40_is_paused(d40c))
2121 		ret = DMA_PAUSED;
2122 	else
2123 		ret = dma_async_is_complete(cookie, last_complete, last_used);
2124 
2125 	dma_set_tx_state(txstate, last_complete, last_used,
2126 			 stedma40_residue(chan));
2127 
2128 	return ret;
2129 }
2130 
2131 static void d40_issue_pending(struct dma_chan *chan)
2132 {
2133 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
2134 	unsigned long flags;
2135 
2136 	if (d40c->phy_chan == NULL) {
2137 		dev_err(&d40c->chan.dev->device,
2138 			"[%s] Channel is not allocated!\n", __func__);
2139 		return;
2140 	}
2141 
2142 	spin_lock_irqsave(&d40c->lock, flags);
2143 
2144 	/* Busy means that pending jobs are already being processed */
2145 	if (!d40c->busy)
2146 		(void) d40_queue_start(d40c);
2147 
2148 	spin_unlock_irqrestore(&d40c->lock, flags);
2149 }
2150 
2151 /* Runtime reconfiguration extension */
2152 static void d40_set_runtime_config(struct dma_chan *chan,
2153 			       struct dma_slave_config *config)
2154 {
2155 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
2156 	struct stedma40_chan_cfg *cfg = &d40c->dma_cfg;
2157 	enum dma_slave_buswidth config_addr_width;
2158 	dma_addr_t config_addr;
2159 	u32 config_maxburst;
2160 	enum stedma40_periph_data_width addr_width;
2161 	int psize;
2162 
2163 	if (config->direction == DMA_FROM_DEVICE) {
2164 		dma_addr_t dev_addr_rx =
2165 			d40c->base->plat_data->dev_rx[cfg->src_dev_type];
2166 
2167 		config_addr = config->src_addr;
2168 		if (dev_addr_rx)
2169 			dev_dbg(d40c->base->dev,
2170 				"channel has a pre-wired RX address %08x "
2171 				"overriding with %08x\n",
2172 				dev_addr_rx, config_addr);
2173 		if (cfg->dir != STEDMA40_PERIPH_TO_MEM)
2174 			dev_dbg(d40c->base->dev,
2175 				"channel was not configured for peripheral "
2176 				"to memory transfer (%d) overriding\n",
2177 				cfg->dir);
2178 		cfg->dir = STEDMA40_PERIPH_TO_MEM;
2179 
2180 		config_addr_width = config->src_addr_width;
2181 		config_maxburst = config->src_maxburst;
2182 
2183 	} else if (config->direction == DMA_TO_DEVICE) {
2184 		dma_addr_t dev_addr_tx =
2185 			d40c->base->plat_data->dev_tx[cfg->dst_dev_type];
2186 
2187 		config_addr = config->dst_addr;
2188 		if (dev_addr_tx)
2189 			dev_dbg(d40c->base->dev,
2190 				"channel has a pre-wired TX address %08x "
2191 				"overriding with %08x\n",
2192 				dev_addr_tx, config_addr);
2193 		if (cfg->dir != STEDMA40_MEM_TO_PERIPH)
2194 			dev_dbg(d40c->base->dev,
2195 				"channel was not configured for memory "
2196 				"to peripheral transfer (%d) overriding\n",
2197 				cfg->dir);
2198 		cfg->dir = STEDMA40_MEM_TO_PERIPH;
2199 
2200 		config_addr_width = config->dst_addr_width;
2201 		config_maxburst = config->dst_maxburst;
2202 
2203 	} else {
2204 		dev_err(d40c->base->dev,
2205 			"unrecognized channel direction %d\n",
2206 			config->direction);
2207 		return;
2208 	}
2209 
2210 	switch (config_addr_width) {
2211 	case DMA_SLAVE_BUSWIDTH_1_BYTE:
2212 		addr_width = STEDMA40_BYTE_WIDTH;
2213 		break;
2214 	case DMA_SLAVE_BUSWIDTH_2_BYTES:
2215 		addr_width = STEDMA40_HALFWORD_WIDTH;
2216 		break;
2217 	case DMA_SLAVE_BUSWIDTH_4_BYTES:
2218 		addr_width = STEDMA40_WORD_WIDTH;
2219 		break;
2220 	case DMA_SLAVE_BUSWIDTH_8_BYTES:
2221 		addr_width = STEDMA40_DOUBLEWORD_WIDTH;
2222 		break;
2223 	default:
2224 		dev_err(d40c->base->dev,
2225 			"illegal peripheral address width "
2226 			"requested (%d)\n",
2227 			config->src_addr_width);
2228 		return;
2229 	}
2230 
2231 	if (d40c->log_num != D40_PHY_CHAN) {
2232 		if (config_maxburst >= 16)
2233 			psize = STEDMA40_PSIZE_LOG_16;
2234 		else if (config_maxburst >= 8)
2235 			psize = STEDMA40_PSIZE_LOG_8;
2236 		else if (config_maxburst >= 4)
2237 			psize = STEDMA40_PSIZE_LOG_4;
2238 		else
2239 			psize = STEDMA40_PSIZE_LOG_1;
2240 	} else {
2241 		if (config_maxburst >= 16)
2242 			psize = STEDMA40_PSIZE_PHY_16;
2243 		else if (config_maxburst >= 8)
2244 			psize = STEDMA40_PSIZE_PHY_8;
2245 		else if (config_maxburst >= 4)
2246 			psize = STEDMA40_PSIZE_PHY_4;
2247 		else
2248 			psize = STEDMA40_PSIZE_PHY_1;
2249 	}
2250 
2251 	/* Set up all the endpoint configs */
2252 	cfg->src_info.data_width = addr_width;
2253 	cfg->src_info.psize = psize;
2254 	cfg->src_info.big_endian = false;
2255 	cfg->src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
2256 	cfg->dst_info.data_width = addr_width;
2257 	cfg->dst_info.psize = psize;
2258 	cfg->dst_info.big_endian = false;
2259 	cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
2260 
2261 	/* Fill in register values */
2262 	if (d40c->log_num != D40_PHY_CHAN)
2263 		d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
2264 	else
2265 		d40_phy_cfg(cfg, &d40c->src_def_cfg,
2266 			    &d40c->dst_def_cfg, false);
2267 
2268 	/* These settings will take precedence later */
2269 	d40c->runtime_addr = config_addr;
2270 	d40c->runtime_direction = config->direction;
2271 	dev_dbg(d40c->base->dev,
2272 		"configured channel %s for %s, data width %d, "
2273 		"maxburst %d bytes, LE, no flow control\n",
2274 		dma_chan_name(chan),
2275 		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
2276 		config_addr_width,
2277 		config_maxburst);
2278 }
2279 
2280 static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
2281 		       unsigned long arg)
2282 {
2283 	unsigned long flags;
2284 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
2285 
2286 	if (d40c->phy_chan == NULL) {
2287 		dev_err(&d40c->chan.dev->device,
2288 			"[%s] Channel is not allocated!\n", __func__);
2289 		return -EINVAL;
2290 	}
2291 
2292 	switch (cmd) {
2293 	case DMA_TERMINATE_ALL:
2294 		spin_lock_irqsave(&d40c->lock, flags);
2295 		d40_term_all(d40c);
2296 		spin_unlock_irqrestore(&d40c->lock, flags);
2297 		return 0;
2298 	case DMA_PAUSE:
2299 		return d40_pause(chan);
2300 	case DMA_RESUME:
2301 		return d40_resume(chan);
2302 	case DMA_SLAVE_CONFIG:
2303 		d40_set_runtime_config(chan,
2304 			(struct dma_slave_config *) arg);
2305 		return 0;
2306 	default:
2307 		break;
2308 	}
2309 
2310 	/* Other commands are unimplemented */
2311 	return -ENXIO;
2312 }
2313 
2314 /* Initialization functions */
2315 
2316 static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
2317 				 struct d40_chan *chans, int offset,
2318 				 int num_chans)
2319 {
2320 	int i = 0;
2321 	struct d40_chan *d40c;
2322 
2323 	INIT_LIST_HEAD(&dma->channels);
2324 
2325 	for (i = offset; i < offset + num_chans; i++) {
2326 		d40c = &chans[i];
2327 		d40c->base = base;
2328 		d40c->chan.device = dma;
2329 
2330 		spin_lock_init(&d40c->lock);
2331 
2332 		d40c->log_num = D40_PHY_CHAN;
2333 
2334 		INIT_LIST_HEAD(&d40c->active);
2335 		INIT_LIST_HEAD(&d40c->queue);
2336 		INIT_LIST_HEAD(&d40c->client);
2337 
2338 		tasklet_init(&d40c->tasklet, dma_tasklet,
2339 			     (unsigned long) d40c);
2340 
2341 		list_add_tail(&d40c->chan.device_node,
2342 			      &dma->channels);
2343 	}
2344 }
2345 
2346 static int __init d40_dmaengine_init(struct d40_base *base,
2347 				     int num_reserved_chans)
2348 {
2349 	int err ;
2350 
2351 	d40_chan_init(base, &base->dma_slave, base->log_chans,
2352 		      0, base->num_log_chans);
2353 
2354 	dma_cap_zero(base->dma_slave.cap_mask);
2355 	dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
2356 
2357 	base->dma_slave.device_alloc_chan_resources = d40_alloc_chan_resources;
2358 	base->dma_slave.device_free_chan_resources = d40_free_chan_resources;
2359 	base->dma_slave.device_prep_dma_memcpy = d40_prep_memcpy;
2360 	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
2361 	base->dma_slave.device_prep_slave_sg = d40_prep_slave_sg;
2362 	base->dma_slave.device_tx_status = d40_tx_status;
2363 	base->dma_slave.device_issue_pending = d40_issue_pending;
2364 	base->dma_slave.device_control = d40_control;
2365 	base->dma_slave.dev = base->dev;
2366 
2367 	err = dma_async_device_register(&base->dma_slave);
2368 
2369 	if (err) {
2370 		dev_err(base->dev,
2371 			"[%s] Failed to register slave channels\n",
2372 			__func__);
2373 		goto failure1;
2374 	}
2375 
2376 	d40_chan_init(base, &base->dma_memcpy, base->log_chans,
2377 		      base->num_log_chans, base->plat_data->memcpy_len);
2378 
2379 	dma_cap_zero(base->dma_memcpy.cap_mask);
2380 	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
2381 	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
2382 
2383 	base->dma_memcpy.device_alloc_chan_resources = d40_alloc_chan_resources;
2384 	base->dma_memcpy.device_free_chan_resources = d40_free_chan_resources;
2385 	base->dma_memcpy.device_prep_dma_memcpy = d40_prep_memcpy;
2386 	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
2387 	base->dma_memcpy.device_prep_slave_sg = d40_prep_slave_sg;
2388 	base->dma_memcpy.device_tx_status = d40_tx_status;
2389 	base->dma_memcpy.device_issue_pending = d40_issue_pending;
2390 	base->dma_memcpy.device_control = d40_control;
2391 	base->dma_memcpy.dev = base->dev;
2392 	/*
2393 	 * This controller can only access address at even
2394 	 * 32bit boundaries, i.e. 2^2
2395 	 */
2396 	base->dma_memcpy.copy_align = 2;
2397 
2398 	err = dma_async_device_register(&base->dma_memcpy);
2399 
2400 	if (err) {
2401 		dev_err(base->dev,
2402 			"[%s] Failed to regsiter memcpy only channels\n",
2403 			__func__);
2404 		goto failure2;
2405 	}
2406 
2407 	d40_chan_init(base, &base->dma_both, base->phy_chans,
2408 		      0, num_reserved_chans);
2409 
2410 	dma_cap_zero(base->dma_both.cap_mask);
2411 	dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
2412 	dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
2413 	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
2414 
2415 	base->dma_both.device_alloc_chan_resources = d40_alloc_chan_resources;
2416 	base->dma_both.device_free_chan_resources = d40_free_chan_resources;
2417 	base->dma_both.device_prep_dma_memcpy = d40_prep_memcpy;
2418 	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
2419 	base->dma_both.device_prep_slave_sg = d40_prep_slave_sg;
2420 	base->dma_both.device_tx_status = d40_tx_status;
2421 	base->dma_both.device_issue_pending = d40_issue_pending;
2422 	base->dma_both.device_control = d40_control;
2423 	base->dma_both.dev = base->dev;
2424 	base->dma_both.copy_align = 2;
2425 	err = dma_async_device_register(&base->dma_both);
2426 
2427 	if (err) {
2428 		dev_err(base->dev,
2429 			"[%s] Failed to register logical and physical capable channels\n",
2430 			__func__);
2431 		goto failure3;
2432 	}
2433 	return 0;
2434 failure3:
2435 	dma_async_device_unregister(&base->dma_memcpy);
2436 failure2:
2437 	dma_async_device_unregister(&base->dma_slave);
2438 failure1:
2439 	return err;
2440 }
2441 
2442 /* Initialization functions. */
2443 
2444 static int __init d40_phy_res_init(struct d40_base *base)
2445 {
2446 	int i;
2447 	int num_phy_chans_avail = 0;
2448 	u32 val[2];
2449 	int odd_even_bit = -2;
2450 
2451 	val[0] = readl(base->virtbase + D40_DREG_PRSME);
2452 	val[1] = readl(base->virtbase + D40_DREG_PRSMO);
2453 
2454 	for (i = 0; i < base->num_phy_chans; i++) {
2455 		base->phy_res[i].num = i;
2456 		odd_even_bit += 2 * ((i % 2) == 0);
2457 		if (((val[i % 2] >> odd_even_bit) & 3) == 1) {
2458 			/* Mark security only channels as occupied */
2459 			base->phy_res[i].allocated_src = D40_ALLOC_PHY;
2460 			base->phy_res[i].allocated_dst = D40_ALLOC_PHY;
2461 		} else {
2462 			base->phy_res[i].allocated_src = D40_ALLOC_FREE;
2463 			base->phy_res[i].allocated_dst = D40_ALLOC_FREE;
2464 			num_phy_chans_avail++;
2465 		}
2466 		spin_lock_init(&base->phy_res[i].lock);
2467 	}
2468 
2469 	/* Mark disabled channels as occupied */
2470 	for (i = 0; base->plat_data->disabled_channels[i] != -1; i++) {
2471 		int chan = base->plat_data->disabled_channels[i];
2472 
2473 		base->phy_res[chan].allocated_src = D40_ALLOC_PHY;
2474 		base->phy_res[chan].allocated_dst = D40_ALLOC_PHY;
2475 		num_phy_chans_avail--;
2476 	}
2477 
2478 	dev_info(base->dev, "%d of %d physical DMA channels available\n",
2479 		 num_phy_chans_avail, base->num_phy_chans);
2480 
2481 	/* Verify settings extended vs standard */
2482 	val[0] = readl(base->virtbase + D40_DREG_PRTYP);
2483 
2484 	for (i = 0; i < base->num_phy_chans; i++) {
2485 
2486 		if (base->phy_res[i].allocated_src == D40_ALLOC_FREE &&
2487 		    (val[0] & 0x3) != 1)
2488 			dev_info(base->dev,
2489 				 "[%s] INFO: channel %d is misconfigured (%d)\n",
2490 				 __func__, i, val[0] & 0x3);
2491 
2492 		val[0] = val[0] >> 2;
2493 	}
2494 
2495 	return num_phy_chans_avail;
2496 }
2497 
2498 static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
2499 {
2500 	static const struct d40_reg_val dma_id_regs[] = {
2501 		/* Peripheral Id */
2502 		{ .reg = D40_DREG_PERIPHID0, .val = 0x0040},
2503 		{ .reg = D40_DREG_PERIPHID1, .val = 0x0000},
2504 		/*
2505 		 * D40_DREG_PERIPHID2 Depends on HW revision:
2506 		 *  MOP500/HREF ED has 0x0008,
2507 		 *  ? has 0x0018,
2508 		 *  HREF V1 has 0x0028
2509 		 */
2510 		{ .reg = D40_DREG_PERIPHID3, .val = 0x0000},
2511 
2512 		/* PCell Id */
2513 		{ .reg = D40_DREG_CELLID0, .val = 0x000d},
2514 		{ .reg = D40_DREG_CELLID1, .val = 0x00f0},
2515 		{ .reg = D40_DREG_CELLID2, .val = 0x0005},
2516 		{ .reg = D40_DREG_CELLID3, .val = 0x00b1}
2517 	};
2518 	struct stedma40_platform_data *plat_data;
2519 	struct clk *clk = NULL;
2520 	void __iomem *virtbase = NULL;
2521 	struct resource *res = NULL;
2522 	struct d40_base *base = NULL;
2523 	int num_log_chans = 0;
2524 	int num_phy_chans;
2525 	int i;
2526 	u32 val;
2527 	u32 rev;
2528 
2529 	clk = clk_get(&pdev->dev, NULL);
2530 
2531 	if (IS_ERR(clk)) {
2532 		dev_err(&pdev->dev, "[%s] No matching clock found\n",
2533 			__func__);
2534 		goto failure;
2535 	}
2536 
2537 	clk_enable(clk);
2538 
2539 	/* Get IO for DMAC base address */
2540 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base");
2541 	if (!res)
2542 		goto failure;
2543 
2544 	if (request_mem_region(res->start, resource_size(res),
2545 			       D40_NAME " I/O base") == NULL)
2546 		goto failure;
2547 
2548 	virtbase = ioremap(res->start, resource_size(res));
2549 	if (!virtbase)
2550 		goto failure;
2551 
2552 	/* HW version check */
2553 	for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) {
2554 		if (dma_id_regs[i].val !=
2555 		    readl(virtbase + dma_id_regs[i].reg)) {
2556 			dev_err(&pdev->dev,
2557 				"[%s] Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n",
2558 				__func__,
2559 				dma_id_regs[i].val,
2560 				dma_id_regs[i].reg,
2561 				readl(virtbase + dma_id_regs[i].reg));
2562 			goto failure;
2563 		}
2564 	}
2565 
2566 	/* Get silicon revision and designer */
2567 	val = readl(virtbase + D40_DREG_PERIPHID2);
2568 
2569 	if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) !=
2570 	    D40_HW_DESIGNER) {
2571 		dev_err(&pdev->dev,
2572 			"[%s] Unknown designer! Got %x wanted %x\n",
2573 			__func__, val & D40_DREG_PERIPHID2_DESIGNER_MASK,
2574 			D40_HW_DESIGNER);
2575 		goto failure;
2576 	}
2577 
2578 	rev = (val & D40_DREG_PERIPHID2_REV_MASK) >>
2579 		D40_DREG_PERIPHID2_REV_POS;
2580 
2581 	/* The number of physical channels on this HW */
2582 	num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
2583 
2584 	dev_info(&pdev->dev, "hardware revision: %d @ 0x%x\n",
2585 		 rev, res->start);
2586 
2587 	plat_data = pdev->dev.platform_data;
2588 
2589 	/* Count the number of logical channels in use */
2590 	for (i = 0; i < plat_data->dev_len; i++)
2591 		if (plat_data->dev_rx[i] != 0)
2592 			num_log_chans++;
2593 
2594 	for (i = 0; i < plat_data->dev_len; i++)
2595 		if (plat_data->dev_tx[i] != 0)
2596 			num_log_chans++;
2597 
2598 	base = kzalloc(ALIGN(sizeof(struct d40_base), 4) +
2599 		       (num_phy_chans + num_log_chans + plat_data->memcpy_len) *
2600 		       sizeof(struct d40_chan), GFP_KERNEL);
2601 
2602 	if (base == NULL) {
2603 		dev_err(&pdev->dev, "[%s] Out of memory\n", __func__);
2604 		goto failure;
2605 	}
2606 
2607 	base->rev = rev;
2608 	base->clk = clk;
2609 	base->num_phy_chans = num_phy_chans;
2610 	base->num_log_chans = num_log_chans;
2611 	base->phy_start = res->start;
2612 	base->phy_size = resource_size(res);
2613 	base->virtbase = virtbase;
2614 	base->plat_data = plat_data;
2615 	base->dev = &pdev->dev;
2616 	base->phy_chans = ((void *)base) + ALIGN(sizeof(struct d40_base), 4);
2617 	base->log_chans = &base->phy_chans[num_phy_chans];
2618 
2619 	base->phy_res = kzalloc(num_phy_chans * sizeof(struct d40_phy_res),
2620 				GFP_KERNEL);
2621 	if (!base->phy_res)
2622 		goto failure;
2623 
2624 	base->lookup_phy_chans = kzalloc(num_phy_chans *
2625 					 sizeof(struct d40_chan *),
2626 					 GFP_KERNEL);
2627 	if (!base->lookup_phy_chans)
2628 		goto failure;
2629 
2630 	if (num_log_chans + plat_data->memcpy_len) {
2631 		/*
2632 		 * The max number of logical channels are event lines for all
2633 		 * src devices and dst devices
2634 		 */
2635 		base->lookup_log_chans = kzalloc(plat_data->dev_len * 2 *
2636 						 sizeof(struct d40_chan *),
2637 						 GFP_KERNEL);
2638 		if (!base->lookup_log_chans)
2639 			goto failure;
2640 	}
2641 
2642 	base->lcla_pool.alloc_map = kzalloc(num_phy_chans *
2643 					    sizeof(struct d40_desc *) *
2644 					    D40_LCLA_LINK_PER_EVENT_GRP,
2645 					    GFP_KERNEL);
2646 	if (!base->lcla_pool.alloc_map)
2647 		goto failure;
2648 
2649 	base->desc_slab = kmem_cache_create(D40_NAME, sizeof(struct d40_desc),
2650 					    0, SLAB_HWCACHE_ALIGN,
2651 					    NULL);
2652 	if (base->desc_slab == NULL)
2653 		goto failure;
2654 
2655 	return base;
2656 
2657 failure:
2658 	if (!IS_ERR(clk)) {
2659 		clk_disable(clk);
2660 		clk_put(clk);
2661 	}
2662 	if (virtbase)
2663 		iounmap(virtbase);
2664 	if (res)
2665 		release_mem_region(res->start,
2666 				   resource_size(res));
2667 	if (virtbase)
2668 		iounmap(virtbase);
2669 
2670 	if (base) {
2671 		kfree(base->lcla_pool.alloc_map);
2672 		kfree(base->lookup_log_chans);
2673 		kfree(base->lookup_phy_chans);
2674 		kfree(base->phy_res);
2675 		kfree(base);
2676 	}
2677 
2678 	return NULL;
2679 }
2680 
2681 static void __init d40_hw_init(struct d40_base *base)
2682 {
2683 
2684 	static const struct d40_reg_val dma_init_reg[] = {
2685 		/* Clock every part of the DMA block from start */
2686 		{ .reg = D40_DREG_GCC,    .val = 0x0000ff01},
2687 
2688 		/* Interrupts on all logical channels */
2689 		{ .reg = D40_DREG_LCMIS0, .val = 0xFFFFFFFF},
2690 		{ .reg = D40_DREG_LCMIS1, .val = 0xFFFFFFFF},
2691 		{ .reg = D40_DREG_LCMIS2, .val = 0xFFFFFFFF},
2692 		{ .reg = D40_DREG_LCMIS3, .val = 0xFFFFFFFF},
2693 		{ .reg = D40_DREG_LCICR0, .val = 0xFFFFFFFF},
2694 		{ .reg = D40_DREG_LCICR1, .val = 0xFFFFFFFF},
2695 		{ .reg = D40_DREG_LCICR2, .val = 0xFFFFFFFF},
2696 		{ .reg = D40_DREG_LCICR3, .val = 0xFFFFFFFF},
2697 		{ .reg = D40_DREG_LCTIS0, .val = 0xFFFFFFFF},
2698 		{ .reg = D40_DREG_LCTIS1, .val = 0xFFFFFFFF},
2699 		{ .reg = D40_DREG_LCTIS2, .val = 0xFFFFFFFF},
2700 		{ .reg = D40_DREG_LCTIS3, .val = 0xFFFFFFFF}
2701 	};
2702 	int i;
2703 	u32 prmseo[2] = {0, 0};
2704 	u32 activeo[2] = {0xFFFFFFFF, 0xFFFFFFFF};
2705 	u32 pcmis = 0;
2706 	u32 pcicr = 0;
2707 
2708 	for (i = 0; i < ARRAY_SIZE(dma_init_reg); i++)
2709 		writel(dma_init_reg[i].val,
2710 		       base->virtbase + dma_init_reg[i].reg);
2711 
2712 	/* Configure all our dma channels to default settings */
2713 	for (i = 0; i < base->num_phy_chans; i++) {
2714 
2715 		activeo[i % 2] = activeo[i % 2] << 2;
2716 
2717 		if (base->phy_res[base->num_phy_chans - i - 1].allocated_src
2718 		    == D40_ALLOC_PHY) {
2719 			activeo[i % 2] |= 3;
2720 			continue;
2721 		}
2722 
2723 		/* Enable interrupt # */
2724 		pcmis = (pcmis << 1) | 1;
2725 
2726 		/* Clear interrupt # */
2727 		pcicr = (pcicr << 1) | 1;
2728 
2729 		/* Set channel to physical mode */
2730 		prmseo[i % 2] = prmseo[i % 2] << 2;
2731 		prmseo[i % 2] |= 1;
2732 
2733 	}
2734 
2735 	writel(prmseo[1], base->virtbase + D40_DREG_PRMSE);
2736 	writel(prmseo[0], base->virtbase + D40_DREG_PRMSO);
2737 	writel(activeo[1], base->virtbase + D40_DREG_ACTIVE);
2738 	writel(activeo[0], base->virtbase + D40_DREG_ACTIVO);
2739 
2740 	/* Write which interrupt to enable */
2741 	writel(pcmis, base->virtbase + D40_DREG_PCMIS);
2742 
2743 	/* Write which interrupt to clear */
2744 	writel(pcicr, base->virtbase + D40_DREG_PCICR);
2745 
2746 }
2747 
2748 static int __init d40_lcla_allocate(struct d40_base *base)
2749 {
2750 	unsigned long *page_list;
2751 	int i, j;
2752 	int ret = 0;
2753 
2754 	/*
2755 	 * This is somewhat ugly. We need 8192 bytes that are 18 bit aligned,
2756 	 * To full fill this hardware requirement without wasting 256 kb
2757 	 * we allocate pages until we get an aligned one.
2758 	 */
2759 	page_list = kmalloc(sizeof(unsigned long) * MAX_LCLA_ALLOC_ATTEMPTS,
2760 			    GFP_KERNEL);
2761 
2762 	if (!page_list) {
2763 		ret = -ENOMEM;
2764 		goto failure;
2765 	}
2766 
2767 	/* Calculating how many pages that are required */
2768 	base->lcla_pool.pages = SZ_1K * base->num_phy_chans / PAGE_SIZE;
2769 
2770 	for (i = 0; i < MAX_LCLA_ALLOC_ATTEMPTS; i++) {
2771 		page_list[i] = __get_free_pages(GFP_KERNEL,
2772 						base->lcla_pool.pages);
2773 		if (!page_list[i]) {
2774 
2775 			dev_err(base->dev,
2776 				"[%s] Failed to allocate %d pages.\n",
2777 				__func__, base->lcla_pool.pages);
2778 
2779 			for (j = 0; j < i; j++)
2780 				free_pages(page_list[j], base->lcla_pool.pages);
2781 			goto failure;
2782 		}
2783 
2784 		if ((virt_to_phys((void *)page_list[i]) &
2785 		     (LCLA_ALIGNMENT - 1)) == 0)
2786 			break;
2787 	}
2788 
2789 	for (j = 0; j < i; j++)
2790 		free_pages(page_list[j], base->lcla_pool.pages);
2791 
2792 	if (i < MAX_LCLA_ALLOC_ATTEMPTS) {
2793 		base->lcla_pool.base = (void *)page_list[i];
2794 	} else {
2795 		/*
2796 		 * After many attempts and no succees with finding the correct
2797 		 * alignment, try with allocating a big buffer.
2798 		 */
2799 		dev_warn(base->dev,
2800 			 "[%s] Failed to get %d pages @ 18 bit align.\n",
2801 			 __func__, base->lcla_pool.pages);
2802 		base->lcla_pool.base_unaligned = kmalloc(SZ_1K *
2803 							 base->num_phy_chans +
2804 							 LCLA_ALIGNMENT,
2805 							 GFP_KERNEL);
2806 		if (!base->lcla_pool.base_unaligned) {
2807 			ret = -ENOMEM;
2808 			goto failure;
2809 		}
2810 
2811 		base->lcla_pool.base = PTR_ALIGN(base->lcla_pool.base_unaligned,
2812 						 LCLA_ALIGNMENT);
2813 	}
2814 
2815 	writel(virt_to_phys(base->lcla_pool.base),
2816 	       base->virtbase + D40_DREG_LCLA);
2817 failure:
2818 	kfree(page_list);
2819 	return ret;
2820 }
2821 
2822 static int __init d40_probe(struct platform_device *pdev)
2823 {
2824 	int err;
2825 	int ret = -ENOENT;
2826 	struct d40_base *base;
2827 	struct resource *res = NULL;
2828 	int num_reserved_chans;
2829 	u32 val;
2830 
2831 	base = d40_hw_detect_init(pdev);
2832 
2833 	if (!base)
2834 		goto failure;
2835 
2836 	num_reserved_chans = d40_phy_res_init(base);
2837 
2838 	platform_set_drvdata(pdev, base);
2839 
2840 	spin_lock_init(&base->interrupt_lock);
2841 	spin_lock_init(&base->execmd_lock);
2842 
2843 	/* Get IO for logical channel parameter address */
2844 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcpa");
2845 	if (!res) {
2846 		ret = -ENOENT;
2847 		dev_err(&pdev->dev,
2848 			"[%s] No \"lcpa\" memory resource\n",
2849 			__func__);
2850 		goto failure;
2851 	}
2852 	base->lcpa_size = resource_size(res);
2853 	base->phy_lcpa = res->start;
2854 
2855 	if (request_mem_region(res->start, resource_size(res),
2856 			       D40_NAME " I/O lcpa") == NULL) {
2857 		ret = -EBUSY;
2858 		dev_err(&pdev->dev,
2859 			"[%s] Failed to request LCPA region 0x%x-0x%x\n",
2860 			__func__, res->start, res->end);
2861 		goto failure;
2862 	}
2863 
2864 	/* We make use of ESRAM memory for this. */
2865 	val = readl(base->virtbase + D40_DREG_LCPA);
2866 	if (res->start != val && val != 0) {
2867 		dev_warn(&pdev->dev,
2868 			 "[%s] Mismatch LCPA dma 0x%x, def 0x%x\n",
2869 			 __func__, val, res->start);
2870 	} else
2871 		writel(res->start, base->virtbase + D40_DREG_LCPA);
2872 
2873 	base->lcpa_base = ioremap(res->start, resource_size(res));
2874 	if (!base->lcpa_base) {
2875 		ret = -ENOMEM;
2876 		dev_err(&pdev->dev,
2877 			"[%s] Failed to ioremap LCPA region\n",
2878 			__func__);
2879 		goto failure;
2880 	}
2881 
2882 	ret = d40_lcla_allocate(base);
2883 	if (ret) {
2884 		dev_err(&pdev->dev, "[%s] Failed to allocate LCLA area\n",
2885 			__func__);
2886 		goto failure;
2887 	}
2888 
2889 	spin_lock_init(&base->lcla_pool.lock);
2890 
2891 	base->irq = platform_get_irq(pdev, 0);
2892 
2893 	ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base);
2894 
2895 	if (ret) {
2896 		dev_err(&pdev->dev, "[%s] No IRQ defined\n", __func__);
2897 		goto failure;
2898 	}
2899 
2900 	err = d40_dmaengine_init(base, num_reserved_chans);
2901 	if (err)
2902 		goto failure;
2903 
2904 	d40_hw_init(base);
2905 
2906 	dev_info(base->dev, "initialized\n");
2907 	return 0;
2908 
2909 failure:
2910 	if (base) {
2911 		if (base->desc_slab)
2912 			kmem_cache_destroy(base->desc_slab);
2913 		if (base->virtbase)
2914 			iounmap(base->virtbase);
2915 		if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
2916 			free_pages((unsigned long)base->lcla_pool.base,
2917 				   base->lcla_pool.pages);
2918 
2919 		kfree(base->lcla_pool.base_unaligned);
2920 
2921 		if (base->phy_lcpa)
2922 			release_mem_region(base->phy_lcpa,
2923 					   base->lcpa_size);
2924 		if (base->phy_start)
2925 			release_mem_region(base->phy_start,
2926 					   base->phy_size);
2927 		if (base->clk) {
2928 			clk_disable(base->clk);
2929 			clk_put(base->clk);
2930 		}
2931 
2932 		kfree(base->lcla_pool.alloc_map);
2933 		kfree(base->lookup_log_chans);
2934 		kfree(base->lookup_phy_chans);
2935 		kfree(base->phy_res);
2936 		kfree(base);
2937 	}
2938 
2939 	dev_err(&pdev->dev, "[%s] probe failed\n", __func__);
2940 	return ret;
2941 }
2942 
2943 static struct platform_driver d40_driver = {
2944 	.driver = {
2945 		.owner = THIS_MODULE,
2946 		.name  = D40_NAME,
2947 	},
2948 };
2949 
2950 int __init stedma40_init(void)
2951 {
2952 	return platform_driver_probe(&d40_driver, d40_probe);
2953 }
2954 arch_initcall(stedma40_init);
2955