xref: /linux/drivers/dma/ste_dma40.c (revision c9895ed5a84dc3cbc86a9d6d5656d8c187f53380)
1 /*
2  * Copyright (C) Ericsson AB 2007-2008
3  * Copyright (C) ST-Ericsson SA 2008-2010
4  * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
5  * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
6  * License terms: GNU General Public License (GPL) version 2
7  */
8 
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/dmaengine.h>
12 #include <linux/platform_device.h>
13 #include <linux/clk.h>
14 #include <linux/delay.h>
15 #include <linux/err.h>
16 
17 #include <plat/ste_dma40.h>
18 
19 #include "ste_dma40_ll.h"
20 
21 #define D40_NAME "dma40"
22 
23 #define D40_PHY_CHAN -1
24 
25 /* For masking out/in 2 bit channel positions */
26 #define D40_CHAN_POS(chan)  (2 * (chan / 2))
27 #define D40_CHAN_POS_MASK(chan) (0x3 << D40_CHAN_POS(chan))
28 
29 /* Maximum iterations taken before giving up suspending a channel */
30 #define D40_SUSPEND_MAX_IT 500
31 
32 /* Hardware requirement on LCLA alignment */
33 #define LCLA_ALIGNMENT 0x40000
34 
35 /* Max number of links per event group */
36 #define D40_LCLA_LINK_PER_EVENT_GRP 128
37 #define D40_LCLA_END D40_LCLA_LINK_PER_EVENT_GRP
38 
39 /* Attempts before giving up to trying to get pages that are aligned */
40 #define MAX_LCLA_ALLOC_ATTEMPTS 256
41 
42 /* Bit markings for allocation map */
43 #define D40_ALLOC_FREE		(1 << 31)
44 #define D40_ALLOC_PHY		(1 << 30)
45 #define D40_ALLOC_LOG_FREE	0
46 
47 /* Hardware designer of the block */
48 #define D40_HW_DESIGNER 0x8
49 
50 /**
51  * enum 40_command - The different commands and/or statuses.
52  *
53  * @D40_DMA_STOP: DMA channel command STOP or status STOPPED,
54  * @D40_DMA_RUN: The DMA channel is RUNNING of the command RUN.
55  * @D40_DMA_SUSPEND_REQ: Request the DMA to SUSPEND as soon as possible.
56  * @D40_DMA_SUSPENDED: The DMA channel is SUSPENDED.
57  */
58 enum d40_command {
59 	D40_DMA_STOP		= 0,
60 	D40_DMA_RUN		= 1,
61 	D40_DMA_SUSPEND_REQ	= 2,
62 	D40_DMA_SUSPENDED	= 3
63 };
64 
65 /**
66  * struct d40_lli_pool - Structure for keeping LLIs in memory
67  *
68  * @base: Pointer to memory area when the pre_alloc_lli's are not large
69  * enough, IE bigger than the most common case, 1 dst and 1 src. NULL if
70  * pre_alloc_lli is used.
71  * @size: The size in bytes of the memory at base or the size of pre_alloc_lli.
72  * @pre_alloc_lli: Pre allocated area for the most common case of transfers,
73  * one buffer to one buffer.
74  */
75 struct d40_lli_pool {
76 	void	*base;
77 	int	 size;
78 	/* Space for dst and src, plus an extra for padding */
79 	u8	 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
80 };
81 
82 /**
83  * struct d40_desc - A descriptor is one DMA job.
84  *
85  * @lli_phy: LLI settings for physical channel. Both src and dst=
86  * points into the lli_pool, to base if lli_len > 1 or to pre_alloc_lli if
87  * lli_len equals one.
88  * @lli_log: Same as above but for logical channels.
89  * @lli_pool: The pool with two entries pre-allocated.
90  * @lli_len: Number of llis of current descriptor.
91  * @lli_current: Number of transfered llis.
92  * @lcla_alloc: Number of LCLA entries allocated.
93  * @txd: DMA engine struct. Used for among other things for communication
94  * during a transfer.
95  * @node: List entry.
96  * @is_in_client_list: true if the client owns this descriptor.
97  * @is_hw_linked: true if this job will automatically be continued for
98  * the previous one.
99  *
100  * This descriptor is used for both logical and physical transfers.
101  */
102 struct d40_desc {
103 	/* LLI physical */
104 	struct d40_phy_lli_bidir	 lli_phy;
105 	/* LLI logical */
106 	struct d40_log_lli_bidir	 lli_log;
107 
108 	struct d40_lli_pool		 lli_pool;
109 	int				 lli_len;
110 	int				 lli_current;
111 	int				 lcla_alloc;
112 
113 	struct dma_async_tx_descriptor	 txd;
114 	struct list_head		 node;
115 
116 	bool				 is_in_client_list;
117 	bool				 is_hw_linked;
118 };
119 
120 /**
121  * struct d40_lcla_pool - LCLA pool settings and data.
122  *
123  * @base: The virtual address of LCLA. 18 bit aligned.
124  * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used.
125  * This pointer is only there for clean-up on error.
126  * @pages: The number of pages needed for all physical channels.
127  * Only used later for clean-up on error
128  * @lock: Lock to protect the content in this struct.
129  * @alloc_map: big map over which LCLA entry is own by which job.
130  */
131 struct d40_lcla_pool {
132 	void		*base;
133 	void		*base_unaligned;
134 	int		 pages;
135 	spinlock_t	 lock;
136 	struct d40_desc	**alloc_map;
137 };
138 
139 /**
140  * struct d40_phy_res - struct for handling eventlines mapped to physical
141  * channels.
142  *
143  * @lock: A lock protection this entity.
144  * @num: The physical channel number of this entity.
145  * @allocated_src: Bit mapped to show which src event line's are mapped to
146  * this physical channel. Can also be free or physically allocated.
147  * @allocated_dst: Same as for src but is dst.
148  * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as
149  * event line number.
150  */
151 struct d40_phy_res {
152 	spinlock_t lock;
153 	int	   num;
154 	u32	   allocated_src;
155 	u32	   allocated_dst;
156 };
157 
158 struct d40_base;
159 
160 /**
161  * struct d40_chan - Struct that describes a channel.
162  *
163  * @lock: A spinlock to protect this struct.
164  * @log_num: The logical number, if any of this channel.
165  * @completed: Starts with 1, after first interrupt it is set to dma engine's
166  * current cookie.
167  * @pending_tx: The number of pending transfers. Used between interrupt handler
168  * and tasklet.
169  * @busy: Set to true when transfer is ongoing on this channel.
170  * @phy_chan: Pointer to physical channel which this instance runs on. If this
171  * point is NULL, then the channel is not allocated.
172  * @chan: DMA engine handle.
173  * @tasklet: Tasklet that gets scheduled from interrupt context to complete a
174  * transfer and call client callback.
175  * @client: Cliented owned descriptor list.
176  * @active: Active descriptor.
177  * @queue: Queued jobs.
178  * @dma_cfg: The client configuration of this dma channel.
179  * @configured: whether the dma_cfg configuration is valid
180  * @base: Pointer to the device instance struct.
181  * @src_def_cfg: Default cfg register setting for src.
182  * @dst_def_cfg: Default cfg register setting for dst.
183  * @log_def: Default logical channel settings.
184  * @lcla: Space for one dst src pair for logical channel transfers.
185  * @lcpa: Pointer to dst and src lcpa settings.
186  *
187  * This struct can either "be" a logical or a physical channel.
188  */
189 struct d40_chan {
190 	spinlock_t			 lock;
191 	int				 log_num;
192 	/* ID of the most recent completed transfer */
193 	int				 completed;
194 	int				 pending_tx;
195 	bool				 busy;
196 	struct d40_phy_res		*phy_chan;
197 	struct dma_chan			 chan;
198 	struct tasklet_struct		 tasklet;
199 	struct list_head		 client;
200 	struct list_head		 active;
201 	struct list_head		 queue;
202 	struct stedma40_chan_cfg	 dma_cfg;
203 	bool				 configured;
204 	struct d40_base			*base;
205 	/* Default register configurations */
206 	u32				 src_def_cfg;
207 	u32				 dst_def_cfg;
208 	struct d40_def_lcsp		 log_def;
209 	struct d40_log_lli_full		*lcpa;
210 	/* Runtime reconfiguration */
211 	dma_addr_t			runtime_addr;
212 	enum dma_data_direction		runtime_direction;
213 };
214 
215 /**
216  * struct d40_base - The big global struct, one for each probe'd instance.
217  *
218  * @interrupt_lock: Lock used to make sure one interrupt is handle a time.
219  * @execmd_lock: Lock for execute command usage since several channels share
220  * the same physical register.
221  * @dev: The device structure.
222  * @virtbase: The virtual base address of the DMA's register.
223  * @rev: silicon revision detected.
224  * @clk: Pointer to the DMA clock structure.
225  * @phy_start: Physical memory start of the DMA registers.
226  * @phy_size: Size of the DMA register map.
227  * @irq: The IRQ number.
228  * @num_phy_chans: The number of physical channels. Read from HW. This
229  * is the number of available channels for this driver, not counting "Secure
230  * mode" allocated physical channels.
231  * @num_log_chans: The number of logical channels. Calculated from
232  * num_phy_chans.
233  * @dma_both: dma_device channels that can do both memcpy and slave transfers.
234  * @dma_slave: dma_device channels that can do only do slave transfers.
235  * @dma_memcpy: dma_device channels that can do only do memcpy transfers.
236  * @log_chans: Room for all possible logical channels in system.
237  * @lookup_log_chans: Used to map interrupt number to logical channel. Points
238  * to log_chans entries.
239  * @lookup_phy_chans: Used to map interrupt number to physical channel. Points
240  * to phy_chans entries.
241  * @plat_data: Pointer to provided platform_data which is the driver
242  * configuration.
243  * @phy_res: Vector containing all physical channels.
244  * @lcla_pool: lcla pool settings and data.
245  * @lcpa_base: The virtual mapped address of LCPA.
246  * @phy_lcpa: The physical address of the LCPA.
247  * @lcpa_size: The size of the LCPA area.
248  * @desc_slab: cache for descriptors.
249  */
250 struct d40_base {
251 	spinlock_t			 interrupt_lock;
252 	spinlock_t			 execmd_lock;
253 	struct device			 *dev;
254 	void __iomem			 *virtbase;
255 	u8				  rev:4;
256 	struct clk			 *clk;
257 	phys_addr_t			  phy_start;
258 	resource_size_t			  phy_size;
259 	int				  irq;
260 	int				  num_phy_chans;
261 	int				  num_log_chans;
262 	struct dma_device		  dma_both;
263 	struct dma_device		  dma_slave;
264 	struct dma_device		  dma_memcpy;
265 	struct d40_chan			 *phy_chans;
266 	struct d40_chan			 *log_chans;
267 	struct d40_chan			**lookup_log_chans;
268 	struct d40_chan			**lookup_phy_chans;
269 	struct stedma40_platform_data	 *plat_data;
270 	/* Physical half channels */
271 	struct d40_phy_res		 *phy_res;
272 	struct d40_lcla_pool		  lcla_pool;
273 	void				 *lcpa_base;
274 	dma_addr_t			  phy_lcpa;
275 	resource_size_t			  lcpa_size;
276 	struct kmem_cache		 *desc_slab;
277 };
278 
279 /**
280  * struct d40_interrupt_lookup - lookup table for interrupt handler
281  *
282  * @src: Interrupt mask register.
283  * @clr: Interrupt clear register.
284  * @is_error: true if this is an error interrupt.
285  * @offset: start delta in the lookup_log_chans in d40_base. If equals to
286  * D40_PHY_CHAN, the lookup_phy_chans shall be used instead.
287  */
288 struct d40_interrupt_lookup {
289 	u32 src;
290 	u32 clr;
291 	bool is_error;
292 	int offset;
293 };
294 
295 /**
296  * struct d40_reg_val - simple lookup struct
297  *
298  * @reg: The register.
299  * @val: The value that belongs to the register in reg.
300  */
301 struct d40_reg_val {
302 	unsigned int reg;
303 	unsigned int val;
304 };
305 
306 static int d40_pool_lli_alloc(struct d40_desc *d40d,
307 			      int lli_len, bool is_log)
308 {
309 	u32 align;
310 	void *base;
311 
312 	if (is_log)
313 		align = sizeof(struct d40_log_lli);
314 	else
315 		align = sizeof(struct d40_phy_lli);
316 
317 	if (lli_len == 1) {
318 		base = d40d->lli_pool.pre_alloc_lli;
319 		d40d->lli_pool.size = sizeof(d40d->lli_pool.pre_alloc_lli);
320 		d40d->lli_pool.base = NULL;
321 	} else {
322 		d40d->lli_pool.size = ALIGN(lli_len * 2 * align, align);
323 
324 		base = kmalloc(d40d->lli_pool.size + align, GFP_NOWAIT);
325 		d40d->lli_pool.base = base;
326 
327 		if (d40d->lli_pool.base == NULL)
328 			return -ENOMEM;
329 	}
330 
331 	if (is_log) {
332 		d40d->lli_log.src = PTR_ALIGN((struct d40_log_lli *) base,
333 					      align);
334 		d40d->lli_log.dst = PTR_ALIGN(d40d->lli_log.src + lli_len,
335 					      align);
336 	} else {
337 		d40d->lli_phy.src = PTR_ALIGN((struct d40_phy_lli *)base,
338 					      align);
339 		d40d->lli_phy.dst = PTR_ALIGN(d40d->lli_phy.src + lli_len,
340 					      align);
341 	}
342 
343 	return 0;
344 }
345 
346 static void d40_pool_lli_free(struct d40_desc *d40d)
347 {
348 	kfree(d40d->lli_pool.base);
349 	d40d->lli_pool.base = NULL;
350 	d40d->lli_pool.size = 0;
351 	d40d->lli_log.src = NULL;
352 	d40d->lli_log.dst = NULL;
353 	d40d->lli_phy.src = NULL;
354 	d40d->lli_phy.dst = NULL;
355 }
356 
357 static int d40_lcla_alloc_one(struct d40_chan *d40c,
358 			      struct d40_desc *d40d)
359 {
360 	unsigned long flags;
361 	int i;
362 	int ret = -EINVAL;
363 	int p;
364 
365 	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
366 
367 	p = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP;
368 
369 	/*
370 	 * Allocate both src and dst at the same time, therefore the half
371 	 * start on 1 since 0 can't be used since zero is used as end marker.
372 	 */
373 	for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
374 		if (!d40c->base->lcla_pool.alloc_map[p + i]) {
375 			d40c->base->lcla_pool.alloc_map[p + i] = d40d;
376 			d40d->lcla_alloc++;
377 			ret = i;
378 			break;
379 		}
380 	}
381 
382 	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
383 
384 	return ret;
385 }
386 
387 static int d40_lcla_free_all(struct d40_chan *d40c,
388 			     struct d40_desc *d40d)
389 {
390 	unsigned long flags;
391 	int i;
392 	int ret = -EINVAL;
393 
394 	if (d40c->log_num == D40_PHY_CHAN)
395 		return 0;
396 
397 	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
398 
399 	for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
400 		if (d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num *
401 						    D40_LCLA_LINK_PER_EVENT_GRP + i] == d40d) {
402 			d40c->base->lcla_pool.alloc_map[d40c->phy_chan->num *
403 							D40_LCLA_LINK_PER_EVENT_GRP + i] = NULL;
404 			d40d->lcla_alloc--;
405 			if (d40d->lcla_alloc == 0) {
406 				ret = 0;
407 				break;
408 			}
409 		}
410 	}
411 
412 	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
413 
414 	return ret;
415 
416 }
417 
418 static void d40_desc_remove(struct d40_desc *d40d)
419 {
420 	list_del(&d40d->node);
421 }
422 
423 static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
424 {
425 	struct d40_desc *desc = NULL;
426 
427 	if (!list_empty(&d40c->client)) {
428 		struct d40_desc *d;
429 		struct d40_desc *_d;
430 
431 		list_for_each_entry_safe(d, _d, &d40c->client, node)
432 			if (async_tx_test_ack(&d->txd)) {
433 				d40_pool_lli_free(d);
434 				d40_desc_remove(d);
435 				desc = d;
436 				memset(desc, 0, sizeof(*desc));
437 				break;
438 			}
439 	}
440 
441 	if (!desc)
442 		desc = kmem_cache_zalloc(d40c->base->desc_slab, GFP_NOWAIT);
443 
444 	if (desc)
445 		INIT_LIST_HEAD(&desc->node);
446 
447 	return desc;
448 }
449 
450 static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d)
451 {
452 
453 	d40_lcla_free_all(d40c, d40d);
454 	kmem_cache_free(d40c->base->desc_slab, d40d);
455 }
456 
457 static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc)
458 {
459 	list_add_tail(&desc->node, &d40c->active);
460 }
461 
462 static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
463 {
464 	int curr_lcla = -EINVAL, next_lcla;
465 
466 	if (d40c->log_num == D40_PHY_CHAN) {
467 		d40_phy_lli_write(d40c->base->virtbase,
468 				  d40c->phy_chan->num,
469 				  d40d->lli_phy.dst,
470 				  d40d->lli_phy.src);
471 		d40d->lli_current = d40d->lli_len;
472 	} else {
473 
474 		if ((d40d->lli_len - d40d->lli_current) > 1)
475 			curr_lcla = d40_lcla_alloc_one(d40c, d40d);
476 
477 		d40_log_lli_lcpa_write(d40c->lcpa,
478 				       &d40d->lli_log.dst[d40d->lli_current],
479 				       &d40d->lli_log.src[d40d->lli_current],
480 				       curr_lcla);
481 
482 		d40d->lli_current++;
483 		for (; d40d->lli_current < d40d->lli_len; d40d->lli_current++) {
484 			struct d40_log_lli *lcla;
485 
486 			if (d40d->lli_current + 1 < d40d->lli_len)
487 				next_lcla = d40_lcla_alloc_one(d40c, d40d);
488 			else
489 				next_lcla = -EINVAL;
490 
491 			lcla = d40c->base->lcla_pool.base +
492 				d40c->phy_chan->num * 1024 +
493 				8 * curr_lcla * 2;
494 
495 			d40_log_lli_lcla_write(lcla,
496 					       &d40d->lli_log.dst[d40d->lli_current],
497 					       &d40d->lli_log.src[d40d->lli_current],
498 					       next_lcla);
499 
500 			(void) dma_map_single(d40c->base->dev, lcla,
501 					      2 * sizeof(struct d40_log_lli),
502 					      DMA_TO_DEVICE);
503 
504 			curr_lcla = next_lcla;
505 
506 			if (curr_lcla == -EINVAL) {
507 				d40d->lli_current++;
508 				break;
509 			}
510 
511 		}
512 	}
513 }
514 
515 static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
516 {
517 	struct d40_desc *d;
518 
519 	if (list_empty(&d40c->active))
520 		return NULL;
521 
522 	d = list_first_entry(&d40c->active,
523 			     struct d40_desc,
524 			     node);
525 	return d;
526 }
527 
528 static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
529 {
530 	list_add_tail(&desc->node, &d40c->queue);
531 }
532 
533 static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
534 {
535 	struct d40_desc *d;
536 
537 	if (list_empty(&d40c->queue))
538 		return NULL;
539 
540 	d = list_first_entry(&d40c->queue,
541 			     struct d40_desc,
542 			     node);
543 	return d;
544 }
545 
546 static struct d40_desc *d40_last_queued(struct d40_chan *d40c)
547 {
548 	struct d40_desc *d;
549 
550 	if (list_empty(&d40c->queue))
551 		return NULL;
552 	list_for_each_entry(d, &d40c->queue, node)
553 		if (list_is_last(&d->node, &d40c->queue))
554 			break;
555 	return d;
556 }
557 
558 static int d40_psize_2_burst_size(bool is_log, int psize)
559 {
560 	if (is_log) {
561 		if (psize == STEDMA40_PSIZE_LOG_1)
562 			return 1;
563 	} else {
564 		if (psize == STEDMA40_PSIZE_PHY_1)
565 			return 1;
566 	}
567 
568 	return 2 << psize;
569 }
570 
571 /*
572  * The dma only supports transmitting packages up to
573  * STEDMA40_MAX_SEG_SIZE << data_width. Calculate the total number of
574  * dma elements required to send the entire sg list
575  */
576 static int d40_size_2_dmalen(int size, u32 data_width1, u32 data_width2)
577 {
578 	int dmalen;
579 	u32 max_w = max(data_width1, data_width2);
580 	u32 min_w = min(data_width1, data_width2);
581 	u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE << min_w, 1 << max_w);
582 
583 	if (seg_max > STEDMA40_MAX_SEG_SIZE)
584 		seg_max -= (1 << max_w);
585 
586 	if (!IS_ALIGNED(size, 1 << max_w))
587 		return -EINVAL;
588 
589 	if (size <= seg_max)
590 		dmalen = 1;
591 	else {
592 		dmalen = size / seg_max;
593 		if (dmalen * seg_max < size)
594 			dmalen++;
595 	}
596 	return dmalen;
597 }
598 
599 static int d40_sg_2_dmalen(struct scatterlist *sgl, int sg_len,
600 			   u32 data_width1, u32 data_width2)
601 {
602 	struct scatterlist *sg;
603 	int i;
604 	int len = 0;
605 	int ret;
606 
607 	for_each_sg(sgl, sg, sg_len, i) {
608 		ret = d40_size_2_dmalen(sg_dma_len(sg),
609 					data_width1, data_width2);
610 		if (ret < 0)
611 			return ret;
612 		len += ret;
613 	}
614 	return len;
615 }
616 
617 /* Support functions for logical channels */
618 
619 static int d40_channel_execute_command(struct d40_chan *d40c,
620 				       enum d40_command command)
621 {
622 	u32 status;
623 	int i;
624 	void __iomem *active_reg;
625 	int ret = 0;
626 	unsigned long flags;
627 	u32 wmask;
628 
629 	spin_lock_irqsave(&d40c->base->execmd_lock, flags);
630 
631 	if (d40c->phy_chan->num % 2 == 0)
632 		active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
633 	else
634 		active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
635 
636 	if (command == D40_DMA_SUSPEND_REQ) {
637 		status = (readl(active_reg) &
638 			  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
639 			D40_CHAN_POS(d40c->phy_chan->num);
640 
641 		if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP)
642 			goto done;
643 	}
644 
645 	wmask = 0xffffffff & ~(D40_CHAN_POS_MASK(d40c->phy_chan->num));
646 	writel(wmask | (command << D40_CHAN_POS(d40c->phy_chan->num)),
647 	       active_reg);
648 
649 	if (command == D40_DMA_SUSPEND_REQ) {
650 
651 		for (i = 0 ; i < D40_SUSPEND_MAX_IT; i++) {
652 			status = (readl(active_reg) &
653 				  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
654 				D40_CHAN_POS(d40c->phy_chan->num);
655 
656 			cpu_relax();
657 			/*
658 			 * Reduce the number of bus accesses while
659 			 * waiting for the DMA to suspend.
660 			 */
661 			udelay(3);
662 
663 			if (status == D40_DMA_STOP ||
664 			    status == D40_DMA_SUSPENDED)
665 				break;
666 		}
667 
668 		if (i == D40_SUSPEND_MAX_IT) {
669 			dev_err(&d40c->chan.dev->device,
670 				"[%s]: unable to suspend the chl %d (log: %d) status %x\n",
671 				__func__, d40c->phy_chan->num, d40c->log_num,
672 				status);
673 			dump_stack();
674 			ret = -EBUSY;
675 		}
676 
677 	}
678 done:
679 	spin_unlock_irqrestore(&d40c->base->execmd_lock, flags);
680 	return ret;
681 }
682 
683 static void d40_term_all(struct d40_chan *d40c)
684 {
685 	struct d40_desc *d40d;
686 
687 	/* Release active descriptors */
688 	while ((d40d = d40_first_active_get(d40c))) {
689 		d40_desc_remove(d40d);
690 		d40_desc_free(d40c, d40d);
691 	}
692 
693 	/* Release queued descriptors waiting for transfer */
694 	while ((d40d = d40_first_queued(d40c))) {
695 		d40_desc_remove(d40d);
696 		d40_desc_free(d40c, d40d);
697 	}
698 
699 
700 	d40c->pending_tx = 0;
701 	d40c->busy = false;
702 }
703 
704 static void d40_config_set_event(struct d40_chan *d40c, bool do_enable)
705 {
706 	u32 val;
707 	unsigned long flags;
708 
709 	/* Notice, that disable requires the physical channel to be stopped */
710 	if (do_enable)
711 		val = D40_ACTIVATE_EVENTLINE;
712 	else
713 		val = D40_DEACTIVATE_EVENTLINE;
714 
715 	spin_lock_irqsave(&d40c->phy_chan->lock, flags);
716 
717 	/* Enable event line connected to device (or memcpy) */
718 	if ((d40c->dma_cfg.dir ==  STEDMA40_PERIPH_TO_MEM) ||
719 	    (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_PERIPH)) {
720 		u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
721 
722 		writel((val << D40_EVENTLINE_POS(event)) |
723 		       ~D40_EVENTLINE_MASK(event),
724 		       d40c->base->virtbase + D40_DREG_PCBASE +
725 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
726 		       D40_CHAN_REG_SSLNK);
727 	}
728 	if (d40c->dma_cfg.dir !=  STEDMA40_PERIPH_TO_MEM) {
729 		u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
730 
731 		writel((val << D40_EVENTLINE_POS(event)) |
732 		       ~D40_EVENTLINE_MASK(event),
733 		       d40c->base->virtbase + D40_DREG_PCBASE +
734 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
735 		       D40_CHAN_REG_SDLNK);
736 	}
737 
738 	spin_unlock_irqrestore(&d40c->phy_chan->lock, flags);
739 }
740 
741 static u32 d40_chan_has_events(struct d40_chan *d40c)
742 {
743 	u32 val;
744 
745 	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
746 		    d40c->phy_chan->num * D40_DREG_PCDELTA +
747 		    D40_CHAN_REG_SSLNK);
748 
749 	val |= readl(d40c->base->virtbase + D40_DREG_PCBASE +
750 		     d40c->phy_chan->num * D40_DREG_PCDELTA +
751 		     D40_CHAN_REG_SDLNK);
752 	return val;
753 }
754 
755 static u32 d40_get_prmo(struct d40_chan *d40c)
756 {
757 	static const unsigned int phy_map[] = {
758 		[STEDMA40_PCHAN_BASIC_MODE]
759 			= D40_DREG_PRMO_PCHAN_BASIC,
760 		[STEDMA40_PCHAN_MODULO_MODE]
761 			= D40_DREG_PRMO_PCHAN_MODULO,
762 		[STEDMA40_PCHAN_DOUBLE_DST_MODE]
763 			= D40_DREG_PRMO_PCHAN_DOUBLE_DST,
764 	};
765 	static const unsigned int log_map[] = {
766 		[STEDMA40_LCHAN_SRC_PHY_DST_LOG]
767 			= D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG,
768 		[STEDMA40_LCHAN_SRC_LOG_DST_PHY]
769 			= D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY,
770 		[STEDMA40_LCHAN_SRC_LOG_DST_LOG]
771 			= D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG,
772 	};
773 
774 	if (d40c->log_num == D40_PHY_CHAN)
775 		return phy_map[d40c->dma_cfg.mode_opt];
776 	else
777 		return log_map[d40c->dma_cfg.mode_opt];
778 }
779 
780 static void d40_config_write(struct d40_chan *d40c)
781 {
782 	u32 addr_base;
783 	u32 var;
784 
785 	/* Odd addresses are even addresses + 4 */
786 	addr_base = (d40c->phy_chan->num % 2) * 4;
787 	/* Setup channel mode to logical or physical */
788 	var = ((u32)(d40c->log_num != D40_PHY_CHAN) + 1) <<
789 		D40_CHAN_POS(d40c->phy_chan->num);
790 	writel(var, d40c->base->virtbase + D40_DREG_PRMSE + addr_base);
791 
792 	/* Setup operational mode option register */
793 	var = d40_get_prmo(d40c) << D40_CHAN_POS(d40c->phy_chan->num);
794 
795 	writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base);
796 
797 	if (d40c->log_num != D40_PHY_CHAN) {
798 		/* Set default config for CFG reg */
799 		writel(d40c->src_def_cfg,
800 		       d40c->base->virtbase + D40_DREG_PCBASE +
801 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
802 		       D40_CHAN_REG_SSCFG);
803 		writel(d40c->dst_def_cfg,
804 		       d40c->base->virtbase + D40_DREG_PCBASE +
805 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
806 		       D40_CHAN_REG_SDCFG);
807 
808 		/* Set LIDX for lcla */
809 		writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
810 		       D40_SREG_ELEM_LOG_LIDX_MASK,
811 		       d40c->base->virtbase + D40_DREG_PCBASE +
812 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
813 		       D40_CHAN_REG_SDELT);
814 
815 		writel((d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS) &
816 		       D40_SREG_ELEM_LOG_LIDX_MASK,
817 		       d40c->base->virtbase + D40_DREG_PCBASE +
818 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
819 		       D40_CHAN_REG_SSELT);
820 
821 	}
822 }
823 
824 static u32 d40_residue(struct d40_chan *d40c)
825 {
826 	u32 num_elt;
827 
828 	if (d40c->log_num != D40_PHY_CHAN)
829 		num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
830 			>> D40_MEM_LCSP2_ECNT_POS;
831 	else
832 		num_elt = (readl(d40c->base->virtbase + D40_DREG_PCBASE +
833 				 d40c->phy_chan->num * D40_DREG_PCDELTA +
834 				 D40_CHAN_REG_SDELT) &
835 			   D40_SREG_ELEM_PHY_ECNT_MASK) >>
836 			D40_SREG_ELEM_PHY_ECNT_POS;
837 	return num_elt * (1 << d40c->dma_cfg.dst_info.data_width);
838 }
839 
840 static bool d40_tx_is_linked(struct d40_chan *d40c)
841 {
842 	bool is_link;
843 
844 	if (d40c->log_num != D40_PHY_CHAN)
845 		is_link = readl(&d40c->lcpa->lcsp3) &  D40_MEM_LCSP3_DLOS_MASK;
846 	else
847 		is_link = readl(d40c->base->virtbase + D40_DREG_PCBASE +
848 				d40c->phy_chan->num * D40_DREG_PCDELTA +
849 				D40_CHAN_REG_SDLNK) &
850 			D40_SREG_LNK_PHYS_LNK_MASK;
851 	return is_link;
852 }
853 
854 static int d40_pause(struct dma_chan *chan)
855 {
856 	struct d40_chan *d40c =
857 		container_of(chan, struct d40_chan, chan);
858 	int res = 0;
859 	unsigned long flags;
860 
861 	if (!d40c->busy)
862 		return 0;
863 
864 	spin_lock_irqsave(&d40c->lock, flags);
865 
866 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
867 	if (res == 0) {
868 		if (d40c->log_num != D40_PHY_CHAN) {
869 			d40_config_set_event(d40c, false);
870 			/* Resume the other logical channels if any */
871 			if (d40_chan_has_events(d40c))
872 				res = d40_channel_execute_command(d40c,
873 								  D40_DMA_RUN);
874 		}
875 	}
876 
877 	spin_unlock_irqrestore(&d40c->lock, flags);
878 	return res;
879 }
880 
881 static int d40_resume(struct dma_chan *chan)
882 {
883 	struct d40_chan *d40c =
884 		container_of(chan, struct d40_chan, chan);
885 	int res = 0;
886 	unsigned long flags;
887 
888 	if (!d40c->busy)
889 		return 0;
890 
891 	spin_lock_irqsave(&d40c->lock, flags);
892 
893 	if (d40c->base->rev == 0)
894 		if (d40c->log_num != D40_PHY_CHAN) {
895 			res = d40_channel_execute_command(d40c,
896 							  D40_DMA_SUSPEND_REQ);
897 			goto no_suspend;
898 		}
899 
900 	/* If bytes left to transfer or linked tx resume job */
901 	if (d40_residue(d40c) || d40_tx_is_linked(d40c)) {
902 
903 		if (d40c->log_num != D40_PHY_CHAN)
904 			d40_config_set_event(d40c, true);
905 
906 		res = d40_channel_execute_command(d40c, D40_DMA_RUN);
907 	}
908 
909 no_suspend:
910 	spin_unlock_irqrestore(&d40c->lock, flags);
911 	return res;
912 }
913 
914 static void d40_tx_submit_log(struct d40_chan *d40c, struct d40_desc *d40d)
915 {
916 	/* TODO: Write */
917 }
918 
919 static void d40_tx_submit_phy(struct d40_chan *d40c, struct d40_desc *d40d)
920 {
921 	struct d40_desc *d40d_prev = NULL;
922 	int i;
923 	u32 val;
924 
925 	if (!list_empty(&d40c->queue))
926 		d40d_prev = d40_last_queued(d40c);
927 	else if (!list_empty(&d40c->active))
928 		d40d_prev = d40_first_active_get(d40c);
929 
930 	if (!d40d_prev)
931 		return;
932 
933 	/* Here we try to join this job with previous jobs */
934 	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
935 		    d40c->phy_chan->num * D40_DREG_PCDELTA +
936 		    D40_CHAN_REG_SSLNK);
937 
938 	/* Figure out which link we're currently transmitting */
939 	for (i = 0; i < d40d_prev->lli_len; i++)
940 		if (val == d40d_prev->lli_phy.src[i].reg_lnk)
941 			break;
942 
943 	val = readl(d40c->base->virtbase + D40_DREG_PCBASE +
944 		    d40c->phy_chan->num * D40_DREG_PCDELTA +
945 		    D40_CHAN_REG_SSELT) >> D40_SREG_ELEM_LOG_ECNT_POS;
946 
947 	if (i == (d40d_prev->lli_len - 1) && val > 0) {
948 		/* Change the current one */
949 		writel(virt_to_phys(d40d->lli_phy.src),
950 		       d40c->base->virtbase + D40_DREG_PCBASE +
951 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
952 		       D40_CHAN_REG_SSLNK);
953 		writel(virt_to_phys(d40d->lli_phy.dst),
954 		       d40c->base->virtbase + D40_DREG_PCBASE +
955 		       d40c->phy_chan->num * D40_DREG_PCDELTA +
956 		       D40_CHAN_REG_SDLNK);
957 
958 		d40d->is_hw_linked = true;
959 
960 	} else if (i < d40d_prev->lli_len) {
961 		(void) dma_unmap_single(d40c->base->dev,
962 					virt_to_phys(d40d_prev->lli_phy.src),
963 					d40d_prev->lli_pool.size,
964 					DMA_TO_DEVICE);
965 
966 		/* Keep the settings */
967 		val = d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk &
968 			~D40_SREG_LNK_PHYS_LNK_MASK;
969 		d40d_prev->lli_phy.src[d40d_prev->lli_len - 1].reg_lnk =
970 			val | virt_to_phys(d40d->lli_phy.src);
971 
972 		val = d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk &
973 			~D40_SREG_LNK_PHYS_LNK_MASK;
974 		d40d_prev->lli_phy.dst[d40d_prev->lli_len - 1].reg_lnk =
975 			val | virt_to_phys(d40d->lli_phy.dst);
976 
977 		(void) dma_map_single(d40c->base->dev,
978 				      d40d_prev->lli_phy.src,
979 				      d40d_prev->lli_pool.size,
980 				      DMA_TO_DEVICE);
981 		d40d->is_hw_linked = true;
982 	}
983 }
984 
985 static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
986 {
987 	struct d40_chan *d40c = container_of(tx->chan,
988 					     struct d40_chan,
989 					     chan);
990 	struct d40_desc *d40d = container_of(tx, struct d40_desc, txd);
991 	unsigned long flags;
992 
993 	(void) d40_pause(&d40c->chan);
994 
995 	spin_lock_irqsave(&d40c->lock, flags);
996 
997 	d40c->chan.cookie++;
998 
999 	if (d40c->chan.cookie < 0)
1000 		d40c->chan.cookie = 1;
1001 
1002 	d40d->txd.cookie = d40c->chan.cookie;
1003 
1004 	if (d40c->log_num == D40_PHY_CHAN)
1005 		d40_tx_submit_phy(d40c, d40d);
1006 	else
1007 		d40_tx_submit_log(d40c, d40d);
1008 
1009 	d40_desc_queue(d40c, d40d);
1010 
1011 	spin_unlock_irqrestore(&d40c->lock, flags);
1012 
1013 	(void) d40_resume(&d40c->chan);
1014 
1015 	return tx->cookie;
1016 }
1017 
1018 static int d40_start(struct d40_chan *d40c)
1019 {
1020 	if (d40c->base->rev == 0) {
1021 		int err;
1022 
1023 		if (d40c->log_num != D40_PHY_CHAN) {
1024 			err = d40_channel_execute_command(d40c,
1025 							  D40_DMA_SUSPEND_REQ);
1026 			if (err)
1027 				return err;
1028 		}
1029 	}
1030 
1031 	if (d40c->log_num != D40_PHY_CHAN)
1032 		d40_config_set_event(d40c, true);
1033 
1034 	return d40_channel_execute_command(d40c, D40_DMA_RUN);
1035 }
1036 
1037 static struct d40_desc *d40_queue_start(struct d40_chan *d40c)
1038 {
1039 	struct d40_desc *d40d;
1040 	int err;
1041 
1042 	/* Start queued jobs, if any */
1043 	d40d = d40_first_queued(d40c);
1044 
1045 	if (d40d != NULL) {
1046 		d40c->busy = true;
1047 
1048 		/* Remove from queue */
1049 		d40_desc_remove(d40d);
1050 
1051 		/* Add to active queue */
1052 		d40_desc_submit(d40c, d40d);
1053 
1054 		/*
1055 		 * If this job is already linked in hw,
1056 		 * do not submit it.
1057 		 */
1058 
1059 		if (!d40d->is_hw_linked) {
1060 			/* Initiate DMA job */
1061 			d40_desc_load(d40c, d40d);
1062 
1063 			/* Start dma job */
1064 			err = d40_start(d40c);
1065 
1066 			if (err)
1067 				return NULL;
1068 		}
1069 	}
1070 
1071 	return d40d;
1072 }
1073 
1074 /* called from interrupt context */
1075 static void dma_tc_handle(struct d40_chan *d40c)
1076 {
1077 	struct d40_desc *d40d;
1078 
1079 	/* Get first active entry from list */
1080 	d40d = d40_first_active_get(d40c);
1081 
1082 	if (d40d == NULL)
1083 		return;
1084 
1085 	d40_lcla_free_all(d40c, d40d);
1086 
1087 	if (d40d->lli_current < d40d->lli_len) {
1088 		d40_desc_load(d40c, d40d);
1089 		/* Start dma job */
1090 		(void) d40_start(d40c);
1091 		return;
1092 	}
1093 
1094 	if (d40_queue_start(d40c) == NULL)
1095 		d40c->busy = false;
1096 
1097 	d40c->pending_tx++;
1098 	tasklet_schedule(&d40c->tasklet);
1099 
1100 }
1101 
1102 static void dma_tasklet(unsigned long data)
1103 {
1104 	struct d40_chan *d40c = (struct d40_chan *) data;
1105 	struct d40_desc *d40d;
1106 	unsigned long flags;
1107 	dma_async_tx_callback callback;
1108 	void *callback_param;
1109 
1110 	spin_lock_irqsave(&d40c->lock, flags);
1111 
1112 	/* Get first active entry from list */
1113 	d40d = d40_first_active_get(d40c);
1114 
1115 	if (d40d == NULL)
1116 		goto err;
1117 
1118 	d40c->completed = d40d->txd.cookie;
1119 
1120 	/*
1121 	 * If terminating a channel pending_tx is set to zero.
1122 	 * This prevents any finished active jobs to return to the client.
1123 	 */
1124 	if (d40c->pending_tx == 0) {
1125 		spin_unlock_irqrestore(&d40c->lock, flags);
1126 		return;
1127 	}
1128 
1129 	/* Callback to client */
1130 	callback = d40d->txd.callback;
1131 	callback_param = d40d->txd.callback_param;
1132 
1133 	if (async_tx_test_ack(&d40d->txd)) {
1134 		d40_pool_lli_free(d40d);
1135 		d40_desc_remove(d40d);
1136 		d40_desc_free(d40c, d40d);
1137 	} else {
1138 		if (!d40d->is_in_client_list) {
1139 			d40_desc_remove(d40d);
1140 			d40_lcla_free_all(d40c, d40d);
1141 			list_add_tail(&d40d->node, &d40c->client);
1142 			d40d->is_in_client_list = true;
1143 		}
1144 	}
1145 
1146 	d40c->pending_tx--;
1147 
1148 	if (d40c->pending_tx)
1149 		tasklet_schedule(&d40c->tasklet);
1150 
1151 	spin_unlock_irqrestore(&d40c->lock, flags);
1152 
1153 	if (callback && (d40d->txd.flags & DMA_PREP_INTERRUPT))
1154 		callback(callback_param);
1155 
1156 	return;
1157 
1158  err:
1159 	/* Rescue manouver if receiving double interrupts */
1160 	if (d40c->pending_tx > 0)
1161 		d40c->pending_tx--;
1162 	spin_unlock_irqrestore(&d40c->lock, flags);
1163 }
1164 
1165 static irqreturn_t d40_handle_interrupt(int irq, void *data)
1166 {
1167 	static const struct d40_interrupt_lookup il[] = {
1168 		{D40_DREG_LCTIS0, D40_DREG_LCICR0, false,  0},
1169 		{D40_DREG_LCTIS1, D40_DREG_LCICR1, false, 32},
1170 		{D40_DREG_LCTIS2, D40_DREG_LCICR2, false, 64},
1171 		{D40_DREG_LCTIS3, D40_DREG_LCICR3, false, 96},
1172 		{D40_DREG_LCEIS0, D40_DREG_LCICR0, true,   0},
1173 		{D40_DREG_LCEIS1, D40_DREG_LCICR1, true,  32},
1174 		{D40_DREG_LCEIS2, D40_DREG_LCICR2, true,  64},
1175 		{D40_DREG_LCEIS3, D40_DREG_LCICR3, true,  96},
1176 		{D40_DREG_PCTIS,  D40_DREG_PCICR,  false, D40_PHY_CHAN},
1177 		{D40_DREG_PCEIS,  D40_DREG_PCICR,  true,  D40_PHY_CHAN},
1178 	};
1179 
1180 	int i;
1181 	u32 regs[ARRAY_SIZE(il)];
1182 	u32 idx;
1183 	u32 row;
1184 	long chan = -1;
1185 	struct d40_chan *d40c;
1186 	unsigned long flags;
1187 	struct d40_base *base = data;
1188 
1189 	spin_lock_irqsave(&base->interrupt_lock, flags);
1190 
1191 	/* Read interrupt status of both logical and physical channels */
1192 	for (i = 0; i < ARRAY_SIZE(il); i++)
1193 		regs[i] = readl(base->virtbase + il[i].src);
1194 
1195 	for (;;) {
1196 
1197 		chan = find_next_bit((unsigned long *)regs,
1198 				     BITS_PER_LONG * ARRAY_SIZE(il), chan + 1);
1199 
1200 		/* No more set bits found? */
1201 		if (chan == BITS_PER_LONG * ARRAY_SIZE(il))
1202 			break;
1203 
1204 		row = chan / BITS_PER_LONG;
1205 		idx = chan & (BITS_PER_LONG - 1);
1206 
1207 		/* ACK interrupt */
1208 		writel(1 << idx, base->virtbase + il[row].clr);
1209 
1210 		if (il[row].offset == D40_PHY_CHAN)
1211 			d40c = base->lookup_phy_chans[idx];
1212 		else
1213 			d40c = base->lookup_log_chans[il[row].offset + idx];
1214 		spin_lock(&d40c->lock);
1215 
1216 		if (!il[row].is_error)
1217 			dma_tc_handle(d40c);
1218 		else
1219 			dev_err(base->dev,
1220 				"[%s] IRQ chan: %ld offset %d idx %d\n",
1221 				__func__, chan, il[row].offset, idx);
1222 
1223 		spin_unlock(&d40c->lock);
1224 	}
1225 
1226 	spin_unlock_irqrestore(&base->interrupt_lock, flags);
1227 
1228 	return IRQ_HANDLED;
1229 }
1230 
1231 static int d40_validate_conf(struct d40_chan *d40c,
1232 			     struct stedma40_chan_cfg *conf)
1233 {
1234 	int res = 0;
1235 	u32 dst_event_group = D40_TYPE_TO_GROUP(conf->dst_dev_type);
1236 	u32 src_event_group = D40_TYPE_TO_GROUP(conf->src_dev_type);
1237 	bool is_log = conf->mode == STEDMA40_MODE_LOGICAL;
1238 
1239 	if (!conf->dir) {
1240 		dev_err(&d40c->chan.dev->device, "[%s] Invalid direction.\n",
1241 			__func__);
1242 		res = -EINVAL;
1243 	}
1244 
1245 	if (conf->dst_dev_type != STEDMA40_DEV_DST_MEMORY &&
1246 	    d40c->base->plat_data->dev_tx[conf->dst_dev_type] == 0 &&
1247 	    d40c->runtime_addr == 0) {
1248 
1249 		dev_err(&d40c->chan.dev->device,
1250 			"[%s] Invalid TX channel address (%d)\n",
1251 			__func__, conf->dst_dev_type);
1252 		res = -EINVAL;
1253 	}
1254 
1255 	if (conf->src_dev_type != STEDMA40_DEV_SRC_MEMORY &&
1256 	    d40c->base->plat_data->dev_rx[conf->src_dev_type] == 0 &&
1257 	    d40c->runtime_addr == 0) {
1258 		dev_err(&d40c->chan.dev->device,
1259 			"[%s] Invalid RX channel address (%d)\n",
1260 			__func__, conf->src_dev_type);
1261 		res = -EINVAL;
1262 	}
1263 
1264 	if (conf->dir == STEDMA40_MEM_TO_PERIPH &&
1265 	    dst_event_group == STEDMA40_DEV_DST_MEMORY) {
1266 		dev_err(&d40c->chan.dev->device, "[%s] Invalid dst\n",
1267 			__func__);
1268 		res = -EINVAL;
1269 	}
1270 
1271 	if (conf->dir == STEDMA40_PERIPH_TO_MEM &&
1272 	    src_event_group == STEDMA40_DEV_SRC_MEMORY) {
1273 		dev_err(&d40c->chan.dev->device, "[%s] Invalid src\n",
1274 			__func__);
1275 		res = -EINVAL;
1276 	}
1277 
1278 	if (src_event_group == STEDMA40_DEV_SRC_MEMORY &&
1279 	    dst_event_group == STEDMA40_DEV_DST_MEMORY && is_log) {
1280 		dev_err(&d40c->chan.dev->device,
1281 			"[%s] No event line\n", __func__);
1282 		res = -EINVAL;
1283 	}
1284 
1285 	if (conf->dir == STEDMA40_PERIPH_TO_PERIPH &&
1286 	    (src_event_group != dst_event_group)) {
1287 		dev_err(&d40c->chan.dev->device,
1288 			"[%s] Invalid event group\n", __func__);
1289 		res = -EINVAL;
1290 	}
1291 
1292 	if (conf->dir == STEDMA40_PERIPH_TO_PERIPH) {
1293 		/*
1294 		 * DMAC HW supports it. Will be added to this driver,
1295 		 * in case any dma client requires it.
1296 		 */
1297 		dev_err(&d40c->chan.dev->device,
1298 			"[%s] periph to periph not supported\n",
1299 			__func__);
1300 		res = -EINVAL;
1301 	}
1302 
1303 	if (d40_psize_2_burst_size(is_log, conf->src_info.psize) *
1304 	    (1 << conf->src_info.data_width) !=
1305 	    d40_psize_2_burst_size(is_log, conf->dst_info.psize) *
1306 	    (1 << conf->dst_info.data_width)) {
1307 		/*
1308 		 * The DMAC hardware only supports
1309 		 * src (burst x width) == dst (burst x width)
1310 		 */
1311 
1312 		dev_err(&d40c->chan.dev->device,
1313 			"[%s] src (burst x width) != dst (burst x width)\n",
1314 			__func__);
1315 		res = -EINVAL;
1316 	}
1317 
1318 	return res;
1319 }
1320 
1321 static bool d40_alloc_mask_set(struct d40_phy_res *phy, bool is_src,
1322 			       int log_event_line, bool is_log)
1323 {
1324 	unsigned long flags;
1325 	spin_lock_irqsave(&phy->lock, flags);
1326 	if (!is_log) {
1327 		/* Physical interrupts are masked per physical full channel */
1328 		if (phy->allocated_src == D40_ALLOC_FREE &&
1329 		    phy->allocated_dst == D40_ALLOC_FREE) {
1330 			phy->allocated_dst = D40_ALLOC_PHY;
1331 			phy->allocated_src = D40_ALLOC_PHY;
1332 			goto found;
1333 		} else
1334 			goto not_found;
1335 	}
1336 
1337 	/* Logical channel */
1338 	if (is_src) {
1339 		if (phy->allocated_src == D40_ALLOC_PHY)
1340 			goto not_found;
1341 
1342 		if (phy->allocated_src == D40_ALLOC_FREE)
1343 			phy->allocated_src = D40_ALLOC_LOG_FREE;
1344 
1345 		if (!(phy->allocated_src & (1 << log_event_line))) {
1346 			phy->allocated_src |= 1 << log_event_line;
1347 			goto found;
1348 		} else
1349 			goto not_found;
1350 	} else {
1351 		if (phy->allocated_dst == D40_ALLOC_PHY)
1352 			goto not_found;
1353 
1354 		if (phy->allocated_dst == D40_ALLOC_FREE)
1355 			phy->allocated_dst = D40_ALLOC_LOG_FREE;
1356 
1357 		if (!(phy->allocated_dst & (1 << log_event_line))) {
1358 			phy->allocated_dst |= 1 << log_event_line;
1359 			goto found;
1360 		} else
1361 			goto not_found;
1362 	}
1363 
1364 not_found:
1365 	spin_unlock_irqrestore(&phy->lock, flags);
1366 	return false;
1367 found:
1368 	spin_unlock_irqrestore(&phy->lock, flags);
1369 	return true;
1370 }
1371 
1372 static bool d40_alloc_mask_free(struct d40_phy_res *phy, bool is_src,
1373 			       int log_event_line)
1374 {
1375 	unsigned long flags;
1376 	bool is_free = false;
1377 
1378 	spin_lock_irqsave(&phy->lock, flags);
1379 	if (!log_event_line) {
1380 		phy->allocated_dst = D40_ALLOC_FREE;
1381 		phy->allocated_src = D40_ALLOC_FREE;
1382 		is_free = true;
1383 		goto out;
1384 	}
1385 
1386 	/* Logical channel */
1387 	if (is_src) {
1388 		phy->allocated_src &= ~(1 << log_event_line);
1389 		if (phy->allocated_src == D40_ALLOC_LOG_FREE)
1390 			phy->allocated_src = D40_ALLOC_FREE;
1391 	} else {
1392 		phy->allocated_dst &= ~(1 << log_event_line);
1393 		if (phy->allocated_dst == D40_ALLOC_LOG_FREE)
1394 			phy->allocated_dst = D40_ALLOC_FREE;
1395 	}
1396 
1397 	is_free = ((phy->allocated_src | phy->allocated_dst) ==
1398 		   D40_ALLOC_FREE);
1399 
1400 out:
1401 	spin_unlock_irqrestore(&phy->lock, flags);
1402 
1403 	return is_free;
1404 }
1405 
1406 static int d40_allocate_channel(struct d40_chan *d40c)
1407 {
1408 	int dev_type;
1409 	int event_group;
1410 	int event_line;
1411 	struct d40_phy_res *phys;
1412 	int i;
1413 	int j;
1414 	int log_num;
1415 	bool is_src;
1416 	bool is_log = d40c->dma_cfg.mode == STEDMA40_MODE_LOGICAL;
1417 
1418 	phys = d40c->base->phy_res;
1419 
1420 	if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
1421 		dev_type = d40c->dma_cfg.src_dev_type;
1422 		log_num = 2 * dev_type;
1423 		is_src = true;
1424 	} else if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
1425 		   d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
1426 		/* dst event lines are used for logical memcpy */
1427 		dev_type = d40c->dma_cfg.dst_dev_type;
1428 		log_num = 2 * dev_type + 1;
1429 		is_src = false;
1430 	} else
1431 		return -EINVAL;
1432 
1433 	event_group = D40_TYPE_TO_GROUP(dev_type);
1434 	event_line = D40_TYPE_TO_EVENT(dev_type);
1435 
1436 	if (!is_log) {
1437 		if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
1438 			/* Find physical half channel */
1439 			for (i = 0; i < d40c->base->num_phy_chans; i++) {
1440 
1441 				if (d40_alloc_mask_set(&phys[i], is_src,
1442 						       0, is_log))
1443 					goto found_phy;
1444 			}
1445 		} else
1446 			for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
1447 				int phy_num = j  + event_group * 2;
1448 				for (i = phy_num; i < phy_num + 2; i++) {
1449 					if (d40_alloc_mask_set(&phys[i],
1450 							       is_src,
1451 							       0,
1452 							       is_log))
1453 						goto found_phy;
1454 				}
1455 			}
1456 		return -EINVAL;
1457 found_phy:
1458 		d40c->phy_chan = &phys[i];
1459 		d40c->log_num = D40_PHY_CHAN;
1460 		goto out;
1461 	}
1462 	if (dev_type == -1)
1463 		return -EINVAL;
1464 
1465 	/* Find logical channel */
1466 	for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
1467 		int phy_num = j + event_group * 2;
1468 		/*
1469 		 * Spread logical channels across all available physical rather
1470 		 * than pack every logical channel at the first available phy
1471 		 * channels.
1472 		 */
1473 		if (is_src) {
1474 			for (i = phy_num; i < phy_num + 2; i++) {
1475 				if (d40_alloc_mask_set(&phys[i], is_src,
1476 						       event_line, is_log))
1477 					goto found_log;
1478 			}
1479 		} else {
1480 			for (i = phy_num + 1; i >= phy_num; i--) {
1481 				if (d40_alloc_mask_set(&phys[i], is_src,
1482 						       event_line, is_log))
1483 					goto found_log;
1484 			}
1485 		}
1486 	}
1487 	return -EINVAL;
1488 
1489 found_log:
1490 	d40c->phy_chan = &phys[i];
1491 	d40c->log_num = log_num;
1492 out:
1493 
1494 	if (is_log)
1495 		d40c->base->lookup_log_chans[d40c->log_num] = d40c;
1496 	else
1497 		d40c->base->lookup_phy_chans[d40c->phy_chan->num] = d40c;
1498 
1499 	return 0;
1500 
1501 }
1502 
1503 static int d40_config_memcpy(struct d40_chan *d40c)
1504 {
1505 	dma_cap_mask_t cap = d40c->chan.device->cap_mask;
1506 
1507 	if (dma_has_cap(DMA_MEMCPY, cap) && !dma_has_cap(DMA_SLAVE, cap)) {
1508 		d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_log;
1509 		d40c->dma_cfg.src_dev_type = STEDMA40_DEV_SRC_MEMORY;
1510 		d40c->dma_cfg.dst_dev_type = d40c->base->plat_data->
1511 			memcpy[d40c->chan.chan_id];
1512 
1513 	} else if (dma_has_cap(DMA_MEMCPY, cap) &&
1514 		   dma_has_cap(DMA_SLAVE, cap)) {
1515 		d40c->dma_cfg = *d40c->base->plat_data->memcpy_conf_phy;
1516 	} else {
1517 		dev_err(&d40c->chan.dev->device, "[%s] No memcpy\n",
1518 			__func__);
1519 		return -EINVAL;
1520 	}
1521 
1522 	return 0;
1523 }
1524 
1525 
1526 static int d40_free_dma(struct d40_chan *d40c)
1527 {
1528 
1529 	int res = 0;
1530 	u32 event;
1531 	struct d40_phy_res *phy = d40c->phy_chan;
1532 	bool is_src;
1533 	struct d40_desc *d;
1534 	struct d40_desc *_d;
1535 
1536 
1537 	/* Terminate all queued and active transfers */
1538 	d40_term_all(d40c);
1539 
1540 	/* Release client owned descriptors */
1541 	if (!list_empty(&d40c->client))
1542 		list_for_each_entry_safe(d, _d, &d40c->client, node) {
1543 			d40_pool_lli_free(d);
1544 			d40_desc_remove(d);
1545 			d40_desc_free(d40c, d);
1546 		}
1547 
1548 	if (phy == NULL) {
1549 		dev_err(&d40c->chan.dev->device, "[%s] phy == null\n",
1550 			__func__);
1551 		return -EINVAL;
1552 	}
1553 
1554 	if (phy->allocated_src == D40_ALLOC_FREE &&
1555 	    phy->allocated_dst == D40_ALLOC_FREE) {
1556 		dev_err(&d40c->chan.dev->device, "[%s] channel already free\n",
1557 			__func__);
1558 		return -EINVAL;
1559 	}
1560 
1561 	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
1562 	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
1563 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
1564 		is_src = false;
1565 	} else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
1566 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
1567 		is_src = true;
1568 	} else {
1569 		dev_err(&d40c->chan.dev->device,
1570 			"[%s] Unknown direction\n", __func__);
1571 		return -EINVAL;
1572 	}
1573 
1574 	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
1575 	if (res) {
1576 		dev_err(&d40c->chan.dev->device, "[%s] suspend failed\n",
1577 			__func__);
1578 		return res;
1579 	}
1580 
1581 	if (d40c->log_num != D40_PHY_CHAN) {
1582 		/* Release logical channel, deactivate the event line */
1583 
1584 		d40_config_set_event(d40c, false);
1585 		d40c->base->lookup_log_chans[d40c->log_num] = NULL;
1586 
1587 		/*
1588 		 * Check if there are more logical allocation
1589 		 * on this phy channel.
1590 		 */
1591 		if (!d40_alloc_mask_free(phy, is_src, event)) {
1592 			/* Resume the other logical channels if any */
1593 			if (d40_chan_has_events(d40c)) {
1594 				res = d40_channel_execute_command(d40c,
1595 								  D40_DMA_RUN);
1596 				if (res) {
1597 					dev_err(&d40c->chan.dev->device,
1598 						"[%s] Executing RUN command\n",
1599 						__func__);
1600 					return res;
1601 				}
1602 			}
1603 			return 0;
1604 		}
1605 	} else {
1606 		(void) d40_alloc_mask_free(phy, is_src, 0);
1607 	}
1608 
1609 	/* Release physical channel */
1610 	res = d40_channel_execute_command(d40c, D40_DMA_STOP);
1611 	if (res) {
1612 		dev_err(&d40c->chan.dev->device,
1613 			"[%s] Failed to stop channel\n", __func__);
1614 		return res;
1615 	}
1616 	d40c->phy_chan = NULL;
1617 	d40c->configured = false;
1618 	d40c->base->lookup_phy_chans[phy->num] = NULL;
1619 
1620 	return 0;
1621 }
1622 
1623 static bool d40_is_paused(struct d40_chan *d40c)
1624 {
1625 	bool is_paused = false;
1626 	unsigned long flags;
1627 	void __iomem *active_reg;
1628 	u32 status;
1629 	u32 event;
1630 
1631 	spin_lock_irqsave(&d40c->lock, flags);
1632 
1633 	if (d40c->log_num == D40_PHY_CHAN) {
1634 		if (d40c->phy_chan->num % 2 == 0)
1635 			active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
1636 		else
1637 			active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
1638 
1639 		status = (readl(active_reg) &
1640 			  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
1641 			D40_CHAN_POS(d40c->phy_chan->num);
1642 		if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP)
1643 			is_paused = true;
1644 
1645 		goto _exit;
1646 	}
1647 
1648 	if (d40c->dma_cfg.dir == STEDMA40_MEM_TO_PERIPH ||
1649 	    d40c->dma_cfg.dir == STEDMA40_MEM_TO_MEM) {
1650 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dst_dev_type);
1651 		status = readl(d40c->base->virtbase + D40_DREG_PCBASE +
1652 			       d40c->phy_chan->num * D40_DREG_PCDELTA +
1653 			       D40_CHAN_REG_SDLNK);
1654 	} else if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM) {
1655 		event = D40_TYPE_TO_EVENT(d40c->dma_cfg.src_dev_type);
1656 		status = readl(d40c->base->virtbase + D40_DREG_PCBASE +
1657 			       d40c->phy_chan->num * D40_DREG_PCDELTA +
1658 			       D40_CHAN_REG_SSLNK);
1659 	} else {
1660 		dev_err(&d40c->chan.dev->device,
1661 			"[%s] Unknown direction\n", __func__);
1662 		goto _exit;
1663 	}
1664 
1665 	status = (status & D40_EVENTLINE_MASK(event)) >>
1666 		D40_EVENTLINE_POS(event);
1667 
1668 	if (status != D40_DMA_RUN)
1669 		is_paused = true;
1670 _exit:
1671 	spin_unlock_irqrestore(&d40c->lock, flags);
1672 	return is_paused;
1673 
1674 }
1675 
1676 
1677 static u32 stedma40_residue(struct dma_chan *chan)
1678 {
1679 	struct d40_chan *d40c =
1680 		container_of(chan, struct d40_chan, chan);
1681 	u32 bytes_left;
1682 	unsigned long flags;
1683 
1684 	spin_lock_irqsave(&d40c->lock, flags);
1685 	bytes_left = d40_residue(d40c);
1686 	spin_unlock_irqrestore(&d40c->lock, flags);
1687 
1688 	return bytes_left;
1689 }
1690 
1691 struct dma_async_tx_descriptor *stedma40_memcpy_sg(struct dma_chan *chan,
1692 						   struct scatterlist *sgl_dst,
1693 						   struct scatterlist *sgl_src,
1694 						   unsigned int sgl_len,
1695 						   unsigned long dma_flags)
1696 {
1697 	int res;
1698 	struct d40_desc *d40d;
1699 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
1700 					     chan);
1701 	unsigned long flags;
1702 
1703 	if (d40c->phy_chan == NULL) {
1704 		dev_err(&d40c->chan.dev->device,
1705 			"[%s] Unallocated channel.\n", __func__);
1706 		return ERR_PTR(-EINVAL);
1707 	}
1708 
1709 	spin_lock_irqsave(&d40c->lock, flags);
1710 	d40d = d40_desc_get(d40c);
1711 
1712 	if (d40d == NULL)
1713 		goto err;
1714 
1715 	d40d->lli_len = d40_sg_2_dmalen(sgl_dst, sgl_len,
1716 					d40c->dma_cfg.src_info.data_width,
1717 					d40c->dma_cfg.dst_info.data_width);
1718 	if (d40d->lli_len < 0) {
1719 		dev_err(&d40c->chan.dev->device,
1720 			"[%s] Unaligned size\n", __func__);
1721 		goto err;
1722 	}
1723 
1724 	d40d->lli_current = 0;
1725 	d40d->txd.flags = dma_flags;
1726 
1727 	if (d40c->log_num != D40_PHY_CHAN) {
1728 
1729 		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
1730 			dev_err(&d40c->chan.dev->device,
1731 				"[%s] Out of memory\n", __func__);
1732 			goto err;
1733 		}
1734 
1735 		(void) d40_log_sg_to_lli(sgl_src,
1736 					 sgl_len,
1737 					 d40d->lli_log.src,
1738 					 d40c->log_def.lcsp1,
1739 					 d40c->dma_cfg.src_info.data_width,
1740 					 d40c->dma_cfg.dst_info.data_width);
1741 
1742 		(void) d40_log_sg_to_lli(sgl_dst,
1743 					 sgl_len,
1744 					 d40d->lli_log.dst,
1745 					 d40c->log_def.lcsp3,
1746 					 d40c->dma_cfg.dst_info.data_width,
1747 					 d40c->dma_cfg.src_info.data_width);
1748 	} else {
1749 		if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
1750 			dev_err(&d40c->chan.dev->device,
1751 				"[%s] Out of memory\n", __func__);
1752 			goto err;
1753 		}
1754 
1755 		res = d40_phy_sg_to_lli(sgl_src,
1756 					sgl_len,
1757 					0,
1758 					d40d->lli_phy.src,
1759 					virt_to_phys(d40d->lli_phy.src),
1760 					d40c->src_def_cfg,
1761 					d40c->dma_cfg.src_info.data_width,
1762 					d40c->dma_cfg.dst_info.data_width,
1763 					d40c->dma_cfg.src_info.psize);
1764 
1765 		if (res < 0)
1766 			goto err;
1767 
1768 		res = d40_phy_sg_to_lli(sgl_dst,
1769 					sgl_len,
1770 					0,
1771 					d40d->lli_phy.dst,
1772 					virt_to_phys(d40d->lli_phy.dst),
1773 					d40c->dst_def_cfg,
1774 					d40c->dma_cfg.dst_info.data_width,
1775 					d40c->dma_cfg.src_info.data_width,
1776 					d40c->dma_cfg.dst_info.psize);
1777 
1778 		if (res < 0)
1779 			goto err;
1780 
1781 		(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
1782 				      d40d->lli_pool.size, DMA_TO_DEVICE);
1783 	}
1784 
1785 	dma_async_tx_descriptor_init(&d40d->txd, chan);
1786 
1787 	d40d->txd.tx_submit = d40_tx_submit;
1788 
1789 	spin_unlock_irqrestore(&d40c->lock, flags);
1790 
1791 	return &d40d->txd;
1792 err:
1793 	if (d40d)
1794 		d40_desc_free(d40c, d40d);
1795 	spin_unlock_irqrestore(&d40c->lock, flags);
1796 	return NULL;
1797 }
1798 EXPORT_SYMBOL(stedma40_memcpy_sg);
1799 
1800 bool stedma40_filter(struct dma_chan *chan, void *data)
1801 {
1802 	struct stedma40_chan_cfg *info = data;
1803 	struct d40_chan *d40c =
1804 		container_of(chan, struct d40_chan, chan);
1805 	int err;
1806 
1807 	if (data) {
1808 		err = d40_validate_conf(d40c, info);
1809 		if (!err)
1810 			d40c->dma_cfg = *info;
1811 	} else
1812 		err = d40_config_memcpy(d40c);
1813 
1814 	if (!err)
1815 		d40c->configured = true;
1816 
1817 	return err == 0;
1818 }
1819 EXPORT_SYMBOL(stedma40_filter);
1820 
1821 /* DMA ENGINE functions */
1822 static int d40_alloc_chan_resources(struct dma_chan *chan)
1823 {
1824 	int err;
1825 	unsigned long flags;
1826 	struct d40_chan *d40c =
1827 		container_of(chan, struct d40_chan, chan);
1828 	bool is_free_phy;
1829 	spin_lock_irqsave(&d40c->lock, flags);
1830 
1831 	d40c->completed = chan->cookie = 1;
1832 
1833 	/* If no dma configuration is set use default configuration (memcpy) */
1834 	if (!d40c->configured) {
1835 		err = d40_config_memcpy(d40c);
1836 		if (err) {
1837 			dev_err(&d40c->chan.dev->device,
1838 				"[%s] Failed to configure memcpy channel\n",
1839 				__func__);
1840 			goto fail;
1841 		}
1842 	}
1843 	is_free_phy = (d40c->phy_chan == NULL);
1844 
1845 	err = d40_allocate_channel(d40c);
1846 	if (err) {
1847 		dev_err(&d40c->chan.dev->device,
1848 			"[%s] Failed to allocate channel\n", __func__);
1849 		goto fail;
1850 	}
1851 
1852 	/* Fill in basic CFG register values */
1853 	d40_phy_cfg(&d40c->dma_cfg, &d40c->src_def_cfg,
1854 		    &d40c->dst_def_cfg, d40c->log_num != D40_PHY_CHAN);
1855 
1856 	if (d40c->log_num != D40_PHY_CHAN) {
1857 		d40_log_cfg(&d40c->dma_cfg,
1858 			    &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
1859 
1860 		if (d40c->dma_cfg.dir == STEDMA40_PERIPH_TO_MEM)
1861 			d40c->lcpa = d40c->base->lcpa_base +
1862 			  d40c->dma_cfg.src_dev_type * D40_LCPA_CHAN_SIZE;
1863 		else
1864 			d40c->lcpa = d40c->base->lcpa_base +
1865 			  d40c->dma_cfg.dst_dev_type *
1866 			  D40_LCPA_CHAN_SIZE + D40_LCPA_CHAN_DST_DELTA;
1867 	}
1868 
1869 	/*
1870 	 * Only write channel configuration to the DMA if the physical
1871 	 * resource is free. In case of multiple logical channels
1872 	 * on the same physical resource, only the first write is necessary.
1873 	 */
1874 	if (is_free_phy)
1875 		d40_config_write(d40c);
1876 fail:
1877 	spin_unlock_irqrestore(&d40c->lock, flags);
1878 	return err;
1879 }
1880 
1881 static void d40_free_chan_resources(struct dma_chan *chan)
1882 {
1883 	struct d40_chan *d40c =
1884 		container_of(chan, struct d40_chan, chan);
1885 	int err;
1886 	unsigned long flags;
1887 
1888 	if (d40c->phy_chan == NULL) {
1889 		dev_err(&d40c->chan.dev->device,
1890 			"[%s] Cannot free unallocated channel\n", __func__);
1891 		return;
1892 	}
1893 
1894 
1895 	spin_lock_irqsave(&d40c->lock, flags);
1896 
1897 	err = d40_free_dma(d40c);
1898 
1899 	if (err)
1900 		dev_err(&d40c->chan.dev->device,
1901 			"[%s] Failed to free channel\n", __func__);
1902 	spin_unlock_irqrestore(&d40c->lock, flags);
1903 }
1904 
1905 static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
1906 						       dma_addr_t dst,
1907 						       dma_addr_t src,
1908 						       size_t size,
1909 						       unsigned long dma_flags)
1910 {
1911 	struct d40_desc *d40d;
1912 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
1913 					     chan);
1914 	unsigned long flags;
1915 
1916 	if (d40c->phy_chan == NULL) {
1917 		dev_err(&d40c->chan.dev->device,
1918 			"[%s] Channel is not allocated.\n", __func__);
1919 		return ERR_PTR(-EINVAL);
1920 	}
1921 
1922 	spin_lock_irqsave(&d40c->lock, flags);
1923 	d40d = d40_desc_get(d40c);
1924 
1925 	if (d40d == NULL) {
1926 		dev_err(&d40c->chan.dev->device,
1927 			"[%s] Descriptor is NULL\n", __func__);
1928 		goto err;
1929 	}
1930 
1931 	d40d->txd.flags = dma_flags;
1932 	d40d->lli_len = d40_size_2_dmalen(size,
1933 					  d40c->dma_cfg.src_info.data_width,
1934 					  d40c->dma_cfg.dst_info.data_width);
1935 	if (d40d->lli_len < 0) {
1936 		dev_err(&d40c->chan.dev->device,
1937 			"[%s] Unaligned size\n", __func__);
1938 		goto err;
1939 	}
1940 
1941 
1942 	dma_async_tx_descriptor_init(&d40d->txd, chan);
1943 
1944 	d40d->txd.tx_submit = d40_tx_submit;
1945 
1946 	if (d40c->log_num != D40_PHY_CHAN) {
1947 
1948 		if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
1949 			dev_err(&d40c->chan.dev->device,
1950 				"[%s] Out of memory\n", __func__);
1951 			goto err;
1952 		}
1953 		d40d->lli_current = 0;
1954 
1955 		if (d40_log_buf_to_lli(d40d->lli_log.src,
1956 				       src,
1957 				       size,
1958 				       d40c->log_def.lcsp1,
1959 				       d40c->dma_cfg.src_info.data_width,
1960 				       d40c->dma_cfg.dst_info.data_width,
1961 				       true) == NULL)
1962 			goto err;
1963 
1964 		if (d40_log_buf_to_lli(d40d->lli_log.dst,
1965 				       dst,
1966 				       size,
1967 				       d40c->log_def.lcsp3,
1968 				       d40c->dma_cfg.dst_info.data_width,
1969 				       d40c->dma_cfg.src_info.data_width,
1970 				       true) == NULL)
1971 			goto err;
1972 
1973 	} else {
1974 
1975 		if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
1976 			dev_err(&d40c->chan.dev->device,
1977 				"[%s] Out of memory\n", __func__);
1978 			goto err;
1979 		}
1980 
1981 		if (d40_phy_buf_to_lli(d40d->lli_phy.src,
1982 				       src,
1983 				       size,
1984 				       d40c->dma_cfg.src_info.psize,
1985 				       0,
1986 				       d40c->src_def_cfg,
1987 				       true,
1988 				       d40c->dma_cfg.src_info.data_width,
1989 				       d40c->dma_cfg.dst_info.data_width,
1990 				       false) == NULL)
1991 			goto err;
1992 
1993 		if (d40_phy_buf_to_lli(d40d->lli_phy.dst,
1994 				       dst,
1995 				       size,
1996 				       d40c->dma_cfg.dst_info.psize,
1997 				       0,
1998 				       d40c->dst_def_cfg,
1999 				       true,
2000 				       d40c->dma_cfg.dst_info.data_width,
2001 				       d40c->dma_cfg.src_info.data_width,
2002 				       false) == NULL)
2003 			goto err;
2004 
2005 		(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
2006 				      d40d->lli_pool.size, DMA_TO_DEVICE);
2007 	}
2008 
2009 	spin_unlock_irqrestore(&d40c->lock, flags);
2010 	return &d40d->txd;
2011 
2012 err:
2013 	if (d40d)
2014 		d40_desc_free(d40c, d40d);
2015 	spin_unlock_irqrestore(&d40c->lock, flags);
2016 	return NULL;
2017 }
2018 
2019 static struct dma_async_tx_descriptor *
2020 d40_prep_sg(struct dma_chan *chan,
2021 	    struct scatterlist *dst_sg, unsigned int dst_nents,
2022 	    struct scatterlist *src_sg, unsigned int src_nents,
2023 	    unsigned long dma_flags)
2024 {
2025 	if (dst_nents != src_nents)
2026 		return NULL;
2027 
2028 	return stedma40_memcpy_sg(chan, dst_sg, src_sg, dst_nents, dma_flags);
2029 }
2030 
2031 static int d40_prep_slave_sg_log(struct d40_desc *d40d,
2032 				 struct d40_chan *d40c,
2033 				 struct scatterlist *sgl,
2034 				 unsigned int sg_len,
2035 				 enum dma_data_direction direction,
2036 				 unsigned long dma_flags)
2037 {
2038 	dma_addr_t dev_addr = 0;
2039 	int total_size;
2040 
2041 	d40d->lli_len = d40_sg_2_dmalen(sgl, sg_len,
2042 					d40c->dma_cfg.src_info.data_width,
2043 					d40c->dma_cfg.dst_info.data_width);
2044 	if (d40d->lli_len < 0) {
2045 		dev_err(&d40c->chan.dev->device,
2046 			"[%s] Unaligned size\n", __func__);
2047 		return -EINVAL;
2048 	}
2049 
2050 	if (d40_pool_lli_alloc(d40d, d40d->lli_len, true) < 0) {
2051 		dev_err(&d40c->chan.dev->device,
2052 			"[%s] Out of memory\n", __func__);
2053 		return -ENOMEM;
2054 	}
2055 
2056 	d40d->lli_current = 0;
2057 
2058 	if (direction == DMA_FROM_DEVICE)
2059 		if (d40c->runtime_addr)
2060 			dev_addr = d40c->runtime_addr;
2061 		else
2062 			dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
2063 	else if (direction == DMA_TO_DEVICE)
2064 		if (d40c->runtime_addr)
2065 			dev_addr = d40c->runtime_addr;
2066 		else
2067 			dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
2068 
2069 	else
2070 		return -EINVAL;
2071 
2072 	total_size = d40_log_sg_to_dev(sgl, sg_len,
2073 				       &d40d->lli_log,
2074 				       &d40c->log_def,
2075 				       d40c->dma_cfg.src_info.data_width,
2076 				       d40c->dma_cfg.dst_info.data_width,
2077 				       direction,
2078 				       dev_addr);
2079 
2080 	if (total_size < 0)
2081 		return -EINVAL;
2082 
2083 	return 0;
2084 }
2085 
2086 static int d40_prep_slave_sg_phy(struct d40_desc *d40d,
2087 				 struct d40_chan *d40c,
2088 				 struct scatterlist *sgl,
2089 				 unsigned int sgl_len,
2090 				 enum dma_data_direction direction,
2091 				 unsigned long dma_flags)
2092 {
2093 	dma_addr_t src_dev_addr;
2094 	dma_addr_t dst_dev_addr;
2095 	int res;
2096 
2097 	d40d->lli_len = d40_sg_2_dmalen(sgl, sgl_len,
2098 					d40c->dma_cfg.src_info.data_width,
2099 					d40c->dma_cfg.dst_info.data_width);
2100 	if (d40d->lli_len < 0) {
2101 		dev_err(&d40c->chan.dev->device,
2102 			"[%s] Unaligned size\n", __func__);
2103 		return -EINVAL;
2104 	}
2105 
2106 	if (d40_pool_lli_alloc(d40d, d40d->lli_len, false) < 0) {
2107 		dev_err(&d40c->chan.dev->device,
2108 			"[%s] Out of memory\n", __func__);
2109 		return -ENOMEM;
2110 	}
2111 
2112 	d40d->lli_current = 0;
2113 
2114 	if (direction == DMA_FROM_DEVICE) {
2115 		dst_dev_addr = 0;
2116 		if (d40c->runtime_addr)
2117 			src_dev_addr = d40c->runtime_addr;
2118 		else
2119 			src_dev_addr = d40c->base->plat_data->dev_rx[d40c->dma_cfg.src_dev_type];
2120 	} else if (direction == DMA_TO_DEVICE) {
2121 		if (d40c->runtime_addr)
2122 			dst_dev_addr = d40c->runtime_addr;
2123 		else
2124 			dst_dev_addr = d40c->base->plat_data->dev_tx[d40c->dma_cfg.dst_dev_type];
2125 		src_dev_addr = 0;
2126 	} else
2127 		return -EINVAL;
2128 
2129 	res = d40_phy_sg_to_lli(sgl,
2130 				sgl_len,
2131 				src_dev_addr,
2132 				d40d->lli_phy.src,
2133 				virt_to_phys(d40d->lli_phy.src),
2134 				d40c->src_def_cfg,
2135 				d40c->dma_cfg.src_info.data_width,
2136 				d40c->dma_cfg.dst_info.data_width,
2137 				d40c->dma_cfg.src_info.psize);
2138 	if (res < 0)
2139 		return res;
2140 
2141 	res = d40_phy_sg_to_lli(sgl,
2142 				sgl_len,
2143 				dst_dev_addr,
2144 				d40d->lli_phy.dst,
2145 				virt_to_phys(d40d->lli_phy.dst),
2146 				d40c->dst_def_cfg,
2147 				d40c->dma_cfg.dst_info.data_width,
2148 				d40c->dma_cfg.src_info.data_width,
2149 				d40c->dma_cfg.dst_info.psize);
2150 	if (res < 0)
2151 		return res;
2152 
2153 	(void) dma_map_single(d40c->base->dev, d40d->lli_phy.src,
2154 			      d40d->lli_pool.size, DMA_TO_DEVICE);
2155 	return 0;
2156 }
2157 
2158 static struct dma_async_tx_descriptor *d40_prep_slave_sg(struct dma_chan *chan,
2159 							 struct scatterlist *sgl,
2160 							 unsigned int sg_len,
2161 							 enum dma_data_direction direction,
2162 							 unsigned long dma_flags)
2163 {
2164 	struct d40_desc *d40d;
2165 	struct d40_chan *d40c = container_of(chan, struct d40_chan,
2166 					     chan);
2167 	unsigned long flags;
2168 	int err;
2169 
2170 	if (d40c->phy_chan == NULL) {
2171 		dev_err(&d40c->chan.dev->device,
2172 			"[%s] Cannot prepare unallocated channel\n", __func__);
2173 		return ERR_PTR(-EINVAL);
2174 	}
2175 
2176 	spin_lock_irqsave(&d40c->lock, flags);
2177 	d40d = d40_desc_get(d40c);
2178 
2179 	if (d40d == NULL)
2180 		goto err;
2181 
2182 	if (d40c->log_num != D40_PHY_CHAN)
2183 		err = d40_prep_slave_sg_log(d40d, d40c, sgl, sg_len,
2184 					    direction, dma_flags);
2185 	else
2186 		err = d40_prep_slave_sg_phy(d40d, d40c, sgl, sg_len,
2187 					    direction, dma_flags);
2188 	if (err) {
2189 		dev_err(&d40c->chan.dev->device,
2190 			"[%s] Failed to prepare %s slave sg job: %d\n",
2191 			__func__,
2192 			d40c->log_num != D40_PHY_CHAN ? "log" : "phy", err);
2193 		goto err;
2194 	}
2195 
2196 	d40d->txd.flags = dma_flags;
2197 
2198 	dma_async_tx_descriptor_init(&d40d->txd, chan);
2199 
2200 	d40d->txd.tx_submit = d40_tx_submit;
2201 
2202 	spin_unlock_irqrestore(&d40c->lock, flags);
2203 	return &d40d->txd;
2204 
2205 err:
2206 	if (d40d)
2207 		d40_desc_free(d40c, d40d);
2208 	spin_unlock_irqrestore(&d40c->lock, flags);
2209 	return NULL;
2210 }
2211 
2212 static enum dma_status d40_tx_status(struct dma_chan *chan,
2213 				     dma_cookie_t cookie,
2214 				     struct dma_tx_state *txstate)
2215 {
2216 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
2217 	dma_cookie_t last_used;
2218 	dma_cookie_t last_complete;
2219 	int ret;
2220 
2221 	if (d40c->phy_chan == NULL) {
2222 		dev_err(&d40c->chan.dev->device,
2223 			"[%s] Cannot read status of unallocated channel\n",
2224 			__func__);
2225 		return -EINVAL;
2226 	}
2227 
2228 	last_complete = d40c->completed;
2229 	last_used = chan->cookie;
2230 
2231 	if (d40_is_paused(d40c))
2232 		ret = DMA_PAUSED;
2233 	else
2234 		ret = dma_async_is_complete(cookie, last_complete, last_used);
2235 
2236 	dma_set_tx_state(txstate, last_complete, last_used,
2237 			 stedma40_residue(chan));
2238 
2239 	return ret;
2240 }
2241 
2242 static void d40_issue_pending(struct dma_chan *chan)
2243 {
2244 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
2245 	unsigned long flags;
2246 
2247 	if (d40c->phy_chan == NULL) {
2248 		dev_err(&d40c->chan.dev->device,
2249 			"[%s] Channel is not allocated!\n", __func__);
2250 		return;
2251 	}
2252 
2253 	spin_lock_irqsave(&d40c->lock, flags);
2254 
2255 	/* Busy means that pending jobs are already being processed */
2256 	if (!d40c->busy)
2257 		(void) d40_queue_start(d40c);
2258 
2259 	spin_unlock_irqrestore(&d40c->lock, flags);
2260 }
2261 
2262 /* Runtime reconfiguration extension */
2263 static void d40_set_runtime_config(struct dma_chan *chan,
2264 			       struct dma_slave_config *config)
2265 {
2266 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
2267 	struct stedma40_chan_cfg *cfg = &d40c->dma_cfg;
2268 	enum dma_slave_buswidth config_addr_width;
2269 	dma_addr_t config_addr;
2270 	u32 config_maxburst;
2271 	enum stedma40_periph_data_width addr_width;
2272 	int psize;
2273 
2274 	if (config->direction == DMA_FROM_DEVICE) {
2275 		dma_addr_t dev_addr_rx =
2276 			d40c->base->plat_data->dev_rx[cfg->src_dev_type];
2277 
2278 		config_addr = config->src_addr;
2279 		if (dev_addr_rx)
2280 			dev_dbg(d40c->base->dev,
2281 				"channel has a pre-wired RX address %08x "
2282 				"overriding with %08x\n",
2283 				dev_addr_rx, config_addr);
2284 		if (cfg->dir != STEDMA40_PERIPH_TO_MEM)
2285 			dev_dbg(d40c->base->dev,
2286 				"channel was not configured for peripheral "
2287 				"to memory transfer (%d) overriding\n",
2288 				cfg->dir);
2289 		cfg->dir = STEDMA40_PERIPH_TO_MEM;
2290 
2291 		config_addr_width = config->src_addr_width;
2292 		config_maxburst = config->src_maxburst;
2293 
2294 	} else if (config->direction == DMA_TO_DEVICE) {
2295 		dma_addr_t dev_addr_tx =
2296 			d40c->base->plat_data->dev_tx[cfg->dst_dev_type];
2297 
2298 		config_addr = config->dst_addr;
2299 		if (dev_addr_tx)
2300 			dev_dbg(d40c->base->dev,
2301 				"channel has a pre-wired TX address %08x "
2302 				"overriding with %08x\n",
2303 				dev_addr_tx, config_addr);
2304 		if (cfg->dir != STEDMA40_MEM_TO_PERIPH)
2305 			dev_dbg(d40c->base->dev,
2306 				"channel was not configured for memory "
2307 				"to peripheral transfer (%d) overriding\n",
2308 				cfg->dir);
2309 		cfg->dir = STEDMA40_MEM_TO_PERIPH;
2310 
2311 		config_addr_width = config->dst_addr_width;
2312 		config_maxburst = config->dst_maxburst;
2313 
2314 	} else {
2315 		dev_err(d40c->base->dev,
2316 			"unrecognized channel direction %d\n",
2317 			config->direction);
2318 		return;
2319 	}
2320 
2321 	switch (config_addr_width) {
2322 	case DMA_SLAVE_BUSWIDTH_1_BYTE:
2323 		addr_width = STEDMA40_BYTE_WIDTH;
2324 		break;
2325 	case DMA_SLAVE_BUSWIDTH_2_BYTES:
2326 		addr_width = STEDMA40_HALFWORD_WIDTH;
2327 		break;
2328 	case DMA_SLAVE_BUSWIDTH_4_BYTES:
2329 		addr_width = STEDMA40_WORD_WIDTH;
2330 		break;
2331 	case DMA_SLAVE_BUSWIDTH_8_BYTES:
2332 		addr_width = STEDMA40_DOUBLEWORD_WIDTH;
2333 		break;
2334 	default:
2335 		dev_err(d40c->base->dev,
2336 			"illegal peripheral address width "
2337 			"requested (%d)\n",
2338 			config->src_addr_width);
2339 		return;
2340 	}
2341 
2342 	if (d40c->log_num != D40_PHY_CHAN) {
2343 		if (config_maxburst >= 16)
2344 			psize = STEDMA40_PSIZE_LOG_16;
2345 		else if (config_maxburst >= 8)
2346 			psize = STEDMA40_PSIZE_LOG_8;
2347 		else if (config_maxburst >= 4)
2348 			psize = STEDMA40_PSIZE_LOG_4;
2349 		else
2350 			psize = STEDMA40_PSIZE_LOG_1;
2351 	} else {
2352 		if (config_maxburst >= 16)
2353 			psize = STEDMA40_PSIZE_PHY_16;
2354 		else if (config_maxburst >= 8)
2355 			psize = STEDMA40_PSIZE_PHY_8;
2356 		else if (config_maxburst >= 4)
2357 			psize = STEDMA40_PSIZE_PHY_4;
2358 		else if (config_maxburst >= 2)
2359 			psize = STEDMA40_PSIZE_PHY_2;
2360 		else
2361 			psize = STEDMA40_PSIZE_PHY_1;
2362 	}
2363 
2364 	/* Set up all the endpoint configs */
2365 	cfg->src_info.data_width = addr_width;
2366 	cfg->src_info.psize = psize;
2367 	cfg->src_info.big_endian = false;
2368 	cfg->src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
2369 	cfg->dst_info.data_width = addr_width;
2370 	cfg->dst_info.psize = psize;
2371 	cfg->dst_info.big_endian = false;
2372 	cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
2373 
2374 	/* Fill in register values */
2375 	if (d40c->log_num != D40_PHY_CHAN)
2376 		d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
2377 	else
2378 		d40_phy_cfg(cfg, &d40c->src_def_cfg,
2379 			    &d40c->dst_def_cfg, false);
2380 
2381 	/* These settings will take precedence later */
2382 	d40c->runtime_addr = config_addr;
2383 	d40c->runtime_direction = config->direction;
2384 	dev_dbg(d40c->base->dev,
2385 		"configured channel %s for %s, data width %d, "
2386 		"maxburst %d bytes, LE, no flow control\n",
2387 		dma_chan_name(chan),
2388 		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
2389 		config_addr_width,
2390 		config_maxburst);
2391 }
2392 
2393 static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
2394 		       unsigned long arg)
2395 {
2396 	unsigned long flags;
2397 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
2398 
2399 	if (d40c->phy_chan == NULL) {
2400 		dev_err(&d40c->chan.dev->device,
2401 			"[%s] Channel is not allocated!\n", __func__);
2402 		return -EINVAL;
2403 	}
2404 
2405 	switch (cmd) {
2406 	case DMA_TERMINATE_ALL:
2407 		spin_lock_irqsave(&d40c->lock, flags);
2408 		d40_term_all(d40c);
2409 		spin_unlock_irqrestore(&d40c->lock, flags);
2410 		return 0;
2411 	case DMA_PAUSE:
2412 		return d40_pause(chan);
2413 	case DMA_RESUME:
2414 		return d40_resume(chan);
2415 	case DMA_SLAVE_CONFIG:
2416 		d40_set_runtime_config(chan,
2417 			(struct dma_slave_config *) arg);
2418 		return 0;
2419 	default:
2420 		break;
2421 	}
2422 
2423 	/* Other commands are unimplemented */
2424 	return -ENXIO;
2425 }
2426 
2427 /* Initialization functions */
2428 
2429 static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
2430 				 struct d40_chan *chans, int offset,
2431 				 int num_chans)
2432 {
2433 	int i = 0;
2434 	struct d40_chan *d40c;
2435 
2436 	INIT_LIST_HEAD(&dma->channels);
2437 
2438 	for (i = offset; i < offset + num_chans; i++) {
2439 		d40c = &chans[i];
2440 		d40c->base = base;
2441 		d40c->chan.device = dma;
2442 
2443 		spin_lock_init(&d40c->lock);
2444 
2445 		d40c->log_num = D40_PHY_CHAN;
2446 
2447 		INIT_LIST_HEAD(&d40c->active);
2448 		INIT_LIST_HEAD(&d40c->queue);
2449 		INIT_LIST_HEAD(&d40c->client);
2450 
2451 		tasklet_init(&d40c->tasklet, dma_tasklet,
2452 			     (unsigned long) d40c);
2453 
2454 		list_add_tail(&d40c->chan.device_node,
2455 			      &dma->channels);
2456 	}
2457 }
2458 
2459 static int __init d40_dmaengine_init(struct d40_base *base,
2460 				     int num_reserved_chans)
2461 {
2462 	int err ;
2463 
2464 	d40_chan_init(base, &base->dma_slave, base->log_chans,
2465 		      0, base->num_log_chans);
2466 
2467 	dma_cap_zero(base->dma_slave.cap_mask);
2468 	dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
2469 
2470 	base->dma_slave.device_alloc_chan_resources = d40_alloc_chan_resources;
2471 	base->dma_slave.device_free_chan_resources = d40_free_chan_resources;
2472 	base->dma_slave.device_prep_dma_memcpy = d40_prep_memcpy;
2473 	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
2474 	base->dma_slave.device_prep_slave_sg = d40_prep_slave_sg;
2475 	base->dma_slave.device_tx_status = d40_tx_status;
2476 	base->dma_slave.device_issue_pending = d40_issue_pending;
2477 	base->dma_slave.device_control = d40_control;
2478 	base->dma_slave.dev = base->dev;
2479 
2480 	err = dma_async_device_register(&base->dma_slave);
2481 
2482 	if (err) {
2483 		dev_err(base->dev,
2484 			"[%s] Failed to register slave channels\n",
2485 			__func__);
2486 		goto failure1;
2487 	}
2488 
2489 	d40_chan_init(base, &base->dma_memcpy, base->log_chans,
2490 		      base->num_log_chans, base->plat_data->memcpy_len);
2491 
2492 	dma_cap_zero(base->dma_memcpy.cap_mask);
2493 	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
2494 	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
2495 
2496 	base->dma_memcpy.device_alloc_chan_resources = d40_alloc_chan_resources;
2497 	base->dma_memcpy.device_free_chan_resources = d40_free_chan_resources;
2498 	base->dma_memcpy.device_prep_dma_memcpy = d40_prep_memcpy;
2499 	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
2500 	base->dma_memcpy.device_prep_slave_sg = d40_prep_slave_sg;
2501 	base->dma_memcpy.device_tx_status = d40_tx_status;
2502 	base->dma_memcpy.device_issue_pending = d40_issue_pending;
2503 	base->dma_memcpy.device_control = d40_control;
2504 	base->dma_memcpy.dev = base->dev;
2505 	/*
2506 	 * This controller can only access address at even
2507 	 * 32bit boundaries, i.e. 2^2
2508 	 */
2509 	base->dma_memcpy.copy_align = 2;
2510 
2511 	err = dma_async_device_register(&base->dma_memcpy);
2512 
2513 	if (err) {
2514 		dev_err(base->dev,
2515 			"[%s] Failed to regsiter memcpy only channels\n",
2516 			__func__);
2517 		goto failure2;
2518 	}
2519 
2520 	d40_chan_init(base, &base->dma_both, base->phy_chans,
2521 		      0, num_reserved_chans);
2522 
2523 	dma_cap_zero(base->dma_both.cap_mask);
2524 	dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
2525 	dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
2526 	dma_cap_set(DMA_SG, base->dma_slave.cap_mask);
2527 
2528 	base->dma_both.device_alloc_chan_resources = d40_alloc_chan_resources;
2529 	base->dma_both.device_free_chan_resources = d40_free_chan_resources;
2530 	base->dma_both.device_prep_dma_memcpy = d40_prep_memcpy;
2531 	base->dma_slave.device_prep_dma_sg = d40_prep_sg;
2532 	base->dma_both.device_prep_slave_sg = d40_prep_slave_sg;
2533 	base->dma_both.device_tx_status = d40_tx_status;
2534 	base->dma_both.device_issue_pending = d40_issue_pending;
2535 	base->dma_both.device_control = d40_control;
2536 	base->dma_both.dev = base->dev;
2537 	base->dma_both.copy_align = 2;
2538 	err = dma_async_device_register(&base->dma_both);
2539 
2540 	if (err) {
2541 		dev_err(base->dev,
2542 			"[%s] Failed to register logical and physical capable channels\n",
2543 			__func__);
2544 		goto failure3;
2545 	}
2546 	return 0;
2547 failure3:
2548 	dma_async_device_unregister(&base->dma_memcpy);
2549 failure2:
2550 	dma_async_device_unregister(&base->dma_slave);
2551 failure1:
2552 	return err;
2553 }
2554 
2555 /* Initialization functions. */
2556 
2557 static int __init d40_phy_res_init(struct d40_base *base)
2558 {
2559 	int i;
2560 	int num_phy_chans_avail = 0;
2561 	u32 val[2];
2562 	int odd_even_bit = -2;
2563 
2564 	val[0] = readl(base->virtbase + D40_DREG_PRSME);
2565 	val[1] = readl(base->virtbase + D40_DREG_PRSMO);
2566 
2567 	for (i = 0; i < base->num_phy_chans; i++) {
2568 		base->phy_res[i].num = i;
2569 		odd_even_bit += 2 * ((i % 2) == 0);
2570 		if (((val[i % 2] >> odd_even_bit) & 3) == 1) {
2571 			/* Mark security only channels as occupied */
2572 			base->phy_res[i].allocated_src = D40_ALLOC_PHY;
2573 			base->phy_res[i].allocated_dst = D40_ALLOC_PHY;
2574 		} else {
2575 			base->phy_res[i].allocated_src = D40_ALLOC_FREE;
2576 			base->phy_res[i].allocated_dst = D40_ALLOC_FREE;
2577 			num_phy_chans_avail++;
2578 		}
2579 		spin_lock_init(&base->phy_res[i].lock);
2580 	}
2581 
2582 	/* Mark disabled channels as occupied */
2583 	for (i = 0; base->plat_data->disabled_channels[i] != -1; i++) {
2584 		int chan = base->plat_data->disabled_channels[i];
2585 
2586 		base->phy_res[chan].allocated_src = D40_ALLOC_PHY;
2587 		base->phy_res[chan].allocated_dst = D40_ALLOC_PHY;
2588 		num_phy_chans_avail--;
2589 	}
2590 
2591 	dev_info(base->dev, "%d of %d physical DMA channels available\n",
2592 		 num_phy_chans_avail, base->num_phy_chans);
2593 
2594 	/* Verify settings extended vs standard */
2595 	val[0] = readl(base->virtbase + D40_DREG_PRTYP);
2596 
2597 	for (i = 0; i < base->num_phy_chans; i++) {
2598 
2599 		if (base->phy_res[i].allocated_src == D40_ALLOC_FREE &&
2600 		    (val[0] & 0x3) != 1)
2601 			dev_info(base->dev,
2602 				 "[%s] INFO: channel %d is misconfigured (%d)\n",
2603 				 __func__, i, val[0] & 0x3);
2604 
2605 		val[0] = val[0] >> 2;
2606 	}
2607 
2608 	return num_phy_chans_avail;
2609 }
2610 
2611 static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
2612 {
2613 	static const struct d40_reg_val dma_id_regs[] = {
2614 		/* Peripheral Id */
2615 		{ .reg = D40_DREG_PERIPHID0, .val = 0x0040},
2616 		{ .reg = D40_DREG_PERIPHID1, .val = 0x0000},
2617 		/*
2618 		 * D40_DREG_PERIPHID2 Depends on HW revision:
2619 		 *  MOP500/HREF ED has 0x0008,
2620 		 *  ? has 0x0018,
2621 		 *  HREF V1 has 0x0028
2622 		 */
2623 		{ .reg = D40_DREG_PERIPHID3, .val = 0x0000},
2624 
2625 		/* PCell Id */
2626 		{ .reg = D40_DREG_CELLID0, .val = 0x000d},
2627 		{ .reg = D40_DREG_CELLID1, .val = 0x00f0},
2628 		{ .reg = D40_DREG_CELLID2, .val = 0x0005},
2629 		{ .reg = D40_DREG_CELLID3, .val = 0x00b1}
2630 	};
2631 	struct stedma40_platform_data *plat_data;
2632 	struct clk *clk = NULL;
2633 	void __iomem *virtbase = NULL;
2634 	struct resource *res = NULL;
2635 	struct d40_base *base = NULL;
2636 	int num_log_chans = 0;
2637 	int num_phy_chans;
2638 	int i;
2639 	u32 val;
2640 	u32 rev;
2641 
2642 	clk = clk_get(&pdev->dev, NULL);
2643 
2644 	if (IS_ERR(clk)) {
2645 		dev_err(&pdev->dev, "[%s] No matching clock found\n",
2646 			__func__);
2647 		goto failure;
2648 	}
2649 
2650 	clk_enable(clk);
2651 
2652 	/* Get IO for DMAC base address */
2653 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base");
2654 	if (!res)
2655 		goto failure;
2656 
2657 	if (request_mem_region(res->start, resource_size(res),
2658 			       D40_NAME " I/O base") == NULL)
2659 		goto failure;
2660 
2661 	virtbase = ioremap(res->start, resource_size(res));
2662 	if (!virtbase)
2663 		goto failure;
2664 
2665 	/* HW version check */
2666 	for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) {
2667 		if (dma_id_regs[i].val !=
2668 		    readl(virtbase + dma_id_regs[i].reg)) {
2669 			dev_err(&pdev->dev,
2670 				"[%s] Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n",
2671 				__func__,
2672 				dma_id_regs[i].val,
2673 				dma_id_regs[i].reg,
2674 				readl(virtbase + dma_id_regs[i].reg));
2675 			goto failure;
2676 		}
2677 	}
2678 
2679 	/* Get silicon revision and designer */
2680 	val = readl(virtbase + D40_DREG_PERIPHID2);
2681 
2682 	if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) !=
2683 	    D40_HW_DESIGNER) {
2684 		dev_err(&pdev->dev,
2685 			"[%s] Unknown designer! Got %x wanted %x\n",
2686 			__func__, val & D40_DREG_PERIPHID2_DESIGNER_MASK,
2687 			D40_HW_DESIGNER);
2688 		goto failure;
2689 	}
2690 
2691 	rev = (val & D40_DREG_PERIPHID2_REV_MASK) >>
2692 		D40_DREG_PERIPHID2_REV_POS;
2693 
2694 	/* The number of physical channels on this HW */
2695 	num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
2696 
2697 	dev_info(&pdev->dev, "hardware revision: %d @ 0x%x\n",
2698 		 rev, res->start);
2699 
2700 	plat_data = pdev->dev.platform_data;
2701 
2702 	/* Count the number of logical channels in use */
2703 	for (i = 0; i < plat_data->dev_len; i++)
2704 		if (plat_data->dev_rx[i] != 0)
2705 			num_log_chans++;
2706 
2707 	for (i = 0; i < plat_data->dev_len; i++)
2708 		if (plat_data->dev_tx[i] != 0)
2709 			num_log_chans++;
2710 
2711 	base = kzalloc(ALIGN(sizeof(struct d40_base), 4) +
2712 		       (num_phy_chans + num_log_chans + plat_data->memcpy_len) *
2713 		       sizeof(struct d40_chan), GFP_KERNEL);
2714 
2715 	if (base == NULL) {
2716 		dev_err(&pdev->dev, "[%s] Out of memory\n", __func__);
2717 		goto failure;
2718 	}
2719 
2720 	base->rev = rev;
2721 	base->clk = clk;
2722 	base->num_phy_chans = num_phy_chans;
2723 	base->num_log_chans = num_log_chans;
2724 	base->phy_start = res->start;
2725 	base->phy_size = resource_size(res);
2726 	base->virtbase = virtbase;
2727 	base->plat_data = plat_data;
2728 	base->dev = &pdev->dev;
2729 	base->phy_chans = ((void *)base) + ALIGN(sizeof(struct d40_base), 4);
2730 	base->log_chans = &base->phy_chans[num_phy_chans];
2731 
2732 	base->phy_res = kzalloc(num_phy_chans * sizeof(struct d40_phy_res),
2733 				GFP_KERNEL);
2734 	if (!base->phy_res)
2735 		goto failure;
2736 
2737 	base->lookup_phy_chans = kzalloc(num_phy_chans *
2738 					 sizeof(struct d40_chan *),
2739 					 GFP_KERNEL);
2740 	if (!base->lookup_phy_chans)
2741 		goto failure;
2742 
2743 	if (num_log_chans + plat_data->memcpy_len) {
2744 		/*
2745 		 * The max number of logical channels are event lines for all
2746 		 * src devices and dst devices
2747 		 */
2748 		base->lookup_log_chans = kzalloc(plat_data->dev_len * 2 *
2749 						 sizeof(struct d40_chan *),
2750 						 GFP_KERNEL);
2751 		if (!base->lookup_log_chans)
2752 			goto failure;
2753 	}
2754 
2755 	base->lcla_pool.alloc_map = kzalloc(num_phy_chans *
2756 					    sizeof(struct d40_desc *) *
2757 					    D40_LCLA_LINK_PER_EVENT_GRP,
2758 					    GFP_KERNEL);
2759 	if (!base->lcla_pool.alloc_map)
2760 		goto failure;
2761 
2762 	base->desc_slab = kmem_cache_create(D40_NAME, sizeof(struct d40_desc),
2763 					    0, SLAB_HWCACHE_ALIGN,
2764 					    NULL);
2765 	if (base->desc_slab == NULL)
2766 		goto failure;
2767 
2768 	return base;
2769 
2770 failure:
2771 	if (!IS_ERR(clk)) {
2772 		clk_disable(clk);
2773 		clk_put(clk);
2774 	}
2775 	if (virtbase)
2776 		iounmap(virtbase);
2777 	if (res)
2778 		release_mem_region(res->start,
2779 				   resource_size(res));
2780 	if (virtbase)
2781 		iounmap(virtbase);
2782 
2783 	if (base) {
2784 		kfree(base->lcla_pool.alloc_map);
2785 		kfree(base->lookup_log_chans);
2786 		kfree(base->lookup_phy_chans);
2787 		kfree(base->phy_res);
2788 		kfree(base);
2789 	}
2790 
2791 	return NULL;
2792 }
2793 
2794 static void __init d40_hw_init(struct d40_base *base)
2795 {
2796 
2797 	static const struct d40_reg_val dma_init_reg[] = {
2798 		/* Clock every part of the DMA block from start */
2799 		{ .reg = D40_DREG_GCC,    .val = 0x0000ff01},
2800 
2801 		/* Interrupts on all logical channels */
2802 		{ .reg = D40_DREG_LCMIS0, .val = 0xFFFFFFFF},
2803 		{ .reg = D40_DREG_LCMIS1, .val = 0xFFFFFFFF},
2804 		{ .reg = D40_DREG_LCMIS2, .val = 0xFFFFFFFF},
2805 		{ .reg = D40_DREG_LCMIS3, .val = 0xFFFFFFFF},
2806 		{ .reg = D40_DREG_LCICR0, .val = 0xFFFFFFFF},
2807 		{ .reg = D40_DREG_LCICR1, .val = 0xFFFFFFFF},
2808 		{ .reg = D40_DREG_LCICR2, .val = 0xFFFFFFFF},
2809 		{ .reg = D40_DREG_LCICR3, .val = 0xFFFFFFFF},
2810 		{ .reg = D40_DREG_LCTIS0, .val = 0xFFFFFFFF},
2811 		{ .reg = D40_DREG_LCTIS1, .val = 0xFFFFFFFF},
2812 		{ .reg = D40_DREG_LCTIS2, .val = 0xFFFFFFFF},
2813 		{ .reg = D40_DREG_LCTIS3, .val = 0xFFFFFFFF}
2814 	};
2815 	int i;
2816 	u32 prmseo[2] = {0, 0};
2817 	u32 activeo[2] = {0xFFFFFFFF, 0xFFFFFFFF};
2818 	u32 pcmis = 0;
2819 	u32 pcicr = 0;
2820 
2821 	for (i = 0; i < ARRAY_SIZE(dma_init_reg); i++)
2822 		writel(dma_init_reg[i].val,
2823 		       base->virtbase + dma_init_reg[i].reg);
2824 
2825 	/* Configure all our dma channels to default settings */
2826 	for (i = 0; i < base->num_phy_chans; i++) {
2827 
2828 		activeo[i % 2] = activeo[i % 2] << 2;
2829 
2830 		if (base->phy_res[base->num_phy_chans - i - 1].allocated_src
2831 		    == D40_ALLOC_PHY) {
2832 			activeo[i % 2] |= 3;
2833 			continue;
2834 		}
2835 
2836 		/* Enable interrupt # */
2837 		pcmis = (pcmis << 1) | 1;
2838 
2839 		/* Clear interrupt # */
2840 		pcicr = (pcicr << 1) | 1;
2841 
2842 		/* Set channel to physical mode */
2843 		prmseo[i % 2] = prmseo[i % 2] << 2;
2844 		prmseo[i % 2] |= 1;
2845 
2846 	}
2847 
2848 	writel(prmseo[1], base->virtbase + D40_DREG_PRMSE);
2849 	writel(prmseo[0], base->virtbase + D40_DREG_PRMSO);
2850 	writel(activeo[1], base->virtbase + D40_DREG_ACTIVE);
2851 	writel(activeo[0], base->virtbase + D40_DREG_ACTIVO);
2852 
2853 	/* Write which interrupt to enable */
2854 	writel(pcmis, base->virtbase + D40_DREG_PCMIS);
2855 
2856 	/* Write which interrupt to clear */
2857 	writel(pcicr, base->virtbase + D40_DREG_PCICR);
2858 
2859 }
2860 
2861 static int __init d40_lcla_allocate(struct d40_base *base)
2862 {
2863 	unsigned long *page_list;
2864 	int i, j;
2865 	int ret = 0;
2866 
2867 	/*
2868 	 * This is somewhat ugly. We need 8192 bytes that are 18 bit aligned,
2869 	 * To full fill this hardware requirement without wasting 256 kb
2870 	 * we allocate pages until we get an aligned one.
2871 	 */
2872 	page_list = kmalloc(sizeof(unsigned long) * MAX_LCLA_ALLOC_ATTEMPTS,
2873 			    GFP_KERNEL);
2874 
2875 	if (!page_list) {
2876 		ret = -ENOMEM;
2877 		goto failure;
2878 	}
2879 
2880 	/* Calculating how many pages that are required */
2881 	base->lcla_pool.pages = SZ_1K * base->num_phy_chans / PAGE_SIZE;
2882 
2883 	for (i = 0; i < MAX_LCLA_ALLOC_ATTEMPTS; i++) {
2884 		page_list[i] = __get_free_pages(GFP_KERNEL,
2885 						base->lcla_pool.pages);
2886 		if (!page_list[i]) {
2887 
2888 			dev_err(base->dev,
2889 				"[%s] Failed to allocate %d pages.\n",
2890 				__func__, base->lcla_pool.pages);
2891 
2892 			for (j = 0; j < i; j++)
2893 				free_pages(page_list[j], base->lcla_pool.pages);
2894 			goto failure;
2895 		}
2896 
2897 		if ((virt_to_phys((void *)page_list[i]) &
2898 		     (LCLA_ALIGNMENT - 1)) == 0)
2899 			break;
2900 	}
2901 
2902 	for (j = 0; j < i; j++)
2903 		free_pages(page_list[j], base->lcla_pool.pages);
2904 
2905 	if (i < MAX_LCLA_ALLOC_ATTEMPTS) {
2906 		base->lcla_pool.base = (void *)page_list[i];
2907 	} else {
2908 		/*
2909 		 * After many attempts and no succees with finding the correct
2910 		 * alignment, try with allocating a big buffer.
2911 		 */
2912 		dev_warn(base->dev,
2913 			 "[%s] Failed to get %d pages @ 18 bit align.\n",
2914 			 __func__, base->lcla_pool.pages);
2915 		base->lcla_pool.base_unaligned = kmalloc(SZ_1K *
2916 							 base->num_phy_chans +
2917 							 LCLA_ALIGNMENT,
2918 							 GFP_KERNEL);
2919 		if (!base->lcla_pool.base_unaligned) {
2920 			ret = -ENOMEM;
2921 			goto failure;
2922 		}
2923 
2924 		base->lcla_pool.base = PTR_ALIGN(base->lcla_pool.base_unaligned,
2925 						 LCLA_ALIGNMENT);
2926 	}
2927 
2928 	writel(virt_to_phys(base->lcla_pool.base),
2929 	       base->virtbase + D40_DREG_LCLA);
2930 failure:
2931 	kfree(page_list);
2932 	return ret;
2933 }
2934 
2935 static int __init d40_probe(struct platform_device *pdev)
2936 {
2937 	int err;
2938 	int ret = -ENOENT;
2939 	struct d40_base *base;
2940 	struct resource *res = NULL;
2941 	int num_reserved_chans;
2942 	u32 val;
2943 
2944 	base = d40_hw_detect_init(pdev);
2945 
2946 	if (!base)
2947 		goto failure;
2948 
2949 	num_reserved_chans = d40_phy_res_init(base);
2950 
2951 	platform_set_drvdata(pdev, base);
2952 
2953 	spin_lock_init(&base->interrupt_lock);
2954 	spin_lock_init(&base->execmd_lock);
2955 
2956 	/* Get IO for logical channel parameter address */
2957 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcpa");
2958 	if (!res) {
2959 		ret = -ENOENT;
2960 		dev_err(&pdev->dev,
2961 			"[%s] No \"lcpa\" memory resource\n",
2962 			__func__);
2963 		goto failure;
2964 	}
2965 	base->lcpa_size = resource_size(res);
2966 	base->phy_lcpa = res->start;
2967 
2968 	if (request_mem_region(res->start, resource_size(res),
2969 			       D40_NAME " I/O lcpa") == NULL) {
2970 		ret = -EBUSY;
2971 		dev_err(&pdev->dev,
2972 			"[%s] Failed to request LCPA region 0x%x-0x%x\n",
2973 			__func__, res->start, res->end);
2974 		goto failure;
2975 	}
2976 
2977 	/* We make use of ESRAM memory for this. */
2978 	val = readl(base->virtbase + D40_DREG_LCPA);
2979 	if (res->start != val && val != 0) {
2980 		dev_warn(&pdev->dev,
2981 			 "[%s] Mismatch LCPA dma 0x%x, def 0x%x\n",
2982 			 __func__, val, res->start);
2983 	} else
2984 		writel(res->start, base->virtbase + D40_DREG_LCPA);
2985 
2986 	base->lcpa_base = ioremap(res->start, resource_size(res));
2987 	if (!base->lcpa_base) {
2988 		ret = -ENOMEM;
2989 		dev_err(&pdev->dev,
2990 			"[%s] Failed to ioremap LCPA region\n",
2991 			__func__);
2992 		goto failure;
2993 	}
2994 
2995 	ret = d40_lcla_allocate(base);
2996 	if (ret) {
2997 		dev_err(&pdev->dev, "[%s] Failed to allocate LCLA area\n",
2998 			__func__);
2999 		goto failure;
3000 	}
3001 
3002 	spin_lock_init(&base->lcla_pool.lock);
3003 
3004 	base->irq = platform_get_irq(pdev, 0);
3005 
3006 	ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base);
3007 
3008 	if (ret) {
3009 		dev_err(&pdev->dev, "[%s] No IRQ defined\n", __func__);
3010 		goto failure;
3011 	}
3012 
3013 	err = d40_dmaengine_init(base, num_reserved_chans);
3014 	if (err)
3015 		goto failure;
3016 
3017 	d40_hw_init(base);
3018 
3019 	dev_info(base->dev, "initialized\n");
3020 	return 0;
3021 
3022 failure:
3023 	if (base) {
3024 		if (base->desc_slab)
3025 			kmem_cache_destroy(base->desc_slab);
3026 		if (base->virtbase)
3027 			iounmap(base->virtbase);
3028 		if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
3029 			free_pages((unsigned long)base->lcla_pool.base,
3030 				   base->lcla_pool.pages);
3031 
3032 		kfree(base->lcla_pool.base_unaligned);
3033 
3034 		if (base->phy_lcpa)
3035 			release_mem_region(base->phy_lcpa,
3036 					   base->lcpa_size);
3037 		if (base->phy_start)
3038 			release_mem_region(base->phy_start,
3039 					   base->phy_size);
3040 		if (base->clk) {
3041 			clk_disable(base->clk);
3042 			clk_put(base->clk);
3043 		}
3044 
3045 		kfree(base->lcla_pool.alloc_map);
3046 		kfree(base->lookup_log_chans);
3047 		kfree(base->lookup_phy_chans);
3048 		kfree(base->phy_res);
3049 		kfree(base);
3050 	}
3051 
3052 	dev_err(&pdev->dev, "[%s] probe failed\n", __func__);
3053 	return ret;
3054 }
3055 
3056 static struct platform_driver d40_driver = {
3057 	.driver = {
3058 		.owner = THIS_MODULE,
3059 		.name  = D40_NAME,
3060 	},
3061 };
3062 
3063 int __init stedma40_init(void)
3064 {
3065 	return platform_driver_probe(&d40_driver, d40_probe);
3066 }
3067 arch_initcall(stedma40_init);
3068