1dfed0098SZhou Wang // SPDX-License-Identifier: GPL-2.0
2dfed0098SZhou Wang /* Copyright (c) 2019 HiSilicon Limited. */
3948e35f1SYang Shen #include <linux/align.h>
4dfed0098SZhou Wang #include <linux/dma-mapping.h>
5ff5812e0SShameer Kolothum #include <linux/hisi_acc_qm.h>
6dfed0098SZhou Wang #include <linux/module.h>
748c1cd40SZhou Wang #include <linux/slab.h>
8dfed0098SZhou Wang
9dfed0098SZhou Wang #define HISI_ACC_SGL_SGE_NR_MIN 1
10dfed0098SZhou Wang #define HISI_ACC_SGL_NR_MAX 256
11dfed0098SZhou Wang #define HISI_ACC_SGL_ALIGN_SIZE 64
12d8ac7b85SZhou Wang #define HISI_ACC_MEM_BLOCK_NR 5
13dfed0098SZhou Wang
14dfed0098SZhou Wang struct acc_hw_sge {
15dfed0098SZhou Wang dma_addr_t buf;
16dfed0098SZhou Wang void *page_ctrl;
17dfed0098SZhou Wang __le32 len;
18dfed0098SZhou Wang __le32 pad;
19dfed0098SZhou Wang __le32 pad0;
20dfed0098SZhou Wang __le32 pad1;
21dfed0098SZhou Wang };
22dfed0098SZhou Wang
23dfed0098SZhou Wang /* use default sgl head size 64B */
24dfed0098SZhou Wang struct hisi_acc_hw_sgl {
25dfed0098SZhou Wang dma_addr_t next_dma;
26dfed0098SZhou Wang __le16 entry_sum_in_chain;
27dfed0098SZhou Wang __le16 entry_sum_in_sgl;
28dfed0098SZhou Wang __le16 entry_length_in_sgl;
29dfed0098SZhou Wang __le16 pad0;
30dfed0098SZhou Wang __le64 pad1[5];
31dfed0098SZhou Wang struct hisi_acc_hw_sgl *next;
32dfed0098SZhou Wang struct acc_hw_sge sge_entries[];
33dfed0098SZhou Wang } __aligned(1);
34dfed0098SZhou Wang
3548c1cd40SZhou Wang struct hisi_acc_sgl_pool {
36d8ac7b85SZhou Wang struct mem_block {
3748c1cd40SZhou Wang struct hisi_acc_hw_sgl *sgl;
3848c1cd40SZhou Wang dma_addr_t sgl_dma;
3948c1cd40SZhou Wang size_t size;
40d8ac7b85SZhou Wang } mem_block[HISI_ACC_MEM_BLOCK_NR];
41d8ac7b85SZhou Wang u32 sgl_num_per_block;
42d8ac7b85SZhou Wang u32 block_num;
4348c1cd40SZhou Wang u32 count;
4448c1cd40SZhou Wang u32 sge_nr;
4548c1cd40SZhou Wang size_t sgl_size;
4648c1cd40SZhou Wang };
4748c1cd40SZhou Wang
48dfed0098SZhou Wang /**
49dfed0098SZhou Wang * hisi_acc_create_sgl_pool() - Create a hw sgl pool.
50dfed0098SZhou Wang * @dev: The device which hw sgl pool belongs to.
51dfed0098SZhou Wang * @count: Count of hisi_acc_hw_sgl in pool.
5248c1cd40SZhou Wang * @sge_nr: The count of sge in hw_sgl
53dfed0098SZhou Wang *
54dfed0098SZhou Wang * This function creates a hw sgl pool, after this user can get hw sgl memory
55dfed0098SZhou Wang * from it.
56dfed0098SZhou Wang */
hisi_acc_create_sgl_pool(struct device * dev,u32 count,u32 sge_nr)5748c1cd40SZhou Wang struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev,
5848c1cd40SZhou Wang u32 count, u32 sge_nr)
59dfed0098SZhou Wang {
60c5f735bbSKai Ye u32 sgl_size, block_size, sgl_num_per_block, block_num, remain_sgl;
6148c1cd40SZhou Wang struct hisi_acc_sgl_pool *pool;
62d8ac7b85SZhou Wang struct mem_block *block;
63d8ac7b85SZhou Wang u32 i, j;
64dfed0098SZhou Wang
6548c1cd40SZhou Wang if (!dev || !count || !sge_nr || sge_nr > HISI_ACC_SGL_SGE_NR_MAX)
6648c1cd40SZhou Wang return ERR_PTR(-EINVAL);
67dfed0098SZhou Wang
68948e35f1SYang Shen sgl_size = ALIGN(sizeof(struct acc_hw_sge) * sge_nr +
69948e35f1SYang Shen sizeof(struct hisi_acc_hw_sgl),
70948e35f1SYang Shen HISI_ACC_SGL_ALIGN_SIZE);
7151028c6eSKai Ye
7251028c6eSKai Ye /*
735e0a760bSKirill A. Shutemov * the pool may allocate a block of memory of size PAGE_SIZE * 2^MAX_PAGE_ORDER,
7451028c6eSKai Ye * block size may exceed 2^31 on ia64, so the max of block size is 2^31
7551028c6eSKai Ye */
765e0a760bSKirill A. Shutemov block_size = 1 << (PAGE_SHIFT + MAX_PAGE_ORDER < 32 ?
775e0a760bSKirill A. Shutemov PAGE_SHIFT + MAX_PAGE_ORDER : 31);
78d8ac7b85SZhou Wang sgl_num_per_block = block_size / sgl_size;
79d8ac7b85SZhou Wang block_num = count / sgl_num_per_block;
80d8ac7b85SZhou Wang remain_sgl = count % sgl_num_per_block;
81d8ac7b85SZhou Wang
82d8ac7b85SZhou Wang if ((!remain_sgl && block_num > HISI_ACC_MEM_BLOCK_NR) ||
83d8ac7b85SZhou Wang (remain_sgl > 0 && block_num > HISI_ACC_MEM_BLOCK_NR - 1))
84d8ac7b85SZhou Wang return ERR_PTR(-EINVAL);
85dfed0098SZhou Wang
8648c1cd40SZhou Wang pool = kzalloc(sizeof(*pool), GFP_KERNEL);
8748c1cd40SZhou Wang if (!pool)
8848c1cd40SZhou Wang return ERR_PTR(-ENOMEM);
89d8ac7b85SZhou Wang block = pool->mem_block;
9048c1cd40SZhou Wang
91d8ac7b85SZhou Wang for (i = 0; i < block_num; i++) {
92d8ac7b85SZhou Wang block[i].sgl = dma_alloc_coherent(dev, block_size,
93d8ac7b85SZhou Wang &block[i].sgl_dma,
94d8ac7b85SZhou Wang GFP_KERNEL);
95197272b8SKai Ye if (!block[i].sgl) {
96197272b8SKai Ye dev_err(dev, "Fail to allocate hw SG buffer!\n");
97d8ac7b85SZhou Wang goto err_free_mem;
98197272b8SKai Ye }
99d8ac7b85SZhou Wang
100d8ac7b85SZhou Wang block[i].size = block_size;
10148c1cd40SZhou Wang }
102dfed0098SZhou Wang
103d8ac7b85SZhou Wang if (remain_sgl > 0) {
104d8ac7b85SZhou Wang block[i].sgl = dma_alloc_coherent(dev, remain_sgl * sgl_size,
105d8ac7b85SZhou Wang &block[i].sgl_dma,
106d8ac7b85SZhou Wang GFP_KERNEL);
107197272b8SKai Ye if (!block[i].sgl) {
108197272b8SKai Ye dev_err(dev, "Fail to allocate remained hw SG buffer!\n");
109d8ac7b85SZhou Wang goto err_free_mem;
110197272b8SKai Ye }
111d8ac7b85SZhou Wang
112d8ac7b85SZhou Wang block[i].size = remain_sgl * sgl_size;
113d8ac7b85SZhou Wang }
114d8ac7b85SZhou Wang
115d8ac7b85SZhou Wang pool->sgl_num_per_block = sgl_num_per_block;
116d8ac7b85SZhou Wang pool->block_num = remain_sgl ? block_num + 1 : block_num;
117dfed0098SZhou Wang pool->count = count;
118dfed0098SZhou Wang pool->sgl_size = sgl_size;
11948c1cd40SZhou Wang pool->sge_nr = sge_nr;
120dfed0098SZhou Wang
12148c1cd40SZhou Wang return pool;
122d8ac7b85SZhou Wang
123d8ac7b85SZhou Wang err_free_mem:
124fb4ac519SWeili Qian for (j = 0; j < i; j++)
125d8ac7b85SZhou Wang dma_free_coherent(dev, block_size, block[j].sgl,
126d8ac7b85SZhou Wang block[j].sgl_dma);
127fb4ac519SWeili Qian
128aa85923aSKees Cook kfree_sensitive(pool);
129d8ac7b85SZhou Wang return ERR_PTR(-ENOMEM);
130dfed0098SZhou Wang }
131dfed0098SZhou Wang EXPORT_SYMBOL_GPL(hisi_acc_create_sgl_pool);
132dfed0098SZhou Wang
133dfed0098SZhou Wang /**
134dfed0098SZhou Wang * hisi_acc_free_sgl_pool() - Free a hw sgl pool.
135dfed0098SZhou Wang * @dev: The device which hw sgl pool belongs to.
136dfed0098SZhou Wang * @pool: Pointer of pool.
137dfed0098SZhou Wang *
138dfed0098SZhou Wang * This function frees memory of a hw sgl pool.
139dfed0098SZhou Wang */
hisi_acc_free_sgl_pool(struct device * dev,struct hisi_acc_sgl_pool * pool)140dfed0098SZhou Wang void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool)
141dfed0098SZhou Wang {
142d8ac7b85SZhou Wang struct mem_block *block;
143fb4ac519SWeili Qian u32 i;
144d8ac7b85SZhou Wang
14548c1cd40SZhou Wang if (!dev || !pool)
14648c1cd40SZhou Wang return;
14748c1cd40SZhou Wang
148d8ac7b85SZhou Wang block = pool->mem_block;
149d8ac7b85SZhou Wang
150d8ac7b85SZhou Wang for (i = 0; i < pool->block_num; i++)
151d8ac7b85SZhou Wang dma_free_coherent(dev, block[i].size, block[i].sgl,
152d8ac7b85SZhou Wang block[i].sgl_dma);
153d8ac7b85SZhou Wang
15448c1cd40SZhou Wang kfree(pool);
155dfed0098SZhou Wang }
156dfed0098SZhou Wang EXPORT_SYMBOL_GPL(hisi_acc_free_sgl_pool);
157dfed0098SZhou Wang
acc_get_sgl(struct hisi_acc_sgl_pool * pool,u32 index,dma_addr_t * hw_sgl_dma)158a92a00f8SZhou Wang static struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool,
159a92a00f8SZhou Wang u32 index, dma_addr_t *hw_sgl_dma)
160dfed0098SZhou Wang {
161d8ac7b85SZhou Wang struct mem_block *block;
162d8ac7b85SZhou Wang u32 block_index, offset;
163d8ac7b85SZhou Wang
164d8ac7b85SZhou Wang block = pool->mem_block;
165d8ac7b85SZhou Wang block_index = index / pool->sgl_num_per_block;
166d8ac7b85SZhou Wang offset = index % pool->sgl_num_per_block;
167d8ac7b85SZhou Wang
168d8ac7b85SZhou Wang *hw_sgl_dma = block[block_index].sgl_dma + pool->sgl_size * offset;
169d8ac7b85SZhou Wang return (void *)block[block_index].sgl + pool->sgl_size * offset;
170dfed0098SZhou Wang }
171dfed0098SZhou Wang
sg_map_to_hw_sg(struct scatterlist * sgl,struct acc_hw_sge * hw_sge)172dfed0098SZhou Wang static void sg_map_to_hw_sg(struct scatterlist *sgl,
173dfed0098SZhou Wang struct acc_hw_sge *hw_sge)
174dfed0098SZhou Wang {
175f0c8b6a1SZhou Wang hw_sge->buf = sg_dma_address(sgl);
176f0c8b6a1SZhou Wang hw_sge->len = cpu_to_le32(sg_dma_len(sgl));
1774b95e17bSKai Ye hw_sge->page_ctrl = sg_virt(sgl);
178dfed0098SZhou Wang }
179dfed0098SZhou Wang
inc_hw_sgl_sge(struct hisi_acc_hw_sgl * hw_sgl)180dfed0098SZhou Wang static void inc_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl)
181dfed0098SZhou Wang {
182a92a00f8SZhou Wang u16 var = le16_to_cpu(hw_sgl->entry_sum_in_sgl);
183a92a00f8SZhou Wang
184a92a00f8SZhou Wang var++;
185a92a00f8SZhou Wang hw_sgl->entry_sum_in_sgl = cpu_to_le16(var);
186dfed0098SZhou Wang }
187dfed0098SZhou Wang
update_hw_sgl_sum_sge(struct hisi_acc_hw_sgl * hw_sgl,u16 sum)188dfed0098SZhou Wang static void update_hw_sgl_sum_sge(struct hisi_acc_hw_sgl *hw_sgl, u16 sum)
189dfed0098SZhou Wang {
190a92a00f8SZhou Wang hw_sgl->entry_sum_in_chain = cpu_to_le16(sum);
191dfed0098SZhou Wang }
192dfed0098SZhou Wang
clear_hw_sgl_sge(struct hisi_acc_hw_sgl * hw_sgl)1937e958d30SKai Ye static void clear_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl)
1947e958d30SKai Ye {
1957e958d30SKai Ye struct acc_hw_sge *hw_sge = hw_sgl->sge_entries;
196fb4ac519SWeili Qian u16 entry_sum = le16_to_cpu(hw_sgl->entry_sum_in_sgl);
1977e958d30SKai Ye int i;
1987e958d30SKai Ye
199fb4ac519SWeili Qian for (i = 0; i < entry_sum; i++) {
2007e958d30SKai Ye hw_sge[i].page_ctrl = NULL;
2017e958d30SKai Ye hw_sge[i].buf = 0;
2027e958d30SKai Ye hw_sge[i].len = 0;
2037e958d30SKai Ye }
2047e958d30SKai Ye }
2057e958d30SKai Ye
206dfed0098SZhou Wang /**
207dfed0098SZhou Wang * hisi_acc_sg_buf_map_to_hw_sgl - Map a scatterlist to a hw sgl.
208dfed0098SZhou Wang * @dev: The device which hw sgl belongs to.
209dfed0098SZhou Wang * @sgl: Scatterlist which will be mapped to hw sgl.
210dfed0098SZhou Wang * @pool: Pool which hw sgl memory will be allocated in.
211dfed0098SZhou Wang * @index: Index of hisi_acc_hw_sgl in pool.
212dfed0098SZhou Wang * @hw_sgl_dma: The dma address of allocated hw sgl.
213dfed0098SZhou Wang *
214dfed0098SZhou Wang * This function builds hw sgl according input sgl, user can use hw_sgl_dma
215dfed0098SZhou Wang * as src/dst in its BD. Only support single hw sgl currently.
216dfed0098SZhou Wang */
217dfed0098SZhou Wang struct hisi_acc_hw_sgl *
hisi_acc_sg_buf_map_to_hw_sgl(struct device * dev,struct scatterlist * sgl,struct hisi_acc_sgl_pool * pool,u32 index,dma_addr_t * hw_sgl_dma)218dfed0098SZhou Wang hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
219dfed0098SZhou Wang struct scatterlist *sgl,
220dfed0098SZhou Wang struct hisi_acc_sgl_pool *pool,
221dfed0098SZhou Wang u32 index, dma_addr_t *hw_sgl_dma)
222dfed0098SZhou Wang {
223dfed0098SZhou Wang struct hisi_acc_hw_sgl *curr_hw_sgl;
224fb4ac519SWeili Qian unsigned int i, sg_n_mapped;
2255c086198SZhou Wang dma_addr_t curr_sgl_dma = 0;
226dfed0098SZhou Wang struct acc_hw_sge *curr_hw_sge;
227dfed0098SZhou Wang struct scatterlist *sg;
228*f386dc64SYang Shen int sg_n, ret;
229dfed0098SZhou Wang
230040279e8SChenghai Huang if (!dev || !sgl || !pool || !hw_sgl_dma || index >= pool->count)
231a92a00f8SZhou Wang return ERR_PTR(-EINVAL);
232a92a00f8SZhou Wang
233a92a00f8SZhou Wang sg_n = sg_nents(sgl);
2348debacd6SJonathan Cameron
2358debacd6SJonathan Cameron sg_n_mapped = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
236197272b8SKai Ye if (!sg_n_mapped) {
237197272b8SKai Ye dev_err(dev, "DMA mapping for SG error!\n");
238dfed0098SZhou Wang return ERR_PTR(-EINVAL);
239197272b8SKai Ye }
240dfed0098SZhou Wang
2418debacd6SJonathan Cameron if (sg_n_mapped > pool->sge_nr) {
242197272b8SKai Ye dev_err(dev, "the number of entries in input scatterlist is bigger than SGL pool setting.\n");
243*f386dc64SYang Shen ret = -EINVAL;
244*f386dc64SYang Shen goto err_unmap;
2458debacd6SJonathan Cameron }
246dfed0098SZhou Wang
247dfed0098SZhou Wang curr_hw_sgl = acc_get_sgl(pool, index, &curr_sgl_dma);
248a92a00f8SZhou Wang if (IS_ERR(curr_hw_sgl)) {
249197272b8SKai Ye dev_err(dev, "Get SGL error!\n");
250*f386dc64SYang Shen ret = -ENOMEM;
251*f386dc64SYang Shen goto err_unmap;
252dfed0098SZhou Wang }
253a92a00f8SZhou Wang curr_hw_sgl->entry_length_in_sgl = cpu_to_le16(pool->sge_nr);
254dfed0098SZhou Wang curr_hw_sge = curr_hw_sgl->sge_entries;
255dfed0098SZhou Wang
2568debacd6SJonathan Cameron for_each_sg(sgl, sg, sg_n_mapped, i) {
257dfed0098SZhou Wang sg_map_to_hw_sg(sg, curr_hw_sge);
258dfed0098SZhou Wang inc_hw_sgl_sge(curr_hw_sgl);
259dfed0098SZhou Wang curr_hw_sge++;
260dfed0098SZhou Wang }
261dfed0098SZhou Wang
26248c1cd40SZhou Wang update_hw_sgl_sum_sge(curr_hw_sgl, pool->sge_nr);
263dfed0098SZhou Wang *hw_sgl_dma = curr_sgl_dma;
264dfed0098SZhou Wang
265dfed0098SZhou Wang return curr_hw_sgl;
266*f386dc64SYang Shen
267*f386dc64SYang Shen err_unmap:
268*f386dc64SYang Shen dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
269*f386dc64SYang Shen
270*f386dc64SYang Shen return ERR_PTR(ret);
271dfed0098SZhou Wang }
272dfed0098SZhou Wang EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl);
273dfed0098SZhou Wang
274dfed0098SZhou Wang /**
275dfed0098SZhou Wang * hisi_acc_sg_buf_unmap() - Unmap allocated hw sgl.
276dfed0098SZhou Wang * @dev: The device which hw sgl belongs to.
277dfed0098SZhou Wang * @sgl: Related scatterlist.
278dfed0098SZhou Wang * @hw_sgl: Virtual address of hw sgl.
279dfed0098SZhou Wang *
280dfed0098SZhou Wang * This function unmaps allocated hw sgl.
281dfed0098SZhou Wang */
hisi_acc_sg_buf_unmap(struct device * dev,struct scatterlist * sgl,struct hisi_acc_hw_sgl * hw_sgl)282dfed0098SZhou Wang void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
283dfed0098SZhou Wang struct hisi_acc_hw_sgl *hw_sgl)
284dfed0098SZhou Wang {
285a92a00f8SZhou Wang if (!dev || !sgl || !hw_sgl)
286a92a00f8SZhou Wang return;
287a92a00f8SZhou Wang
288dfed0098SZhou Wang dma_unmap_sg(dev, sgl, sg_nents(sgl), DMA_BIDIRECTIONAL);
2897e958d30SKai Ye clear_hw_sgl_sge(hw_sgl);
290dfed0098SZhou Wang hw_sgl->entry_sum_in_chain = 0;
291dfed0098SZhou Wang hw_sgl->entry_sum_in_sgl = 0;
292dfed0098SZhou Wang hw_sgl->entry_length_in_sgl = 0;
293dfed0098SZhou Wang }
294dfed0098SZhou Wang EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_unmap);
295