xref: /linux/net/core/devmem.c (revision 8ab79ed50cf10f338465c296012500de1081646f)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      Devmem TCP
4  *
5  *      Authors:	Mina Almasry <almasrymina@google.com>
6  *			Willem de Bruijn <willemdebruijn.kernel@gmail.com>
7  *			Kaiyuan Zhang <kaiyuanz@google.com
8  */
9 
10 #include <linux/dma-buf.h>
11 #include <linux/genalloc.h>
12 #include <linux/mm.h>
13 #include <linux/netdevice.h>
14 #include <linux/types.h>
15 #include <net/netdev_queues.h>
16 #include <net/netdev_rx_queue.h>
17 #include <net/page_pool/helpers.h>
18 #include <trace/events/page_pool.h>
19 
20 #include "devmem.h"
21 #include "page_pool_priv.h"
22 
23 /* Device memory support */
24 
25 /* Protected by rtnl_lock() */
26 static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
27 
28 static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
29 					       struct gen_pool_chunk *chunk,
30 					       void *not_used)
31 {
32 	struct dmabuf_genpool_chunk_owner *owner = chunk->owner;
33 
34 	kvfree(owner->niovs);
35 	kfree(owner);
36 }
37 
38 static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov)
39 {
40 	struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov);
41 
42 	return owner->base_dma_addr +
43 	       ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
44 }
45 
46 void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding)
47 {
48 	size_t size, avail;
49 
50 	gen_pool_for_each_chunk(binding->chunk_pool,
51 				net_devmem_dmabuf_free_chunk_owner, NULL);
52 
53 	size = gen_pool_size(binding->chunk_pool);
54 	avail = gen_pool_avail(binding->chunk_pool);
55 
56 	if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu",
57 		  size, avail))
58 		gen_pool_destroy(binding->chunk_pool);
59 
60 	dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
61 					  DMA_FROM_DEVICE);
62 	dma_buf_detach(binding->dmabuf, binding->attachment);
63 	dma_buf_put(binding->dmabuf);
64 	xa_destroy(&binding->bound_rxqs);
65 	kfree(binding);
66 }
67 
68 struct net_iov *
69 net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
70 {
71 	struct dmabuf_genpool_chunk_owner *owner;
72 	unsigned long dma_addr;
73 	struct net_iov *niov;
74 	ssize_t offset;
75 	ssize_t index;
76 
77 	dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE,
78 					(void **)&owner);
79 	if (!dma_addr)
80 		return NULL;
81 
82 	offset = dma_addr - owner->base_dma_addr;
83 	index = offset / PAGE_SIZE;
84 	niov = &owner->niovs[index];
85 
86 	niov->pp_magic = 0;
87 	niov->pp = NULL;
88 	atomic_long_set(&niov->pp_ref_count, 0);
89 
90 	return niov;
91 }
92 
93 void net_devmem_free_dmabuf(struct net_iov *niov)
94 {
95 	struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov);
96 	unsigned long dma_addr = net_devmem_get_dma_addr(niov);
97 
98 	if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr,
99 				       PAGE_SIZE)))
100 		return;
101 
102 	gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE);
103 }
104 
105 void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding)
106 {
107 	struct netdev_rx_queue *rxq;
108 	unsigned long xa_idx;
109 	unsigned int rxq_idx;
110 
111 	if (binding->list.next)
112 		list_del(&binding->list);
113 
114 	xa_for_each(&binding->bound_rxqs, xa_idx, rxq) {
115 		WARN_ON(rxq->mp_params.mp_priv != binding);
116 
117 		rxq->mp_params.mp_priv = NULL;
118 
119 		rxq_idx = get_netdev_rx_queue_index(rxq);
120 
121 		WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx));
122 	}
123 
124 	xa_erase(&net_devmem_dmabuf_bindings, binding->id);
125 
126 	net_devmem_dmabuf_binding_put(binding);
127 }
128 
129 int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx,
130 				    struct net_devmem_dmabuf_binding *binding,
131 				    struct netlink_ext_ack *extack)
132 {
133 	struct netdev_rx_queue *rxq;
134 	u32 xa_idx;
135 	int err;
136 
137 	if (rxq_idx >= dev->real_num_rx_queues) {
138 		NL_SET_ERR_MSG(extack, "rx queue index out of range");
139 		return -ERANGE;
140 	}
141 
142 	rxq = __netif_get_rx_queue(dev, rxq_idx);
143 	if (rxq->mp_params.mp_priv) {
144 		NL_SET_ERR_MSG(extack, "designated queue already memory provider bound");
145 		return -EEXIST;
146 	}
147 
148 #ifdef CONFIG_XDP_SOCKETS
149 	if (rxq->pool) {
150 		NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP");
151 		return -EBUSY;
152 	}
153 #endif
154 
155 	err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b,
156 		       GFP_KERNEL);
157 	if (err)
158 		return err;
159 
160 	rxq->mp_params.mp_priv = binding;
161 
162 	err = netdev_rx_queue_restart(dev, rxq_idx);
163 	if (err)
164 		goto err_xa_erase;
165 
166 	return 0;
167 
168 err_xa_erase:
169 	rxq->mp_params.mp_priv = NULL;
170 	xa_erase(&binding->bound_rxqs, xa_idx);
171 
172 	return err;
173 }
174 
175 struct net_devmem_dmabuf_binding *
176 net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd,
177 		       struct netlink_ext_ack *extack)
178 {
179 	struct net_devmem_dmabuf_binding *binding;
180 	static u32 id_alloc_next;
181 	struct scatterlist *sg;
182 	struct dma_buf *dmabuf;
183 	unsigned int sg_idx, i;
184 	unsigned long virtual;
185 	int err;
186 
187 	dmabuf = dma_buf_get(dmabuf_fd);
188 	if (IS_ERR(dmabuf))
189 		return ERR_CAST(dmabuf);
190 
191 	binding = kzalloc_node(sizeof(*binding), GFP_KERNEL,
192 			       dev_to_node(&dev->dev));
193 	if (!binding) {
194 		err = -ENOMEM;
195 		goto err_put_dmabuf;
196 	}
197 
198 	binding->dev = dev;
199 
200 	err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id,
201 			      binding, xa_limit_32b, &id_alloc_next,
202 			      GFP_KERNEL);
203 	if (err < 0)
204 		goto err_free_binding;
205 
206 	xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC);
207 
208 	refcount_set(&binding->ref, 1);
209 
210 	binding->dmabuf = dmabuf;
211 
212 	binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent);
213 	if (IS_ERR(binding->attachment)) {
214 		err = PTR_ERR(binding->attachment);
215 		NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device");
216 		goto err_free_id;
217 	}
218 
219 	binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment,
220 						       DMA_FROM_DEVICE);
221 	if (IS_ERR(binding->sgt)) {
222 		err = PTR_ERR(binding->sgt);
223 		NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment");
224 		goto err_detach;
225 	}
226 
227 	/* For simplicity we expect to make PAGE_SIZE allocations, but the
228 	 * binding can be much more flexible than that. We may be able to
229 	 * allocate MTU sized chunks here. Leave that for future work...
230 	 */
231 	binding->chunk_pool =
232 		gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev));
233 	if (!binding->chunk_pool) {
234 		err = -ENOMEM;
235 		goto err_unmap;
236 	}
237 
238 	virtual = 0;
239 	for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) {
240 		dma_addr_t dma_addr = sg_dma_address(sg);
241 		struct dmabuf_genpool_chunk_owner *owner;
242 		size_t len = sg_dma_len(sg);
243 		struct net_iov *niov;
244 
245 		owner = kzalloc_node(sizeof(*owner), GFP_KERNEL,
246 				     dev_to_node(&dev->dev));
247 		if (!owner) {
248 			err = -ENOMEM;
249 			goto err_free_chunks;
250 		}
251 
252 		owner->base_virtual = virtual;
253 		owner->base_dma_addr = dma_addr;
254 		owner->num_niovs = len / PAGE_SIZE;
255 		owner->binding = binding;
256 
257 		err = gen_pool_add_owner(binding->chunk_pool, dma_addr,
258 					 dma_addr, len, dev_to_node(&dev->dev),
259 					 owner);
260 		if (err) {
261 			kfree(owner);
262 			err = -EINVAL;
263 			goto err_free_chunks;
264 		}
265 
266 		owner->niovs = kvmalloc_array(owner->num_niovs,
267 					      sizeof(*owner->niovs),
268 					      GFP_KERNEL);
269 		if (!owner->niovs) {
270 			err = -ENOMEM;
271 			goto err_free_chunks;
272 		}
273 
274 		for (i = 0; i < owner->num_niovs; i++) {
275 			niov = &owner->niovs[i];
276 			niov->owner = owner;
277 			page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov),
278 						      net_devmem_get_dma_addr(niov));
279 		}
280 
281 		virtual += len;
282 	}
283 
284 	return binding;
285 
286 err_free_chunks:
287 	gen_pool_for_each_chunk(binding->chunk_pool,
288 				net_devmem_dmabuf_free_chunk_owner, NULL);
289 	gen_pool_destroy(binding->chunk_pool);
290 err_unmap:
291 	dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt,
292 					  DMA_FROM_DEVICE);
293 err_detach:
294 	dma_buf_detach(dmabuf, binding->attachment);
295 err_free_id:
296 	xa_erase(&net_devmem_dmabuf_bindings, binding->id);
297 err_free_binding:
298 	kfree(binding);
299 err_put_dmabuf:
300 	dma_buf_put(dmabuf);
301 	return ERR_PTR(err);
302 }
303 
304 void dev_dmabuf_uninstall(struct net_device *dev)
305 {
306 	struct net_devmem_dmabuf_binding *binding;
307 	struct netdev_rx_queue *rxq;
308 	unsigned long xa_idx;
309 	unsigned int i;
310 
311 	for (i = 0; i < dev->real_num_rx_queues; i++) {
312 		binding = dev->_rx[i].mp_params.mp_priv;
313 		if (!binding)
314 			continue;
315 
316 		xa_for_each(&binding->bound_rxqs, xa_idx, rxq)
317 			if (rxq == &dev->_rx[i]) {
318 				xa_erase(&binding->bound_rxqs, xa_idx);
319 				break;
320 			}
321 	}
322 }
323