1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Devmem TCP 4 * 5 * Authors: Mina Almasry <almasrymina@google.com> 6 * Willem de Bruijn <willemdebruijn.kernel@gmail.com> 7 * Kaiyuan Zhang <kaiyuanz@google.com 8 */ 9 10 #include <linux/dma-buf.h> 11 #include <linux/genalloc.h> 12 #include <linux/mm.h> 13 #include <linux/netdevice.h> 14 #include <linux/types.h> 15 #include <net/netdev_queues.h> 16 #include <net/netdev_rx_queue.h> 17 #include <net/page_pool/helpers.h> 18 #include <trace/events/page_pool.h> 19 20 #include "devmem.h" 21 #include "page_pool_priv.h" 22 23 /* Device memory support */ 24 25 /* Protected by rtnl_lock() */ 26 static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); 27 28 static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, 29 struct gen_pool_chunk *chunk, 30 void *not_used) 31 { 32 struct dmabuf_genpool_chunk_owner *owner = chunk->owner; 33 34 kvfree(owner->niovs); 35 kfree(owner); 36 } 37 38 static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov) 39 { 40 struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); 41 42 return owner->base_dma_addr + 43 ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT); 44 } 45 46 void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding) 47 { 48 size_t size, avail; 49 50 gen_pool_for_each_chunk(binding->chunk_pool, 51 net_devmem_dmabuf_free_chunk_owner, NULL); 52 53 size = gen_pool_size(binding->chunk_pool); 54 avail = gen_pool_avail(binding->chunk_pool); 55 56 if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu", 57 size, avail)) 58 gen_pool_destroy(binding->chunk_pool); 59 60 dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, 61 DMA_FROM_DEVICE); 62 dma_buf_detach(binding->dmabuf, binding->attachment); 63 dma_buf_put(binding->dmabuf); 64 xa_destroy(&binding->bound_rxqs); 65 kfree(binding); 66 } 67 68 struct net_iov * 69 net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) 70 { 71 struct dmabuf_genpool_chunk_owner *owner; 72 unsigned long dma_addr; 73 struct net_iov *niov; 74 ssize_t offset; 75 ssize_t index; 76 77 dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE, 78 (void **)&owner); 79 if (!dma_addr) 80 return NULL; 81 82 offset = dma_addr - owner->base_dma_addr; 83 index = offset / PAGE_SIZE; 84 niov = &owner->niovs[index]; 85 86 niov->pp_magic = 0; 87 niov->pp = NULL; 88 atomic_long_set(&niov->pp_ref_count, 0); 89 90 return niov; 91 } 92 93 void net_devmem_free_dmabuf(struct net_iov *niov) 94 { 95 struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov); 96 unsigned long dma_addr = net_devmem_get_dma_addr(niov); 97 98 if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr, 99 PAGE_SIZE))) 100 return; 101 102 gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE); 103 } 104 105 void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) 106 { 107 struct netdev_rx_queue *rxq; 108 unsigned long xa_idx; 109 unsigned int rxq_idx; 110 111 if (binding->list.next) 112 list_del(&binding->list); 113 114 xa_for_each(&binding->bound_rxqs, xa_idx, rxq) { 115 WARN_ON(rxq->mp_params.mp_priv != binding); 116 117 rxq->mp_params.mp_priv = NULL; 118 119 rxq_idx = get_netdev_rx_queue_index(rxq); 120 121 WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx)); 122 } 123 124 xa_erase(&net_devmem_dmabuf_bindings, binding->id); 125 126 net_devmem_dmabuf_binding_put(binding); 127 } 128 129 int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, 130 struct net_devmem_dmabuf_binding *binding, 131 struct netlink_ext_ack *extack) 132 { 133 struct netdev_rx_queue *rxq; 134 u32 xa_idx; 135 int err; 136 137 if (rxq_idx >= dev->real_num_rx_queues) { 138 NL_SET_ERR_MSG(extack, "rx queue index out of range"); 139 return -ERANGE; 140 } 141 142 rxq = __netif_get_rx_queue(dev, rxq_idx); 143 if (rxq->mp_params.mp_priv) { 144 NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); 145 return -EEXIST; 146 } 147 148 #ifdef CONFIG_XDP_SOCKETS 149 if (rxq->pool) { 150 NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); 151 return -EBUSY; 152 } 153 #endif 154 155 err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b, 156 GFP_KERNEL); 157 if (err) 158 return err; 159 160 rxq->mp_params.mp_priv = binding; 161 162 err = netdev_rx_queue_restart(dev, rxq_idx); 163 if (err) 164 goto err_xa_erase; 165 166 return 0; 167 168 err_xa_erase: 169 rxq->mp_params.mp_priv = NULL; 170 xa_erase(&binding->bound_rxqs, xa_idx); 171 172 return err; 173 } 174 175 struct net_devmem_dmabuf_binding * 176 net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, 177 struct netlink_ext_ack *extack) 178 { 179 struct net_devmem_dmabuf_binding *binding; 180 static u32 id_alloc_next; 181 struct scatterlist *sg; 182 struct dma_buf *dmabuf; 183 unsigned int sg_idx, i; 184 unsigned long virtual; 185 int err; 186 187 dmabuf = dma_buf_get(dmabuf_fd); 188 if (IS_ERR(dmabuf)) 189 return ERR_CAST(dmabuf); 190 191 binding = kzalloc_node(sizeof(*binding), GFP_KERNEL, 192 dev_to_node(&dev->dev)); 193 if (!binding) { 194 err = -ENOMEM; 195 goto err_put_dmabuf; 196 } 197 198 binding->dev = dev; 199 200 err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id, 201 binding, xa_limit_32b, &id_alloc_next, 202 GFP_KERNEL); 203 if (err < 0) 204 goto err_free_binding; 205 206 xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC); 207 208 refcount_set(&binding->ref, 1); 209 210 binding->dmabuf = dmabuf; 211 212 binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); 213 if (IS_ERR(binding->attachment)) { 214 err = PTR_ERR(binding->attachment); 215 NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device"); 216 goto err_free_id; 217 } 218 219 binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment, 220 DMA_FROM_DEVICE); 221 if (IS_ERR(binding->sgt)) { 222 err = PTR_ERR(binding->sgt); 223 NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment"); 224 goto err_detach; 225 } 226 227 /* For simplicity we expect to make PAGE_SIZE allocations, but the 228 * binding can be much more flexible than that. We may be able to 229 * allocate MTU sized chunks here. Leave that for future work... 230 */ 231 binding->chunk_pool = 232 gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev)); 233 if (!binding->chunk_pool) { 234 err = -ENOMEM; 235 goto err_unmap; 236 } 237 238 virtual = 0; 239 for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) { 240 dma_addr_t dma_addr = sg_dma_address(sg); 241 struct dmabuf_genpool_chunk_owner *owner; 242 size_t len = sg_dma_len(sg); 243 struct net_iov *niov; 244 245 owner = kzalloc_node(sizeof(*owner), GFP_KERNEL, 246 dev_to_node(&dev->dev)); 247 if (!owner) { 248 err = -ENOMEM; 249 goto err_free_chunks; 250 } 251 252 owner->base_virtual = virtual; 253 owner->base_dma_addr = dma_addr; 254 owner->num_niovs = len / PAGE_SIZE; 255 owner->binding = binding; 256 257 err = gen_pool_add_owner(binding->chunk_pool, dma_addr, 258 dma_addr, len, dev_to_node(&dev->dev), 259 owner); 260 if (err) { 261 kfree(owner); 262 err = -EINVAL; 263 goto err_free_chunks; 264 } 265 266 owner->niovs = kvmalloc_array(owner->num_niovs, 267 sizeof(*owner->niovs), 268 GFP_KERNEL); 269 if (!owner->niovs) { 270 err = -ENOMEM; 271 goto err_free_chunks; 272 } 273 274 for (i = 0; i < owner->num_niovs; i++) { 275 niov = &owner->niovs[i]; 276 niov->owner = owner; 277 page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), 278 net_devmem_get_dma_addr(niov)); 279 } 280 281 virtual += len; 282 } 283 284 return binding; 285 286 err_free_chunks: 287 gen_pool_for_each_chunk(binding->chunk_pool, 288 net_devmem_dmabuf_free_chunk_owner, NULL); 289 gen_pool_destroy(binding->chunk_pool); 290 err_unmap: 291 dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, 292 DMA_FROM_DEVICE); 293 err_detach: 294 dma_buf_detach(dmabuf, binding->attachment); 295 err_free_id: 296 xa_erase(&net_devmem_dmabuf_bindings, binding->id); 297 err_free_binding: 298 kfree(binding); 299 err_put_dmabuf: 300 dma_buf_put(dmabuf); 301 return ERR_PTR(err); 302 } 303 304 void dev_dmabuf_uninstall(struct net_device *dev) 305 { 306 struct net_devmem_dmabuf_binding *binding; 307 struct netdev_rx_queue *rxq; 308 unsigned long xa_idx; 309 unsigned int i; 310 311 for (i = 0; i < dev->real_num_rx_queues; i++) { 312 binding = dev->_rx[i].mp_params.mp_priv; 313 if (!binding) 314 continue; 315 316 xa_for_each(&binding->bound_rxqs, xa_idx, rxq) 317 if (rxq == &dev->_rx[i]) { 318 xa_erase(&binding->bound_rxqs, xa_idx); 319 break; 320 } 321 } 322 } 323