1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Devmem TCP 4 * 5 * Authors: Mina Almasry <almasrymina@google.com> 6 * Willem de Bruijn <willemdebruijn.kernel@gmail.com> 7 * Kaiyuan Zhang <kaiyuanz@google.com 8 */ 9 10 #include <linux/dma-buf.h> 11 #include <linux/genalloc.h> 12 #include <linux/mm.h> 13 #include <linux/netdevice.h> 14 #include <linux/types.h> 15 #include <net/netdev_queues.h> 16 #include <net/netdev_rx_queue.h> 17 #include <net/page_pool/helpers.h> 18 #include <trace/events/page_pool.h> 19 20 #include "devmem.h" 21 #include "mp_dmabuf_devmem.h" 22 #include "page_pool_priv.h" 23 24 /* Device memory support */ 25 26 /* Protected by rtnl_lock() */ 27 static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); 28 29 static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, 30 struct gen_pool_chunk *chunk, 31 void *not_used) 32 { 33 struct dmabuf_genpool_chunk_owner *owner = chunk->owner; 34 35 kvfree(owner->niovs); 36 kfree(owner); 37 } 38 39 static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov) 40 { 41 struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); 42 43 return owner->base_dma_addr + 44 ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT); 45 } 46 47 void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding) 48 { 49 size_t size, avail; 50 51 gen_pool_for_each_chunk(binding->chunk_pool, 52 net_devmem_dmabuf_free_chunk_owner, NULL); 53 54 size = gen_pool_size(binding->chunk_pool); 55 avail = gen_pool_avail(binding->chunk_pool); 56 57 if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu", 58 size, avail)) 59 gen_pool_destroy(binding->chunk_pool); 60 61 dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, 62 DMA_FROM_DEVICE); 63 dma_buf_detach(binding->dmabuf, binding->attachment); 64 dma_buf_put(binding->dmabuf); 65 xa_destroy(&binding->bound_rxqs); 66 kfree(binding); 67 } 68 69 struct net_iov * 70 net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) 71 { 72 struct dmabuf_genpool_chunk_owner *owner; 73 unsigned long dma_addr; 74 struct net_iov *niov; 75 ssize_t offset; 76 ssize_t index; 77 78 dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE, 79 (void **)&owner); 80 if (!dma_addr) 81 return NULL; 82 83 offset = dma_addr - owner->base_dma_addr; 84 index = offset / PAGE_SIZE; 85 niov = &owner->niovs[index]; 86 87 niov->pp_magic = 0; 88 niov->pp = NULL; 89 atomic_long_set(&niov->pp_ref_count, 0); 90 91 return niov; 92 } 93 94 void net_devmem_free_dmabuf(struct net_iov *niov) 95 { 96 struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov); 97 unsigned long dma_addr = net_devmem_get_dma_addr(niov); 98 99 if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr, 100 PAGE_SIZE))) 101 return; 102 103 gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE); 104 } 105 106 void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) 107 { 108 struct netdev_rx_queue *rxq; 109 unsigned long xa_idx; 110 unsigned int rxq_idx; 111 112 if (binding->list.next) 113 list_del(&binding->list); 114 115 xa_for_each(&binding->bound_rxqs, xa_idx, rxq) { 116 WARN_ON(rxq->mp_params.mp_priv != binding); 117 118 rxq->mp_params.mp_priv = NULL; 119 120 rxq_idx = get_netdev_rx_queue_index(rxq); 121 122 WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx)); 123 } 124 125 xa_erase(&net_devmem_dmabuf_bindings, binding->id); 126 127 net_devmem_dmabuf_binding_put(binding); 128 } 129 130 int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, 131 struct net_devmem_dmabuf_binding *binding, 132 struct netlink_ext_ack *extack) 133 { 134 struct netdev_rx_queue *rxq; 135 u32 xa_idx; 136 int err; 137 138 if (rxq_idx >= dev->real_num_rx_queues) { 139 NL_SET_ERR_MSG(extack, "rx queue index out of range"); 140 return -ERANGE; 141 } 142 143 rxq = __netif_get_rx_queue(dev, rxq_idx); 144 if (rxq->mp_params.mp_priv) { 145 NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); 146 return -EEXIST; 147 } 148 149 #ifdef CONFIG_XDP_SOCKETS 150 if (rxq->pool) { 151 NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); 152 return -EBUSY; 153 } 154 #endif 155 156 err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b, 157 GFP_KERNEL); 158 if (err) 159 return err; 160 161 rxq->mp_params.mp_priv = binding; 162 163 err = netdev_rx_queue_restart(dev, rxq_idx); 164 if (err) 165 goto err_xa_erase; 166 167 return 0; 168 169 err_xa_erase: 170 rxq->mp_params.mp_priv = NULL; 171 xa_erase(&binding->bound_rxqs, xa_idx); 172 173 return err; 174 } 175 176 struct net_devmem_dmabuf_binding * 177 net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, 178 struct netlink_ext_ack *extack) 179 { 180 struct net_devmem_dmabuf_binding *binding; 181 static u32 id_alloc_next; 182 struct scatterlist *sg; 183 struct dma_buf *dmabuf; 184 unsigned int sg_idx, i; 185 unsigned long virtual; 186 int err; 187 188 dmabuf = dma_buf_get(dmabuf_fd); 189 if (IS_ERR(dmabuf)) 190 return ERR_CAST(dmabuf); 191 192 binding = kzalloc_node(sizeof(*binding), GFP_KERNEL, 193 dev_to_node(&dev->dev)); 194 if (!binding) { 195 err = -ENOMEM; 196 goto err_put_dmabuf; 197 } 198 199 binding->dev = dev; 200 201 err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id, 202 binding, xa_limit_32b, &id_alloc_next, 203 GFP_KERNEL); 204 if (err < 0) 205 goto err_free_binding; 206 207 xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC); 208 209 refcount_set(&binding->ref, 1); 210 211 binding->dmabuf = dmabuf; 212 213 binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); 214 if (IS_ERR(binding->attachment)) { 215 err = PTR_ERR(binding->attachment); 216 NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device"); 217 goto err_free_id; 218 } 219 220 binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment, 221 DMA_FROM_DEVICE); 222 if (IS_ERR(binding->sgt)) { 223 err = PTR_ERR(binding->sgt); 224 NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment"); 225 goto err_detach; 226 } 227 228 /* For simplicity we expect to make PAGE_SIZE allocations, but the 229 * binding can be much more flexible than that. We may be able to 230 * allocate MTU sized chunks here. Leave that for future work... 231 */ 232 binding->chunk_pool = 233 gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev)); 234 if (!binding->chunk_pool) { 235 err = -ENOMEM; 236 goto err_unmap; 237 } 238 239 virtual = 0; 240 for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) { 241 dma_addr_t dma_addr = sg_dma_address(sg); 242 struct dmabuf_genpool_chunk_owner *owner; 243 size_t len = sg_dma_len(sg); 244 struct net_iov *niov; 245 246 owner = kzalloc_node(sizeof(*owner), GFP_KERNEL, 247 dev_to_node(&dev->dev)); 248 if (!owner) { 249 err = -ENOMEM; 250 goto err_free_chunks; 251 } 252 253 owner->base_virtual = virtual; 254 owner->base_dma_addr = dma_addr; 255 owner->num_niovs = len / PAGE_SIZE; 256 owner->binding = binding; 257 258 err = gen_pool_add_owner(binding->chunk_pool, dma_addr, 259 dma_addr, len, dev_to_node(&dev->dev), 260 owner); 261 if (err) { 262 kfree(owner); 263 err = -EINVAL; 264 goto err_free_chunks; 265 } 266 267 owner->niovs = kvmalloc_array(owner->num_niovs, 268 sizeof(*owner->niovs), 269 GFP_KERNEL); 270 if (!owner->niovs) { 271 err = -ENOMEM; 272 goto err_free_chunks; 273 } 274 275 for (i = 0; i < owner->num_niovs; i++) { 276 niov = &owner->niovs[i]; 277 niov->owner = owner; 278 page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), 279 net_devmem_get_dma_addr(niov)); 280 } 281 282 virtual += len; 283 } 284 285 return binding; 286 287 err_free_chunks: 288 gen_pool_for_each_chunk(binding->chunk_pool, 289 net_devmem_dmabuf_free_chunk_owner, NULL); 290 gen_pool_destroy(binding->chunk_pool); 291 err_unmap: 292 dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, 293 DMA_FROM_DEVICE); 294 err_detach: 295 dma_buf_detach(dmabuf, binding->attachment); 296 err_free_id: 297 xa_erase(&net_devmem_dmabuf_bindings, binding->id); 298 err_free_binding: 299 kfree(binding); 300 err_put_dmabuf: 301 dma_buf_put(dmabuf); 302 return ERR_PTR(err); 303 } 304 305 void dev_dmabuf_uninstall(struct net_device *dev) 306 { 307 struct net_devmem_dmabuf_binding *binding; 308 struct netdev_rx_queue *rxq; 309 unsigned long xa_idx; 310 unsigned int i; 311 312 for (i = 0; i < dev->real_num_rx_queues; i++) { 313 binding = dev->_rx[i].mp_params.mp_priv; 314 if (!binding) 315 continue; 316 317 xa_for_each(&binding->bound_rxqs, xa_idx, rxq) 318 if (rxq == &dev->_rx[i]) { 319 xa_erase(&binding->bound_rxqs, xa_idx); 320 break; 321 } 322 } 323 } 324 325 /*** "Dmabuf devmem memory provider" ***/ 326 327 int mp_dmabuf_devmem_init(struct page_pool *pool) 328 { 329 struct net_devmem_dmabuf_binding *binding = pool->mp_priv; 330 331 if (!binding) 332 return -EINVAL; 333 334 if (!pool->dma_map) 335 return -EOPNOTSUPP; 336 337 if (pool->dma_sync) 338 return -EOPNOTSUPP; 339 340 if (pool->p.order != 0) 341 return -E2BIG; 342 343 net_devmem_dmabuf_binding_get(binding); 344 return 0; 345 } 346 347 netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp) 348 { 349 struct net_devmem_dmabuf_binding *binding = pool->mp_priv; 350 struct net_iov *niov; 351 netmem_ref netmem; 352 353 niov = net_devmem_alloc_dmabuf(binding); 354 if (!niov) 355 return 0; 356 357 netmem = net_iov_to_netmem(niov); 358 359 page_pool_set_pp_info(pool, netmem); 360 361 pool->pages_state_hold_cnt++; 362 trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); 363 return netmem; 364 } 365 366 void mp_dmabuf_devmem_destroy(struct page_pool *pool) 367 { 368 struct net_devmem_dmabuf_binding *binding = pool->mp_priv; 369 370 net_devmem_dmabuf_binding_put(binding); 371 } 372 373 bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem) 374 { 375 long refcount = atomic_long_read(netmem_get_pp_ref_count_ref(netmem)); 376 377 if (WARN_ON_ONCE(!netmem_is_net_iov(netmem))) 378 return false; 379 380 if (WARN_ON_ONCE(refcount != 1)) 381 return false; 382 383 page_pool_clear_pp_info(netmem); 384 385 net_devmem_free_dmabuf(netmem_to_net_iov(netmem)); 386 387 /* We don't want the page pool put_page()ing our net_iovs. */ 388 return false; 389 } 390