1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Devmem TCP 4 * 5 * Authors: Mina Almasry <almasrymina@google.com> 6 * Willem de Bruijn <willemdebruijn.kernel@gmail.com> 7 * Kaiyuan Zhang <kaiyuanz@google.com 8 */ 9 10 #include <linux/dma-buf.h> 11 #include <linux/ethtool_netlink.h> 12 #include <linux/genalloc.h> 13 #include <linux/mm.h> 14 #include <linux/netdevice.h> 15 #include <linux/types.h> 16 #include <net/netdev_queues.h> 17 #include <net/netdev_rx_queue.h> 18 #include <net/page_pool/helpers.h> 19 #include <trace/events/page_pool.h> 20 21 #include "devmem.h" 22 #include "mp_dmabuf_devmem.h" 23 #include "page_pool_priv.h" 24 25 /* Device memory support */ 26 27 /* Protected by rtnl_lock() */ 28 static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); 29 30 static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, 31 struct gen_pool_chunk *chunk, 32 void *not_used) 33 { 34 struct dmabuf_genpool_chunk_owner *owner = chunk->owner; 35 36 kvfree(owner->niovs); 37 kfree(owner); 38 } 39 40 static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov) 41 { 42 struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); 43 44 return owner->base_dma_addr + 45 ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT); 46 } 47 48 void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding) 49 { 50 size_t size, avail; 51 52 gen_pool_for_each_chunk(binding->chunk_pool, 53 net_devmem_dmabuf_free_chunk_owner, NULL); 54 55 size = gen_pool_size(binding->chunk_pool); 56 avail = gen_pool_avail(binding->chunk_pool); 57 58 if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu", 59 size, avail)) 60 gen_pool_destroy(binding->chunk_pool); 61 62 dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, 63 DMA_FROM_DEVICE); 64 dma_buf_detach(binding->dmabuf, binding->attachment); 65 dma_buf_put(binding->dmabuf); 66 xa_destroy(&binding->bound_rxqs); 67 kfree(binding); 68 } 69 70 struct net_iov * 71 net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) 72 { 73 struct dmabuf_genpool_chunk_owner *owner; 74 unsigned long dma_addr; 75 struct net_iov *niov; 76 ssize_t offset; 77 ssize_t index; 78 79 dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE, 80 (void **)&owner); 81 if (!dma_addr) 82 return NULL; 83 84 offset = dma_addr - owner->base_dma_addr; 85 index = offset / PAGE_SIZE; 86 niov = &owner->niovs[index]; 87 88 niov->pp_magic = 0; 89 niov->pp = NULL; 90 atomic_long_set(&niov->pp_ref_count, 0); 91 92 return niov; 93 } 94 95 void net_devmem_free_dmabuf(struct net_iov *niov) 96 { 97 struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov); 98 unsigned long dma_addr = net_devmem_get_dma_addr(niov); 99 100 if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr, 101 PAGE_SIZE))) 102 return; 103 104 gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE); 105 } 106 107 void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) 108 { 109 struct netdev_rx_queue *rxq; 110 unsigned long xa_idx; 111 unsigned int rxq_idx; 112 113 if (binding->list.next) 114 list_del(&binding->list); 115 116 xa_for_each(&binding->bound_rxqs, xa_idx, rxq) { 117 WARN_ON(rxq->mp_params.mp_priv != binding); 118 119 rxq->mp_params.mp_priv = NULL; 120 121 rxq_idx = get_netdev_rx_queue_index(rxq); 122 123 WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx)); 124 } 125 126 xa_erase(&net_devmem_dmabuf_bindings, binding->id); 127 128 net_devmem_dmabuf_binding_put(binding); 129 } 130 131 int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, 132 struct net_devmem_dmabuf_binding *binding, 133 struct netlink_ext_ack *extack) 134 { 135 struct netdev_rx_queue *rxq; 136 u32 xa_idx; 137 int err; 138 139 if (rxq_idx >= dev->real_num_rx_queues) { 140 NL_SET_ERR_MSG(extack, "rx queue index out of range"); 141 return -ERANGE; 142 } 143 144 if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { 145 NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); 146 return -EINVAL; 147 } 148 149 if (dev->cfg->hds_thresh) { 150 NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); 151 return -EINVAL; 152 } 153 154 rxq = __netif_get_rx_queue(dev, rxq_idx); 155 if (rxq->mp_params.mp_priv) { 156 NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); 157 return -EEXIST; 158 } 159 160 #ifdef CONFIG_XDP_SOCKETS 161 if (rxq->pool) { 162 NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); 163 return -EBUSY; 164 } 165 #endif 166 167 err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b, 168 GFP_KERNEL); 169 if (err) 170 return err; 171 172 rxq->mp_params.mp_priv = binding; 173 174 err = netdev_rx_queue_restart(dev, rxq_idx); 175 if (err) 176 goto err_xa_erase; 177 178 return 0; 179 180 err_xa_erase: 181 rxq->mp_params.mp_priv = NULL; 182 xa_erase(&binding->bound_rxqs, xa_idx); 183 184 return err; 185 } 186 187 struct net_devmem_dmabuf_binding * 188 net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, 189 struct netlink_ext_ack *extack) 190 { 191 struct net_devmem_dmabuf_binding *binding; 192 static u32 id_alloc_next; 193 struct scatterlist *sg; 194 struct dma_buf *dmabuf; 195 unsigned int sg_idx, i; 196 unsigned long virtual; 197 int err; 198 199 dmabuf = dma_buf_get(dmabuf_fd); 200 if (IS_ERR(dmabuf)) 201 return ERR_CAST(dmabuf); 202 203 binding = kzalloc_node(sizeof(*binding), GFP_KERNEL, 204 dev_to_node(&dev->dev)); 205 if (!binding) { 206 err = -ENOMEM; 207 goto err_put_dmabuf; 208 } 209 210 binding->dev = dev; 211 212 err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id, 213 binding, xa_limit_32b, &id_alloc_next, 214 GFP_KERNEL); 215 if (err < 0) 216 goto err_free_binding; 217 218 xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC); 219 220 refcount_set(&binding->ref, 1); 221 222 binding->dmabuf = dmabuf; 223 224 binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); 225 if (IS_ERR(binding->attachment)) { 226 err = PTR_ERR(binding->attachment); 227 NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device"); 228 goto err_free_id; 229 } 230 231 binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment, 232 DMA_FROM_DEVICE); 233 if (IS_ERR(binding->sgt)) { 234 err = PTR_ERR(binding->sgt); 235 NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment"); 236 goto err_detach; 237 } 238 239 /* For simplicity we expect to make PAGE_SIZE allocations, but the 240 * binding can be much more flexible than that. We may be able to 241 * allocate MTU sized chunks here. Leave that for future work... 242 */ 243 binding->chunk_pool = 244 gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev)); 245 if (!binding->chunk_pool) { 246 err = -ENOMEM; 247 goto err_unmap; 248 } 249 250 virtual = 0; 251 for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) { 252 dma_addr_t dma_addr = sg_dma_address(sg); 253 struct dmabuf_genpool_chunk_owner *owner; 254 size_t len = sg_dma_len(sg); 255 struct net_iov *niov; 256 257 owner = kzalloc_node(sizeof(*owner), GFP_KERNEL, 258 dev_to_node(&dev->dev)); 259 if (!owner) { 260 err = -ENOMEM; 261 goto err_free_chunks; 262 } 263 264 owner->base_virtual = virtual; 265 owner->base_dma_addr = dma_addr; 266 owner->num_niovs = len / PAGE_SIZE; 267 owner->binding = binding; 268 269 err = gen_pool_add_owner(binding->chunk_pool, dma_addr, 270 dma_addr, len, dev_to_node(&dev->dev), 271 owner); 272 if (err) { 273 kfree(owner); 274 err = -EINVAL; 275 goto err_free_chunks; 276 } 277 278 owner->niovs = kvmalloc_array(owner->num_niovs, 279 sizeof(*owner->niovs), 280 GFP_KERNEL); 281 if (!owner->niovs) { 282 err = -ENOMEM; 283 goto err_free_chunks; 284 } 285 286 for (i = 0; i < owner->num_niovs; i++) { 287 niov = &owner->niovs[i]; 288 niov->owner = owner; 289 page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), 290 net_devmem_get_dma_addr(niov)); 291 } 292 293 virtual += len; 294 } 295 296 return binding; 297 298 err_free_chunks: 299 gen_pool_for_each_chunk(binding->chunk_pool, 300 net_devmem_dmabuf_free_chunk_owner, NULL); 301 gen_pool_destroy(binding->chunk_pool); 302 err_unmap: 303 dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, 304 DMA_FROM_DEVICE); 305 err_detach: 306 dma_buf_detach(dmabuf, binding->attachment); 307 err_free_id: 308 xa_erase(&net_devmem_dmabuf_bindings, binding->id); 309 err_free_binding: 310 kfree(binding); 311 err_put_dmabuf: 312 dma_buf_put(dmabuf); 313 return ERR_PTR(err); 314 } 315 316 void dev_dmabuf_uninstall(struct net_device *dev) 317 { 318 struct net_devmem_dmabuf_binding *binding; 319 struct netdev_rx_queue *rxq; 320 unsigned long xa_idx; 321 unsigned int i; 322 323 for (i = 0; i < dev->real_num_rx_queues; i++) { 324 binding = dev->_rx[i].mp_params.mp_priv; 325 if (!binding) 326 continue; 327 328 xa_for_each(&binding->bound_rxqs, xa_idx, rxq) 329 if (rxq == &dev->_rx[i]) { 330 xa_erase(&binding->bound_rxqs, xa_idx); 331 break; 332 } 333 } 334 } 335 336 /*** "Dmabuf devmem memory provider" ***/ 337 338 int mp_dmabuf_devmem_init(struct page_pool *pool) 339 { 340 struct net_devmem_dmabuf_binding *binding = pool->mp_priv; 341 342 if (!binding) 343 return -EINVAL; 344 345 /* dma-buf dma addresses do not need and should not be used with 346 * dma_sync_for_cpu/device. Force disable dma_sync. 347 */ 348 pool->dma_sync = false; 349 pool->dma_sync_for_cpu = false; 350 351 if (pool->p.order != 0) 352 return -E2BIG; 353 354 net_devmem_dmabuf_binding_get(binding); 355 return 0; 356 } 357 358 netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp) 359 { 360 struct net_devmem_dmabuf_binding *binding = pool->mp_priv; 361 struct net_iov *niov; 362 netmem_ref netmem; 363 364 niov = net_devmem_alloc_dmabuf(binding); 365 if (!niov) 366 return 0; 367 368 netmem = net_iov_to_netmem(niov); 369 370 page_pool_set_pp_info(pool, netmem); 371 372 pool->pages_state_hold_cnt++; 373 trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); 374 return netmem; 375 } 376 377 void mp_dmabuf_devmem_destroy(struct page_pool *pool) 378 { 379 struct net_devmem_dmabuf_binding *binding = pool->mp_priv; 380 381 net_devmem_dmabuf_binding_put(binding); 382 } 383 384 bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem) 385 { 386 long refcount = atomic_long_read(netmem_get_pp_ref_count_ref(netmem)); 387 388 if (WARN_ON_ONCE(!netmem_is_net_iov(netmem))) 389 return false; 390 391 if (WARN_ON_ONCE(refcount != 1)) 392 return false; 393 394 page_pool_clear_pp_info(netmem); 395 396 net_devmem_free_dmabuf(netmem_to_net_iov(netmem)); 397 398 /* We don't want the page pool put_page()ing our net_iovs. */ 399 return false; 400 } 401