1 /*- 2 * Copyright (c) 2021 The FreeBSD Foundation 3 * Copyright (c) 2022 Bjoern A. Zeeb 4 * 5 * This software was developed by Björn Zeeb under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/types.h> 35 #include <sys/kernel.h> 36 #include <sys/sysctl.h> 37 38 #include <linux/bitops.h> 39 #include <linux/list.h> 40 #include <linux/netdevice.h> 41 42 MALLOC_DEFINE(M_NETDEV, "lkpindev", "Linux KPI netdevice compat"); 43 44 #define NAPI_LOCK_INIT(_ndev) \ 45 mtx_init(&(_ndev)->napi_mtx, "napi_mtx", NULL, MTX_DEF) 46 #define NAPI_LOCK_DESTROY(_ndev) mtx_destroy(&(_ndev)->napi_mtx) 47 #define NAPI_LOCK_ASSERT(_ndev) mtx_assert(&(_ndev)->napi_mtx, MA_OWNED) 48 #define NAPI_LOCK(_ndev) mtx_lock(&(_ndev)->napi_mtx) 49 #define NAPI_UNLOCK(_ndev) mtx_unlock(&(_ndev)->napi_mtx) 50 51 /* -------------------------------------------------------------------------- */ 52 53 #define LKPI_NAPI_FLAGS \ 54 "\20\1DISABLE_PENDING\2IS_SCHEDULED\3LOST_RACE_TRY_AGAIN" 55 56 /* #define NAPI_DEBUG */ 57 #ifdef NAPI_DEBUG 58 static int debug_napi; 59 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, debug_napi, CTLFLAG_RWTUN, 60 &debug_napi, 0, "NAPI debug level"); 61 62 #define DNAPI_TODO 0x01 63 #define DNAPI_IMPROVE 0x02 64 #define DNAPI_TRACE 0x10 65 #define DNAPI_TRACE_TASK 0x20 66 #define DNAPI_DIRECT_DISPATCH 0x1000 67 68 #define NAPI_TRACE(_n) if (debug_napi & DNAPI_TRACE) \ 69 printf("NAPI_TRACE %s:%d %u %p (%#jx %b)\n", __func__, __LINE__, \ 70 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 71 (int)(_n)->state, LKPI_NAPI_FLAGS) 72 #define NAPI_TRACE2D(_n, _d) if (debug_napi & DNAPI_TRACE) \ 73 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) %d\n", __func__, __LINE__, \ 74 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 75 (int)(_n)->state, LKPI_NAPI_FLAGS, _d) 76 #define NAPI_TRACE_TASK(_n, _p, _c) if (debug_napi & DNAPI_TRACE_TASK) \ 77 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) pending %d count %d " \ 78 "rx_count %d\n", __func__, __LINE__, \ 79 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 80 (int)(_n)->state, LKPI_NAPI_FLAGS, _p, _c, (_n)->rx_count) 81 #define NAPI_TODO() if (debug_napi & DNAPI_TODO) \ 82 printf("NAPI_TODO %s:%d %d\n", __func__, __LINE__, ticks) 83 #define NAPI_IMPROVE() if (debug_napi & DNAPI_IMPROVE) \ 84 printf("NAPI_IMPROVE %s:%d %d\n", __func__, __LINE__, ticks) 85 86 #define NAPI_DIRECT_DISPATCH() ((debug_napi & DNAPI_DIRECT_DISPATCH) != 0) 87 #else 88 #define NAPI_TRACE(_n) do { } while(0) 89 #define NAPI_TRACE2D(_n, _d) do { } while(0) 90 #define NAPI_TRACE_TASK(_n, _p, _c) do { } while(0) 91 #define NAPI_TODO() do { } while(0) 92 #define NAPI_IMPROVE() do { } while(0) 93 94 #define NAPI_DIRECT_DISPATCH() (0) 95 #endif 96 97 /* -------------------------------------------------------------------------- */ 98 99 /* 100 * Check if a poll is running or can run and and if the latter 101 * make us as running. That way we ensure that only one poll 102 * can only ever run at the same time. Returns true if no poll 103 * was scheduled yet. 104 */ 105 bool 106 linuxkpi_napi_schedule_prep(struct napi_struct *napi) 107 { 108 unsigned long old, new; 109 110 NAPI_TRACE(napi); 111 112 /* Can can only update/return if all flags agree. */ 113 do { 114 old = READ_ONCE(napi->state); 115 116 /* If we are stopping, cannot run again. */ 117 if ((old & BIT(LKPI_NAPI_FLAG_DISABLE_PENDING)) != 0) { 118 NAPI_TRACE(napi); 119 return (false); 120 } 121 122 new = old; 123 /* We were already scheduled. Need to try again? */ 124 if ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) != 0) 125 new |= BIT(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN); 126 new |= BIT(LKPI_NAPI_FLAG_IS_SCHEDULED); 127 128 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 129 130 NAPI_TRACE(napi); 131 return ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) == 0); 132 } 133 134 static void 135 lkpi___napi_schedule_dd(struct napi_struct *napi) 136 { 137 unsigned long old, new; 138 int rc; 139 140 rc = 0; 141 again: 142 NAPI_TRACE2D(napi, rc); 143 if (napi->poll != NULL) 144 rc = napi->poll(napi, napi->budget); 145 napi->rx_count += rc; 146 147 /* Check if interrupts are still disabled, more work to do. */ 148 /* Bandaid for now. */ 149 if (rc >= napi->budget) 150 goto again; 151 152 /* Bandaid for now. */ 153 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->state)) 154 goto again; 155 156 do { 157 new = old = READ_ONCE(napi->state); 158 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 159 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 160 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 161 162 NAPI_TRACE2D(napi, rc); 163 } 164 165 void 166 linuxkpi___napi_schedule(struct napi_struct *napi) 167 { 168 int rc; 169 170 NAPI_TRACE(napi); 171 if (test_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state)) { 172 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->state); 173 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state); 174 NAPI_TRACE(napi); 175 return; 176 } 177 178 if (NAPI_DIRECT_DISPATCH()) { 179 lkpi___napi_schedule_dd(napi); 180 } else { 181 rc = taskqueue_enqueue(napi->dev->napi_tq, &napi->napi_task); 182 NAPI_TRACE2D(napi, rc); 183 if (rc != 0) { 184 /* Should we assert EPIPE? */ 185 return; 186 } 187 } 188 } 189 190 void 191 linuxkpi_napi_schedule(struct napi_struct *napi) 192 { 193 194 NAPI_TRACE(napi); 195 196 /* 197 * iwlwifi calls this sequence instead of napi_schedule() 198 * to be able to test the prep result. 199 */ 200 if (napi_schedule_prep(napi)) 201 __napi_schedule(napi); 202 } 203 204 void 205 linuxkpi_napi_reschedule(struct napi_struct *napi) 206 { 207 208 NAPI_TRACE(napi); 209 210 /* Not sure what is different to napi_schedule yet. */ 211 if (napi_schedule_prep(napi)) 212 __napi_schedule(napi); 213 } 214 215 bool 216 linuxkpi_napi_complete_done(struct napi_struct *napi, int ret) 217 { 218 unsigned long old, new; 219 220 NAPI_TRACE(napi); 221 if (NAPI_DIRECT_DISPATCH()) 222 return (true); 223 224 do { 225 new = old = READ_ONCE(napi->state); 226 227 /* 228 * If we lost a race before, we need to re-schedule. 229 * Leave IS_SCHEDULED set essentially doing "_prep". 230 */ 231 if (!test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) 232 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 233 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 234 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 235 236 NAPI_TRACE(napi); 237 238 /* Someone tried to schedule while poll was running. Re-sched. */ 239 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) { 240 __napi_schedule(napi); 241 return (false); 242 } 243 244 return (true); 245 } 246 247 bool 248 linuxkpi_napi_complete(struct napi_struct *napi) 249 { 250 251 NAPI_TRACE(napi); 252 return (napi_complete_done(napi, 0)); 253 } 254 255 void 256 linuxkpi_napi_disable(struct napi_struct *napi) 257 { 258 NAPI_TRACE(napi); 259 set_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->state); 260 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state)) 261 pause_sbt("napidslp", SBT_1MS, 0, C_HARDCLOCK); 262 clear_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->state); 263 } 264 265 void 266 linuxkpi_napi_enable(struct napi_struct *napi) 267 { 268 269 NAPI_TRACE(napi); 270 KASSERT(!test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state), 271 ("%s: enabling napi %p already scheduled\n", __func__, napi)); 272 mb(); 273 /* Let us be scheduled. */ 274 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state); 275 } 276 277 void 278 linuxkpi_napi_synchronize(struct napi_struct *napi) 279 { 280 NAPI_TRACE(napi); 281 #if defined(SMP) 282 /* Check & sleep while a napi is scheduled. */ 283 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state)) 284 pause_sbt("napisslp", SBT_1MS, 0, C_HARDCLOCK); 285 #else 286 mb(); 287 #endif 288 } 289 290 /* -------------------------------------------------------------------------- */ 291 292 static void 293 lkpi_napi_task(void *ctx, int pending) 294 { 295 struct napi_struct *napi; 296 int count; 297 298 KASSERT(ctx != NULL, ("%s: napi %p, pending %d\n", 299 __func__, ctx, pending)); 300 napi = ctx; 301 KASSERT(napi->poll != NULL, ("%s: napi %p poll is NULL\n", 302 __func__, napi)); 303 304 NAPI_TRACE_TASK(napi, pending, napi->budget); 305 count = napi->poll(napi, napi->budget); 306 napi->rx_count += count; 307 NAPI_TRACE_TASK(napi, pending, count); 308 309 /* 310 * We must not check against count < pending here. There are situations 311 * when a driver may "poll" and we may not have any work to do and that 312 * would make us re-schedule ourseless for ever. 313 */ 314 if (count >= napi->budget) { 315 /* 316 * Have to re-schedule ourselves. napi_complete() was not run 317 * in this case which means we are still SCHEDULED. 318 * In order to queue another task we have to directly call 319 * __napi_schedule() without _prep() in the way. 320 */ 321 __napi_schedule(napi); 322 } 323 } 324 325 /* -------------------------------------------------------------------------- */ 326 327 void 328 linuxkpi_netif_napi_add(struct net_device *ndev, struct napi_struct *napi, 329 int(*napi_poll)(struct napi_struct *, int)) 330 { 331 332 napi->dev = ndev; 333 napi->poll = napi_poll; 334 napi->budget = NAPI_POLL_WEIGHT; 335 336 INIT_LIST_HEAD(&napi->rx_list); 337 napi->rx_count = 0; 338 339 TASK_INIT(&napi->napi_task, 0, lkpi_napi_task, napi); 340 341 NAPI_LOCK(ndev); 342 TAILQ_INSERT_TAIL(&ndev->napi_head, napi, entry); 343 NAPI_UNLOCK(ndev); 344 345 /* Anything else to do on the ndev? */ 346 clear_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state); 347 } 348 349 static void 350 lkpi_netif_napi_del_locked(struct napi_struct *napi) 351 { 352 struct net_device *ndev; 353 354 ndev = napi->dev; 355 NAPI_LOCK_ASSERT(ndev); 356 357 set_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state); 358 TAILQ_REMOVE(&ndev->napi_head, napi, entry); 359 while (taskqueue_cancel(ndev->napi_tq, &napi->napi_task, NULL) != 0) 360 taskqueue_drain(ndev->napi_tq, &napi->napi_task); 361 } 362 363 void 364 linuxkpi_netif_napi_del(struct napi_struct *napi) 365 { 366 struct net_device *ndev; 367 368 ndev = napi->dev; 369 NAPI_LOCK(ndev); 370 lkpi_netif_napi_del_locked(napi); 371 NAPI_UNLOCK(ndev); 372 } 373 374 /* -------------------------------------------------------------------------- */ 375 376 void 377 linuxkpi_init_dummy_netdev(struct net_device *ndev) 378 { 379 380 memset(ndev, 0, sizeof(*ndev)); 381 382 ndev->reg_state = NETREG_DUMMY; 383 NAPI_LOCK_INIT(ndev); 384 TAILQ_INIT(&ndev->napi_head); 385 /* Anything else? */ 386 387 ndev->napi_tq = taskqueue_create("tq_ndev_napi", M_WAITOK, 388 taskqueue_thread_enqueue, &ndev->napi_tq); 389 /* One thread for now. */ 390 (void) taskqueue_start_threads(&ndev->napi_tq, 1, PWAIT, 391 "ndev napi taskq"); 392 } 393 394 struct net_device * 395 linuxkpi_alloc_netdev(size_t len, const char *name, uint32_t flags, 396 void(*setup_func)(struct net_device *)) 397 { 398 struct net_device *ndev; 399 400 ndev = malloc(sizeof(*ndev) + len, M_NETDEV, M_NOWAIT); 401 if (ndev == NULL) 402 return (ndev); 403 404 /* Always first as it zeros! */ 405 linuxkpi_init_dummy_netdev(ndev); 406 407 strlcpy(ndev->name, name, sizeof(*ndev->name)); 408 409 /* This needs extending as we support more. */ 410 411 setup_func(ndev); 412 413 return (ndev); 414 } 415 416 void 417 linuxkpi_free_netdev(struct net_device *ndev) 418 { 419 struct napi_struct *napi, *temp; 420 421 NAPI_LOCK(ndev); 422 TAILQ_FOREACH_SAFE(napi, &ndev->napi_head, entry, temp) { 423 lkpi_netif_napi_del_locked(napi); 424 } 425 NAPI_UNLOCK(ndev); 426 427 taskqueue_free(ndev->napi_tq); 428 ndev->napi_tq = NULL; 429 NAPI_LOCK_DESTROY(ndev); 430 431 /* This needs extending as we support more. */ 432 433 free(ndev, M_NETDEV); 434 } 435