1 /*- 2 * Copyright (c) 2021 The FreeBSD Foundation 3 * Copyright (c) 2022 Bjoern A. Zeeb 4 * 5 * This software was developed by Björn Zeeb under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/kernel.h> 34 #include <sys/sysctl.h> 35 36 #include <linux/bitops.h> 37 #include <linux/list.h> 38 #include <linux/netdevice.h> 39 40 MALLOC_DEFINE(M_NETDEV, "lkpindev", "Linux KPI netdevice compat"); 41 42 #define NAPI_LOCK_INIT(_ndev) \ 43 mtx_init(&(_ndev)->napi_mtx, "napi_mtx", NULL, MTX_DEF) 44 #define NAPI_LOCK_DESTROY(_ndev) mtx_destroy(&(_ndev)->napi_mtx) 45 #define NAPI_LOCK_ASSERT(_ndev) mtx_assert(&(_ndev)->napi_mtx, MA_OWNED) 46 #define NAPI_LOCK(_ndev) mtx_lock(&(_ndev)->napi_mtx) 47 #define NAPI_UNLOCK(_ndev) mtx_unlock(&(_ndev)->napi_mtx) 48 49 /* -------------------------------------------------------------------------- */ 50 51 #define LKPI_NAPI_FLAGS \ 52 "\20\1DISABLE_PENDING\2IS_SCHEDULED\3LOST_RACE_TRY_AGAIN" 53 54 /* #define NAPI_DEBUG */ 55 #ifdef NAPI_DEBUG 56 static int debug_napi; 57 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, debug_napi, CTLFLAG_RWTUN, 58 &debug_napi, 0, "NAPI debug level"); 59 60 #define DNAPI_TODO 0x01 61 #define DNAPI_IMPROVE 0x02 62 #define DNAPI_TRACE 0x10 63 #define DNAPI_TRACE_TASK 0x20 64 #define DNAPI_DIRECT_DISPATCH 0x1000 65 66 #define NAPI_TRACE(_n) if (debug_napi & DNAPI_TRACE) \ 67 printf("NAPI_TRACE %s:%d %u %p (%#jx %b)\n", __func__, __LINE__, \ 68 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 69 (int)(_n)->state, LKPI_NAPI_FLAGS) 70 #define NAPI_TRACE2D(_n, _d) if (debug_napi & DNAPI_TRACE) \ 71 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) %d\n", __func__, __LINE__, \ 72 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 73 (int)(_n)->state, LKPI_NAPI_FLAGS, _d) 74 #define NAPI_TRACE_TASK(_n, _p, _c) if (debug_napi & DNAPI_TRACE_TASK) \ 75 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) pending %d count %d " \ 76 "rx_count %d\n", __func__, __LINE__, \ 77 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 78 (int)(_n)->state, LKPI_NAPI_FLAGS, _p, _c, (_n)->rx_count) 79 #define NAPI_TODO() if (debug_napi & DNAPI_TODO) \ 80 printf("NAPI_TODO %s:%d %d\n", __func__, __LINE__, ticks) 81 #define NAPI_IMPROVE() if (debug_napi & DNAPI_IMPROVE) \ 82 printf("NAPI_IMPROVE %s:%d %d\n", __func__, __LINE__, ticks) 83 84 #define NAPI_DIRECT_DISPATCH() ((debug_napi & DNAPI_DIRECT_DISPATCH) != 0) 85 #else 86 #define NAPI_TRACE(_n) do { } while(0) 87 #define NAPI_TRACE2D(_n, _d) do { } while(0) 88 #define NAPI_TRACE_TASK(_n, _p, _c) do { } while(0) 89 #define NAPI_TODO() do { } while(0) 90 #define NAPI_IMPROVE() do { } while(0) 91 92 #define NAPI_DIRECT_DISPATCH() (0) 93 #endif 94 95 /* -------------------------------------------------------------------------- */ 96 97 /* 98 * Check if a poll is running or can run and and if the latter 99 * make us as running. That way we ensure that only one poll 100 * can only ever run at the same time. Returns true if no poll 101 * was scheduled yet. 102 */ 103 bool 104 linuxkpi_napi_schedule_prep(struct napi_struct *napi) 105 { 106 unsigned long old, new; 107 108 NAPI_TRACE(napi); 109 110 /* Can can only update/return if all flags agree. */ 111 do { 112 old = READ_ONCE(napi->state); 113 114 /* If we are stopping, cannot run again. */ 115 if ((old & BIT(LKPI_NAPI_FLAG_DISABLE_PENDING)) != 0) { 116 NAPI_TRACE(napi); 117 return (false); 118 } 119 120 new = old; 121 /* We were already scheduled. Need to try again? */ 122 if ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) != 0) 123 new |= BIT(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN); 124 new |= BIT(LKPI_NAPI_FLAG_IS_SCHEDULED); 125 126 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 127 128 NAPI_TRACE(napi); 129 return ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) == 0); 130 } 131 132 static void 133 lkpi___napi_schedule_dd(struct napi_struct *napi) 134 { 135 unsigned long old, new; 136 int rc; 137 138 rc = 0; 139 again: 140 NAPI_TRACE2D(napi, rc); 141 if (napi->poll != NULL) 142 rc = napi->poll(napi, napi->budget); 143 napi->rx_count += rc; 144 145 /* Check if interrupts are still disabled, more work to do. */ 146 /* Bandaid for now. */ 147 if (rc >= napi->budget) 148 goto again; 149 150 /* Bandaid for now. */ 151 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->state)) 152 goto again; 153 154 do { 155 new = old = READ_ONCE(napi->state); 156 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 157 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 158 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 159 160 NAPI_TRACE2D(napi, rc); 161 } 162 163 void 164 linuxkpi___napi_schedule(struct napi_struct *napi) 165 { 166 int rc; 167 168 NAPI_TRACE(napi); 169 if (test_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state)) { 170 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->state); 171 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state); 172 NAPI_TRACE(napi); 173 return; 174 } 175 176 if (NAPI_DIRECT_DISPATCH()) { 177 lkpi___napi_schedule_dd(napi); 178 } else { 179 rc = taskqueue_enqueue(napi->dev->napi_tq, &napi->napi_task); 180 NAPI_TRACE2D(napi, rc); 181 if (rc != 0) { 182 /* Should we assert EPIPE? */ 183 return; 184 } 185 } 186 } 187 188 void 189 linuxkpi_napi_schedule(struct napi_struct *napi) 190 { 191 192 NAPI_TRACE(napi); 193 194 /* 195 * iwlwifi calls this sequence instead of napi_schedule() 196 * to be able to test the prep result. 197 */ 198 if (napi_schedule_prep(napi)) 199 __napi_schedule(napi); 200 } 201 202 void 203 linuxkpi_napi_reschedule(struct napi_struct *napi) 204 { 205 206 NAPI_TRACE(napi); 207 208 /* Not sure what is different to napi_schedule yet. */ 209 if (napi_schedule_prep(napi)) 210 __napi_schedule(napi); 211 } 212 213 bool 214 linuxkpi_napi_complete_done(struct napi_struct *napi, int ret) 215 { 216 unsigned long old, new; 217 218 NAPI_TRACE(napi); 219 if (NAPI_DIRECT_DISPATCH()) 220 return (true); 221 222 do { 223 new = old = READ_ONCE(napi->state); 224 225 /* 226 * If we lost a race before, we need to re-schedule. 227 * Leave IS_SCHEDULED set essentially doing "_prep". 228 */ 229 if (!test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) 230 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 231 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 232 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 233 234 NAPI_TRACE(napi); 235 236 /* Someone tried to schedule while poll was running. Re-sched. */ 237 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) { 238 __napi_schedule(napi); 239 return (false); 240 } 241 242 return (true); 243 } 244 245 bool 246 linuxkpi_napi_complete(struct napi_struct *napi) 247 { 248 249 NAPI_TRACE(napi); 250 return (napi_complete_done(napi, 0)); 251 } 252 253 void 254 linuxkpi_napi_disable(struct napi_struct *napi) 255 { 256 NAPI_TRACE(napi); 257 set_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->state); 258 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state)) 259 pause_sbt("napidslp", SBT_1MS, 0, C_HARDCLOCK); 260 clear_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->state); 261 } 262 263 void 264 linuxkpi_napi_enable(struct napi_struct *napi) 265 { 266 267 NAPI_TRACE(napi); 268 KASSERT(!test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state), 269 ("%s: enabling napi %p already scheduled\n", __func__, napi)); 270 mb(); 271 /* Let us be scheduled. */ 272 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state); 273 } 274 275 void 276 linuxkpi_napi_synchronize(struct napi_struct *napi) 277 { 278 NAPI_TRACE(napi); 279 #if defined(SMP) 280 /* Check & sleep while a napi is scheduled. */ 281 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state)) 282 pause_sbt("napisslp", SBT_1MS, 0, C_HARDCLOCK); 283 #else 284 mb(); 285 #endif 286 } 287 288 /* -------------------------------------------------------------------------- */ 289 290 static void 291 lkpi_napi_task(void *ctx, int pending) 292 { 293 struct napi_struct *napi; 294 int count; 295 296 KASSERT(ctx != NULL, ("%s: napi %p, pending %d\n", 297 __func__, ctx, pending)); 298 napi = ctx; 299 KASSERT(napi->poll != NULL, ("%s: napi %p poll is NULL\n", 300 __func__, napi)); 301 302 NAPI_TRACE_TASK(napi, pending, napi->budget); 303 count = napi->poll(napi, napi->budget); 304 napi->rx_count += count; 305 NAPI_TRACE_TASK(napi, pending, count); 306 307 /* 308 * We must not check against count < pending here. There are situations 309 * when a driver may "poll" and we may not have any work to do and that 310 * would make us re-schedule ourseless for ever. 311 */ 312 if (count >= napi->budget) { 313 /* 314 * Have to re-schedule ourselves. napi_complete() was not run 315 * in this case which means we are still SCHEDULED. 316 * In order to queue another task we have to directly call 317 * __napi_schedule() without _prep() in the way. 318 */ 319 __napi_schedule(napi); 320 } 321 } 322 323 /* -------------------------------------------------------------------------- */ 324 325 void 326 linuxkpi_netif_napi_add(struct net_device *ndev, struct napi_struct *napi, 327 int(*napi_poll)(struct napi_struct *, int)) 328 { 329 330 napi->dev = ndev; 331 napi->poll = napi_poll; 332 napi->budget = NAPI_POLL_WEIGHT; 333 334 INIT_LIST_HEAD(&napi->rx_list); 335 napi->rx_count = 0; 336 337 TASK_INIT(&napi->napi_task, 0, lkpi_napi_task, napi); 338 339 NAPI_LOCK(ndev); 340 TAILQ_INSERT_TAIL(&ndev->napi_head, napi, entry); 341 NAPI_UNLOCK(ndev); 342 343 /* Anything else to do on the ndev? */ 344 clear_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state); 345 } 346 347 static void 348 lkpi_netif_napi_del_locked(struct napi_struct *napi) 349 { 350 struct net_device *ndev; 351 352 ndev = napi->dev; 353 NAPI_LOCK_ASSERT(ndev); 354 355 set_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state); 356 TAILQ_REMOVE(&ndev->napi_head, napi, entry); 357 while (taskqueue_cancel(ndev->napi_tq, &napi->napi_task, NULL) != 0) 358 taskqueue_drain(ndev->napi_tq, &napi->napi_task); 359 } 360 361 void 362 linuxkpi_netif_napi_del(struct napi_struct *napi) 363 { 364 struct net_device *ndev; 365 366 ndev = napi->dev; 367 NAPI_LOCK(ndev); 368 lkpi_netif_napi_del_locked(napi); 369 NAPI_UNLOCK(ndev); 370 } 371 372 /* -------------------------------------------------------------------------- */ 373 374 void 375 linuxkpi_init_dummy_netdev(struct net_device *ndev) 376 { 377 378 memset(ndev, 0, sizeof(*ndev)); 379 380 ndev->reg_state = NETREG_DUMMY; 381 NAPI_LOCK_INIT(ndev); 382 TAILQ_INIT(&ndev->napi_head); 383 /* Anything else? */ 384 385 ndev->napi_tq = taskqueue_create("tq_ndev_napi", M_WAITOK, 386 taskqueue_thread_enqueue, &ndev->napi_tq); 387 /* One thread for now. */ 388 (void) taskqueue_start_threads(&ndev->napi_tq, 1, PWAIT, 389 "ndev napi taskq"); 390 } 391 392 struct net_device * 393 linuxkpi_alloc_netdev(size_t len, const char *name, uint32_t flags, 394 void(*setup_func)(struct net_device *)) 395 { 396 struct net_device *ndev; 397 398 ndev = malloc(sizeof(*ndev) + len, M_NETDEV, M_NOWAIT); 399 if (ndev == NULL) 400 return (ndev); 401 402 /* Always first as it zeros! */ 403 linuxkpi_init_dummy_netdev(ndev); 404 405 strlcpy(ndev->name, name, sizeof(*ndev->name)); 406 407 /* This needs extending as we support more. */ 408 409 setup_func(ndev); 410 411 return (ndev); 412 } 413 414 void 415 linuxkpi_free_netdev(struct net_device *ndev) 416 { 417 struct napi_struct *napi, *temp; 418 419 NAPI_LOCK(ndev); 420 TAILQ_FOREACH_SAFE(napi, &ndev->napi_head, entry, temp) { 421 lkpi_netif_napi_del_locked(napi); 422 } 423 NAPI_UNLOCK(ndev); 424 425 taskqueue_free(ndev->napi_tq); 426 ndev->napi_tq = NULL; 427 NAPI_LOCK_DESTROY(ndev); 428 429 /* This needs extending as we support more. */ 430 431 free(ndev, M_NETDEV); 432 } 433