1 /*- 2 * Copyright (c) 2021 The FreeBSD Foundation 3 * Copyright (c) 2022 Bjoern A. Zeeb 4 * 5 * This software was developed by Björn Zeeb under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/kernel.h> 33 #include <sys/sysctl.h> 34 35 #include <linux/bitops.h> 36 #include <linux/list.h> 37 #include <linux/netdevice.h> 38 39 MALLOC_DEFINE(M_NETDEV, "lkpindev", "Linux KPI netdevice compat"); 40 41 #define NAPI_LOCK_INIT(_ndev) \ 42 mtx_init(&(_ndev)->napi_mtx, "napi_mtx", NULL, MTX_DEF) 43 #define NAPI_LOCK_DESTROY(_ndev) mtx_destroy(&(_ndev)->napi_mtx) 44 #define NAPI_LOCK_ASSERT(_ndev) mtx_assert(&(_ndev)->napi_mtx, MA_OWNED) 45 #define NAPI_LOCK(_ndev) mtx_lock(&(_ndev)->napi_mtx) 46 #define NAPI_UNLOCK(_ndev) mtx_unlock(&(_ndev)->napi_mtx) 47 48 /* -------------------------------------------------------------------------- */ 49 50 #define LKPI_NAPI_FLAGS \ 51 "\20\1DISABLE_PENDING\2IS_SCHEDULED\3LOST_RACE_TRY_AGAIN" 52 53 /* #define NAPI_DEBUG */ 54 #ifdef NAPI_DEBUG 55 static int debug_napi; 56 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, debug_napi, CTLFLAG_RWTUN, 57 &debug_napi, 0, "NAPI debug level"); 58 59 #define DNAPI_TODO 0x01 60 #define DNAPI_IMPROVE 0x02 61 #define DNAPI_TRACE 0x10 62 #define DNAPI_TRACE_TASK 0x20 63 #define DNAPI_DIRECT_DISPATCH 0x1000 64 65 #define NAPI_TRACE(_n) if (debug_napi & DNAPI_TRACE) \ 66 printf("NAPI_TRACE %s:%d %u %p (%#jx %b)\n", __func__, __LINE__, \ 67 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 68 (int)(_n)->state, LKPI_NAPI_FLAGS) 69 #define NAPI_TRACE2D(_n, _d) if (debug_napi & DNAPI_TRACE) \ 70 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) %d\n", __func__, __LINE__, \ 71 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 72 (int)(_n)->state, LKPI_NAPI_FLAGS, _d) 73 #define NAPI_TRACE_TASK(_n, _p, _c) if (debug_napi & DNAPI_TRACE_TASK) \ 74 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) pending %d count %d " \ 75 "rx_count %d\n", __func__, __LINE__, \ 76 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 77 (int)(_n)->state, LKPI_NAPI_FLAGS, _p, _c, (_n)->rx_count) 78 #define NAPI_TODO() if (debug_napi & DNAPI_TODO) \ 79 printf("NAPI_TODO %s:%d %d\n", __func__, __LINE__, ticks) 80 #define NAPI_IMPROVE() if (debug_napi & DNAPI_IMPROVE) \ 81 printf("NAPI_IMPROVE %s:%d %d\n", __func__, __LINE__, ticks) 82 83 #define NAPI_DIRECT_DISPATCH() ((debug_napi & DNAPI_DIRECT_DISPATCH) != 0) 84 #else 85 #define NAPI_TRACE(_n) do { } while(0) 86 #define NAPI_TRACE2D(_n, _d) do { } while(0) 87 #define NAPI_TRACE_TASK(_n, _p, _c) do { } while(0) 88 #define NAPI_TODO() do { } while(0) 89 #define NAPI_IMPROVE() do { } while(0) 90 91 #define NAPI_DIRECT_DISPATCH() (0) 92 #endif 93 94 /* -------------------------------------------------------------------------- */ 95 96 /* 97 * Check if a poll is running or can run and and if the latter 98 * make us as running. That way we ensure that only one poll 99 * can only ever run at the same time. Returns true if no poll 100 * was scheduled yet. 101 */ 102 bool 103 linuxkpi_napi_schedule_prep(struct napi_struct *napi) 104 { 105 unsigned long old, new; 106 107 NAPI_TRACE(napi); 108 109 /* Can can only update/return if all flags agree. */ 110 do { 111 old = READ_ONCE(napi->state); 112 113 /* If we are stopping, cannot run again. */ 114 if ((old & BIT(LKPI_NAPI_FLAG_DISABLE_PENDING)) != 0) { 115 NAPI_TRACE(napi); 116 return (false); 117 } 118 119 new = old; 120 /* We were already scheduled. Need to try again? */ 121 if ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) != 0) 122 new |= BIT(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN); 123 new |= BIT(LKPI_NAPI_FLAG_IS_SCHEDULED); 124 125 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 126 127 NAPI_TRACE(napi); 128 return ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) == 0); 129 } 130 131 static void 132 lkpi___napi_schedule_dd(struct napi_struct *napi) 133 { 134 unsigned long old, new; 135 int rc; 136 137 rc = 0; 138 again: 139 NAPI_TRACE2D(napi, rc); 140 if (napi->poll != NULL) 141 rc = napi->poll(napi, napi->budget); 142 napi->rx_count += rc; 143 144 /* Check if interrupts are still disabled, more work to do. */ 145 /* Bandaid for now. */ 146 if (rc >= napi->budget) 147 goto again; 148 149 /* Bandaid for now. */ 150 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->state)) 151 goto again; 152 153 do { 154 new = old = READ_ONCE(napi->state); 155 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 156 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 157 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 158 159 NAPI_TRACE2D(napi, rc); 160 } 161 162 void 163 linuxkpi___napi_schedule(struct napi_struct *napi) 164 { 165 int rc; 166 167 NAPI_TRACE(napi); 168 if (test_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state)) { 169 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->state); 170 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state); 171 NAPI_TRACE(napi); 172 return; 173 } 174 175 if (NAPI_DIRECT_DISPATCH()) { 176 lkpi___napi_schedule_dd(napi); 177 } else { 178 rc = taskqueue_enqueue(napi->dev->napi_tq, &napi->napi_task); 179 NAPI_TRACE2D(napi, rc); 180 if (rc != 0) { 181 /* Should we assert EPIPE? */ 182 return; 183 } 184 } 185 } 186 187 bool 188 linuxkpi_napi_schedule(struct napi_struct *napi) 189 { 190 191 NAPI_TRACE(napi); 192 193 /* 194 * iwlwifi calls this sequence instead of napi_schedule() 195 * to be able to test the prep result. 196 */ 197 if (napi_schedule_prep(napi)) { 198 __napi_schedule(napi); 199 return (true); 200 } 201 202 return (false); 203 } 204 205 void 206 linuxkpi_napi_reschedule(struct napi_struct *napi) 207 { 208 209 NAPI_TRACE(napi); 210 211 /* Not sure what is different to napi_schedule yet. */ 212 if (napi_schedule_prep(napi)) 213 __napi_schedule(napi); 214 } 215 216 bool 217 linuxkpi_napi_complete_done(struct napi_struct *napi, int ret) 218 { 219 unsigned long old, new; 220 221 NAPI_TRACE(napi); 222 if (NAPI_DIRECT_DISPATCH()) 223 return (true); 224 225 do { 226 new = old = READ_ONCE(napi->state); 227 228 /* 229 * If we lost a race before, we need to re-schedule. 230 * Leave IS_SCHEDULED set essentially doing "_prep". 231 */ 232 if (!test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) 233 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 234 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 235 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 236 237 NAPI_TRACE(napi); 238 239 /* Someone tried to schedule while poll was running. Re-sched. */ 240 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) { 241 __napi_schedule(napi); 242 return (false); 243 } 244 245 return (true); 246 } 247 248 bool 249 linuxkpi_napi_complete(struct napi_struct *napi) 250 { 251 252 NAPI_TRACE(napi); 253 return (napi_complete_done(napi, 0)); 254 } 255 256 void 257 linuxkpi_napi_disable(struct napi_struct *napi) 258 { 259 NAPI_TRACE(napi); 260 set_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->state); 261 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state)) 262 pause_sbt("napidslp", SBT_1MS, 0, C_HARDCLOCK); 263 clear_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->state); 264 } 265 266 void 267 linuxkpi_napi_enable(struct napi_struct *napi) 268 { 269 270 NAPI_TRACE(napi); 271 KASSERT(!test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state), 272 ("%s: enabling napi %p already scheduled\n", __func__, napi)); 273 mb(); 274 /* Let us be scheduled. */ 275 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state); 276 } 277 278 void 279 linuxkpi_napi_synchronize(struct napi_struct *napi) 280 { 281 NAPI_TRACE(napi); 282 #if defined(SMP) 283 /* Check & sleep while a napi is scheduled. */ 284 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state)) 285 pause_sbt("napisslp", SBT_1MS, 0, C_HARDCLOCK); 286 #else 287 mb(); 288 #endif 289 } 290 291 /* -------------------------------------------------------------------------- */ 292 293 static void 294 lkpi_napi_task(void *ctx, int pending) 295 { 296 struct napi_struct *napi; 297 int count; 298 299 KASSERT(ctx != NULL, ("%s: napi %p, pending %d\n", 300 __func__, ctx, pending)); 301 napi = ctx; 302 KASSERT(napi->poll != NULL, ("%s: napi %p poll is NULL\n", 303 __func__, napi)); 304 305 NAPI_TRACE_TASK(napi, pending, napi->budget); 306 count = napi->poll(napi, napi->budget); 307 napi->rx_count += count; 308 NAPI_TRACE_TASK(napi, pending, count); 309 310 /* 311 * We must not check against count < pending here. There are situations 312 * when a driver may "poll" and we may not have any work to do and that 313 * would make us re-schedule ourseless for ever. 314 */ 315 if (count >= napi->budget) { 316 /* 317 * Have to re-schedule ourselves. napi_complete() was not run 318 * in this case which means we are still SCHEDULED. 319 * In order to queue another task we have to directly call 320 * __napi_schedule() without _prep() in the way. 321 */ 322 __napi_schedule(napi); 323 } 324 } 325 326 /* -------------------------------------------------------------------------- */ 327 328 void 329 linuxkpi_netif_napi_add(struct net_device *ndev, struct napi_struct *napi, 330 int(*napi_poll)(struct napi_struct *, int)) 331 { 332 333 napi->dev = ndev; 334 napi->poll = napi_poll; 335 napi->budget = NAPI_POLL_WEIGHT; 336 337 INIT_LIST_HEAD(&napi->rx_list); 338 napi->rx_count = 0; 339 340 TASK_INIT(&napi->napi_task, 0, lkpi_napi_task, napi); 341 342 NAPI_LOCK(ndev); 343 TAILQ_INSERT_TAIL(&ndev->napi_head, napi, entry); 344 NAPI_UNLOCK(ndev); 345 346 /* Anything else to do on the ndev? */ 347 clear_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state); 348 } 349 350 static void 351 lkpi_netif_napi_del_locked(struct napi_struct *napi) 352 { 353 struct net_device *ndev; 354 355 ndev = napi->dev; 356 NAPI_LOCK_ASSERT(ndev); 357 358 set_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state); 359 TAILQ_REMOVE(&ndev->napi_head, napi, entry); 360 while (taskqueue_cancel(ndev->napi_tq, &napi->napi_task, NULL) != 0) 361 taskqueue_drain(ndev->napi_tq, &napi->napi_task); 362 } 363 364 void 365 linuxkpi_netif_napi_del(struct napi_struct *napi) 366 { 367 struct net_device *ndev; 368 369 ndev = napi->dev; 370 NAPI_LOCK(ndev); 371 lkpi_netif_napi_del_locked(napi); 372 NAPI_UNLOCK(ndev); 373 } 374 375 /* -------------------------------------------------------------------------- */ 376 377 void 378 linuxkpi_init_dummy_netdev(struct net_device *ndev) 379 { 380 381 memset(ndev, 0, sizeof(*ndev)); 382 383 ndev->reg_state = NETREG_DUMMY; 384 NAPI_LOCK_INIT(ndev); 385 TAILQ_INIT(&ndev->napi_head); 386 /* Anything else? */ 387 388 ndev->napi_tq = taskqueue_create("tq_ndev_napi", M_WAITOK, 389 taskqueue_thread_enqueue, &ndev->napi_tq); 390 /* One thread for now. */ 391 (void) taskqueue_start_threads(&ndev->napi_tq, 1, PWAIT, 392 "ndev napi taskq"); 393 } 394 395 struct net_device * 396 linuxkpi_alloc_netdev(size_t len, const char *name, uint32_t flags, 397 void(*setup_func)(struct net_device *)) 398 { 399 struct net_device *ndev; 400 401 ndev = malloc(sizeof(*ndev) + len, M_NETDEV, M_NOWAIT); 402 if (ndev == NULL) 403 return (ndev); 404 405 /* Always first as it zeros! */ 406 linuxkpi_init_dummy_netdev(ndev); 407 408 strlcpy(ndev->name, name, sizeof(*ndev->name)); 409 410 /* This needs extending as we support more. */ 411 412 setup_func(ndev); 413 414 return (ndev); 415 } 416 417 void 418 linuxkpi_free_netdev(struct net_device *ndev) 419 { 420 struct napi_struct *napi, *temp; 421 422 NAPI_LOCK(ndev); 423 TAILQ_FOREACH_SAFE(napi, &ndev->napi_head, entry, temp) { 424 lkpi_netif_napi_del_locked(napi); 425 } 426 NAPI_UNLOCK(ndev); 427 428 taskqueue_free(ndev->napi_tq); 429 ndev->napi_tq = NULL; 430 NAPI_LOCK_DESTROY(ndev); 431 432 /* This needs extending as we support more. */ 433 434 free(ndev, M_NETDEV); 435 } 436