1 /*- 2 * Copyright (c) 2021 The FreeBSD Foundation 3 * Copyright (c) 2022 Bjoern A. Zeeb 4 * 5 * This software was developed by Björn Zeeb under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/kernel.h> 33 #include <sys/sysctl.h> 34 35 #include <linux/bitops.h> 36 #include <linux/list.h> 37 #include <linux/netdevice.h> 38 39 MALLOC_DEFINE(M_NETDEV, "lkpindev", "Linux KPI netdevice compat"); 40 41 #define NAPI_LOCK_INIT(_ndev) \ 42 mtx_init(&(_ndev)->napi_mtx, "napi_mtx", NULL, MTX_DEF) 43 #define NAPI_LOCK_DESTROY(_ndev) mtx_destroy(&(_ndev)->napi_mtx) 44 #define NAPI_LOCK_ASSERT(_ndev) mtx_assert(&(_ndev)->napi_mtx, MA_OWNED) 45 #define NAPI_LOCK(_ndev) mtx_lock(&(_ndev)->napi_mtx) 46 #define NAPI_UNLOCK(_ndev) mtx_unlock(&(_ndev)->napi_mtx) 47 48 /* -------------------------------------------------------------------------- */ 49 50 #define LKPI_NAPI_FLAGS \ 51 "\20\1DISABLE_PENDING\2IS_SCHEDULED\3LOST_RACE_TRY_AGAIN" 52 53 /* #define NAPI_DEBUG */ 54 #ifdef NAPI_DEBUG 55 static int debug_napi; 56 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, debug_napi, CTLFLAG_RWTUN, 57 &debug_napi, 0, "NAPI debug level"); 58 59 #define DNAPI_TODO 0x01 60 #define DNAPI_IMPROVE 0x02 61 #define DNAPI_TRACE 0x10 62 #define DNAPI_TRACE_TASK 0x20 63 #define DNAPI_DIRECT_DISPATCH 0x1000 64 65 #define NAPI_TRACE(_n) if (debug_napi & DNAPI_TRACE) \ 66 printf("NAPI_TRACE %s:%d %u %p (%#jx %b)\n", __func__, __LINE__, \ 67 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 68 (int)(_n)->state, LKPI_NAPI_FLAGS) 69 #define NAPI_TRACE2D(_n, _d) if (debug_napi & DNAPI_TRACE) \ 70 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) %d\n", __func__, __LINE__, \ 71 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 72 (int)(_n)->state, LKPI_NAPI_FLAGS, _d) 73 #define NAPI_TRACE_TASK(_n, _p, _c) if (debug_napi & DNAPI_TRACE_TASK) \ 74 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) pending %d count %d " \ 75 "rx_count %d\n", __func__, __LINE__, \ 76 (unsigned int)ticks, _n, (uintmax_t)(_n)->state, \ 77 (int)(_n)->state, LKPI_NAPI_FLAGS, _p, _c, (_n)->rx_count) 78 #define NAPI_TODO() if (debug_napi & DNAPI_TODO) \ 79 printf("NAPI_TODO %s:%d %d\n", __func__, __LINE__, ticks) 80 #define NAPI_IMPROVE() if (debug_napi & DNAPI_IMPROVE) \ 81 printf("NAPI_IMPROVE %s:%d %d\n", __func__, __LINE__, ticks) 82 83 #define NAPI_DIRECT_DISPATCH() ((debug_napi & DNAPI_DIRECT_DISPATCH) != 0) 84 #else 85 #define NAPI_TRACE(_n) do { } while(0) 86 #define NAPI_TRACE2D(_n, _d) do { } while(0) 87 #define NAPI_TRACE_TASK(_n, _p, _c) do { } while(0) 88 #define NAPI_TODO() do { } while(0) 89 #define NAPI_IMPROVE() do { } while(0) 90 91 #define NAPI_DIRECT_DISPATCH() (0) 92 #endif 93 94 /* -------------------------------------------------------------------------- */ 95 96 /* 97 * Check if a poll is running or can run and and if the latter 98 * make us as running. That way we ensure that only one poll 99 * can only ever run at the same time. Returns true if no poll 100 * was scheduled yet. 101 */ 102 bool 103 linuxkpi_napi_schedule_prep(struct napi_struct *napi) 104 { 105 unsigned long old, new; 106 107 NAPI_TRACE(napi); 108 109 /* Can can only update/return if all flags agree. */ 110 do { 111 old = READ_ONCE(napi->state); 112 113 /* If we are stopping, cannot run again. */ 114 if ((old & BIT(LKPI_NAPI_FLAG_DISABLE_PENDING)) != 0) { 115 NAPI_TRACE(napi); 116 return (false); 117 } 118 119 new = old; 120 /* We were already scheduled. Need to try again? */ 121 if ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) != 0) 122 new |= BIT(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN); 123 new |= BIT(LKPI_NAPI_FLAG_IS_SCHEDULED); 124 125 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 126 127 NAPI_TRACE(napi); 128 return ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) == 0); 129 } 130 131 static void 132 lkpi___napi_schedule_dd(struct napi_struct *napi) 133 { 134 unsigned long old, new; 135 int rc; 136 137 rc = 0; 138 again: 139 NAPI_TRACE2D(napi, rc); 140 if (napi->poll != NULL) 141 rc = napi->poll(napi, napi->budget); 142 napi->rx_count += rc; 143 144 /* Check if interrupts are still disabled, more work to do. */ 145 /* Bandaid for now. */ 146 if (rc >= napi->budget) 147 goto again; 148 149 /* Bandaid for now. */ 150 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->state)) 151 goto again; 152 153 do { 154 new = old = READ_ONCE(napi->state); 155 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 156 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 157 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 158 159 NAPI_TRACE2D(napi, rc); 160 } 161 162 void 163 linuxkpi___napi_schedule(struct napi_struct *napi) 164 { 165 int rc; 166 167 NAPI_TRACE(napi); 168 if (test_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state)) { 169 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->state); 170 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state); 171 NAPI_TRACE(napi); 172 return; 173 } 174 175 if (NAPI_DIRECT_DISPATCH()) { 176 lkpi___napi_schedule_dd(napi); 177 } else { 178 rc = taskqueue_enqueue(napi->dev->napi_tq, &napi->napi_task); 179 NAPI_TRACE2D(napi, rc); 180 if (rc != 0) { 181 /* Should we assert EPIPE? */ 182 return; 183 } 184 } 185 } 186 187 void 188 linuxkpi_napi_schedule(struct napi_struct *napi) 189 { 190 191 NAPI_TRACE(napi); 192 193 /* 194 * iwlwifi calls this sequence instead of napi_schedule() 195 * to be able to test the prep result. 196 */ 197 if (napi_schedule_prep(napi)) 198 __napi_schedule(napi); 199 } 200 201 void 202 linuxkpi_napi_reschedule(struct napi_struct *napi) 203 { 204 205 NAPI_TRACE(napi); 206 207 /* Not sure what is different to napi_schedule yet. */ 208 if (napi_schedule_prep(napi)) 209 __napi_schedule(napi); 210 } 211 212 bool 213 linuxkpi_napi_complete_done(struct napi_struct *napi, int ret) 214 { 215 unsigned long old, new; 216 217 NAPI_TRACE(napi); 218 if (NAPI_DIRECT_DISPATCH()) 219 return (true); 220 221 do { 222 new = old = READ_ONCE(napi->state); 223 224 /* 225 * If we lost a race before, we need to re-schedule. 226 * Leave IS_SCHEDULED set essentially doing "_prep". 227 */ 228 if (!test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) 229 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 230 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 231 } while (atomic_cmpset_acq_long(&napi->state, old, new) == 0); 232 233 NAPI_TRACE(napi); 234 235 /* Someone tried to schedule while poll was running. Re-sched. */ 236 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) { 237 __napi_schedule(napi); 238 return (false); 239 } 240 241 return (true); 242 } 243 244 bool 245 linuxkpi_napi_complete(struct napi_struct *napi) 246 { 247 248 NAPI_TRACE(napi); 249 return (napi_complete_done(napi, 0)); 250 } 251 252 void 253 linuxkpi_napi_disable(struct napi_struct *napi) 254 { 255 NAPI_TRACE(napi); 256 set_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->state); 257 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state)) 258 pause_sbt("napidslp", SBT_1MS, 0, C_HARDCLOCK); 259 clear_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->state); 260 } 261 262 void 263 linuxkpi_napi_enable(struct napi_struct *napi) 264 { 265 266 NAPI_TRACE(napi); 267 KASSERT(!test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state), 268 ("%s: enabling napi %p already scheduled\n", __func__, napi)); 269 mb(); 270 /* Let us be scheduled. */ 271 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state); 272 } 273 274 void 275 linuxkpi_napi_synchronize(struct napi_struct *napi) 276 { 277 NAPI_TRACE(napi); 278 #if defined(SMP) 279 /* Check & sleep while a napi is scheduled. */ 280 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->state)) 281 pause_sbt("napisslp", SBT_1MS, 0, C_HARDCLOCK); 282 #else 283 mb(); 284 #endif 285 } 286 287 /* -------------------------------------------------------------------------- */ 288 289 static void 290 lkpi_napi_task(void *ctx, int pending) 291 { 292 struct napi_struct *napi; 293 int count; 294 295 KASSERT(ctx != NULL, ("%s: napi %p, pending %d\n", 296 __func__, ctx, pending)); 297 napi = ctx; 298 KASSERT(napi->poll != NULL, ("%s: napi %p poll is NULL\n", 299 __func__, napi)); 300 301 NAPI_TRACE_TASK(napi, pending, napi->budget); 302 count = napi->poll(napi, napi->budget); 303 napi->rx_count += count; 304 NAPI_TRACE_TASK(napi, pending, count); 305 306 /* 307 * We must not check against count < pending here. There are situations 308 * when a driver may "poll" and we may not have any work to do and that 309 * would make us re-schedule ourseless for ever. 310 */ 311 if (count >= napi->budget) { 312 /* 313 * Have to re-schedule ourselves. napi_complete() was not run 314 * in this case which means we are still SCHEDULED. 315 * In order to queue another task we have to directly call 316 * __napi_schedule() without _prep() in the way. 317 */ 318 __napi_schedule(napi); 319 } 320 } 321 322 /* -------------------------------------------------------------------------- */ 323 324 void 325 linuxkpi_netif_napi_add(struct net_device *ndev, struct napi_struct *napi, 326 int(*napi_poll)(struct napi_struct *, int)) 327 { 328 329 napi->dev = ndev; 330 napi->poll = napi_poll; 331 napi->budget = NAPI_POLL_WEIGHT; 332 333 INIT_LIST_HEAD(&napi->rx_list); 334 napi->rx_count = 0; 335 336 TASK_INIT(&napi->napi_task, 0, lkpi_napi_task, napi); 337 338 NAPI_LOCK(ndev); 339 TAILQ_INSERT_TAIL(&ndev->napi_head, napi, entry); 340 NAPI_UNLOCK(ndev); 341 342 /* Anything else to do on the ndev? */ 343 clear_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state); 344 } 345 346 static void 347 lkpi_netif_napi_del_locked(struct napi_struct *napi) 348 { 349 struct net_device *ndev; 350 351 ndev = napi->dev; 352 NAPI_LOCK_ASSERT(ndev); 353 354 set_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->state); 355 TAILQ_REMOVE(&ndev->napi_head, napi, entry); 356 while (taskqueue_cancel(ndev->napi_tq, &napi->napi_task, NULL) != 0) 357 taskqueue_drain(ndev->napi_tq, &napi->napi_task); 358 } 359 360 void 361 linuxkpi_netif_napi_del(struct napi_struct *napi) 362 { 363 struct net_device *ndev; 364 365 ndev = napi->dev; 366 NAPI_LOCK(ndev); 367 lkpi_netif_napi_del_locked(napi); 368 NAPI_UNLOCK(ndev); 369 } 370 371 /* -------------------------------------------------------------------------- */ 372 373 void 374 linuxkpi_init_dummy_netdev(struct net_device *ndev) 375 { 376 377 memset(ndev, 0, sizeof(*ndev)); 378 379 ndev->reg_state = NETREG_DUMMY; 380 NAPI_LOCK_INIT(ndev); 381 TAILQ_INIT(&ndev->napi_head); 382 /* Anything else? */ 383 384 ndev->napi_tq = taskqueue_create("tq_ndev_napi", M_WAITOK, 385 taskqueue_thread_enqueue, &ndev->napi_tq); 386 /* One thread for now. */ 387 (void) taskqueue_start_threads(&ndev->napi_tq, 1, PWAIT, 388 "ndev napi taskq"); 389 } 390 391 struct net_device * 392 linuxkpi_alloc_netdev(size_t len, const char *name, uint32_t flags, 393 void(*setup_func)(struct net_device *)) 394 { 395 struct net_device *ndev; 396 397 ndev = malloc(sizeof(*ndev) + len, M_NETDEV, M_NOWAIT); 398 if (ndev == NULL) 399 return (ndev); 400 401 /* Always first as it zeros! */ 402 linuxkpi_init_dummy_netdev(ndev); 403 404 strlcpy(ndev->name, name, sizeof(*ndev->name)); 405 406 /* This needs extending as we support more. */ 407 408 setup_func(ndev); 409 410 return (ndev); 411 } 412 413 void 414 linuxkpi_free_netdev(struct net_device *ndev) 415 { 416 struct napi_struct *napi, *temp; 417 418 NAPI_LOCK(ndev); 419 TAILQ_FOREACH_SAFE(napi, &ndev->napi_head, entry, temp) { 420 lkpi_netif_napi_del_locked(napi); 421 } 422 NAPI_UNLOCK(ndev); 423 424 taskqueue_free(ndev->napi_tq); 425 ndev->napi_tq = NULL; 426 NAPI_LOCK_DESTROY(ndev); 427 428 /* This needs extending as we support more. */ 429 430 free(ndev, M_NETDEV); 431 } 432