1 /*- 2 * Copyright (c) 2021 The FreeBSD Foundation 3 * 4 * This software was developed by Björn Zeeb under sponsorship from 5 * the FreeBSD Foundation. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/types.h> 34 #include <sys/kernel.h> 35 #include <sys/sysctl.h> 36 37 #include <linux/bitops.h> 38 #include <linux/list.h> 39 #include <linux/netdevice.h> 40 41 MALLOC_DEFINE(M_NETDEV, "lkpindev", "Linux KPI netdevice compat"); 42 43 #define NAPI_LOCK_INIT(_ndev) \ 44 mtx_init(&(_ndev)->napi_mtx, "napi_mtx", NULL, MTX_DEF) 45 #define NAPI_LOCK_DESTROY(_ndev) mtx_destroy(&(_ndev)->napi_mtx) 46 #define NAPI_LOCK_ASSERT(_ndev) mtx_assert(&(_ndev)->napi_mtx, MA_OWNED) 47 #define NAPI_LOCK(_ndev) mtx_lock(&(_ndev)->napi_mtx) 48 #define NAPI_UNLOCK(_ndev) mtx_unlock(&(_ndev)->napi_mtx) 49 50 /* -------------------------------------------------------------------------- */ 51 /* Do not schedule new things while we are waiting to clear things. */ 52 #define LKPI_NAPI_FLAG_DISABLE_PENDING 0 53 /* To synchronise that only one poll is ever running. */ 54 #define LKPI_NAPI_FLAG_IS_SCHEDULED 1 55 /* If trying to schedule while poll is running. Need to re-schedule. */ 56 #define LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN 2 57 /* When shutting down forcefully prevent anything from running task/poll. */ 58 #define LKPI_NAPI_FLAG_SHUTDOWN 3 59 60 #define LKPI_NAPI_FLAGS \ 61 "\20\1DISABLE_PENDING\2IS_SCHEDULED\3LOST_RACE_TRY_AGAIN" 62 63 /* #define NAPI_DEBUG */ 64 #ifdef NAPI_DEBUG 65 static int debug_napi; 66 SYSCTL_INT(_compat_linuxkpi, OID_AUTO, debug_napi, CTLFLAG_RWTUN, 67 &debug_napi, 0, "NAPI debug level"); 68 69 #define DNAPI_TODO 0x01 70 #define DNAPI_IMPROVE 0x02 71 #define DNAPI_TRACE 0x10 72 #define DNAPI_TRACE_TASK 0x20 73 #define DNAPI_DIRECT_DISPATCH 0x1000 74 75 #define NAPI_TRACE(_n) if (debug_napi & DNAPI_TRACE) \ 76 printf("NAPI_TRACE %s:%d %u %p (%#jx %b)\n", __func__, __LINE__, \ 77 (unsigned int)ticks, _n, (uintmax_t)(_n)->_flags, \ 78 (int)(_n)->_flags, LKPI_NAPI_FLAGS) 79 #define NAPI_TRACE2D(_n, _d) if (debug_napi & DNAPI_TRACE) \ 80 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) %d\n", __func__, __LINE__, \ 81 (unsigned int)ticks, _n, (uintmax_t)(_n)->_flags, \ 82 (int)(_n)->_flags, LKPI_NAPI_FLAGS, _d) 83 #define NAPI_TRACE_TASK(_n, _p, _c) if (debug_napi & DNAPI_TRACE_TASK) \ 84 printf("NAPI_TRACE %s:%d %u %p (%#jx %b) pending %d count %d " \ 85 "rx_count %d\n", __func__, __LINE__, \ 86 (unsigned int)ticks, _n, (uintmax_t)(_n)->_flags, \ 87 (int)(_n)->_flags, LKPI_NAPI_FLAGS, _p, _c, (_n)->rx_count) 88 #define NAPI_TODO() if (debug_napi & DNAPI_TODO) \ 89 printf("NAPI_TODO %s:%d %d\n", __func__, __LINE__, ticks) 90 #define NAPI_IMPROVE() if (debug_napi & DNAPI_IMPROVE) \ 91 printf("NAPI_IMPROVE %s:%d %d\n", __func__, __LINE__, ticks) 92 93 #define NAPI_DIRECT_DISPATCH() ((debug_napi & DNAPI_DIRECT_DISPATCH) != 0) 94 #else 95 #define NAPI_TRACE(_n) do { } while(0) 96 #define NAPI_TRACE2D(_n, _d) do { } while(0) 97 #define NAPI_TRACE_TASK(_n, _p, _c) do { } while(0) 98 #define NAPI_TODO() do { } while(0) 99 #define NAPI_IMPROVE() do { } while(0) 100 101 #define NAPI_DIRECT_DISPATCH() (0) 102 #endif 103 104 /* -------------------------------------------------------------------------- */ 105 106 /* 107 * Check if a poll is running or can run and and if the latter 108 * make us as running. That way we ensure that only one poll 109 * can only ever run at the same time. Returns true if no poll 110 * was scheduled yet. 111 */ 112 bool 113 linuxkpi_napi_schedule_prep(struct napi_struct *napi) 114 { 115 unsigned long old, new; 116 117 NAPI_TRACE(napi); 118 119 /* Can can only update/return if all flags agree. */ 120 do { 121 old = READ_ONCE(napi->_flags); 122 123 /* If we are stopping, cannot run again. */ 124 if ((old & BIT(LKPI_NAPI_FLAG_DISABLE_PENDING)) != 0) { 125 NAPI_TRACE(napi); 126 return (false); 127 } 128 129 new = old; 130 /* We were already scheduled. Need to try again? */ 131 if ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) != 0) 132 new |= BIT(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN); 133 new |= BIT(LKPI_NAPI_FLAG_IS_SCHEDULED); 134 135 } while (atomic_cmpset_acq_long(&napi->_flags, old, new) == 0); 136 137 NAPI_TRACE(napi); 138 return ((old & BIT(LKPI_NAPI_FLAG_IS_SCHEDULED)) == 0); 139 } 140 141 static void 142 lkpi___napi_schedule_dd(struct napi_struct *napi) 143 { 144 unsigned long old, new; 145 int rc; 146 147 rc = 0; 148 again: 149 NAPI_TRACE2D(napi, rc); 150 if (napi->poll != NULL) 151 rc = napi->poll(napi, napi->budget); 152 napi->rx_count += rc; 153 154 /* Check if interrupts are still disabled, more work to do. */ 155 /* Bandaid for now. */ 156 if (rc >= napi->budget) 157 goto again; 158 159 /* Bandaid for now. */ 160 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->_flags)) 161 goto again; 162 163 do { 164 new = old = READ_ONCE(napi->_flags); 165 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 166 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 167 } while (atomic_cmpset_acq_long(&napi->_flags, old, new) == 0); 168 169 NAPI_TRACE2D(napi, rc); 170 } 171 172 void 173 linuxkpi___napi_schedule(struct napi_struct *napi) 174 { 175 int rc; 176 177 NAPI_TRACE(napi); 178 if (test_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->_flags)) { 179 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &napi->_flags); 180 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags); 181 NAPI_TRACE(napi); 182 return; 183 } 184 185 if (NAPI_DIRECT_DISPATCH()) { 186 lkpi___napi_schedule_dd(napi); 187 } else { 188 rc = taskqueue_enqueue(napi->dev->napi_tq, &napi->napi_task); 189 NAPI_TRACE2D(napi, rc); 190 if (rc != 0) { 191 /* Should we assert EPIPE? */ 192 return; 193 } 194 } 195 } 196 197 void 198 linuxkpi_napi_schedule(struct napi_struct *napi) 199 { 200 201 NAPI_TRACE(napi); 202 203 /* 204 * iwlwifi calls this sequence instead of napi_schedule() 205 * to be able to test the prep result. 206 */ 207 if (napi_schedule_prep(napi)) 208 __napi_schedule(napi); 209 } 210 211 void 212 linuxkpi_napi_reschedule(struct napi_struct *napi) 213 { 214 215 NAPI_TRACE(napi); 216 217 /* Not sure what is different to napi_schedule yet. */ 218 if (napi_schedule_prep(napi)) 219 __napi_schedule(napi); 220 } 221 222 bool 223 linuxkpi_napi_complete_done(struct napi_struct *napi, int ret) 224 { 225 unsigned long old, new; 226 227 NAPI_TRACE(napi); 228 if (NAPI_DIRECT_DISPATCH()) 229 return (true); 230 231 do { 232 new = old = READ_ONCE(napi->_flags); 233 234 /* 235 * If we lost a race before, we need to re-schedule. 236 * Leave IS_SCHEDULED set essentially doing "_prep". 237 */ 238 if (!test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) 239 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &new); 240 clear_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &new); 241 } while (atomic_cmpset_acq_long(&napi->_flags, old, new) == 0); 242 243 NAPI_TRACE(napi); 244 245 /* Someone tried to schedule while poll was running. Re-sched. */ 246 if (test_bit(LKPI_NAPI_FLAG_LOST_RACE_TRY_AGAIN, &old)) { 247 __napi_schedule(napi); 248 return (false); 249 } 250 251 return (true); 252 } 253 254 bool 255 linuxkpi_napi_complete(struct napi_struct *napi) 256 { 257 258 NAPI_TRACE(napi); 259 return (napi_complete_done(napi, 0)); 260 } 261 262 void 263 linuxkpi_napi_disable(struct napi_struct *napi) 264 { 265 NAPI_TRACE(napi); 266 set_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->_flags); 267 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags)) 268 pause_sbt("napidslp", SBT_1MS, 0, C_HARDCLOCK); 269 clear_bit(LKPI_NAPI_FLAG_DISABLE_PENDING, &napi->_flags); 270 } 271 272 void 273 linuxkpi_napi_enable(struct napi_struct *napi) 274 { 275 276 NAPI_TRACE(napi); 277 KASSERT(!test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags), 278 ("%s: enabling napi %p already scheduled\n", __func__, napi)); 279 mb(); 280 /* Let us be scheduled. */ 281 clear_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags); 282 } 283 284 void 285 linuxkpi_napi_synchronize(struct napi_struct *napi) 286 { 287 NAPI_TRACE(napi); 288 #if defined(SMP) 289 /* Check & sleep while a napi is scheduled. */ 290 while (test_bit(LKPI_NAPI_FLAG_IS_SCHEDULED, &napi->_flags)) 291 pause_sbt("napisslp", SBT_1MS, 0, C_HARDCLOCK); 292 #else 293 mb(); 294 #endif 295 } 296 297 /* -------------------------------------------------------------------------- */ 298 299 static void 300 lkpi_napi_task(void *ctx, int pending) 301 { 302 struct napi_struct *napi; 303 int count; 304 305 KASSERT(ctx != NULL, ("%s: napi %p, pending %d\n", 306 __func__, ctx, pending)); 307 napi = ctx; 308 KASSERT(napi->poll != NULL, ("%s: napi %p poll is NULL\n", 309 __func__, napi)); 310 311 NAPI_TRACE_TASK(napi, pending, napi->budget); 312 count = napi->poll(napi, napi->budget); 313 napi->rx_count += count; 314 NAPI_TRACE_TASK(napi, pending, count); 315 316 /* 317 * We must not check against count < pending here. There are situations 318 * when a driver may "poll" and we may not have any work to do and that 319 * would make us re-schedule ourseless for ever. 320 */ 321 if (count >= napi->budget) { 322 /* 323 * Have to re-schedule ourselves. napi_complete() was not run 324 * in this case which means we are still SCHEDULED. 325 * In order to queue another task we have to directly call 326 * __napi_schedule() without _prep() in the way. 327 */ 328 __napi_schedule(napi); 329 } 330 } 331 332 /* -------------------------------------------------------------------------- */ 333 334 void 335 linuxkpi_netif_napi_add(struct net_device *ndev, struct napi_struct *napi, 336 int(*napi_poll)(struct napi_struct *, int), int budget) 337 { 338 339 napi->dev = ndev; 340 napi->poll = napi_poll; 341 napi->budget = budget; 342 343 INIT_LIST_HEAD(&napi->rx_list); 344 napi->rx_count = 0; 345 346 TASK_INIT(&napi->napi_task, 0, lkpi_napi_task, napi); 347 348 NAPI_LOCK(ndev); 349 TAILQ_INSERT_TAIL(&ndev->napi_head, napi, entry); 350 NAPI_UNLOCK(ndev); 351 352 /* Anything else to do on the ndev? */ 353 clear_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->_flags); 354 } 355 356 static void 357 lkpi_netif_napi_del_locked(struct napi_struct *napi) 358 { 359 struct net_device *ndev; 360 361 ndev = napi->dev; 362 NAPI_LOCK_ASSERT(ndev); 363 364 set_bit(LKPI_NAPI_FLAG_SHUTDOWN, &napi->_flags); 365 TAILQ_REMOVE(&ndev->napi_head, napi, entry); 366 while (taskqueue_cancel(ndev->napi_tq, &napi->napi_task, NULL) != 0) 367 taskqueue_drain(ndev->napi_tq, &napi->napi_task); 368 } 369 370 void 371 linuxkpi_netif_napi_del(struct napi_struct *napi) 372 { 373 struct net_device *ndev; 374 375 ndev = napi->dev; 376 NAPI_LOCK(ndev); 377 lkpi_netif_napi_del_locked(napi); 378 NAPI_UNLOCK(ndev); 379 } 380 381 /* -------------------------------------------------------------------------- */ 382 383 void 384 linuxkpi_init_dummy_netdev(struct net_device *ndev) 385 { 386 387 memset(ndev, 0, sizeof(*ndev)); 388 389 ndev->reg_state = NETREG_DUMMY; 390 NAPI_LOCK_INIT(ndev); 391 TAILQ_INIT(&ndev->napi_head); 392 /* Anything else? */ 393 394 ndev->napi_tq = taskqueue_create("tq_ndev_napi", M_WAITOK, 395 taskqueue_thread_enqueue, &ndev->napi_tq); 396 /* One thread for now. */ 397 (void) taskqueue_start_threads(&ndev->napi_tq, 1, PWAIT, 398 "ndev napi taskq"); 399 } 400 401 struct net_device * 402 linuxkpi_alloc_netdev(size_t len, const char *name, uint32_t flags, 403 void(*setup_func)(struct net_device *)) 404 { 405 struct net_device *ndev; 406 407 ndev = malloc(sizeof(*ndev) + len, M_NETDEV, M_NOWAIT); 408 if (ndev == NULL) 409 return (ndev); 410 411 /* Always first as it zeros! */ 412 linuxkpi_init_dummy_netdev(ndev); 413 414 strlcpy(ndev->name, name, sizeof(*ndev->name)); 415 416 /* This needs extending as we support more. */ 417 418 setup_func(ndev); 419 420 return (ndev); 421 } 422 423 void 424 linuxkpi_free_netdev(struct net_device *ndev) 425 { 426 struct napi_struct *napi, *temp; 427 428 NAPI_LOCK(ndev); 429 TAILQ_FOREACH_SAFE(napi, &ndev->napi_head, entry, temp) { 430 lkpi_netif_napi_del_locked(napi); 431 } 432 NAPI_UNLOCK(ndev); 433 434 taskqueue_free(ndev->napi_tq); 435 ndev->napi_tq = NULL; 436 NAPI_LOCK_DESTROY(ndev); 437 438 /* This needs extending as we support more. */ 439 440 free(ndev, M_NETDEV); 441 } 442