17951040fSNavdeep Parhar /*- 27951040fSNavdeep Parhar * Copyright (c) 2014 Chelsio Communications, Inc. 37951040fSNavdeep Parhar * All rights reserved. 47951040fSNavdeep Parhar * Written by: Navdeep Parhar <np@FreeBSD.org> 57951040fSNavdeep Parhar * 67951040fSNavdeep Parhar * Redistribution and use in source and binary forms, with or without 77951040fSNavdeep Parhar * modification, are permitted provided that the following conditions 87951040fSNavdeep Parhar * are met: 97951040fSNavdeep Parhar * 1. Redistributions of source code must retain the above copyright 107951040fSNavdeep Parhar * notice, this list of conditions and the following disclaimer. 117951040fSNavdeep Parhar * 2. Redistributions in binary form must reproduce the above copyright 127951040fSNavdeep Parhar * notice, this list of conditions and the following disclaimer in the 137951040fSNavdeep Parhar * documentation and/or other materials provided with the distribution. 147951040fSNavdeep Parhar * 157951040fSNavdeep Parhar * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 167951040fSNavdeep Parhar * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 177951040fSNavdeep Parhar * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 187951040fSNavdeep Parhar * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 197951040fSNavdeep Parhar * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 207951040fSNavdeep Parhar * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 217951040fSNavdeep Parhar * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 227951040fSNavdeep Parhar * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 237951040fSNavdeep Parhar * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 247951040fSNavdeep Parhar * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 257951040fSNavdeep Parhar * SUCH DAMAGE. 267951040fSNavdeep Parhar */ 277951040fSNavdeep Parhar 287951040fSNavdeep Parhar #include <sys/cdefs.h> 297951040fSNavdeep Parhar __FBSDID("$FreeBSD$"); 307951040fSNavdeep Parhar 317951040fSNavdeep Parhar #include <sys/types.h> 327951040fSNavdeep Parhar #include <sys/param.h> 337951040fSNavdeep Parhar #include <sys/systm.h> 347951040fSNavdeep Parhar #include <sys/counter.h> 357951040fSNavdeep Parhar #include <sys/lock.h> 367951040fSNavdeep Parhar #include <sys/malloc.h> 377951040fSNavdeep Parhar #include <machine/cpu.h> 387951040fSNavdeep Parhar 397951040fSNavdeep Parhar #include "t4_mp_ring.h" 407951040fSNavdeep Parhar 4188d7f6bdSNavdeep Parhar #if defined(__i386__) 4288d7f6bdSNavdeep Parhar #define atomic_cmpset_acq_64 atomic_cmpset_64 4388d7f6bdSNavdeep Parhar #define atomic_cmpset_rel_64 atomic_cmpset_64 4488d7f6bdSNavdeep Parhar #endif 4588d7f6bdSNavdeep Parhar 467951040fSNavdeep Parhar union ring_state { 477951040fSNavdeep Parhar struct { 487951040fSNavdeep Parhar uint16_t pidx_head; 497951040fSNavdeep Parhar uint16_t pidx_tail; 507951040fSNavdeep Parhar uint16_t cidx; 517951040fSNavdeep Parhar uint16_t flags; 527951040fSNavdeep Parhar }; 537951040fSNavdeep Parhar uint64_t state; 547951040fSNavdeep Parhar }; 557951040fSNavdeep Parhar 567951040fSNavdeep Parhar enum { 577951040fSNavdeep Parhar IDLE = 0, /* consumer ran to completion, nothing more to do. */ 587951040fSNavdeep Parhar BUSY, /* consumer is running already, or will be shortly. */ 597951040fSNavdeep Parhar STALLED, /* consumer stopped due to lack of resources. */ 607951040fSNavdeep Parhar ABDICATED, /* consumer stopped even though there was work to be 617951040fSNavdeep Parhar done because it wants another thread to take over. */ 627951040fSNavdeep Parhar }; 637951040fSNavdeep Parhar 647951040fSNavdeep Parhar static inline uint16_t 657951040fSNavdeep Parhar space_available(struct mp_ring *r, union ring_state s) 667951040fSNavdeep Parhar { 677951040fSNavdeep Parhar uint16_t x = r->size - 1; 687951040fSNavdeep Parhar 697951040fSNavdeep Parhar if (s.cidx == s.pidx_head) 707951040fSNavdeep Parhar return (x); 717951040fSNavdeep Parhar else if (s.cidx > s.pidx_head) 727951040fSNavdeep Parhar return (s.cidx - s.pidx_head - 1); 737951040fSNavdeep Parhar else 747951040fSNavdeep Parhar return (x - s.pidx_head + s.cidx); 757951040fSNavdeep Parhar } 767951040fSNavdeep Parhar 777951040fSNavdeep Parhar static inline uint16_t 787951040fSNavdeep Parhar increment_idx(struct mp_ring *r, uint16_t idx, uint16_t n) 797951040fSNavdeep Parhar { 807951040fSNavdeep Parhar int x = r->size - idx; 817951040fSNavdeep Parhar 827951040fSNavdeep Parhar MPASS(x > 0); 837951040fSNavdeep Parhar return (x > n ? idx + n : n - x); 847951040fSNavdeep Parhar } 857951040fSNavdeep Parhar 867951040fSNavdeep Parhar /* Consumer is about to update the ring's state to s */ 877951040fSNavdeep Parhar static inline uint16_t 887951040fSNavdeep Parhar state_to_flags(union ring_state s, int abdicate) 897951040fSNavdeep Parhar { 907951040fSNavdeep Parhar 917951040fSNavdeep Parhar if (s.cidx == s.pidx_tail) 927951040fSNavdeep Parhar return (IDLE); 937951040fSNavdeep Parhar else if (abdicate && s.pidx_tail != s.pidx_head) 947951040fSNavdeep Parhar return (ABDICATED); 957951040fSNavdeep Parhar 967951040fSNavdeep Parhar return (BUSY); 977951040fSNavdeep Parhar } 987951040fSNavdeep Parhar 997951040fSNavdeep Parhar /* 1007951040fSNavdeep Parhar * Caller passes in a state, with a guarantee that there is work to do and that 1017951040fSNavdeep Parhar * all items up to the pidx_tail in the state are visible. 1027951040fSNavdeep Parhar */ 1037951040fSNavdeep Parhar static void 1047951040fSNavdeep Parhar drain_ring(struct mp_ring *r, union ring_state os, uint16_t prev, int budget) 1057951040fSNavdeep Parhar { 1067951040fSNavdeep Parhar union ring_state ns; 1077951040fSNavdeep Parhar int n, pending, total; 1087951040fSNavdeep Parhar uint16_t cidx = os.cidx; 1097951040fSNavdeep Parhar uint16_t pidx = os.pidx_tail; 1107951040fSNavdeep Parhar 1117951040fSNavdeep Parhar MPASS(os.flags == BUSY); 1127951040fSNavdeep Parhar MPASS(cidx != pidx); 1137951040fSNavdeep Parhar 1147951040fSNavdeep Parhar if (prev == IDLE) 1157951040fSNavdeep Parhar counter_u64_add(r->starts, 1); 1167951040fSNavdeep Parhar pending = 0; 1177951040fSNavdeep Parhar total = 0; 1187951040fSNavdeep Parhar 1197951040fSNavdeep Parhar while (cidx != pidx) { 1207951040fSNavdeep Parhar 1217951040fSNavdeep Parhar /* Items from cidx to pidx are available for consumption. */ 1227951040fSNavdeep Parhar n = r->drain(r, cidx, pidx); 1237951040fSNavdeep Parhar if (n == 0) { 1247951040fSNavdeep Parhar critical_enter(); 1257951040fSNavdeep Parhar do { 1267951040fSNavdeep Parhar os.state = ns.state = r->state; 1277951040fSNavdeep Parhar ns.cidx = cidx; 1287951040fSNavdeep Parhar ns.flags = STALLED; 1297951040fSNavdeep Parhar } while (atomic_cmpset_64(&r->state, os.state, 1307951040fSNavdeep Parhar ns.state) == 0); 1317951040fSNavdeep Parhar critical_exit(); 1327951040fSNavdeep Parhar if (prev != STALLED) 1337951040fSNavdeep Parhar counter_u64_add(r->stalls, 1); 1347951040fSNavdeep Parhar else if (total > 0) { 1357951040fSNavdeep Parhar counter_u64_add(r->restarts, 1); 1367951040fSNavdeep Parhar counter_u64_add(r->stalls, 1); 1377951040fSNavdeep Parhar } 1387951040fSNavdeep Parhar break; 1397951040fSNavdeep Parhar } 1407951040fSNavdeep Parhar cidx = increment_idx(r, cidx, n); 1417951040fSNavdeep Parhar pending += n; 1427951040fSNavdeep Parhar total += n; 1437951040fSNavdeep Parhar 1447951040fSNavdeep Parhar /* 1457951040fSNavdeep Parhar * We update the cidx only if we've caught up with the pidx, the 1467951040fSNavdeep Parhar * real cidx is getting too far ahead of the one visible to 1477951040fSNavdeep Parhar * everyone else, or we have exceeded our budget. 1487951040fSNavdeep Parhar */ 1497951040fSNavdeep Parhar if (cidx != pidx && pending < 64 && total < budget) 1507951040fSNavdeep Parhar continue; 1517951040fSNavdeep Parhar critical_enter(); 1527951040fSNavdeep Parhar do { 1537951040fSNavdeep Parhar os.state = ns.state = r->state; 1547951040fSNavdeep Parhar ns.cidx = cidx; 1557951040fSNavdeep Parhar ns.flags = state_to_flags(ns, total >= budget); 1567951040fSNavdeep Parhar } while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0); 1577951040fSNavdeep Parhar critical_exit(); 1587951040fSNavdeep Parhar 1597951040fSNavdeep Parhar if (ns.flags == ABDICATED) 1607951040fSNavdeep Parhar counter_u64_add(r->abdications, 1); 1617951040fSNavdeep Parhar if (ns.flags != BUSY) { 1627951040fSNavdeep Parhar /* Wrong loop exit if we're going to stall. */ 1637951040fSNavdeep Parhar MPASS(ns.flags != STALLED); 1647951040fSNavdeep Parhar if (prev == STALLED) { 1657951040fSNavdeep Parhar MPASS(total > 0); 1667951040fSNavdeep Parhar counter_u64_add(r->restarts, 1); 1677951040fSNavdeep Parhar } 1687951040fSNavdeep Parhar break; 1697951040fSNavdeep Parhar } 1707951040fSNavdeep Parhar 1717951040fSNavdeep Parhar /* 1727951040fSNavdeep Parhar * The acquire style atomic above guarantees visibility of items 1737951040fSNavdeep Parhar * associated with any pidx change that we notice here. 1747951040fSNavdeep Parhar */ 1757951040fSNavdeep Parhar pidx = ns.pidx_tail; 1767951040fSNavdeep Parhar pending = 0; 1777951040fSNavdeep Parhar } 1787951040fSNavdeep Parhar } 1797951040fSNavdeep Parhar 1807951040fSNavdeep Parhar int 1817951040fSNavdeep Parhar mp_ring_alloc(struct mp_ring **pr, int size, void *cookie, ring_drain_t drain, 1827951040fSNavdeep Parhar ring_can_drain_t can_drain, struct malloc_type *mt, int flags) 1837951040fSNavdeep Parhar { 1847951040fSNavdeep Parhar struct mp_ring *r; 1857951040fSNavdeep Parhar 1867951040fSNavdeep Parhar /* All idx are 16b so size can be 65536 at most */ 1877951040fSNavdeep Parhar if (pr == NULL || size < 2 || size > 65536 || drain == NULL || 1887951040fSNavdeep Parhar can_drain == NULL) 1897951040fSNavdeep Parhar return (EINVAL); 1907951040fSNavdeep Parhar *pr = NULL; 1917951040fSNavdeep Parhar flags &= M_NOWAIT | M_WAITOK; 1927951040fSNavdeep Parhar MPASS(flags != 0); 1937951040fSNavdeep Parhar 1947951040fSNavdeep Parhar r = malloc(__offsetof(struct mp_ring, items[size]), mt, flags | M_ZERO); 1957951040fSNavdeep Parhar if (r == NULL) 1967951040fSNavdeep Parhar return (ENOMEM); 1977951040fSNavdeep Parhar r->size = size; 1987951040fSNavdeep Parhar r->cookie = cookie; 1997951040fSNavdeep Parhar r->mt = mt; 2007951040fSNavdeep Parhar r->drain = drain; 2017951040fSNavdeep Parhar r->can_drain = can_drain; 2027951040fSNavdeep Parhar r->enqueues = counter_u64_alloc(flags); 2037951040fSNavdeep Parhar r->drops = counter_u64_alloc(flags); 2047951040fSNavdeep Parhar r->starts = counter_u64_alloc(flags); 2057951040fSNavdeep Parhar r->stalls = counter_u64_alloc(flags); 2067951040fSNavdeep Parhar r->restarts = counter_u64_alloc(flags); 2077951040fSNavdeep Parhar r->abdications = counter_u64_alloc(flags); 2087951040fSNavdeep Parhar if (r->enqueues == NULL || r->drops == NULL || r->starts == NULL || 2097951040fSNavdeep Parhar r->stalls == NULL || r->restarts == NULL || 2107951040fSNavdeep Parhar r->abdications == NULL) { 2117951040fSNavdeep Parhar mp_ring_free(r); 2127951040fSNavdeep Parhar return (ENOMEM); 2137951040fSNavdeep Parhar } 2147951040fSNavdeep Parhar 2157951040fSNavdeep Parhar *pr = r; 2167951040fSNavdeep Parhar return (0); 2177951040fSNavdeep Parhar } 2187951040fSNavdeep Parhar 2197951040fSNavdeep Parhar void 2207951040fSNavdeep Parhar 2217951040fSNavdeep Parhar mp_ring_free(struct mp_ring *r) 2227951040fSNavdeep Parhar { 2237951040fSNavdeep Parhar 2247951040fSNavdeep Parhar if (r == NULL) 2257951040fSNavdeep Parhar return; 2267951040fSNavdeep Parhar 2277951040fSNavdeep Parhar if (r->enqueues != NULL) 2287951040fSNavdeep Parhar counter_u64_free(r->enqueues); 2297951040fSNavdeep Parhar if (r->drops != NULL) 2307951040fSNavdeep Parhar counter_u64_free(r->drops); 2317951040fSNavdeep Parhar if (r->starts != NULL) 2327951040fSNavdeep Parhar counter_u64_free(r->starts); 2337951040fSNavdeep Parhar if (r->stalls != NULL) 2347951040fSNavdeep Parhar counter_u64_free(r->stalls); 2357951040fSNavdeep Parhar if (r->restarts != NULL) 2367951040fSNavdeep Parhar counter_u64_free(r->restarts); 2377951040fSNavdeep Parhar if (r->abdications != NULL) 2387951040fSNavdeep Parhar counter_u64_free(r->abdications); 2397951040fSNavdeep Parhar 2407951040fSNavdeep Parhar free(r, r->mt); 2417951040fSNavdeep Parhar } 2427951040fSNavdeep Parhar 2437951040fSNavdeep Parhar /* 2447951040fSNavdeep Parhar * Enqueue n items and maybe drain the ring for some time. 2457951040fSNavdeep Parhar * 2467951040fSNavdeep Parhar * Returns an errno. 2477951040fSNavdeep Parhar */ 2487951040fSNavdeep Parhar int 2497951040fSNavdeep Parhar mp_ring_enqueue(struct mp_ring *r, void **items, int n, int budget) 2507951040fSNavdeep Parhar { 2517951040fSNavdeep Parhar union ring_state os, ns; 2527951040fSNavdeep Parhar uint16_t pidx_start, pidx_stop; 2537951040fSNavdeep Parhar int i; 2547951040fSNavdeep Parhar 2557951040fSNavdeep Parhar MPASS(items != NULL); 2567951040fSNavdeep Parhar MPASS(n > 0); 2577951040fSNavdeep Parhar 2587951040fSNavdeep Parhar /* 2597951040fSNavdeep Parhar * Reserve room for the new items. Our reservation, if successful, is 2607951040fSNavdeep Parhar * from 'pidx_start' to 'pidx_stop'. 2617951040fSNavdeep Parhar */ 2627951040fSNavdeep Parhar for (;;) { 2637951040fSNavdeep Parhar os.state = r->state; 2647951040fSNavdeep Parhar if (n >= space_available(r, os)) { 2657951040fSNavdeep Parhar counter_u64_add(r->drops, n); 2667951040fSNavdeep Parhar MPASS(os.flags != IDLE); 2677951040fSNavdeep Parhar if (os.flags == STALLED) 2687951040fSNavdeep Parhar mp_ring_check_drainage(r, 0); 2697951040fSNavdeep Parhar return (ENOBUFS); 2707951040fSNavdeep Parhar } 2717951040fSNavdeep Parhar ns.state = os.state; 2727951040fSNavdeep Parhar ns.pidx_head = increment_idx(r, os.pidx_head, n); 2737951040fSNavdeep Parhar critical_enter(); 2747951040fSNavdeep Parhar if (atomic_cmpset_64(&r->state, os.state, ns.state)) 2757951040fSNavdeep Parhar break; 2767951040fSNavdeep Parhar critical_exit(); 2777951040fSNavdeep Parhar cpu_spinwait(); 2787951040fSNavdeep Parhar } 2797951040fSNavdeep Parhar pidx_start = os.pidx_head; 2807951040fSNavdeep Parhar pidx_stop = ns.pidx_head; 2817951040fSNavdeep Parhar 2827951040fSNavdeep Parhar /* 2837951040fSNavdeep Parhar * Wait for other producers who got in ahead of us to enqueue their 2847951040fSNavdeep Parhar * items, one producer at a time. It is our turn when the ring's 285*453130d9SPedro F. Giffuni * pidx_tail reaches the beginning of our reservation (pidx_start). 2867951040fSNavdeep Parhar */ 2877951040fSNavdeep Parhar while (ns.pidx_tail != pidx_start) { 2887951040fSNavdeep Parhar cpu_spinwait(); 2897951040fSNavdeep Parhar ns.state = r->state; 2907951040fSNavdeep Parhar } 2917951040fSNavdeep Parhar 2927951040fSNavdeep Parhar /* Now it is our turn to fill up the area we reserved earlier. */ 2937951040fSNavdeep Parhar i = pidx_start; 2947951040fSNavdeep Parhar do { 2957951040fSNavdeep Parhar r->items[i] = *items++; 2967951040fSNavdeep Parhar if (__predict_false(++i == r->size)) 2977951040fSNavdeep Parhar i = 0; 2987951040fSNavdeep Parhar } while (i != pidx_stop); 2997951040fSNavdeep Parhar 3007951040fSNavdeep Parhar /* 3017951040fSNavdeep Parhar * Update the ring's pidx_tail. The release style atomic guarantees 3027951040fSNavdeep Parhar * that the items are visible to any thread that sees the updated pidx. 3037951040fSNavdeep Parhar */ 3047951040fSNavdeep Parhar do { 3057951040fSNavdeep Parhar os.state = ns.state = r->state; 3067951040fSNavdeep Parhar ns.pidx_tail = pidx_stop; 3077951040fSNavdeep Parhar ns.flags = BUSY; 3087951040fSNavdeep Parhar } while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0); 3097951040fSNavdeep Parhar critical_exit(); 3107951040fSNavdeep Parhar counter_u64_add(r->enqueues, n); 3117951040fSNavdeep Parhar 3127951040fSNavdeep Parhar /* 3137951040fSNavdeep Parhar * Turn into a consumer if some other thread isn't active as a consumer 3147951040fSNavdeep Parhar * already. 3157951040fSNavdeep Parhar */ 3167951040fSNavdeep Parhar if (os.flags != BUSY) 3177951040fSNavdeep Parhar drain_ring(r, ns, os.flags, budget); 3187951040fSNavdeep Parhar 3197951040fSNavdeep Parhar return (0); 3207951040fSNavdeep Parhar } 3217951040fSNavdeep Parhar 3227951040fSNavdeep Parhar void 3237951040fSNavdeep Parhar mp_ring_check_drainage(struct mp_ring *r, int budget) 3247951040fSNavdeep Parhar { 3257951040fSNavdeep Parhar union ring_state os, ns; 3267951040fSNavdeep Parhar 3277951040fSNavdeep Parhar os.state = r->state; 3287951040fSNavdeep Parhar if (os.flags != STALLED || os.pidx_head != os.pidx_tail || 3297951040fSNavdeep Parhar r->can_drain(r) == 0) 3307951040fSNavdeep Parhar return; 3317951040fSNavdeep Parhar 3327951040fSNavdeep Parhar MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */ 3337951040fSNavdeep Parhar ns.state = os.state; 3347951040fSNavdeep Parhar ns.flags = BUSY; 3357951040fSNavdeep Parhar 3367951040fSNavdeep Parhar /* 3377951040fSNavdeep Parhar * The acquire style atomic guarantees visibility of items associated 3387951040fSNavdeep Parhar * with the pidx that we read here. 3397951040fSNavdeep Parhar */ 3407951040fSNavdeep Parhar if (!atomic_cmpset_acq_64(&r->state, os.state, ns.state)) 3417951040fSNavdeep Parhar return; 3427951040fSNavdeep Parhar 3437951040fSNavdeep Parhar drain_ring(r, ns, os.flags, budget); 3447951040fSNavdeep Parhar } 3457951040fSNavdeep Parhar 3467951040fSNavdeep Parhar void 3477951040fSNavdeep Parhar mp_ring_reset_stats(struct mp_ring *r) 3487951040fSNavdeep Parhar { 3497951040fSNavdeep Parhar 3507951040fSNavdeep Parhar counter_u64_zero(r->enqueues); 3517951040fSNavdeep Parhar counter_u64_zero(r->drops); 3527951040fSNavdeep Parhar counter_u64_zero(r->starts); 3537951040fSNavdeep Parhar counter_u64_zero(r->stalls); 3547951040fSNavdeep Parhar counter_u64_zero(r->restarts); 3557951040fSNavdeep Parhar counter_u64_zero(r->abdications); 3567951040fSNavdeep Parhar } 3577951040fSNavdeep Parhar 3587951040fSNavdeep Parhar int 3597951040fSNavdeep Parhar mp_ring_is_idle(struct mp_ring *r) 3607951040fSNavdeep Parhar { 3617951040fSNavdeep Parhar union ring_state s; 3627951040fSNavdeep Parhar 3637951040fSNavdeep Parhar s.state = r->state; 3647951040fSNavdeep Parhar if (s.pidx_head == s.pidx_tail && s.pidx_tail == s.cidx && 3657951040fSNavdeep Parhar s.flags == IDLE) 3667951040fSNavdeep Parhar return (1); 3677951040fSNavdeep Parhar 3687951040fSNavdeep Parhar return (0); 3697951040fSNavdeep Parhar } 370