1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "io_uring.h" 4 #include "napi.h" 5 6 #ifdef CONFIG_NET_RX_BUSY_POLL 7 8 /* Timeout for cleanout of stale entries. */ 9 #define NAPI_TIMEOUT (60 * SEC_CONVERSION) 10 11 struct io_napi_entry { 12 unsigned int napi_id; 13 struct list_head list; 14 15 unsigned long timeout; 16 struct hlist_node node; 17 18 struct rcu_head rcu; 19 }; 20 21 static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, 22 unsigned int napi_id) 23 { 24 struct io_napi_entry *e; 25 26 hlist_for_each_entry_rcu(e, hash_list, node) { 27 if (e->napi_id != napi_id) 28 continue; 29 e->timeout = jiffies + NAPI_TIMEOUT; 30 return e; 31 } 32 33 return NULL; 34 } 35 36 void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) 37 { 38 struct hlist_head *hash_list; 39 unsigned int napi_id; 40 struct sock *sk; 41 struct io_napi_entry *e; 42 43 sk = sock->sk; 44 if (!sk) 45 return; 46 47 napi_id = READ_ONCE(sk->sk_napi_id); 48 49 /* Non-NAPI IDs can be rejected. */ 50 if (napi_id < MIN_NAPI_ID) 51 return; 52 53 hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; 54 55 rcu_read_lock(); 56 e = io_napi_hash_find(hash_list, napi_id); 57 if (e) { 58 e->timeout = jiffies + NAPI_TIMEOUT; 59 rcu_read_unlock(); 60 return; 61 } 62 rcu_read_unlock(); 63 64 e = kmalloc(sizeof(*e), GFP_NOWAIT); 65 if (!e) 66 return; 67 68 e->napi_id = napi_id; 69 e->timeout = jiffies + NAPI_TIMEOUT; 70 71 spin_lock(&ctx->napi_lock); 72 if (unlikely(io_napi_hash_find(hash_list, napi_id))) { 73 spin_unlock(&ctx->napi_lock); 74 kfree(e); 75 return; 76 } 77 78 hlist_add_tail_rcu(&e->node, hash_list); 79 list_add_tail(&e->list, &ctx->napi_list); 80 spin_unlock(&ctx->napi_lock); 81 } 82 83 static void __io_napi_remove_stale(struct io_ring_ctx *ctx) 84 { 85 struct io_napi_entry *e; 86 unsigned int i; 87 88 spin_lock(&ctx->napi_lock); 89 hash_for_each(ctx->napi_ht, i, e, node) { 90 if (time_after(jiffies, e->timeout)) { 91 list_del(&e->list); 92 hash_del_rcu(&e->node); 93 kfree_rcu(e, rcu); 94 } 95 } 96 spin_unlock(&ctx->napi_lock); 97 } 98 99 static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) 100 { 101 if (is_stale) 102 __io_napi_remove_stale(ctx); 103 } 104 105 static inline bool io_napi_busy_loop_timeout(unsigned long start_time, 106 unsigned long bp_usec) 107 { 108 if (bp_usec) { 109 unsigned long end_time = start_time + bp_usec; 110 unsigned long now = busy_loop_current_time(); 111 112 return time_after(now, end_time); 113 } 114 115 return true; 116 } 117 118 static bool io_napi_busy_loop_should_end(void *data, 119 unsigned long start_time) 120 { 121 struct io_wait_queue *iowq = data; 122 123 if (signal_pending(current)) 124 return true; 125 if (io_should_wake(iowq) || io_has_work(iowq->ctx)) 126 return true; 127 if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to)) 128 return true; 129 130 return false; 131 } 132 133 static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, 134 void *loop_end_arg) 135 { 136 struct io_napi_entry *e; 137 bool (*loop_end)(void *, unsigned long) = NULL; 138 bool is_stale = false; 139 140 if (loop_end_arg) 141 loop_end = io_napi_busy_loop_should_end; 142 143 list_for_each_entry_rcu(e, &ctx->napi_list, list) { 144 napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, 145 ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); 146 147 if (time_after(jiffies, e->timeout)) 148 is_stale = true; 149 } 150 151 return is_stale; 152 } 153 154 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, 155 struct io_wait_queue *iowq) 156 { 157 unsigned long start_time = busy_loop_current_time(); 158 void *loop_end_arg = NULL; 159 bool is_stale = false; 160 161 /* Singular lists use a different napi loop end check function and are 162 * only executed once. 163 */ 164 if (list_is_singular(&ctx->napi_list)) 165 loop_end_arg = iowq; 166 167 rcu_read_lock(); 168 do { 169 is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); 170 } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); 171 rcu_read_unlock(); 172 173 io_napi_remove_stale(ctx, is_stale); 174 } 175 176 /* 177 * io_napi_init() - Init napi settings 178 * @ctx: pointer to io-uring context structure 179 * 180 * Init napi settings in the io-uring context. 181 */ 182 void io_napi_init(struct io_ring_ctx *ctx) 183 { 184 INIT_LIST_HEAD(&ctx->napi_list); 185 spin_lock_init(&ctx->napi_lock); 186 ctx->napi_prefer_busy_poll = false; 187 ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll); 188 } 189 190 /* 191 * io_napi_free() - Deallocate napi 192 * @ctx: pointer to io-uring context structure 193 * 194 * Free the napi list and the hash table in the io-uring context. 195 */ 196 void io_napi_free(struct io_ring_ctx *ctx) 197 { 198 struct io_napi_entry *e; 199 LIST_HEAD(napi_list); 200 unsigned int i; 201 202 spin_lock(&ctx->napi_lock); 203 hash_for_each(ctx->napi_ht, i, e, node) { 204 hash_del_rcu(&e->node); 205 kfree_rcu(e, rcu); 206 } 207 spin_unlock(&ctx->napi_lock); 208 } 209 210 /* 211 * io_napi_register() - Register napi with io-uring 212 * @ctx: pointer to io-uring context structure 213 * @arg: pointer to io_uring_napi structure 214 * 215 * Register napi in the io-uring context. 216 */ 217 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) 218 { 219 const struct io_uring_napi curr = { 220 .busy_poll_to = ctx->napi_busy_poll_to, 221 .prefer_busy_poll = ctx->napi_prefer_busy_poll 222 }; 223 struct io_uring_napi napi; 224 225 if (copy_from_user(&napi, arg, sizeof(napi))) 226 return -EFAULT; 227 if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) 228 return -EINVAL; 229 230 if (copy_to_user(arg, &curr, sizeof(curr))) 231 return -EFAULT; 232 233 WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to); 234 WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); 235 WRITE_ONCE(ctx->napi_enabled, true); 236 return 0; 237 } 238 239 /* 240 * io_napi_unregister() - Unregister napi with io-uring 241 * @ctx: pointer to io-uring context structure 242 * @arg: pointer to io_uring_napi structure 243 * 244 * Unregister napi. If arg has been specified copy the busy poll timeout and 245 * prefer busy poll setting to the passed in structure. 246 */ 247 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) 248 { 249 const struct io_uring_napi curr = { 250 .busy_poll_to = ctx->napi_busy_poll_to, 251 .prefer_busy_poll = ctx->napi_prefer_busy_poll 252 }; 253 254 if (arg && copy_to_user(arg, &curr, sizeof(curr))) 255 return -EFAULT; 256 257 WRITE_ONCE(ctx->napi_busy_poll_to, 0); 258 WRITE_ONCE(ctx->napi_prefer_busy_poll, false); 259 WRITE_ONCE(ctx->napi_enabled, false); 260 return 0; 261 } 262 263 /* 264 * __io_napi_adjust_timeout() - Add napi id to the busy poll list 265 * @ctx: pointer to io-uring context structure 266 * @iowq: pointer to io wait queue 267 * @ts: pointer to timespec or NULL 268 * 269 * Adjust the busy loop timeout according to timespec and busy poll timeout. 270 */ 271 void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, 272 struct timespec64 *ts) 273 { 274 unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to); 275 276 if (ts) { 277 struct timespec64 poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to); 278 279 if (timespec64_compare(ts, &poll_to_ts) > 0) { 280 *ts = timespec64_sub(*ts, poll_to_ts); 281 } else { 282 u64 to = timespec64_to_ns(ts); 283 284 do_div(to, 1000); 285 ts->tv_sec = 0; 286 ts->tv_nsec = 0; 287 } 288 } 289 290 iowq->napi_busy_poll_to = poll_to; 291 } 292 293 /* 294 * __io_napi_busy_loop() - execute busy poll loop 295 * @ctx: pointer to io-uring context structure 296 * @iowq: pointer to io wait queue 297 * 298 * Execute the busy poll loop and merge the spliced off list. 299 */ 300 void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) 301 { 302 iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); 303 304 if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled) 305 io_napi_blocking_busy_loop(ctx, iowq); 306 } 307 308 /* 309 * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll 310 * @ctx: pointer to io-uring context structure 311 * 312 * Splice of the napi list and execute the napi busy poll loop. 313 */ 314 int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) 315 { 316 LIST_HEAD(napi_list); 317 bool is_stale = false; 318 319 if (!READ_ONCE(ctx->napi_busy_poll_to)) 320 return 0; 321 if (list_empty_careful(&ctx->napi_list)) 322 return 0; 323 324 rcu_read_lock(); 325 is_stale = __io_napi_do_busy_loop(ctx, NULL); 326 rcu_read_unlock(); 327 328 io_napi_remove_stale(ctx, is_stale); 329 return 1; 330 } 331 332 #endif 333