1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "io_uring.h" 4 #include "napi.h" 5 6 #ifdef CONFIG_NET_RX_BUSY_POLL 7 8 /* Timeout for cleanout of stale entries. */ 9 #define NAPI_TIMEOUT (60 * SEC_CONVERSION) 10 11 struct io_napi_entry { 12 unsigned int napi_id; 13 struct list_head list; 14 15 unsigned long timeout; 16 struct hlist_node node; 17 18 struct rcu_head rcu; 19 }; 20 21 static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, 22 unsigned int napi_id) 23 { 24 struct io_napi_entry *e; 25 26 hlist_for_each_entry_rcu(e, hash_list, node) { 27 if (e->napi_id != napi_id) 28 continue; 29 e->timeout = jiffies + NAPI_TIMEOUT; 30 return e; 31 } 32 33 return NULL; 34 } 35 36 static inline ktime_t net_to_ktime(unsigned long t) 37 { 38 /* napi approximating usecs, reverse busy_loop_current_time */ 39 return ns_to_ktime(t << 10); 40 } 41 42 void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) 43 { 44 struct hlist_head *hash_list; 45 unsigned int napi_id; 46 struct sock *sk; 47 struct io_napi_entry *e; 48 49 sk = sock->sk; 50 if (!sk) 51 return; 52 53 napi_id = READ_ONCE(sk->sk_napi_id); 54 55 /* Non-NAPI IDs can be rejected. */ 56 if (napi_id < MIN_NAPI_ID) 57 return; 58 59 hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; 60 61 rcu_read_lock(); 62 e = io_napi_hash_find(hash_list, napi_id); 63 if (e) { 64 e->timeout = jiffies + NAPI_TIMEOUT; 65 rcu_read_unlock(); 66 return; 67 } 68 rcu_read_unlock(); 69 70 e = kmalloc(sizeof(*e), GFP_NOWAIT); 71 if (!e) 72 return; 73 74 e->napi_id = napi_id; 75 e->timeout = jiffies + NAPI_TIMEOUT; 76 77 spin_lock(&ctx->napi_lock); 78 if (unlikely(io_napi_hash_find(hash_list, napi_id))) { 79 spin_unlock(&ctx->napi_lock); 80 kfree(e); 81 return; 82 } 83 84 hlist_add_tail_rcu(&e->node, hash_list); 85 list_add_tail(&e->list, &ctx->napi_list); 86 spin_unlock(&ctx->napi_lock); 87 } 88 89 static void __io_napi_remove_stale(struct io_ring_ctx *ctx) 90 { 91 struct io_napi_entry *e; 92 unsigned int i; 93 94 spin_lock(&ctx->napi_lock); 95 hash_for_each(ctx->napi_ht, i, e, node) { 96 if (time_after(jiffies, e->timeout)) { 97 list_del(&e->list); 98 hash_del_rcu(&e->node); 99 kfree_rcu(e, rcu); 100 } 101 } 102 spin_unlock(&ctx->napi_lock); 103 } 104 105 static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) 106 { 107 if (is_stale) 108 __io_napi_remove_stale(ctx); 109 } 110 111 static inline bool io_napi_busy_loop_timeout(ktime_t start_time, 112 ktime_t bp) 113 { 114 if (bp) { 115 ktime_t end_time = ktime_add(start_time, bp); 116 ktime_t now = net_to_ktime(busy_loop_current_time()); 117 118 return ktime_after(now, end_time); 119 } 120 121 return true; 122 } 123 124 static bool io_napi_busy_loop_should_end(void *data, 125 unsigned long start_time) 126 { 127 struct io_wait_queue *iowq = data; 128 129 if (signal_pending(current)) 130 return true; 131 if (io_should_wake(iowq) || io_has_work(iowq->ctx)) 132 return true; 133 if (io_napi_busy_loop_timeout(net_to_ktime(start_time), 134 iowq->napi_busy_poll_dt)) 135 return true; 136 137 return false; 138 } 139 140 static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, 141 void *loop_end_arg) 142 { 143 struct io_napi_entry *e; 144 bool (*loop_end)(void *, unsigned long) = NULL; 145 bool is_stale = false; 146 147 if (loop_end_arg) 148 loop_end = io_napi_busy_loop_should_end; 149 150 list_for_each_entry_rcu(e, &ctx->napi_list, list) { 151 napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, 152 ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); 153 154 if (time_after(jiffies, e->timeout)) 155 is_stale = true; 156 } 157 158 return is_stale; 159 } 160 161 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, 162 struct io_wait_queue *iowq) 163 { 164 unsigned long start_time = busy_loop_current_time(); 165 void *loop_end_arg = NULL; 166 bool is_stale = false; 167 168 /* Singular lists use a different napi loop end check function and are 169 * only executed once. 170 */ 171 if (list_is_singular(&ctx->napi_list)) 172 loop_end_arg = iowq; 173 174 rcu_read_lock(); 175 do { 176 is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); 177 } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); 178 rcu_read_unlock(); 179 180 io_napi_remove_stale(ctx, is_stale); 181 } 182 183 /* 184 * io_napi_init() - Init napi settings 185 * @ctx: pointer to io-uring context structure 186 * 187 * Init napi settings in the io-uring context. 188 */ 189 void io_napi_init(struct io_ring_ctx *ctx) 190 { 191 u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC; 192 193 INIT_LIST_HEAD(&ctx->napi_list); 194 spin_lock_init(&ctx->napi_lock); 195 ctx->napi_prefer_busy_poll = false; 196 ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); 197 } 198 199 /* 200 * io_napi_free() - Deallocate napi 201 * @ctx: pointer to io-uring context structure 202 * 203 * Free the napi list and the hash table in the io-uring context. 204 */ 205 void io_napi_free(struct io_ring_ctx *ctx) 206 { 207 struct io_napi_entry *e; 208 LIST_HEAD(napi_list); 209 unsigned int i; 210 211 spin_lock(&ctx->napi_lock); 212 hash_for_each(ctx->napi_ht, i, e, node) { 213 hash_del_rcu(&e->node); 214 kfree_rcu(e, rcu); 215 } 216 spin_unlock(&ctx->napi_lock); 217 } 218 219 /* 220 * io_napi_register() - Register napi with io-uring 221 * @ctx: pointer to io-uring context structure 222 * @arg: pointer to io_uring_napi structure 223 * 224 * Register napi in the io-uring context. 225 */ 226 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) 227 { 228 const struct io_uring_napi curr = { 229 .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), 230 .prefer_busy_poll = ctx->napi_prefer_busy_poll 231 }; 232 struct io_uring_napi napi; 233 234 if (ctx->flags & IORING_SETUP_IOPOLL) 235 return -EINVAL; 236 if (copy_from_user(&napi, arg, sizeof(napi))) 237 return -EFAULT; 238 if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) 239 return -EINVAL; 240 241 if (copy_to_user(arg, &curr, sizeof(curr))) 242 return -EFAULT; 243 244 WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); 245 WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); 246 WRITE_ONCE(ctx->napi_enabled, true); 247 return 0; 248 } 249 250 /* 251 * io_napi_unregister() - Unregister napi with io-uring 252 * @ctx: pointer to io-uring context structure 253 * @arg: pointer to io_uring_napi structure 254 * 255 * Unregister napi. If arg has been specified copy the busy poll timeout and 256 * prefer busy poll setting to the passed in structure. 257 */ 258 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) 259 { 260 const struct io_uring_napi curr = { 261 .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), 262 .prefer_busy_poll = ctx->napi_prefer_busy_poll 263 }; 264 265 if (arg && copy_to_user(arg, &curr, sizeof(curr))) 266 return -EFAULT; 267 268 WRITE_ONCE(ctx->napi_busy_poll_dt, 0); 269 WRITE_ONCE(ctx->napi_prefer_busy_poll, false); 270 WRITE_ONCE(ctx->napi_enabled, false); 271 return 0; 272 } 273 274 /* 275 * __io_napi_adjust_timeout() - adjust busy loop timeout 276 * @ctx: pointer to io-uring context structure 277 * @iowq: pointer to io wait queue 278 * @ts: pointer to timespec or NULL 279 * 280 * Adjust the busy loop timeout according to timespec and busy poll timeout. 281 * If the specified NAPI timeout is bigger than the wait timeout, then adjust 282 * the NAPI timeout accordingly. 283 */ 284 void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, 285 ktime_t to_wait) 286 { 287 ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); 288 289 if (to_wait) 290 poll_dt = min(poll_dt, to_wait); 291 292 iowq->napi_busy_poll_dt = poll_dt; 293 } 294 295 /* 296 * __io_napi_busy_loop() - execute busy poll loop 297 * @ctx: pointer to io-uring context structure 298 * @iowq: pointer to io wait queue 299 * 300 * Execute the busy poll loop and merge the spliced off list. 301 */ 302 void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) 303 { 304 iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); 305 306 if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled) 307 io_napi_blocking_busy_loop(ctx, iowq); 308 } 309 310 /* 311 * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll 312 * @ctx: pointer to io-uring context structure 313 * 314 * Splice of the napi list and execute the napi busy poll loop. 315 */ 316 int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) 317 { 318 LIST_HEAD(napi_list); 319 bool is_stale = false; 320 321 if (!READ_ONCE(ctx->napi_busy_poll_dt)) 322 return 0; 323 if (list_empty_careful(&ctx->napi_list)) 324 return 0; 325 326 rcu_read_lock(); 327 is_stale = __io_napi_do_busy_loop(ctx, NULL); 328 rcu_read_unlock(); 329 330 io_napi_remove_stale(ctx, is_stale); 331 return 1; 332 } 333 334 #endif 335