1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "io_uring.h" 4 #include "napi.h" 5 6 #ifdef CONFIG_NET_RX_BUSY_POLL 7 8 /* Timeout for cleanout of stale entries. */ 9 #define NAPI_TIMEOUT (60 * SEC_CONVERSION) 10 11 struct io_napi_entry { 12 unsigned int napi_id; 13 struct list_head list; 14 15 unsigned long timeout; 16 struct hlist_node node; 17 18 struct rcu_head rcu; 19 }; 20 21 static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, 22 unsigned int napi_id) 23 { 24 struct io_napi_entry *e; 25 26 hlist_for_each_entry_rcu(e, hash_list, node) { 27 if (e->napi_id != napi_id) 28 continue; 29 return e; 30 } 31 32 return NULL; 33 } 34 35 static inline ktime_t net_to_ktime(unsigned long t) 36 { 37 /* napi approximating usecs, reverse busy_loop_current_time */ 38 return ns_to_ktime(t << 10); 39 } 40 41 void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) 42 { 43 struct hlist_head *hash_list; 44 unsigned int napi_id; 45 struct sock *sk; 46 struct io_napi_entry *e; 47 48 sk = sock->sk; 49 if (!sk) 50 return; 51 52 napi_id = READ_ONCE(sk->sk_napi_id); 53 54 /* Non-NAPI IDs can be rejected. */ 55 if (napi_id < MIN_NAPI_ID) 56 return; 57 58 hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; 59 60 rcu_read_lock(); 61 e = io_napi_hash_find(hash_list, napi_id); 62 if (e) { 63 e->timeout = jiffies + NAPI_TIMEOUT; 64 rcu_read_unlock(); 65 return; 66 } 67 rcu_read_unlock(); 68 69 e = kmalloc(sizeof(*e), GFP_NOWAIT); 70 if (!e) 71 return; 72 73 e->napi_id = napi_id; 74 e->timeout = jiffies + NAPI_TIMEOUT; 75 76 spin_lock(&ctx->napi_lock); 77 if (unlikely(io_napi_hash_find(hash_list, napi_id))) { 78 spin_unlock(&ctx->napi_lock); 79 kfree(e); 80 return; 81 } 82 83 hlist_add_tail_rcu(&e->node, hash_list); 84 list_add_tail(&e->list, &ctx->napi_list); 85 spin_unlock(&ctx->napi_lock); 86 } 87 88 static void __io_napi_remove_stale(struct io_ring_ctx *ctx) 89 { 90 struct io_napi_entry *e; 91 unsigned int i; 92 93 spin_lock(&ctx->napi_lock); 94 hash_for_each(ctx->napi_ht, i, e, node) { 95 if (time_after(jiffies, e->timeout)) { 96 list_del(&e->list); 97 hash_del_rcu(&e->node); 98 kfree_rcu(e, rcu); 99 } 100 } 101 spin_unlock(&ctx->napi_lock); 102 } 103 104 static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) 105 { 106 if (is_stale) 107 __io_napi_remove_stale(ctx); 108 } 109 110 static inline bool io_napi_busy_loop_timeout(ktime_t start_time, 111 ktime_t bp) 112 { 113 if (bp) { 114 ktime_t end_time = ktime_add(start_time, bp); 115 ktime_t now = net_to_ktime(busy_loop_current_time()); 116 117 return ktime_after(now, end_time); 118 } 119 120 return true; 121 } 122 123 static bool io_napi_busy_loop_should_end(void *data, 124 unsigned long start_time) 125 { 126 struct io_wait_queue *iowq = data; 127 128 if (signal_pending(current)) 129 return true; 130 if (io_should_wake(iowq) || io_has_work(iowq->ctx)) 131 return true; 132 if (io_napi_busy_loop_timeout(net_to_ktime(start_time), 133 iowq->napi_busy_poll_dt)) 134 return true; 135 136 return false; 137 } 138 139 static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, 140 void *loop_end_arg) 141 { 142 struct io_napi_entry *e; 143 bool (*loop_end)(void *, unsigned long) = NULL; 144 bool is_stale = false; 145 146 if (loop_end_arg) 147 loop_end = io_napi_busy_loop_should_end; 148 149 list_for_each_entry_rcu(e, &ctx->napi_list, list) { 150 napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, 151 ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); 152 153 if (time_after(jiffies, e->timeout)) 154 is_stale = true; 155 } 156 157 return is_stale; 158 } 159 160 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, 161 struct io_wait_queue *iowq) 162 { 163 unsigned long start_time = busy_loop_current_time(); 164 void *loop_end_arg = NULL; 165 bool is_stale = false; 166 167 /* Singular lists use a different napi loop end check function and are 168 * only executed once. 169 */ 170 if (list_is_singular(&ctx->napi_list)) 171 loop_end_arg = iowq; 172 173 rcu_read_lock(); 174 do { 175 is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); 176 } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); 177 rcu_read_unlock(); 178 179 io_napi_remove_stale(ctx, is_stale); 180 } 181 182 /* 183 * io_napi_init() - Init napi settings 184 * @ctx: pointer to io-uring context structure 185 * 186 * Init napi settings in the io-uring context. 187 */ 188 void io_napi_init(struct io_ring_ctx *ctx) 189 { 190 u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC; 191 192 INIT_LIST_HEAD(&ctx->napi_list); 193 spin_lock_init(&ctx->napi_lock); 194 ctx->napi_prefer_busy_poll = false; 195 ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); 196 } 197 198 /* 199 * io_napi_free() - Deallocate napi 200 * @ctx: pointer to io-uring context structure 201 * 202 * Free the napi list and the hash table in the io-uring context. 203 */ 204 void io_napi_free(struct io_ring_ctx *ctx) 205 { 206 struct io_napi_entry *e; 207 unsigned int i; 208 209 spin_lock(&ctx->napi_lock); 210 hash_for_each(ctx->napi_ht, i, e, node) { 211 hash_del_rcu(&e->node); 212 kfree_rcu(e, rcu); 213 } 214 spin_unlock(&ctx->napi_lock); 215 } 216 217 /* 218 * io_napi_register() - Register napi with io-uring 219 * @ctx: pointer to io-uring context structure 220 * @arg: pointer to io_uring_napi structure 221 * 222 * Register napi in the io-uring context. 223 */ 224 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) 225 { 226 const struct io_uring_napi curr = { 227 .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), 228 .prefer_busy_poll = ctx->napi_prefer_busy_poll 229 }; 230 struct io_uring_napi napi; 231 232 if (ctx->flags & IORING_SETUP_IOPOLL) 233 return -EINVAL; 234 if (copy_from_user(&napi, arg, sizeof(napi))) 235 return -EFAULT; 236 if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) 237 return -EINVAL; 238 239 if (copy_to_user(arg, &curr, sizeof(curr))) 240 return -EFAULT; 241 242 WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); 243 WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); 244 WRITE_ONCE(ctx->napi_enabled, true); 245 return 0; 246 } 247 248 /* 249 * io_napi_unregister() - Unregister napi with io-uring 250 * @ctx: pointer to io-uring context structure 251 * @arg: pointer to io_uring_napi structure 252 * 253 * Unregister napi. If arg has been specified copy the busy poll timeout and 254 * prefer busy poll setting to the passed in structure. 255 */ 256 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) 257 { 258 const struct io_uring_napi curr = { 259 .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), 260 .prefer_busy_poll = ctx->napi_prefer_busy_poll 261 }; 262 263 if (arg && copy_to_user(arg, &curr, sizeof(curr))) 264 return -EFAULT; 265 266 WRITE_ONCE(ctx->napi_busy_poll_dt, 0); 267 WRITE_ONCE(ctx->napi_prefer_busy_poll, false); 268 WRITE_ONCE(ctx->napi_enabled, false); 269 return 0; 270 } 271 272 /* 273 * __io_napi_busy_loop() - execute busy poll loop 274 * @ctx: pointer to io-uring context structure 275 * @iowq: pointer to io wait queue 276 * 277 * Execute the busy poll loop and merge the spliced off list. 278 */ 279 void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) 280 { 281 if (ctx->flags & IORING_SETUP_SQPOLL) 282 return; 283 284 iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); 285 if (iowq->timeout != KTIME_MAX) { 286 ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx)); 287 288 iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt); 289 } 290 291 iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); 292 io_napi_blocking_busy_loop(ctx, iowq); 293 } 294 295 /* 296 * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll 297 * @ctx: pointer to io-uring context structure 298 * 299 * Splice of the napi list and execute the napi busy poll loop. 300 */ 301 int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) 302 { 303 bool is_stale = false; 304 305 if (!READ_ONCE(ctx->napi_busy_poll_dt)) 306 return 0; 307 if (list_empty_careful(&ctx->napi_list)) 308 return 0; 309 310 rcu_read_lock(); 311 is_stale = __io_napi_do_busy_loop(ctx, NULL); 312 rcu_read_unlock(); 313 314 io_napi_remove_stale(ctx, is_stale); 315 return 1; 316 } 317 318 #endif 319