1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/fs.h> 5 #include <linux/anon_inodes.h> 6 #include <linux/filter.h> 7 #include <linux/bpf.h> 8 9 struct bpf_iter_target_info { 10 struct list_head list; 11 const struct bpf_iter_reg *reg_info; 12 u32 btf_id; /* cached value */ 13 }; 14 15 struct bpf_iter_link { 16 struct bpf_link link; 17 struct bpf_iter_target_info *tinfo; 18 }; 19 20 struct bpf_iter_priv_data { 21 struct bpf_iter_target_info *tinfo; 22 struct bpf_prog *prog; 23 u64 session_id; 24 u64 seq_num; 25 bool done_stop; 26 u8 target_private[] __aligned(8); 27 }; 28 29 static struct list_head targets = LIST_HEAD_INIT(targets); 30 static DEFINE_MUTEX(targets_mutex); 31 32 /* protect bpf_iter_link changes */ 33 static DEFINE_MUTEX(link_mutex); 34 35 /* incremented on every opened seq_file */ 36 static atomic64_t session_id; 37 38 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link); 39 40 static void bpf_iter_inc_seq_num(struct seq_file *seq) 41 { 42 struct bpf_iter_priv_data *iter_priv; 43 44 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 45 target_private); 46 iter_priv->seq_num++; 47 } 48 49 static void bpf_iter_dec_seq_num(struct seq_file *seq) 50 { 51 struct bpf_iter_priv_data *iter_priv; 52 53 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 54 target_private); 55 iter_priv->seq_num--; 56 } 57 58 static void bpf_iter_done_stop(struct seq_file *seq) 59 { 60 struct bpf_iter_priv_data *iter_priv; 61 62 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 63 target_private); 64 iter_priv->done_stop = true; 65 } 66 67 /* bpf_seq_read, a customized and simpler version for bpf iterator. 68 * no_llseek is assumed for this file. 69 * The following are differences from seq_read(): 70 * . fixed buffer size (PAGE_SIZE) 71 * . assuming no_llseek 72 * . stop() may call bpf program, handling potential overflow there 73 */ 74 static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, 75 loff_t *ppos) 76 { 77 struct seq_file *seq = file->private_data; 78 size_t n, offs, copied = 0; 79 int err = 0; 80 void *p; 81 82 mutex_lock(&seq->lock); 83 84 if (!seq->buf) { 85 seq->size = PAGE_SIZE; 86 seq->buf = kmalloc(seq->size, GFP_KERNEL); 87 if (!seq->buf) { 88 err = -ENOMEM; 89 goto done; 90 } 91 } 92 93 if (seq->count) { 94 n = min(seq->count, size); 95 err = copy_to_user(buf, seq->buf + seq->from, n); 96 if (err) { 97 err = -EFAULT; 98 goto done; 99 } 100 seq->count -= n; 101 seq->from += n; 102 copied = n; 103 goto done; 104 } 105 106 seq->from = 0; 107 p = seq->op->start(seq, &seq->index); 108 if (!p) 109 goto stop; 110 if (IS_ERR(p)) { 111 err = PTR_ERR(p); 112 seq->op->stop(seq, p); 113 seq->count = 0; 114 goto done; 115 } 116 117 err = seq->op->show(seq, p); 118 if (err > 0) { 119 /* object is skipped, decrease seq_num, so next 120 * valid object can reuse the same seq_num. 121 */ 122 bpf_iter_dec_seq_num(seq); 123 seq->count = 0; 124 } else if (err < 0 || seq_has_overflowed(seq)) { 125 if (!err) 126 err = -E2BIG; 127 seq->op->stop(seq, p); 128 seq->count = 0; 129 goto done; 130 } 131 132 while (1) { 133 loff_t pos = seq->index; 134 135 offs = seq->count; 136 p = seq->op->next(seq, p, &seq->index); 137 if (pos == seq->index) { 138 pr_info_ratelimited("buggy seq_file .next function %ps " 139 "did not updated position index\n", 140 seq->op->next); 141 seq->index++; 142 } 143 144 if (IS_ERR_OR_NULL(p)) 145 break; 146 147 /* got a valid next object, increase seq_num */ 148 bpf_iter_inc_seq_num(seq); 149 150 if (seq->count >= size) 151 break; 152 153 err = seq->op->show(seq, p); 154 if (err > 0) { 155 bpf_iter_dec_seq_num(seq); 156 seq->count = offs; 157 } else if (err < 0 || seq_has_overflowed(seq)) { 158 seq->count = offs; 159 if (offs == 0) { 160 if (!err) 161 err = -E2BIG; 162 seq->op->stop(seq, p); 163 goto done; 164 } 165 break; 166 } 167 } 168 stop: 169 offs = seq->count; 170 /* bpf program called if !p */ 171 seq->op->stop(seq, p); 172 if (!p) { 173 if (!seq_has_overflowed(seq)) { 174 bpf_iter_done_stop(seq); 175 } else { 176 seq->count = offs; 177 if (offs == 0) { 178 err = -E2BIG; 179 goto done; 180 } 181 } 182 } 183 184 n = min(seq->count, size); 185 err = copy_to_user(buf, seq->buf, n); 186 if (err) { 187 err = -EFAULT; 188 goto done; 189 } 190 copied = n; 191 seq->count -= n; 192 seq->from = n; 193 done: 194 if (!copied) 195 copied = err; 196 else 197 *ppos += copied; 198 mutex_unlock(&seq->lock); 199 return copied; 200 } 201 202 static int iter_open(struct inode *inode, struct file *file) 203 { 204 struct bpf_iter_link *link = inode->i_private; 205 206 return prepare_seq_file(file, link); 207 } 208 209 static int iter_release(struct inode *inode, struct file *file) 210 { 211 struct bpf_iter_priv_data *iter_priv; 212 struct seq_file *seq; 213 214 seq = file->private_data; 215 if (!seq) 216 return 0; 217 218 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 219 target_private); 220 221 if (iter_priv->tinfo->reg_info->fini_seq_private) 222 iter_priv->tinfo->reg_info->fini_seq_private(seq->private); 223 224 bpf_prog_put(iter_priv->prog); 225 seq->private = iter_priv; 226 227 return seq_release_private(inode, file); 228 } 229 230 const struct file_operations bpf_iter_fops = { 231 .open = iter_open, 232 .llseek = no_llseek, 233 .read = bpf_seq_read, 234 .release = iter_release, 235 }; 236 237 /* The argument reg_info will be cached in bpf_iter_target_info. 238 * The common practice is to declare target reg_info as 239 * a const static variable and passed as an argument to 240 * bpf_iter_reg_target(). 241 */ 242 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) 243 { 244 struct bpf_iter_target_info *tinfo; 245 246 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); 247 if (!tinfo) 248 return -ENOMEM; 249 250 tinfo->reg_info = reg_info; 251 INIT_LIST_HEAD(&tinfo->list); 252 253 mutex_lock(&targets_mutex); 254 list_add(&tinfo->list, &targets); 255 mutex_unlock(&targets_mutex); 256 257 return 0; 258 } 259 260 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) 261 { 262 struct bpf_iter_target_info *tinfo; 263 bool found = false; 264 265 mutex_lock(&targets_mutex); 266 list_for_each_entry(tinfo, &targets, list) { 267 if (reg_info == tinfo->reg_info) { 268 list_del(&tinfo->list); 269 kfree(tinfo); 270 found = true; 271 break; 272 } 273 } 274 mutex_unlock(&targets_mutex); 275 276 WARN_ON(found == false); 277 } 278 279 static void cache_btf_id(struct bpf_iter_target_info *tinfo, 280 struct bpf_prog *prog) 281 { 282 tinfo->btf_id = prog->aux->attach_btf_id; 283 } 284 285 bool bpf_iter_prog_supported(struct bpf_prog *prog) 286 { 287 const char *attach_fname = prog->aux->attach_func_name; 288 u32 prog_btf_id = prog->aux->attach_btf_id; 289 const char *prefix = BPF_ITER_FUNC_PREFIX; 290 struct bpf_iter_target_info *tinfo; 291 int prefix_len = strlen(prefix); 292 bool supported = false; 293 294 if (strncmp(attach_fname, prefix, prefix_len)) 295 return false; 296 297 mutex_lock(&targets_mutex); 298 list_for_each_entry(tinfo, &targets, list) { 299 if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) { 300 supported = true; 301 break; 302 } 303 if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) { 304 cache_btf_id(tinfo, prog); 305 supported = true; 306 break; 307 } 308 } 309 mutex_unlock(&targets_mutex); 310 311 if (supported) { 312 prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; 313 prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; 314 } 315 316 return supported; 317 } 318 319 static void bpf_iter_link_release(struct bpf_link *link) 320 { 321 } 322 323 static void bpf_iter_link_dealloc(struct bpf_link *link) 324 { 325 struct bpf_iter_link *iter_link = 326 container_of(link, struct bpf_iter_link, link); 327 328 kfree(iter_link); 329 } 330 331 static int bpf_iter_link_replace(struct bpf_link *link, 332 struct bpf_prog *new_prog, 333 struct bpf_prog *old_prog) 334 { 335 int ret = 0; 336 337 mutex_lock(&link_mutex); 338 if (old_prog && link->prog != old_prog) { 339 ret = -EPERM; 340 goto out_unlock; 341 } 342 343 if (link->prog->type != new_prog->type || 344 link->prog->expected_attach_type != new_prog->expected_attach_type || 345 link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { 346 ret = -EINVAL; 347 goto out_unlock; 348 } 349 350 old_prog = xchg(&link->prog, new_prog); 351 bpf_prog_put(old_prog); 352 353 out_unlock: 354 mutex_unlock(&link_mutex); 355 return ret; 356 } 357 358 static const struct bpf_link_ops bpf_iter_link_lops = { 359 .release = bpf_iter_link_release, 360 .dealloc = bpf_iter_link_dealloc, 361 .update_prog = bpf_iter_link_replace, 362 }; 363 364 bool bpf_link_is_iter(struct bpf_link *link) 365 { 366 return link->ops == &bpf_iter_link_lops; 367 } 368 369 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 370 { 371 struct bpf_link_primer link_primer; 372 struct bpf_iter_target_info *tinfo; 373 struct bpf_iter_link *link; 374 bool existed = false; 375 u32 prog_btf_id; 376 int err; 377 378 if (attr->link_create.target_fd || attr->link_create.flags) 379 return -EINVAL; 380 381 prog_btf_id = prog->aux->attach_btf_id; 382 mutex_lock(&targets_mutex); 383 list_for_each_entry(tinfo, &targets, list) { 384 if (tinfo->btf_id == prog_btf_id) { 385 existed = true; 386 break; 387 } 388 } 389 mutex_unlock(&targets_mutex); 390 if (!existed) 391 return -ENOENT; 392 393 link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); 394 if (!link) 395 return -ENOMEM; 396 397 bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); 398 link->tinfo = tinfo; 399 400 err = bpf_link_prime(&link->link, &link_primer); 401 if (err) { 402 kfree(link); 403 return err; 404 } 405 406 return bpf_link_settle(&link_primer); 407 } 408 409 static void init_seq_meta(struct bpf_iter_priv_data *priv_data, 410 struct bpf_iter_target_info *tinfo, 411 struct bpf_prog *prog) 412 { 413 priv_data->tinfo = tinfo; 414 priv_data->prog = prog; 415 priv_data->session_id = atomic64_inc_return(&session_id); 416 priv_data->seq_num = 0; 417 priv_data->done_stop = false; 418 } 419 420 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) 421 { 422 struct bpf_iter_priv_data *priv_data; 423 struct bpf_iter_target_info *tinfo; 424 struct bpf_prog *prog; 425 u32 total_priv_dsize; 426 struct seq_file *seq; 427 int err = 0; 428 429 mutex_lock(&link_mutex); 430 prog = link->link.prog; 431 bpf_prog_inc(prog); 432 mutex_unlock(&link_mutex); 433 434 tinfo = link->tinfo; 435 total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + 436 tinfo->reg_info->seq_priv_size; 437 priv_data = __seq_open_private(file, tinfo->reg_info->seq_ops, 438 total_priv_dsize); 439 if (!priv_data) { 440 err = -ENOMEM; 441 goto release_prog; 442 } 443 444 if (tinfo->reg_info->init_seq_private) { 445 err = tinfo->reg_info->init_seq_private(priv_data->target_private); 446 if (err) 447 goto release_seq_file; 448 } 449 450 init_seq_meta(priv_data, tinfo, prog); 451 seq = file->private_data; 452 seq->private = priv_data->target_private; 453 454 return 0; 455 456 release_seq_file: 457 seq_release_private(file->f_inode, file); 458 file->private_data = NULL; 459 release_prog: 460 bpf_prog_put(prog); 461 return err; 462 } 463 464 int bpf_iter_new_fd(struct bpf_link *link) 465 { 466 struct file *file; 467 unsigned int flags; 468 int err, fd; 469 470 if (link->ops != &bpf_iter_link_lops) 471 return -EINVAL; 472 473 flags = O_RDONLY | O_CLOEXEC; 474 fd = get_unused_fd_flags(flags); 475 if (fd < 0) 476 return fd; 477 478 file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); 479 if (IS_ERR(file)) { 480 err = PTR_ERR(file); 481 goto free_fd; 482 } 483 484 err = prepare_seq_file(file, 485 container_of(link, struct bpf_iter_link, link)); 486 if (err) 487 goto free_file; 488 489 fd_install(fd, file); 490 return fd; 491 492 free_file: 493 fput(file); 494 free_fd: 495 put_unused_fd(fd); 496 return err; 497 } 498 499 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) 500 { 501 struct bpf_iter_priv_data *iter_priv; 502 struct seq_file *seq; 503 void *seq_priv; 504 505 seq = meta->seq; 506 if (seq->file->f_op != &bpf_iter_fops) 507 return NULL; 508 509 seq_priv = seq->private; 510 iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, 511 target_private); 512 513 if (in_stop && iter_priv->done_stop) 514 return NULL; 515 516 meta->session_id = iter_priv->session_id; 517 meta->seq_num = iter_priv->seq_num; 518 519 return iter_priv->prog; 520 } 521 522 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) 523 { 524 int ret; 525 526 rcu_read_lock(); 527 migrate_disable(); 528 ret = BPF_PROG_RUN(prog, ctx); 529 migrate_enable(); 530 rcu_read_unlock(); 531 532 /* bpf program can only return 0 or 1: 533 * 0 : okay 534 * 1 : retry the same object 535 * The bpf_iter_run_prog() return value 536 * will be seq_ops->show() return value. 537 */ 538 return ret == 0 ? 0 : -EAGAIN; 539 } 540