1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2020 Facebook */ 3 4 #include <linux/fs.h> 5 #include <linux/anon_inodes.h> 6 #include <linux/filter.h> 7 #include <linux/bpf.h> 8 9 struct bpf_iter_target_info { 10 struct list_head list; 11 const char *target; 12 const struct seq_operations *seq_ops; 13 bpf_iter_init_seq_priv_t init_seq_private; 14 bpf_iter_fini_seq_priv_t fini_seq_private; 15 u32 seq_priv_size; 16 u32 btf_id; /* cached value */ 17 }; 18 19 struct bpf_iter_link { 20 struct bpf_link link; 21 struct bpf_iter_target_info *tinfo; 22 }; 23 24 struct bpf_iter_priv_data { 25 struct bpf_iter_target_info *tinfo; 26 struct bpf_prog *prog; 27 u64 session_id; 28 u64 seq_num; 29 bool done_stop; 30 u8 target_private[] __aligned(8); 31 }; 32 33 static struct list_head targets = LIST_HEAD_INIT(targets); 34 static DEFINE_MUTEX(targets_mutex); 35 36 /* protect bpf_iter_link changes */ 37 static DEFINE_MUTEX(link_mutex); 38 39 /* incremented on every opened seq_file */ 40 static atomic64_t session_id; 41 42 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link); 43 44 static void bpf_iter_inc_seq_num(struct seq_file *seq) 45 { 46 struct bpf_iter_priv_data *iter_priv; 47 48 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 49 target_private); 50 iter_priv->seq_num++; 51 } 52 53 static void bpf_iter_dec_seq_num(struct seq_file *seq) 54 { 55 struct bpf_iter_priv_data *iter_priv; 56 57 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 58 target_private); 59 iter_priv->seq_num--; 60 } 61 62 static void bpf_iter_done_stop(struct seq_file *seq) 63 { 64 struct bpf_iter_priv_data *iter_priv; 65 66 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 67 target_private); 68 iter_priv->done_stop = true; 69 } 70 71 /* bpf_seq_read, a customized and simpler version for bpf iterator. 72 * no_llseek is assumed for this file. 73 * The following are differences from seq_read(): 74 * . fixed buffer size (PAGE_SIZE) 75 * . assuming no_llseek 76 * . stop() may call bpf program, handling potential overflow there 77 */ 78 static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, 79 loff_t *ppos) 80 { 81 struct seq_file *seq = file->private_data; 82 size_t n, offs, copied = 0; 83 int err = 0; 84 void *p; 85 86 mutex_lock(&seq->lock); 87 88 if (!seq->buf) { 89 seq->size = PAGE_SIZE; 90 seq->buf = kmalloc(seq->size, GFP_KERNEL); 91 if (!seq->buf) { 92 err = -ENOMEM; 93 goto done; 94 } 95 } 96 97 if (seq->count) { 98 n = min(seq->count, size); 99 err = copy_to_user(buf, seq->buf + seq->from, n); 100 if (err) { 101 err = -EFAULT; 102 goto done; 103 } 104 seq->count -= n; 105 seq->from += n; 106 copied = n; 107 goto done; 108 } 109 110 seq->from = 0; 111 p = seq->op->start(seq, &seq->index); 112 if (!p) 113 goto stop; 114 if (IS_ERR(p)) { 115 err = PTR_ERR(p); 116 seq->op->stop(seq, p); 117 seq->count = 0; 118 goto done; 119 } 120 121 err = seq->op->show(seq, p); 122 if (err > 0) { 123 /* object is skipped, decrease seq_num, so next 124 * valid object can reuse the same seq_num. 125 */ 126 bpf_iter_dec_seq_num(seq); 127 seq->count = 0; 128 } else if (err < 0 || seq_has_overflowed(seq)) { 129 if (!err) 130 err = -E2BIG; 131 seq->op->stop(seq, p); 132 seq->count = 0; 133 goto done; 134 } 135 136 while (1) { 137 loff_t pos = seq->index; 138 139 offs = seq->count; 140 p = seq->op->next(seq, p, &seq->index); 141 if (pos == seq->index) { 142 pr_info_ratelimited("buggy seq_file .next function %ps " 143 "did not updated position index\n", 144 seq->op->next); 145 seq->index++; 146 } 147 148 if (IS_ERR_OR_NULL(p)) 149 break; 150 151 /* got a valid next object, increase seq_num */ 152 bpf_iter_inc_seq_num(seq); 153 154 if (seq->count >= size) 155 break; 156 157 err = seq->op->show(seq, p); 158 if (err > 0) { 159 bpf_iter_dec_seq_num(seq); 160 seq->count = offs; 161 } else if (err < 0 || seq_has_overflowed(seq)) { 162 seq->count = offs; 163 if (offs == 0) { 164 if (!err) 165 err = -E2BIG; 166 seq->op->stop(seq, p); 167 goto done; 168 } 169 break; 170 } 171 } 172 stop: 173 offs = seq->count; 174 /* bpf program called if !p */ 175 seq->op->stop(seq, p); 176 if (!p) { 177 if (!seq_has_overflowed(seq)) { 178 bpf_iter_done_stop(seq); 179 } else { 180 seq->count = offs; 181 if (offs == 0) { 182 err = -E2BIG; 183 goto done; 184 } 185 } 186 } 187 188 n = min(seq->count, size); 189 err = copy_to_user(buf, seq->buf, n); 190 if (err) { 191 err = -EFAULT; 192 goto done; 193 } 194 copied = n; 195 seq->count -= n; 196 seq->from = n; 197 done: 198 if (!copied) 199 copied = err; 200 else 201 *ppos += copied; 202 mutex_unlock(&seq->lock); 203 return copied; 204 } 205 206 static int iter_open(struct inode *inode, struct file *file) 207 { 208 struct bpf_iter_link *link = inode->i_private; 209 210 return prepare_seq_file(file, link); 211 } 212 213 static int iter_release(struct inode *inode, struct file *file) 214 { 215 struct bpf_iter_priv_data *iter_priv; 216 struct seq_file *seq; 217 218 seq = file->private_data; 219 if (!seq) 220 return 0; 221 222 iter_priv = container_of(seq->private, struct bpf_iter_priv_data, 223 target_private); 224 225 if (iter_priv->tinfo->fini_seq_private) 226 iter_priv->tinfo->fini_seq_private(seq->private); 227 228 bpf_prog_put(iter_priv->prog); 229 seq->private = iter_priv; 230 231 return seq_release_private(inode, file); 232 } 233 234 const struct file_operations bpf_iter_fops = { 235 .open = iter_open, 236 .llseek = no_llseek, 237 .read = bpf_seq_read, 238 .release = iter_release, 239 }; 240 241 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info) 242 { 243 struct bpf_iter_target_info *tinfo; 244 245 tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); 246 if (!tinfo) 247 return -ENOMEM; 248 249 tinfo->target = reg_info->target; 250 tinfo->seq_ops = reg_info->seq_ops; 251 tinfo->init_seq_private = reg_info->init_seq_private; 252 tinfo->fini_seq_private = reg_info->fini_seq_private; 253 tinfo->seq_priv_size = reg_info->seq_priv_size; 254 INIT_LIST_HEAD(&tinfo->list); 255 256 mutex_lock(&targets_mutex); 257 list_add(&tinfo->list, &targets); 258 mutex_unlock(&targets_mutex); 259 260 return 0; 261 } 262 263 void bpf_iter_unreg_target(const char *target) 264 { 265 struct bpf_iter_target_info *tinfo; 266 bool found = false; 267 268 mutex_lock(&targets_mutex); 269 list_for_each_entry(tinfo, &targets, list) { 270 if (!strcmp(target, tinfo->target)) { 271 list_del(&tinfo->list); 272 kfree(tinfo); 273 found = true; 274 break; 275 } 276 } 277 mutex_unlock(&targets_mutex); 278 279 WARN_ON(found == false); 280 } 281 282 static void cache_btf_id(struct bpf_iter_target_info *tinfo, 283 struct bpf_prog *prog) 284 { 285 tinfo->btf_id = prog->aux->attach_btf_id; 286 } 287 288 bool bpf_iter_prog_supported(struct bpf_prog *prog) 289 { 290 const char *attach_fname = prog->aux->attach_func_name; 291 u32 prog_btf_id = prog->aux->attach_btf_id; 292 const char *prefix = BPF_ITER_FUNC_PREFIX; 293 struct bpf_iter_target_info *tinfo; 294 int prefix_len = strlen(prefix); 295 bool supported = false; 296 297 if (strncmp(attach_fname, prefix, prefix_len)) 298 return false; 299 300 mutex_lock(&targets_mutex); 301 list_for_each_entry(tinfo, &targets, list) { 302 if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) { 303 supported = true; 304 break; 305 } 306 if (!strcmp(attach_fname + prefix_len, tinfo->target)) { 307 cache_btf_id(tinfo, prog); 308 supported = true; 309 break; 310 } 311 } 312 mutex_unlock(&targets_mutex); 313 314 return supported; 315 } 316 317 static void bpf_iter_link_release(struct bpf_link *link) 318 { 319 } 320 321 static void bpf_iter_link_dealloc(struct bpf_link *link) 322 { 323 struct bpf_iter_link *iter_link = 324 container_of(link, struct bpf_iter_link, link); 325 326 kfree(iter_link); 327 } 328 329 static int bpf_iter_link_replace(struct bpf_link *link, 330 struct bpf_prog *new_prog, 331 struct bpf_prog *old_prog) 332 { 333 int ret = 0; 334 335 mutex_lock(&link_mutex); 336 if (old_prog && link->prog != old_prog) { 337 ret = -EPERM; 338 goto out_unlock; 339 } 340 341 if (link->prog->type != new_prog->type || 342 link->prog->expected_attach_type != new_prog->expected_attach_type || 343 link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { 344 ret = -EINVAL; 345 goto out_unlock; 346 } 347 348 old_prog = xchg(&link->prog, new_prog); 349 bpf_prog_put(old_prog); 350 351 out_unlock: 352 mutex_unlock(&link_mutex); 353 return ret; 354 } 355 356 static const struct bpf_link_ops bpf_iter_link_lops = { 357 .release = bpf_iter_link_release, 358 .dealloc = bpf_iter_link_dealloc, 359 .update_prog = bpf_iter_link_replace, 360 }; 361 362 bool bpf_link_is_iter(struct bpf_link *link) 363 { 364 return link->ops == &bpf_iter_link_lops; 365 } 366 367 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 368 { 369 struct bpf_link_primer link_primer; 370 struct bpf_iter_target_info *tinfo; 371 struct bpf_iter_link *link; 372 bool existed = false; 373 u32 prog_btf_id; 374 int err; 375 376 if (attr->link_create.target_fd || attr->link_create.flags) 377 return -EINVAL; 378 379 prog_btf_id = prog->aux->attach_btf_id; 380 mutex_lock(&targets_mutex); 381 list_for_each_entry(tinfo, &targets, list) { 382 if (tinfo->btf_id == prog_btf_id) { 383 existed = true; 384 break; 385 } 386 } 387 mutex_unlock(&targets_mutex); 388 if (!existed) 389 return -ENOENT; 390 391 link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); 392 if (!link) 393 return -ENOMEM; 394 395 bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); 396 link->tinfo = tinfo; 397 398 err = bpf_link_prime(&link->link, &link_primer); 399 if (err) { 400 kfree(link); 401 return err; 402 } 403 404 return bpf_link_settle(&link_primer); 405 } 406 407 static void init_seq_meta(struct bpf_iter_priv_data *priv_data, 408 struct bpf_iter_target_info *tinfo, 409 struct bpf_prog *prog) 410 { 411 priv_data->tinfo = tinfo; 412 priv_data->prog = prog; 413 priv_data->session_id = atomic64_inc_return(&session_id); 414 priv_data->seq_num = 0; 415 priv_data->done_stop = false; 416 } 417 418 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) 419 { 420 struct bpf_iter_priv_data *priv_data; 421 struct bpf_iter_target_info *tinfo; 422 struct bpf_prog *prog; 423 u32 total_priv_dsize; 424 struct seq_file *seq; 425 int err = 0; 426 427 mutex_lock(&link_mutex); 428 prog = link->link.prog; 429 bpf_prog_inc(prog); 430 mutex_unlock(&link_mutex); 431 432 tinfo = link->tinfo; 433 total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + 434 tinfo->seq_priv_size; 435 priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize); 436 if (!priv_data) { 437 err = -ENOMEM; 438 goto release_prog; 439 } 440 441 if (tinfo->init_seq_private) { 442 err = tinfo->init_seq_private(priv_data->target_private); 443 if (err) 444 goto release_seq_file; 445 } 446 447 init_seq_meta(priv_data, tinfo, prog); 448 seq = file->private_data; 449 seq->private = priv_data->target_private; 450 451 return 0; 452 453 release_seq_file: 454 seq_release_private(file->f_inode, file); 455 file->private_data = NULL; 456 release_prog: 457 bpf_prog_put(prog); 458 return err; 459 } 460 461 int bpf_iter_new_fd(struct bpf_link *link) 462 { 463 struct file *file; 464 unsigned int flags; 465 int err, fd; 466 467 if (link->ops != &bpf_iter_link_lops) 468 return -EINVAL; 469 470 flags = O_RDONLY | O_CLOEXEC; 471 fd = get_unused_fd_flags(flags); 472 if (fd < 0) 473 return fd; 474 475 file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); 476 if (IS_ERR(file)) { 477 err = PTR_ERR(file); 478 goto free_fd; 479 } 480 481 err = prepare_seq_file(file, 482 container_of(link, struct bpf_iter_link, link)); 483 if (err) 484 goto free_file; 485 486 fd_install(fd, file); 487 return fd; 488 489 free_file: 490 fput(file); 491 free_fd: 492 put_unused_fd(fd); 493 return err; 494 } 495 496 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) 497 { 498 struct bpf_iter_priv_data *iter_priv; 499 struct seq_file *seq; 500 void *seq_priv; 501 502 seq = meta->seq; 503 if (seq->file->f_op != &bpf_iter_fops) 504 return NULL; 505 506 seq_priv = seq->private; 507 iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, 508 target_private); 509 510 if (in_stop && iter_priv->done_stop) 511 return NULL; 512 513 meta->session_id = iter_priv->session_id; 514 meta->seq_num = iter_priv->seq_num; 515 516 return iter_priv->prog; 517 } 518 519 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) 520 { 521 int ret; 522 523 rcu_read_lock(); 524 migrate_disable(); 525 ret = BPF_PROG_RUN(prog, ctx); 526 migrate_enable(); 527 rcu_read_unlock(); 528 529 return ret == 0 ? 0 : -EAGAIN; 530 } 531