1 #include <linux/workqueue.h> 2 #include <linux/rtnetlink.h> 3 #include <linux/cache.h> 4 #include <linux/slab.h> 5 #include <linux/list.h> 6 #include <linux/delay.h> 7 #include <linux/sched.h> 8 #include <linux/idr.h> 9 #include <linux/rculist.h> 10 #include <linux/nsproxy.h> 11 #include <linux/proc_fs.h> 12 #include <linux/file.h> 13 #include <linux/export.h> 14 #include <net/net_namespace.h> 15 #include <net/netns/generic.h> 16 17 /* 18 * Our network namespace constructor/destructor lists 19 */ 20 21 static LIST_HEAD(pernet_list); 22 static struct list_head *first_device = &pernet_list; 23 static DEFINE_MUTEX(net_mutex); 24 25 LIST_HEAD(net_namespace_list); 26 EXPORT_SYMBOL_GPL(net_namespace_list); 27 28 struct net init_net; 29 EXPORT_SYMBOL(init_net); 30 31 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 32 33 static int net_assign_generic(struct net *net, int id, void *data) 34 { 35 struct net_generic *ng, *old_ng; 36 37 BUG_ON(!mutex_is_locked(&net_mutex)); 38 BUG_ON(id == 0); 39 40 old_ng = rcu_dereference_protected(net->gen, 41 lockdep_is_held(&net_mutex)); 42 ng = old_ng; 43 if (old_ng->len >= id) 44 goto assign; 45 46 ng = kzalloc(sizeof(struct net_generic) + 47 id * sizeof(void *), GFP_KERNEL); 48 if (ng == NULL) 49 return -ENOMEM; 50 51 /* 52 * Some synchronisation notes: 53 * 54 * The net_generic explores the net->gen array inside rcu 55 * read section. Besides once set the net->gen->ptr[x] 56 * pointer never changes (see rules in netns/generic.h). 57 * 58 * That said, we simply duplicate this array and schedule 59 * the old copy for kfree after a grace period. 60 */ 61 62 ng->len = id; 63 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 64 65 rcu_assign_pointer(net->gen, ng); 66 kfree_rcu(old_ng, rcu); 67 assign: 68 ng->ptr[id - 1] = data; 69 return 0; 70 } 71 72 static int ops_init(const struct pernet_operations *ops, struct net *net) 73 { 74 int err; 75 if (ops->id && ops->size) { 76 void *data = kzalloc(ops->size, GFP_KERNEL); 77 if (!data) 78 return -ENOMEM; 79 80 err = net_assign_generic(net, *ops->id, data); 81 if (err) { 82 kfree(data); 83 return err; 84 } 85 } 86 if (ops->init) 87 return ops->init(net); 88 return 0; 89 } 90 91 static void ops_free(const struct pernet_operations *ops, struct net *net) 92 { 93 if (ops->id && ops->size) { 94 int id = *ops->id; 95 kfree(net_generic(net, id)); 96 } 97 } 98 99 static void ops_exit_list(const struct pernet_operations *ops, 100 struct list_head *net_exit_list) 101 { 102 struct net *net; 103 if (ops->exit) { 104 list_for_each_entry(net, net_exit_list, exit_list) 105 ops->exit(net); 106 } 107 if (ops->exit_batch) 108 ops->exit_batch(net_exit_list); 109 } 110 111 static void ops_free_list(const struct pernet_operations *ops, 112 struct list_head *net_exit_list) 113 { 114 struct net *net; 115 if (ops->size && ops->id) { 116 list_for_each_entry(net, net_exit_list, exit_list) 117 ops_free(ops, net); 118 } 119 } 120 121 /* 122 * setup_net runs the initializers for the network namespace object. 123 */ 124 static __net_init int setup_net(struct net *net) 125 { 126 /* Must be called with net_mutex held */ 127 const struct pernet_operations *ops, *saved_ops; 128 int error = 0; 129 LIST_HEAD(net_exit_list); 130 131 atomic_set(&net->count, 1); 132 atomic_set(&net->passive, 1); 133 net->dev_base_seq = 1; 134 135 #ifdef NETNS_REFCNT_DEBUG 136 atomic_set(&net->use_count, 0); 137 #endif 138 139 list_for_each_entry(ops, &pernet_list, list) { 140 error = ops_init(ops, net); 141 if (error < 0) 142 goto out_undo; 143 } 144 out: 145 return error; 146 147 out_undo: 148 /* Walk through the list backwards calling the exit functions 149 * for the pernet modules whose init functions did not fail. 150 */ 151 list_add(&net->exit_list, &net_exit_list); 152 saved_ops = ops; 153 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 154 ops_exit_list(ops, &net_exit_list); 155 156 ops = saved_ops; 157 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 158 ops_free_list(ops, &net_exit_list); 159 160 rcu_barrier(); 161 goto out; 162 } 163 164 static struct net_generic *net_alloc_generic(void) 165 { 166 struct net_generic *ng; 167 size_t generic_size = sizeof(struct net_generic) + 168 INITIAL_NET_GEN_PTRS * sizeof(void *); 169 170 ng = kzalloc(generic_size, GFP_KERNEL); 171 if (ng) 172 ng->len = INITIAL_NET_GEN_PTRS; 173 174 return ng; 175 } 176 177 #ifdef CONFIG_NET_NS 178 static struct kmem_cache *net_cachep; 179 static struct workqueue_struct *netns_wq; 180 181 static struct net *net_alloc(void) 182 { 183 struct net *net = NULL; 184 struct net_generic *ng; 185 186 ng = net_alloc_generic(); 187 if (!ng) 188 goto out; 189 190 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 191 if (!net) 192 goto out_free; 193 194 rcu_assign_pointer(net->gen, ng); 195 out: 196 return net; 197 198 out_free: 199 kfree(ng); 200 goto out; 201 } 202 203 static void net_free(struct net *net) 204 { 205 #ifdef NETNS_REFCNT_DEBUG 206 if (unlikely(atomic_read(&net->use_count) != 0)) { 207 printk(KERN_EMERG "network namespace not free! Usage: %d\n", 208 atomic_read(&net->use_count)); 209 return; 210 } 211 #endif 212 kfree(net->gen); 213 kmem_cache_free(net_cachep, net); 214 } 215 216 void net_drop_ns(void *p) 217 { 218 struct net *ns = p; 219 if (ns && atomic_dec_and_test(&ns->passive)) 220 net_free(ns); 221 } 222 223 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 224 { 225 struct net *net; 226 int rv; 227 228 if (!(flags & CLONE_NEWNET)) 229 return get_net(old_net); 230 231 net = net_alloc(); 232 if (!net) 233 return ERR_PTR(-ENOMEM); 234 mutex_lock(&net_mutex); 235 rv = setup_net(net); 236 if (rv == 0) { 237 rtnl_lock(); 238 list_add_tail_rcu(&net->list, &net_namespace_list); 239 rtnl_unlock(); 240 } 241 mutex_unlock(&net_mutex); 242 if (rv < 0) { 243 net_drop_ns(net); 244 return ERR_PTR(rv); 245 } 246 return net; 247 } 248 249 static DEFINE_SPINLOCK(cleanup_list_lock); 250 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 251 252 static void cleanup_net(struct work_struct *work) 253 { 254 const struct pernet_operations *ops; 255 struct net *net, *tmp; 256 LIST_HEAD(net_kill_list); 257 LIST_HEAD(net_exit_list); 258 259 /* Atomically snapshot the list of namespaces to cleanup */ 260 spin_lock_irq(&cleanup_list_lock); 261 list_replace_init(&cleanup_list, &net_kill_list); 262 spin_unlock_irq(&cleanup_list_lock); 263 264 mutex_lock(&net_mutex); 265 266 /* Don't let anyone else find us. */ 267 rtnl_lock(); 268 list_for_each_entry(net, &net_kill_list, cleanup_list) { 269 list_del_rcu(&net->list); 270 list_add_tail(&net->exit_list, &net_exit_list); 271 } 272 rtnl_unlock(); 273 274 /* 275 * Another CPU might be rcu-iterating the list, wait for it. 276 * This needs to be before calling the exit() notifiers, so 277 * the rcu_barrier() below isn't sufficient alone. 278 */ 279 synchronize_rcu(); 280 281 /* Run all of the network namespace exit methods */ 282 list_for_each_entry_reverse(ops, &pernet_list, list) 283 ops_exit_list(ops, &net_exit_list); 284 285 /* Free the net generic variables */ 286 list_for_each_entry_reverse(ops, &pernet_list, list) 287 ops_free_list(ops, &net_exit_list); 288 289 mutex_unlock(&net_mutex); 290 291 /* Ensure there are no outstanding rcu callbacks using this 292 * network namespace. 293 */ 294 rcu_barrier(); 295 296 /* Finally it is safe to free my network namespace structure */ 297 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 298 list_del_init(&net->exit_list); 299 net_drop_ns(net); 300 } 301 } 302 static DECLARE_WORK(net_cleanup_work, cleanup_net); 303 304 void __put_net(struct net *net) 305 { 306 /* Cleanup the network namespace in process context */ 307 unsigned long flags; 308 309 spin_lock_irqsave(&cleanup_list_lock, flags); 310 list_add(&net->cleanup_list, &cleanup_list); 311 spin_unlock_irqrestore(&cleanup_list_lock, flags); 312 313 queue_work(netns_wq, &net_cleanup_work); 314 } 315 EXPORT_SYMBOL_GPL(__put_net); 316 317 struct net *get_net_ns_by_fd(int fd) 318 { 319 struct proc_inode *ei; 320 struct file *file; 321 struct net *net; 322 323 file = proc_ns_fget(fd); 324 if (IS_ERR(file)) 325 return ERR_CAST(file); 326 327 ei = PROC_I(file->f_dentry->d_inode); 328 if (ei->ns_ops == &netns_operations) 329 net = get_net(ei->ns); 330 else 331 net = ERR_PTR(-EINVAL); 332 333 fput(file); 334 return net; 335 } 336 337 #else 338 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 339 { 340 if (flags & CLONE_NEWNET) 341 return ERR_PTR(-EINVAL); 342 return old_net; 343 } 344 345 struct net *get_net_ns_by_fd(int fd) 346 { 347 return ERR_PTR(-EINVAL); 348 } 349 #endif 350 351 struct net *get_net_ns_by_pid(pid_t pid) 352 { 353 struct task_struct *tsk; 354 struct net *net; 355 356 /* Lookup the network namespace */ 357 net = ERR_PTR(-ESRCH); 358 rcu_read_lock(); 359 tsk = find_task_by_vpid(pid); 360 if (tsk) { 361 struct nsproxy *nsproxy; 362 nsproxy = task_nsproxy(tsk); 363 if (nsproxy) 364 net = get_net(nsproxy->net_ns); 365 } 366 rcu_read_unlock(); 367 return net; 368 } 369 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 370 371 static int __init net_ns_init(void) 372 { 373 struct net_generic *ng; 374 375 #ifdef CONFIG_NET_NS 376 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 377 SMP_CACHE_BYTES, 378 SLAB_PANIC, NULL); 379 380 /* Create workqueue for cleanup */ 381 netns_wq = create_singlethread_workqueue("netns"); 382 if (!netns_wq) 383 panic("Could not create netns workq"); 384 #endif 385 386 ng = net_alloc_generic(); 387 if (!ng) 388 panic("Could not allocate generic netns"); 389 390 rcu_assign_pointer(init_net.gen, ng); 391 392 mutex_lock(&net_mutex); 393 if (setup_net(&init_net)) 394 panic("Could not setup the initial network namespace"); 395 396 rtnl_lock(); 397 list_add_tail_rcu(&init_net.list, &net_namespace_list); 398 rtnl_unlock(); 399 400 mutex_unlock(&net_mutex); 401 402 return 0; 403 } 404 405 pure_initcall(net_ns_init); 406 407 #ifdef CONFIG_NET_NS 408 static int __register_pernet_operations(struct list_head *list, 409 struct pernet_operations *ops) 410 { 411 struct net *net; 412 int error; 413 LIST_HEAD(net_exit_list); 414 415 list_add_tail(&ops->list, list); 416 if (ops->init || (ops->id && ops->size)) { 417 for_each_net(net) { 418 error = ops_init(ops, net); 419 if (error) 420 goto out_undo; 421 list_add_tail(&net->exit_list, &net_exit_list); 422 } 423 } 424 return 0; 425 426 out_undo: 427 /* If I have an error cleanup all namespaces I initialized */ 428 list_del(&ops->list); 429 ops_exit_list(ops, &net_exit_list); 430 ops_free_list(ops, &net_exit_list); 431 return error; 432 } 433 434 static void __unregister_pernet_operations(struct pernet_operations *ops) 435 { 436 struct net *net; 437 LIST_HEAD(net_exit_list); 438 439 list_del(&ops->list); 440 for_each_net(net) 441 list_add_tail(&net->exit_list, &net_exit_list); 442 ops_exit_list(ops, &net_exit_list); 443 ops_free_list(ops, &net_exit_list); 444 } 445 446 #else 447 448 static int __register_pernet_operations(struct list_head *list, 449 struct pernet_operations *ops) 450 { 451 int err = 0; 452 err = ops_init(ops, &init_net); 453 if (err) 454 ops_free(ops, &init_net); 455 return err; 456 457 } 458 459 static void __unregister_pernet_operations(struct pernet_operations *ops) 460 { 461 LIST_HEAD(net_exit_list); 462 list_add(&init_net.exit_list, &net_exit_list); 463 ops_exit_list(ops, &net_exit_list); 464 ops_free_list(ops, &net_exit_list); 465 } 466 467 #endif /* CONFIG_NET_NS */ 468 469 static DEFINE_IDA(net_generic_ids); 470 471 static int register_pernet_operations(struct list_head *list, 472 struct pernet_operations *ops) 473 { 474 int error; 475 476 if (ops->id) { 477 again: 478 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 479 if (error < 0) { 480 if (error == -EAGAIN) { 481 ida_pre_get(&net_generic_ids, GFP_KERNEL); 482 goto again; 483 } 484 return error; 485 } 486 } 487 error = __register_pernet_operations(list, ops); 488 if (error) { 489 rcu_barrier(); 490 if (ops->id) 491 ida_remove(&net_generic_ids, *ops->id); 492 } 493 494 return error; 495 } 496 497 static void unregister_pernet_operations(struct pernet_operations *ops) 498 { 499 500 __unregister_pernet_operations(ops); 501 rcu_barrier(); 502 if (ops->id) 503 ida_remove(&net_generic_ids, *ops->id); 504 } 505 506 /** 507 * register_pernet_subsys - register a network namespace subsystem 508 * @ops: pernet operations structure for the subsystem 509 * 510 * Register a subsystem which has init and exit functions 511 * that are called when network namespaces are created and 512 * destroyed respectively. 513 * 514 * When registered all network namespace init functions are 515 * called for every existing network namespace. Allowing kernel 516 * modules to have a race free view of the set of network namespaces. 517 * 518 * When a new network namespace is created all of the init 519 * methods are called in the order in which they were registered. 520 * 521 * When a network namespace is destroyed all of the exit methods 522 * are called in the reverse of the order with which they were 523 * registered. 524 */ 525 int register_pernet_subsys(struct pernet_operations *ops) 526 { 527 int error; 528 mutex_lock(&net_mutex); 529 error = register_pernet_operations(first_device, ops); 530 mutex_unlock(&net_mutex); 531 return error; 532 } 533 EXPORT_SYMBOL_GPL(register_pernet_subsys); 534 535 /** 536 * unregister_pernet_subsys - unregister a network namespace subsystem 537 * @ops: pernet operations structure to manipulate 538 * 539 * Remove the pernet operations structure from the list to be 540 * used when network namespaces are created or destroyed. In 541 * addition run the exit method for all existing network 542 * namespaces. 543 */ 544 void unregister_pernet_subsys(struct pernet_operations *ops) 545 { 546 mutex_lock(&net_mutex); 547 unregister_pernet_operations(ops); 548 mutex_unlock(&net_mutex); 549 } 550 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 551 552 /** 553 * register_pernet_device - register a network namespace device 554 * @ops: pernet operations structure for the subsystem 555 * 556 * Register a device which has init and exit functions 557 * that are called when network namespaces are created and 558 * destroyed respectively. 559 * 560 * When registered all network namespace init functions are 561 * called for every existing network namespace. Allowing kernel 562 * modules to have a race free view of the set of network namespaces. 563 * 564 * When a new network namespace is created all of the init 565 * methods are called in the order in which they were registered. 566 * 567 * When a network namespace is destroyed all of the exit methods 568 * are called in the reverse of the order with which they were 569 * registered. 570 */ 571 int register_pernet_device(struct pernet_operations *ops) 572 { 573 int error; 574 mutex_lock(&net_mutex); 575 error = register_pernet_operations(&pernet_list, ops); 576 if (!error && (first_device == &pernet_list)) 577 first_device = &ops->list; 578 mutex_unlock(&net_mutex); 579 return error; 580 } 581 EXPORT_SYMBOL_GPL(register_pernet_device); 582 583 /** 584 * unregister_pernet_device - unregister a network namespace netdevice 585 * @ops: pernet operations structure to manipulate 586 * 587 * Remove the pernet operations structure from the list to be 588 * used when network namespaces are created or destroyed. In 589 * addition run the exit method for all existing network 590 * namespaces. 591 */ 592 void unregister_pernet_device(struct pernet_operations *ops) 593 { 594 mutex_lock(&net_mutex); 595 if (&ops->list == first_device) 596 first_device = first_device->next; 597 unregister_pernet_operations(ops); 598 mutex_unlock(&net_mutex); 599 } 600 EXPORT_SYMBOL_GPL(unregister_pernet_device); 601 602 #ifdef CONFIG_NET_NS 603 static void *netns_get(struct task_struct *task) 604 { 605 struct net *net = NULL; 606 struct nsproxy *nsproxy; 607 608 rcu_read_lock(); 609 nsproxy = task_nsproxy(task); 610 if (nsproxy) 611 net = get_net(nsproxy->net_ns); 612 rcu_read_unlock(); 613 614 return net; 615 } 616 617 static void netns_put(void *ns) 618 { 619 put_net(ns); 620 } 621 622 static int netns_install(struct nsproxy *nsproxy, void *ns) 623 { 624 put_net(nsproxy->net_ns); 625 nsproxy->net_ns = get_net(ns); 626 return 0; 627 } 628 629 const struct proc_ns_operations netns_operations = { 630 .name = "net", 631 .type = CLONE_NEWNET, 632 .get = netns_get, 633 .put = netns_put, 634 .install = netns_install, 635 }; 636 #endif 637