1 #include <linux/workqueue.h> 2 #include <linux/rtnetlink.h> 3 #include <linux/cache.h> 4 #include <linux/slab.h> 5 #include <linux/list.h> 6 #include <linux/delay.h> 7 #include <linux/sched.h> 8 #include <linux/idr.h> 9 #include <linux/rculist.h> 10 #include <linux/nsproxy.h> 11 #include <net/net_namespace.h> 12 #include <net/netns/generic.h> 13 14 /* 15 * Our network namespace constructor/destructor lists 16 */ 17 18 static LIST_HEAD(pernet_list); 19 static struct list_head *first_device = &pernet_list; 20 static DEFINE_MUTEX(net_mutex); 21 22 LIST_HEAD(net_namespace_list); 23 EXPORT_SYMBOL_GPL(net_namespace_list); 24 25 struct net init_net; 26 EXPORT_SYMBOL(init_net); 27 28 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 29 30 static int net_assign_generic(struct net *net, int id, void *data) 31 { 32 struct net_generic *ng, *old_ng; 33 34 BUG_ON(!mutex_is_locked(&net_mutex)); 35 BUG_ON(id == 0); 36 37 old_ng = rcu_dereference_protected(net->gen, 38 lockdep_is_held(&net_mutex)); 39 ng = old_ng; 40 if (old_ng->len >= id) 41 goto assign; 42 43 ng = kzalloc(sizeof(struct net_generic) + 44 id * sizeof(void *), GFP_KERNEL); 45 if (ng == NULL) 46 return -ENOMEM; 47 48 /* 49 * Some synchronisation notes: 50 * 51 * The net_generic explores the net->gen array inside rcu 52 * read section. Besides once set the net->gen->ptr[x] 53 * pointer never changes (see rules in netns/generic.h). 54 * 55 * That said, we simply duplicate this array and schedule 56 * the old copy for kfree after a grace period. 57 */ 58 59 ng->len = id; 60 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 61 62 rcu_assign_pointer(net->gen, ng); 63 kfree_rcu(old_ng, rcu); 64 assign: 65 ng->ptr[id - 1] = data; 66 return 0; 67 } 68 69 static int ops_init(const struct pernet_operations *ops, struct net *net) 70 { 71 int err; 72 if (ops->id && ops->size) { 73 void *data = kzalloc(ops->size, GFP_KERNEL); 74 if (!data) 75 return -ENOMEM; 76 77 err = net_assign_generic(net, *ops->id, data); 78 if (err) { 79 kfree(data); 80 return err; 81 } 82 } 83 if (ops->init) 84 return ops->init(net); 85 return 0; 86 } 87 88 static void ops_free(const struct pernet_operations *ops, struct net *net) 89 { 90 if (ops->id && ops->size) { 91 int id = *ops->id; 92 kfree(net_generic(net, id)); 93 } 94 } 95 96 static void ops_exit_list(const struct pernet_operations *ops, 97 struct list_head *net_exit_list) 98 { 99 struct net *net; 100 if (ops->exit) { 101 list_for_each_entry(net, net_exit_list, exit_list) 102 ops->exit(net); 103 } 104 if (ops->exit_batch) 105 ops->exit_batch(net_exit_list); 106 } 107 108 static void ops_free_list(const struct pernet_operations *ops, 109 struct list_head *net_exit_list) 110 { 111 struct net *net; 112 if (ops->size && ops->id) { 113 list_for_each_entry(net, net_exit_list, exit_list) 114 ops_free(ops, net); 115 } 116 } 117 118 /* 119 * setup_net runs the initializers for the network namespace object. 120 */ 121 static __net_init int setup_net(struct net *net) 122 { 123 /* Must be called with net_mutex held */ 124 const struct pernet_operations *ops, *saved_ops; 125 int error = 0; 126 LIST_HEAD(net_exit_list); 127 128 atomic_set(&net->count, 1); 129 130 #ifdef NETNS_REFCNT_DEBUG 131 atomic_set(&net->use_count, 0); 132 #endif 133 134 list_for_each_entry(ops, &pernet_list, list) { 135 error = ops_init(ops, net); 136 if (error < 0) 137 goto out_undo; 138 } 139 out: 140 return error; 141 142 out_undo: 143 /* Walk through the list backwards calling the exit functions 144 * for the pernet modules whose init functions did not fail. 145 */ 146 list_add(&net->exit_list, &net_exit_list); 147 saved_ops = ops; 148 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 149 ops_exit_list(ops, &net_exit_list); 150 151 ops = saved_ops; 152 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 153 ops_free_list(ops, &net_exit_list); 154 155 rcu_barrier(); 156 goto out; 157 } 158 159 static struct net_generic *net_alloc_generic(void) 160 { 161 struct net_generic *ng; 162 size_t generic_size = sizeof(struct net_generic) + 163 INITIAL_NET_GEN_PTRS * sizeof(void *); 164 165 ng = kzalloc(generic_size, GFP_KERNEL); 166 if (ng) 167 ng->len = INITIAL_NET_GEN_PTRS; 168 169 return ng; 170 } 171 172 #ifdef CONFIG_NET_NS 173 static struct kmem_cache *net_cachep; 174 static struct workqueue_struct *netns_wq; 175 176 static struct net *net_alloc(void) 177 { 178 struct net *net = NULL; 179 struct net_generic *ng; 180 181 ng = net_alloc_generic(); 182 if (!ng) 183 goto out; 184 185 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 186 if (!net) 187 goto out_free; 188 189 rcu_assign_pointer(net->gen, ng); 190 out: 191 return net; 192 193 out_free: 194 kfree(ng); 195 goto out; 196 } 197 198 static void net_free(struct net *net) 199 { 200 #ifdef NETNS_REFCNT_DEBUG 201 if (unlikely(atomic_read(&net->use_count) != 0)) { 202 printk(KERN_EMERG "network namespace not free! Usage: %d\n", 203 atomic_read(&net->use_count)); 204 return; 205 } 206 #endif 207 kfree(net->gen); 208 kmem_cache_free(net_cachep, net); 209 } 210 211 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 212 { 213 struct net *net; 214 int rv; 215 216 if (!(flags & CLONE_NEWNET)) 217 return get_net(old_net); 218 219 net = net_alloc(); 220 if (!net) 221 return ERR_PTR(-ENOMEM); 222 mutex_lock(&net_mutex); 223 rv = setup_net(net); 224 if (rv == 0) { 225 rtnl_lock(); 226 list_add_tail_rcu(&net->list, &net_namespace_list); 227 rtnl_unlock(); 228 } 229 mutex_unlock(&net_mutex); 230 if (rv < 0) { 231 net_free(net); 232 return ERR_PTR(rv); 233 } 234 return net; 235 } 236 237 static DEFINE_SPINLOCK(cleanup_list_lock); 238 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 239 240 static void cleanup_net(struct work_struct *work) 241 { 242 const struct pernet_operations *ops; 243 struct net *net, *tmp; 244 LIST_HEAD(net_kill_list); 245 LIST_HEAD(net_exit_list); 246 247 /* Atomically snapshot the list of namespaces to cleanup */ 248 spin_lock_irq(&cleanup_list_lock); 249 list_replace_init(&cleanup_list, &net_kill_list); 250 spin_unlock_irq(&cleanup_list_lock); 251 252 mutex_lock(&net_mutex); 253 254 /* Don't let anyone else find us. */ 255 rtnl_lock(); 256 list_for_each_entry(net, &net_kill_list, cleanup_list) { 257 list_del_rcu(&net->list); 258 list_add_tail(&net->exit_list, &net_exit_list); 259 } 260 rtnl_unlock(); 261 262 /* 263 * Another CPU might be rcu-iterating the list, wait for it. 264 * This needs to be before calling the exit() notifiers, so 265 * the rcu_barrier() below isn't sufficient alone. 266 */ 267 synchronize_rcu(); 268 269 /* Run all of the network namespace exit methods */ 270 list_for_each_entry_reverse(ops, &pernet_list, list) 271 ops_exit_list(ops, &net_exit_list); 272 273 /* Free the net generic variables */ 274 list_for_each_entry_reverse(ops, &pernet_list, list) 275 ops_free_list(ops, &net_exit_list); 276 277 mutex_unlock(&net_mutex); 278 279 /* Ensure there are no outstanding rcu callbacks using this 280 * network namespace. 281 */ 282 rcu_barrier(); 283 284 /* Finally it is safe to free my network namespace structure */ 285 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 286 list_del_init(&net->exit_list); 287 net_free(net); 288 } 289 } 290 static DECLARE_WORK(net_cleanup_work, cleanup_net); 291 292 void __put_net(struct net *net) 293 { 294 /* Cleanup the network namespace in process context */ 295 unsigned long flags; 296 297 spin_lock_irqsave(&cleanup_list_lock, flags); 298 list_add(&net->cleanup_list, &cleanup_list); 299 spin_unlock_irqrestore(&cleanup_list_lock, flags); 300 301 queue_work(netns_wq, &net_cleanup_work); 302 } 303 EXPORT_SYMBOL_GPL(__put_net); 304 305 #else 306 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 307 { 308 if (flags & CLONE_NEWNET) 309 return ERR_PTR(-EINVAL); 310 return old_net; 311 } 312 #endif 313 314 struct net *get_net_ns_by_pid(pid_t pid) 315 { 316 struct task_struct *tsk; 317 struct net *net; 318 319 /* Lookup the network namespace */ 320 net = ERR_PTR(-ESRCH); 321 rcu_read_lock(); 322 tsk = find_task_by_vpid(pid); 323 if (tsk) { 324 struct nsproxy *nsproxy; 325 nsproxy = task_nsproxy(tsk); 326 if (nsproxy) 327 net = get_net(nsproxy->net_ns); 328 } 329 rcu_read_unlock(); 330 return net; 331 } 332 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 333 334 static int __init net_ns_init(void) 335 { 336 struct net_generic *ng; 337 338 #ifdef CONFIG_NET_NS 339 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 340 SMP_CACHE_BYTES, 341 SLAB_PANIC, NULL); 342 343 /* Create workqueue for cleanup */ 344 netns_wq = create_singlethread_workqueue("netns"); 345 if (!netns_wq) 346 panic("Could not create netns workq"); 347 #endif 348 349 ng = net_alloc_generic(); 350 if (!ng) 351 panic("Could not allocate generic netns"); 352 353 rcu_assign_pointer(init_net.gen, ng); 354 355 mutex_lock(&net_mutex); 356 if (setup_net(&init_net)) 357 panic("Could not setup the initial network namespace"); 358 359 rtnl_lock(); 360 list_add_tail_rcu(&init_net.list, &net_namespace_list); 361 rtnl_unlock(); 362 363 mutex_unlock(&net_mutex); 364 365 return 0; 366 } 367 368 pure_initcall(net_ns_init); 369 370 #ifdef CONFIG_NET_NS 371 static int __register_pernet_operations(struct list_head *list, 372 struct pernet_operations *ops) 373 { 374 struct net *net; 375 int error; 376 LIST_HEAD(net_exit_list); 377 378 list_add_tail(&ops->list, list); 379 if (ops->init || (ops->id && ops->size)) { 380 for_each_net(net) { 381 error = ops_init(ops, net); 382 if (error) 383 goto out_undo; 384 list_add_tail(&net->exit_list, &net_exit_list); 385 } 386 } 387 return 0; 388 389 out_undo: 390 /* If I have an error cleanup all namespaces I initialized */ 391 list_del(&ops->list); 392 ops_exit_list(ops, &net_exit_list); 393 ops_free_list(ops, &net_exit_list); 394 return error; 395 } 396 397 static void __unregister_pernet_operations(struct pernet_operations *ops) 398 { 399 struct net *net; 400 LIST_HEAD(net_exit_list); 401 402 list_del(&ops->list); 403 for_each_net(net) 404 list_add_tail(&net->exit_list, &net_exit_list); 405 ops_exit_list(ops, &net_exit_list); 406 ops_free_list(ops, &net_exit_list); 407 } 408 409 #else 410 411 static int __register_pernet_operations(struct list_head *list, 412 struct pernet_operations *ops) 413 { 414 int err = 0; 415 err = ops_init(ops, &init_net); 416 if (err) 417 ops_free(ops, &init_net); 418 return err; 419 420 } 421 422 static void __unregister_pernet_operations(struct pernet_operations *ops) 423 { 424 LIST_HEAD(net_exit_list); 425 list_add(&init_net.exit_list, &net_exit_list); 426 ops_exit_list(ops, &net_exit_list); 427 ops_free_list(ops, &net_exit_list); 428 } 429 430 #endif /* CONFIG_NET_NS */ 431 432 static DEFINE_IDA(net_generic_ids); 433 434 static int register_pernet_operations(struct list_head *list, 435 struct pernet_operations *ops) 436 { 437 int error; 438 439 if (ops->id) { 440 again: 441 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 442 if (error < 0) { 443 if (error == -EAGAIN) { 444 ida_pre_get(&net_generic_ids, GFP_KERNEL); 445 goto again; 446 } 447 return error; 448 } 449 } 450 error = __register_pernet_operations(list, ops); 451 if (error) { 452 rcu_barrier(); 453 if (ops->id) 454 ida_remove(&net_generic_ids, *ops->id); 455 } 456 457 return error; 458 } 459 460 static void unregister_pernet_operations(struct pernet_operations *ops) 461 { 462 463 __unregister_pernet_operations(ops); 464 rcu_barrier(); 465 if (ops->id) 466 ida_remove(&net_generic_ids, *ops->id); 467 } 468 469 /** 470 * register_pernet_subsys - register a network namespace subsystem 471 * @ops: pernet operations structure for the subsystem 472 * 473 * Register a subsystem which has init and exit functions 474 * that are called when network namespaces are created and 475 * destroyed respectively. 476 * 477 * When registered all network namespace init functions are 478 * called for every existing network namespace. Allowing kernel 479 * modules to have a race free view of the set of network namespaces. 480 * 481 * When a new network namespace is created all of the init 482 * methods are called in the order in which they were registered. 483 * 484 * When a network namespace is destroyed all of the exit methods 485 * are called in the reverse of the order with which they were 486 * registered. 487 */ 488 int register_pernet_subsys(struct pernet_operations *ops) 489 { 490 int error; 491 mutex_lock(&net_mutex); 492 error = register_pernet_operations(first_device, ops); 493 mutex_unlock(&net_mutex); 494 return error; 495 } 496 EXPORT_SYMBOL_GPL(register_pernet_subsys); 497 498 /** 499 * unregister_pernet_subsys - unregister a network namespace subsystem 500 * @ops: pernet operations structure to manipulate 501 * 502 * Remove the pernet operations structure from the list to be 503 * used when network namespaces are created or destroyed. In 504 * addition run the exit method for all existing network 505 * namespaces. 506 */ 507 void unregister_pernet_subsys(struct pernet_operations *ops) 508 { 509 mutex_lock(&net_mutex); 510 unregister_pernet_operations(ops); 511 mutex_unlock(&net_mutex); 512 } 513 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 514 515 /** 516 * register_pernet_device - register a network namespace device 517 * @ops: pernet operations structure for the subsystem 518 * 519 * Register a device which has init and exit functions 520 * that are called when network namespaces are created and 521 * destroyed respectively. 522 * 523 * When registered all network namespace init functions are 524 * called for every existing network namespace. Allowing kernel 525 * modules to have a race free view of the set of network namespaces. 526 * 527 * When a new network namespace is created all of the init 528 * methods are called in the order in which they were registered. 529 * 530 * When a network namespace is destroyed all of the exit methods 531 * are called in the reverse of the order with which they were 532 * registered. 533 */ 534 int register_pernet_device(struct pernet_operations *ops) 535 { 536 int error; 537 mutex_lock(&net_mutex); 538 error = register_pernet_operations(&pernet_list, ops); 539 if (!error && (first_device == &pernet_list)) 540 first_device = &ops->list; 541 mutex_unlock(&net_mutex); 542 return error; 543 } 544 EXPORT_SYMBOL_GPL(register_pernet_device); 545 546 /** 547 * unregister_pernet_device - unregister a network namespace netdevice 548 * @ops: pernet operations structure to manipulate 549 * 550 * Remove the pernet operations structure from the list to be 551 * used when network namespaces are created or destroyed. In 552 * addition run the exit method for all existing network 553 * namespaces. 554 */ 555 void unregister_pernet_device(struct pernet_operations *ops) 556 { 557 mutex_lock(&net_mutex); 558 if (&ops->list == first_device) 559 first_device = first_device->next; 560 unregister_pernet_operations(ops); 561 mutex_unlock(&net_mutex); 562 } 563 EXPORT_SYMBOL_GPL(unregister_pernet_device); 564