xref: /linux/net/netlink/af_netlink.c (revision e5c5d22e8dcf7c2d430336cbf8e180bd38e8daf1)
1 /*
2  * NETLINK      Kernel-user communication protocol.
3  *
4  * 		Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>
5  * 				Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6  *
7  *		This program is free software; you can redistribute it and/or
8  *		modify it under the terms of the GNU General Public License
9  *		as published by the Free Software Foundation; either version
10  *		2 of the License, or (at your option) any later version.
11  *
12  * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
13  *                               added netlink_proto_exit
14  * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
15  * 				 use nlk_sk, as sk->protinfo is on a diet 8)
16  * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
17  * 				 - inc module use count of module that owns
18  * 				   the kernel socket in case userspace opens
19  * 				   socket of same protocol
20  * 				 - remove all module support, since netlink is
21  * 				   mandatory if CONFIG_NET=y these days
22  */
23 
24 #include <linux/module.h>
25 
26 #include <linux/capability.h>
27 #include <linux/kernel.h>
28 #include <linux/init.h>
29 #include <linux/signal.h>
30 #include <linux/sched.h>
31 #include <linux/errno.h>
32 #include <linux/string.h>
33 #include <linux/stat.h>
34 #include <linux/socket.h>
35 #include <linux/un.h>
36 #include <linux/fcntl.h>
37 #include <linux/termios.h>
38 #include <linux/sockios.h>
39 #include <linux/net.h>
40 #include <linux/fs.h>
41 #include <linux/slab.h>
42 #include <asm/uaccess.h>
43 #include <linux/skbuff.h>
44 #include <linux/netdevice.h>
45 #include <linux/rtnetlink.h>
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #include <linux/notifier.h>
49 #include <linux/security.h>
50 #include <linux/jhash.h>
51 #include <linux/jiffies.h>
52 #include <linux/random.h>
53 #include <linux/bitops.h>
54 #include <linux/mm.h>
55 #include <linux/types.h>
56 #include <linux/audit.h>
57 #include <linux/mutex.h>
58 
59 #include <net/net_namespace.h>
60 #include <net/sock.h>
61 #include <net/scm.h>
62 #include <net/netlink.h>
63 
64 #include "af_netlink.h"
65 
66 struct listeners {
67 	struct rcu_head		rcu;
68 	unsigned long		masks[0];
69 };
70 
71 #define NETLINK_KERNEL_SOCKET	0x1
72 #define NETLINK_RECV_PKTINFO	0x2
73 #define NETLINK_BROADCAST_SEND_ERROR	0x4
74 #define NETLINK_RECV_NO_ENOBUFS	0x8
75 
76 static inline int netlink_is_kernel(struct sock *sk)
77 {
78 	return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
79 }
80 
81 struct netlink_table *nl_table;
82 EXPORT_SYMBOL_GPL(nl_table);
83 
84 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
85 
86 static int netlink_dump(struct sock *sk);
87 
88 DEFINE_RWLOCK(nl_table_lock);
89 EXPORT_SYMBOL_GPL(nl_table_lock);
90 static atomic_t nl_table_users = ATOMIC_INIT(0);
91 
92 #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
93 
94 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
95 
96 static inline u32 netlink_group_mask(u32 group)
97 {
98 	return group ? 1 << (group - 1) : 0;
99 }
100 
101 static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
102 {
103 	return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
104 }
105 
106 static void netlink_destroy_callback(struct netlink_callback *cb)
107 {
108 	kfree_skb(cb->skb);
109 	kfree(cb);
110 }
111 
112 static void netlink_consume_callback(struct netlink_callback *cb)
113 {
114 	consume_skb(cb->skb);
115 	kfree(cb);
116 }
117 
118 static void netlink_sock_destruct(struct sock *sk)
119 {
120 	struct netlink_sock *nlk = nlk_sk(sk);
121 
122 	if (nlk->cb) {
123 		if (nlk->cb->done)
124 			nlk->cb->done(nlk->cb);
125 
126 		module_put(nlk->cb->module);
127 		netlink_destroy_callback(nlk->cb);
128 	}
129 
130 	skb_queue_purge(&sk->sk_receive_queue);
131 
132 	if (!sock_flag(sk, SOCK_DEAD)) {
133 		printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
134 		return;
135 	}
136 
137 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
138 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
139 	WARN_ON(nlk_sk(sk)->groups);
140 }
141 
142 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
143  * SMP. Look, when several writers sleep and reader wakes them up, all but one
144  * immediately hit write lock and grab all the cpus. Exclusive sleep solves
145  * this, _but_ remember, it adds useless work on UP machines.
146  */
147 
148 void netlink_table_grab(void)
149 	__acquires(nl_table_lock)
150 {
151 	might_sleep();
152 
153 	write_lock_irq(&nl_table_lock);
154 
155 	if (atomic_read(&nl_table_users)) {
156 		DECLARE_WAITQUEUE(wait, current);
157 
158 		add_wait_queue_exclusive(&nl_table_wait, &wait);
159 		for (;;) {
160 			set_current_state(TASK_UNINTERRUPTIBLE);
161 			if (atomic_read(&nl_table_users) == 0)
162 				break;
163 			write_unlock_irq(&nl_table_lock);
164 			schedule();
165 			write_lock_irq(&nl_table_lock);
166 		}
167 
168 		__set_current_state(TASK_RUNNING);
169 		remove_wait_queue(&nl_table_wait, &wait);
170 	}
171 }
172 
173 void netlink_table_ungrab(void)
174 	__releases(nl_table_lock)
175 {
176 	write_unlock_irq(&nl_table_lock);
177 	wake_up(&nl_table_wait);
178 }
179 
180 static inline void
181 netlink_lock_table(void)
182 {
183 	/* read_lock() synchronizes us to netlink_table_grab */
184 
185 	read_lock(&nl_table_lock);
186 	atomic_inc(&nl_table_users);
187 	read_unlock(&nl_table_lock);
188 }
189 
190 static inline void
191 netlink_unlock_table(void)
192 {
193 	if (atomic_dec_and_test(&nl_table_users))
194 		wake_up(&nl_table_wait);
195 }
196 
197 static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
198 {
199 	struct nl_portid_hash *hash = &nl_table[protocol].hash;
200 	struct hlist_head *head;
201 	struct sock *sk;
202 
203 	read_lock(&nl_table_lock);
204 	head = nl_portid_hashfn(hash, portid);
205 	sk_for_each(sk, head) {
206 		if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) {
207 			sock_hold(sk);
208 			goto found;
209 		}
210 	}
211 	sk = NULL;
212 found:
213 	read_unlock(&nl_table_lock);
214 	return sk;
215 }
216 
217 static struct hlist_head *nl_portid_hash_zalloc(size_t size)
218 {
219 	if (size <= PAGE_SIZE)
220 		return kzalloc(size, GFP_ATOMIC);
221 	else
222 		return (struct hlist_head *)
223 			__get_free_pages(GFP_ATOMIC | __GFP_ZERO,
224 					 get_order(size));
225 }
226 
227 static void nl_portid_hash_free(struct hlist_head *table, size_t size)
228 {
229 	if (size <= PAGE_SIZE)
230 		kfree(table);
231 	else
232 		free_pages((unsigned long)table, get_order(size));
233 }
234 
235 static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
236 {
237 	unsigned int omask, mask, shift;
238 	size_t osize, size;
239 	struct hlist_head *otable, *table;
240 	int i;
241 
242 	omask = mask = hash->mask;
243 	osize = size = (mask + 1) * sizeof(*table);
244 	shift = hash->shift;
245 
246 	if (grow) {
247 		if (++shift > hash->max_shift)
248 			return 0;
249 		mask = mask * 2 + 1;
250 		size *= 2;
251 	}
252 
253 	table = nl_portid_hash_zalloc(size);
254 	if (!table)
255 		return 0;
256 
257 	otable = hash->table;
258 	hash->table = table;
259 	hash->mask = mask;
260 	hash->shift = shift;
261 	get_random_bytes(&hash->rnd, sizeof(hash->rnd));
262 
263 	for (i = 0; i <= omask; i++) {
264 		struct sock *sk;
265 		struct hlist_node *tmp;
266 
267 		sk_for_each_safe(sk, tmp, &otable[i])
268 			__sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
269 	}
270 
271 	nl_portid_hash_free(otable, osize);
272 	hash->rehash_time = jiffies + 10 * 60 * HZ;
273 	return 1;
274 }
275 
276 static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
277 {
278 	int avg = hash->entries >> hash->shift;
279 
280 	if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
281 		return 1;
282 
283 	if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
284 		nl_portid_hash_rehash(hash, 0);
285 		return 1;
286 	}
287 
288 	return 0;
289 }
290 
291 static const struct proto_ops netlink_ops;
292 
293 static void
294 netlink_update_listeners(struct sock *sk)
295 {
296 	struct netlink_table *tbl = &nl_table[sk->sk_protocol];
297 	unsigned long mask;
298 	unsigned int i;
299 	struct listeners *listeners;
300 
301 	listeners = nl_deref_protected(tbl->listeners);
302 	if (!listeners)
303 		return;
304 
305 	for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
306 		mask = 0;
307 		sk_for_each_bound(sk, &tbl->mc_list) {
308 			if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
309 				mask |= nlk_sk(sk)->groups[i];
310 		}
311 		listeners->masks[i] = mask;
312 	}
313 	/* this function is only called with the netlink table "grabbed", which
314 	 * makes sure updates are visible before bind or setsockopt return. */
315 }
316 
317 static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
318 {
319 	struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
320 	struct hlist_head *head;
321 	int err = -EADDRINUSE;
322 	struct sock *osk;
323 	int len;
324 
325 	netlink_table_grab();
326 	head = nl_portid_hashfn(hash, portid);
327 	len = 0;
328 	sk_for_each(osk, head) {
329 		if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid))
330 			break;
331 		len++;
332 	}
333 	if (osk)
334 		goto err;
335 
336 	err = -EBUSY;
337 	if (nlk_sk(sk)->portid)
338 		goto err;
339 
340 	err = -ENOMEM;
341 	if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
342 		goto err;
343 
344 	if (len && nl_portid_hash_dilute(hash, len))
345 		head = nl_portid_hashfn(hash, portid);
346 	hash->entries++;
347 	nlk_sk(sk)->portid = portid;
348 	sk_add_node(sk, head);
349 	err = 0;
350 
351 err:
352 	netlink_table_ungrab();
353 	return err;
354 }
355 
356 static void netlink_remove(struct sock *sk)
357 {
358 	netlink_table_grab();
359 	if (sk_del_node_init(sk))
360 		nl_table[sk->sk_protocol].hash.entries--;
361 	if (nlk_sk(sk)->subscriptions)
362 		__sk_del_bind_node(sk);
363 	netlink_table_ungrab();
364 }
365 
366 static struct proto netlink_proto = {
367 	.name	  = "NETLINK",
368 	.owner	  = THIS_MODULE,
369 	.obj_size = sizeof(struct netlink_sock),
370 };
371 
372 static int __netlink_create(struct net *net, struct socket *sock,
373 			    struct mutex *cb_mutex, int protocol)
374 {
375 	struct sock *sk;
376 	struct netlink_sock *nlk;
377 
378 	sock->ops = &netlink_ops;
379 
380 	sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
381 	if (!sk)
382 		return -ENOMEM;
383 
384 	sock_init_data(sock, sk);
385 
386 	nlk = nlk_sk(sk);
387 	if (cb_mutex) {
388 		nlk->cb_mutex = cb_mutex;
389 	} else {
390 		nlk->cb_mutex = &nlk->cb_def_mutex;
391 		mutex_init(nlk->cb_mutex);
392 	}
393 	init_waitqueue_head(&nlk->wait);
394 
395 	sk->sk_destruct = netlink_sock_destruct;
396 	sk->sk_protocol = protocol;
397 	return 0;
398 }
399 
400 static int netlink_create(struct net *net, struct socket *sock, int protocol,
401 			  int kern)
402 {
403 	struct module *module = NULL;
404 	struct mutex *cb_mutex;
405 	struct netlink_sock *nlk;
406 	void (*bind)(int group);
407 	int err = 0;
408 
409 	sock->state = SS_UNCONNECTED;
410 
411 	if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
412 		return -ESOCKTNOSUPPORT;
413 
414 	if (protocol < 0 || protocol >= MAX_LINKS)
415 		return -EPROTONOSUPPORT;
416 
417 	netlink_lock_table();
418 #ifdef CONFIG_MODULES
419 	if (!nl_table[protocol].registered) {
420 		netlink_unlock_table();
421 		request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
422 		netlink_lock_table();
423 	}
424 #endif
425 	if (nl_table[protocol].registered &&
426 	    try_module_get(nl_table[protocol].module))
427 		module = nl_table[protocol].module;
428 	else
429 		err = -EPROTONOSUPPORT;
430 	cb_mutex = nl_table[protocol].cb_mutex;
431 	bind = nl_table[protocol].bind;
432 	netlink_unlock_table();
433 
434 	if (err < 0)
435 		goto out;
436 
437 	err = __netlink_create(net, sock, cb_mutex, protocol);
438 	if (err < 0)
439 		goto out_module;
440 
441 	local_bh_disable();
442 	sock_prot_inuse_add(net, &netlink_proto, 1);
443 	local_bh_enable();
444 
445 	nlk = nlk_sk(sock->sk);
446 	nlk->module = module;
447 	nlk->netlink_bind = bind;
448 out:
449 	return err;
450 
451 out_module:
452 	module_put(module);
453 	goto out;
454 }
455 
456 static int netlink_release(struct socket *sock)
457 {
458 	struct sock *sk = sock->sk;
459 	struct netlink_sock *nlk;
460 
461 	if (!sk)
462 		return 0;
463 
464 	netlink_remove(sk);
465 	sock_orphan(sk);
466 	nlk = nlk_sk(sk);
467 
468 	/*
469 	 * OK. Socket is unlinked, any packets that arrive now
470 	 * will be purged.
471 	 */
472 
473 	sock->sk = NULL;
474 	wake_up_interruptible_all(&nlk->wait);
475 
476 	skb_queue_purge(&sk->sk_write_queue);
477 
478 	if (nlk->portid) {
479 		struct netlink_notify n = {
480 						.net = sock_net(sk),
481 						.protocol = sk->sk_protocol,
482 						.portid = nlk->portid,
483 					  };
484 		atomic_notifier_call_chain(&netlink_chain,
485 				NETLINK_URELEASE, &n);
486 	}
487 
488 	module_put(nlk->module);
489 
490 	netlink_table_grab();
491 	if (netlink_is_kernel(sk)) {
492 		BUG_ON(nl_table[sk->sk_protocol].registered == 0);
493 		if (--nl_table[sk->sk_protocol].registered == 0) {
494 			struct listeners *old;
495 
496 			old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
497 			RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
498 			kfree_rcu(old, rcu);
499 			nl_table[sk->sk_protocol].module = NULL;
500 			nl_table[sk->sk_protocol].bind = NULL;
501 			nl_table[sk->sk_protocol].flags = 0;
502 			nl_table[sk->sk_protocol].registered = 0;
503 		}
504 	} else if (nlk->subscriptions) {
505 		netlink_update_listeners(sk);
506 	}
507 	netlink_table_ungrab();
508 
509 	kfree(nlk->groups);
510 	nlk->groups = NULL;
511 
512 	local_bh_disable();
513 	sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
514 	local_bh_enable();
515 	sock_put(sk);
516 	return 0;
517 }
518 
519 static int netlink_autobind(struct socket *sock)
520 {
521 	struct sock *sk = sock->sk;
522 	struct net *net = sock_net(sk);
523 	struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
524 	struct hlist_head *head;
525 	struct sock *osk;
526 	s32 portid = task_tgid_vnr(current);
527 	int err;
528 	static s32 rover = -4097;
529 
530 retry:
531 	cond_resched();
532 	netlink_table_grab();
533 	head = nl_portid_hashfn(hash, portid);
534 	sk_for_each(osk, head) {
535 		if (!net_eq(sock_net(osk), net))
536 			continue;
537 		if (nlk_sk(osk)->portid == portid) {
538 			/* Bind collision, search negative portid values. */
539 			portid = rover--;
540 			if (rover > -4097)
541 				rover = -4097;
542 			netlink_table_ungrab();
543 			goto retry;
544 		}
545 	}
546 	netlink_table_ungrab();
547 
548 	err = netlink_insert(sk, net, portid);
549 	if (err == -EADDRINUSE)
550 		goto retry;
551 
552 	/* If 2 threads race to autobind, that is fine.  */
553 	if (err == -EBUSY)
554 		err = 0;
555 
556 	return err;
557 }
558 
559 static inline int netlink_capable(const struct socket *sock, unsigned int flag)
560 {
561 	return (nl_table[sock->sk->sk_protocol].flags & flag) ||
562 		ns_capable(sock_net(sock->sk)->user_ns, CAP_NET_ADMIN);
563 }
564 
565 static void
566 netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
567 {
568 	struct netlink_sock *nlk = nlk_sk(sk);
569 
570 	if (nlk->subscriptions && !subscriptions)
571 		__sk_del_bind_node(sk);
572 	else if (!nlk->subscriptions && subscriptions)
573 		sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
574 	nlk->subscriptions = subscriptions;
575 }
576 
577 static int netlink_realloc_groups(struct sock *sk)
578 {
579 	struct netlink_sock *nlk = nlk_sk(sk);
580 	unsigned int groups;
581 	unsigned long *new_groups;
582 	int err = 0;
583 
584 	netlink_table_grab();
585 
586 	groups = nl_table[sk->sk_protocol].groups;
587 	if (!nl_table[sk->sk_protocol].registered) {
588 		err = -ENOENT;
589 		goto out_unlock;
590 	}
591 
592 	if (nlk->ngroups >= groups)
593 		goto out_unlock;
594 
595 	new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
596 	if (new_groups == NULL) {
597 		err = -ENOMEM;
598 		goto out_unlock;
599 	}
600 	memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
601 	       NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
602 
603 	nlk->groups = new_groups;
604 	nlk->ngroups = groups;
605  out_unlock:
606 	netlink_table_ungrab();
607 	return err;
608 }
609 
610 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
611 			int addr_len)
612 {
613 	struct sock *sk = sock->sk;
614 	struct net *net = sock_net(sk);
615 	struct netlink_sock *nlk = nlk_sk(sk);
616 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
617 	int err;
618 
619 	if (addr_len < sizeof(struct sockaddr_nl))
620 		return -EINVAL;
621 
622 	if (nladdr->nl_family != AF_NETLINK)
623 		return -EINVAL;
624 
625 	/* Only superuser is allowed to listen multicasts */
626 	if (nladdr->nl_groups) {
627 		if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
628 			return -EPERM;
629 		err = netlink_realloc_groups(sk);
630 		if (err)
631 			return err;
632 	}
633 
634 	if (nlk->portid) {
635 		if (nladdr->nl_pid != nlk->portid)
636 			return -EINVAL;
637 	} else {
638 		err = nladdr->nl_pid ?
639 			netlink_insert(sk, net, nladdr->nl_pid) :
640 			netlink_autobind(sock);
641 		if (err)
642 			return err;
643 	}
644 
645 	if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
646 		return 0;
647 
648 	netlink_table_grab();
649 	netlink_update_subscriptions(sk, nlk->subscriptions +
650 					 hweight32(nladdr->nl_groups) -
651 					 hweight32(nlk->groups[0]));
652 	nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
653 	netlink_update_listeners(sk);
654 	netlink_table_ungrab();
655 
656 	if (nlk->netlink_bind && nlk->groups[0]) {
657 		int i;
658 
659 		for (i=0; i<nlk->ngroups; i++) {
660 			if (test_bit(i, nlk->groups))
661 				nlk->netlink_bind(i);
662 		}
663 	}
664 
665 	return 0;
666 }
667 
668 static int netlink_connect(struct socket *sock, struct sockaddr *addr,
669 			   int alen, int flags)
670 {
671 	int err = 0;
672 	struct sock *sk = sock->sk;
673 	struct netlink_sock *nlk = nlk_sk(sk);
674 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
675 
676 	if (alen < sizeof(addr->sa_family))
677 		return -EINVAL;
678 
679 	if (addr->sa_family == AF_UNSPEC) {
680 		sk->sk_state	= NETLINK_UNCONNECTED;
681 		nlk->dst_portid	= 0;
682 		nlk->dst_group  = 0;
683 		return 0;
684 	}
685 	if (addr->sa_family != AF_NETLINK)
686 		return -EINVAL;
687 
688 	/* Only superuser is allowed to send multicasts */
689 	if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
690 		return -EPERM;
691 
692 	if (!nlk->portid)
693 		err = netlink_autobind(sock);
694 
695 	if (err == 0) {
696 		sk->sk_state	= NETLINK_CONNECTED;
697 		nlk->dst_portid = nladdr->nl_pid;
698 		nlk->dst_group  = ffs(nladdr->nl_groups);
699 	}
700 
701 	return err;
702 }
703 
704 static int netlink_getname(struct socket *sock, struct sockaddr *addr,
705 			   int *addr_len, int peer)
706 {
707 	struct sock *sk = sock->sk;
708 	struct netlink_sock *nlk = nlk_sk(sk);
709 	DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
710 
711 	nladdr->nl_family = AF_NETLINK;
712 	nladdr->nl_pad = 0;
713 	*addr_len = sizeof(*nladdr);
714 
715 	if (peer) {
716 		nladdr->nl_pid = nlk->dst_portid;
717 		nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
718 	} else {
719 		nladdr->nl_pid = nlk->portid;
720 		nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
721 	}
722 	return 0;
723 }
724 
725 static void netlink_overrun(struct sock *sk)
726 {
727 	struct netlink_sock *nlk = nlk_sk(sk);
728 
729 	if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
730 		if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
731 			sk->sk_err = ENOBUFS;
732 			sk->sk_error_report(sk);
733 		}
734 	}
735 	atomic_inc(&sk->sk_drops);
736 }
737 
738 static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
739 {
740 	struct sock *sock;
741 	struct netlink_sock *nlk;
742 
743 	sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
744 	if (!sock)
745 		return ERR_PTR(-ECONNREFUSED);
746 
747 	/* Don't bother queuing skb if kernel socket has no input function */
748 	nlk = nlk_sk(sock);
749 	if (sock->sk_state == NETLINK_CONNECTED &&
750 	    nlk->dst_portid != nlk_sk(ssk)->portid) {
751 		sock_put(sock);
752 		return ERR_PTR(-ECONNREFUSED);
753 	}
754 	return sock;
755 }
756 
757 struct sock *netlink_getsockbyfilp(struct file *filp)
758 {
759 	struct inode *inode = file_inode(filp);
760 	struct sock *sock;
761 
762 	if (!S_ISSOCK(inode->i_mode))
763 		return ERR_PTR(-ENOTSOCK);
764 
765 	sock = SOCKET_I(inode)->sk;
766 	if (sock->sk_family != AF_NETLINK)
767 		return ERR_PTR(-EINVAL);
768 
769 	sock_hold(sock);
770 	return sock;
771 }
772 
773 /*
774  * Attach a skb to a netlink socket.
775  * The caller must hold a reference to the destination socket. On error, the
776  * reference is dropped. The skb is not send to the destination, just all
777  * all error checks are performed and memory in the queue is reserved.
778  * Return values:
779  * < 0: error. skb freed, reference to sock dropped.
780  * 0: continue
781  * 1: repeat lookup - reference dropped while waiting for socket memory.
782  */
783 int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
784 		      long *timeo, struct sock *ssk)
785 {
786 	struct netlink_sock *nlk;
787 
788 	nlk = nlk_sk(sk);
789 
790 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
791 	    test_bit(0, &nlk->state)) {
792 		DECLARE_WAITQUEUE(wait, current);
793 		if (!*timeo) {
794 			if (!ssk || netlink_is_kernel(ssk))
795 				netlink_overrun(sk);
796 			sock_put(sk);
797 			kfree_skb(skb);
798 			return -EAGAIN;
799 		}
800 
801 		__set_current_state(TASK_INTERRUPTIBLE);
802 		add_wait_queue(&nlk->wait, &wait);
803 
804 		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
805 		     test_bit(0, &nlk->state)) &&
806 		    !sock_flag(sk, SOCK_DEAD))
807 			*timeo = schedule_timeout(*timeo);
808 
809 		__set_current_state(TASK_RUNNING);
810 		remove_wait_queue(&nlk->wait, &wait);
811 		sock_put(sk);
812 
813 		if (signal_pending(current)) {
814 			kfree_skb(skb);
815 			return sock_intr_errno(*timeo);
816 		}
817 		return 1;
818 	}
819 	skb_set_owner_r(skb, sk);
820 	return 0;
821 }
822 
823 static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
824 {
825 	int len = skb->len;
826 
827 	skb_queue_tail(&sk->sk_receive_queue, skb);
828 	sk->sk_data_ready(sk, len);
829 	return len;
830 }
831 
832 int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
833 {
834 	int len = __netlink_sendskb(sk, skb);
835 
836 	sock_put(sk);
837 	return len;
838 }
839 
840 void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
841 {
842 	kfree_skb(skb);
843 	sock_put(sk);
844 }
845 
846 static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
847 {
848 	int delta;
849 
850 	skb_orphan(skb);
851 
852 	delta = skb->end - skb->tail;
853 	if (delta * 2 < skb->truesize)
854 		return skb;
855 
856 	if (skb_shared(skb)) {
857 		struct sk_buff *nskb = skb_clone(skb, allocation);
858 		if (!nskb)
859 			return skb;
860 		consume_skb(skb);
861 		skb = nskb;
862 	}
863 
864 	if (!pskb_expand_head(skb, 0, -delta, allocation))
865 		skb->truesize -= delta;
866 
867 	return skb;
868 }
869 
870 static void netlink_rcv_wake(struct sock *sk)
871 {
872 	struct netlink_sock *nlk = nlk_sk(sk);
873 
874 	if (skb_queue_empty(&sk->sk_receive_queue))
875 		clear_bit(0, &nlk->state);
876 	if (!test_bit(0, &nlk->state))
877 		wake_up_interruptible(&nlk->wait);
878 }
879 
880 static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
881 				  struct sock *ssk)
882 {
883 	int ret;
884 	struct netlink_sock *nlk = nlk_sk(sk);
885 
886 	ret = -ECONNREFUSED;
887 	if (nlk->netlink_rcv != NULL) {
888 		ret = skb->len;
889 		skb_set_owner_r(skb, sk);
890 		NETLINK_CB(skb).ssk = ssk;
891 		nlk->netlink_rcv(skb);
892 		consume_skb(skb);
893 	} else {
894 		kfree_skb(skb);
895 	}
896 	sock_put(sk);
897 	return ret;
898 }
899 
900 int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
901 		    u32 portid, int nonblock)
902 {
903 	struct sock *sk;
904 	int err;
905 	long timeo;
906 
907 	skb = netlink_trim(skb, gfp_any());
908 
909 	timeo = sock_sndtimeo(ssk, nonblock);
910 retry:
911 	sk = netlink_getsockbyportid(ssk, portid);
912 	if (IS_ERR(sk)) {
913 		kfree_skb(skb);
914 		return PTR_ERR(sk);
915 	}
916 	if (netlink_is_kernel(sk))
917 		return netlink_unicast_kernel(sk, skb, ssk);
918 
919 	if (sk_filter(sk, skb)) {
920 		err = skb->len;
921 		kfree_skb(skb);
922 		sock_put(sk);
923 		return err;
924 	}
925 
926 	err = netlink_attachskb(sk, skb, &timeo, ssk);
927 	if (err == 1)
928 		goto retry;
929 	if (err)
930 		return err;
931 
932 	return netlink_sendskb(sk, skb);
933 }
934 EXPORT_SYMBOL(netlink_unicast);
935 
936 int netlink_has_listeners(struct sock *sk, unsigned int group)
937 {
938 	int res = 0;
939 	struct listeners *listeners;
940 
941 	BUG_ON(!netlink_is_kernel(sk));
942 
943 	rcu_read_lock();
944 	listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
945 
946 	if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
947 		res = test_bit(group - 1, listeners->masks);
948 
949 	rcu_read_unlock();
950 
951 	return res;
952 }
953 EXPORT_SYMBOL_GPL(netlink_has_listeners);
954 
955 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
956 {
957 	struct netlink_sock *nlk = nlk_sk(sk);
958 
959 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
960 	    !test_bit(0, &nlk->state)) {
961 		skb_set_owner_r(skb, sk);
962 		__netlink_sendskb(sk, skb);
963 		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
964 	}
965 	return -1;
966 }
967 
968 struct netlink_broadcast_data {
969 	struct sock *exclude_sk;
970 	struct net *net;
971 	u32 portid;
972 	u32 group;
973 	int failure;
974 	int delivery_failure;
975 	int congested;
976 	int delivered;
977 	gfp_t allocation;
978 	struct sk_buff *skb, *skb2;
979 	int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
980 	void *tx_data;
981 };
982 
983 static int do_one_broadcast(struct sock *sk,
984 				   struct netlink_broadcast_data *p)
985 {
986 	struct netlink_sock *nlk = nlk_sk(sk);
987 	int val;
988 
989 	if (p->exclude_sk == sk)
990 		goto out;
991 
992 	if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
993 	    !test_bit(p->group - 1, nlk->groups))
994 		goto out;
995 
996 	if (!net_eq(sock_net(sk), p->net))
997 		goto out;
998 
999 	if (p->failure) {
1000 		netlink_overrun(sk);
1001 		goto out;
1002 	}
1003 
1004 	sock_hold(sk);
1005 	if (p->skb2 == NULL) {
1006 		if (skb_shared(p->skb)) {
1007 			p->skb2 = skb_clone(p->skb, p->allocation);
1008 		} else {
1009 			p->skb2 = skb_get(p->skb);
1010 			/*
1011 			 * skb ownership may have been set when
1012 			 * delivered to a previous socket.
1013 			 */
1014 			skb_orphan(p->skb2);
1015 		}
1016 	}
1017 	if (p->skb2 == NULL) {
1018 		netlink_overrun(sk);
1019 		/* Clone failed. Notify ALL listeners. */
1020 		p->failure = 1;
1021 		if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1022 			p->delivery_failure = 1;
1023 	} else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1024 		kfree_skb(p->skb2);
1025 		p->skb2 = NULL;
1026 	} else if (sk_filter(sk, p->skb2)) {
1027 		kfree_skb(p->skb2);
1028 		p->skb2 = NULL;
1029 	} else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1030 		netlink_overrun(sk);
1031 		if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1032 			p->delivery_failure = 1;
1033 	} else {
1034 		p->congested |= val;
1035 		p->delivered = 1;
1036 		p->skb2 = NULL;
1037 	}
1038 	sock_put(sk);
1039 
1040 out:
1041 	return 0;
1042 }
1043 
1044 int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
1045 	u32 group, gfp_t allocation,
1046 	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1047 	void *filter_data)
1048 {
1049 	struct net *net = sock_net(ssk);
1050 	struct netlink_broadcast_data info;
1051 	struct sock *sk;
1052 
1053 	skb = netlink_trim(skb, allocation);
1054 
1055 	info.exclude_sk = ssk;
1056 	info.net = net;
1057 	info.portid = portid;
1058 	info.group = group;
1059 	info.failure = 0;
1060 	info.delivery_failure = 0;
1061 	info.congested = 0;
1062 	info.delivered = 0;
1063 	info.allocation = allocation;
1064 	info.skb = skb;
1065 	info.skb2 = NULL;
1066 	info.tx_filter = filter;
1067 	info.tx_data = filter_data;
1068 
1069 	/* While we sleep in clone, do not allow to change socket list */
1070 
1071 	netlink_lock_table();
1072 
1073 	sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1074 		do_one_broadcast(sk, &info);
1075 
1076 	consume_skb(skb);
1077 
1078 	netlink_unlock_table();
1079 
1080 	if (info.delivery_failure) {
1081 		kfree_skb(info.skb2);
1082 		return -ENOBUFS;
1083 	}
1084 	consume_skb(info.skb2);
1085 
1086 	if (info.delivered) {
1087 		if (info.congested && (allocation & __GFP_WAIT))
1088 			yield();
1089 		return 0;
1090 	}
1091 	return -ESRCH;
1092 }
1093 EXPORT_SYMBOL(netlink_broadcast_filtered);
1094 
1095 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
1096 		      u32 group, gfp_t allocation)
1097 {
1098 	return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
1099 		NULL, NULL);
1100 }
1101 EXPORT_SYMBOL(netlink_broadcast);
1102 
1103 struct netlink_set_err_data {
1104 	struct sock *exclude_sk;
1105 	u32 portid;
1106 	u32 group;
1107 	int code;
1108 };
1109 
1110 static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1111 {
1112 	struct netlink_sock *nlk = nlk_sk(sk);
1113 	int ret = 0;
1114 
1115 	if (sk == p->exclude_sk)
1116 		goto out;
1117 
1118 	if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1119 		goto out;
1120 
1121 	if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1122 	    !test_bit(p->group - 1, nlk->groups))
1123 		goto out;
1124 
1125 	if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1126 		ret = 1;
1127 		goto out;
1128 	}
1129 
1130 	sk->sk_err = p->code;
1131 	sk->sk_error_report(sk);
1132 out:
1133 	return ret;
1134 }
1135 
1136 /**
1137  * netlink_set_err - report error to broadcast listeners
1138  * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1139  * @portid: the PORTID of a process that we want to skip (if any)
1140  * @groups: the broadcast group that will notice the error
1141  * @code: error code, must be negative (as usual in kernelspace)
1142  *
1143  * This function returns the number of broadcast listeners that have set the
1144  * NETLINK_RECV_NO_ENOBUFS socket option.
1145  */
1146 int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
1147 {
1148 	struct netlink_set_err_data info;
1149 	struct sock *sk;
1150 	int ret = 0;
1151 
1152 	info.exclude_sk = ssk;
1153 	info.portid = portid;
1154 	info.group = group;
1155 	/* sk->sk_err wants a positive error value */
1156 	info.code = -code;
1157 
1158 	read_lock(&nl_table_lock);
1159 
1160 	sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
1161 		ret += do_one_set_err(sk, &info);
1162 
1163 	read_unlock(&nl_table_lock);
1164 	return ret;
1165 }
1166 EXPORT_SYMBOL(netlink_set_err);
1167 
1168 /* must be called with netlink table grabbed */
1169 static void netlink_update_socket_mc(struct netlink_sock *nlk,
1170 				     unsigned int group,
1171 				     int is_new)
1172 {
1173 	int old, new = !!is_new, subscriptions;
1174 
1175 	old = test_bit(group - 1, nlk->groups);
1176 	subscriptions = nlk->subscriptions - old + new;
1177 	if (new)
1178 		__set_bit(group - 1, nlk->groups);
1179 	else
1180 		__clear_bit(group - 1, nlk->groups);
1181 	netlink_update_subscriptions(&nlk->sk, subscriptions);
1182 	netlink_update_listeners(&nlk->sk);
1183 }
1184 
1185 static int netlink_setsockopt(struct socket *sock, int level, int optname,
1186 			      char __user *optval, unsigned int optlen)
1187 {
1188 	struct sock *sk = sock->sk;
1189 	struct netlink_sock *nlk = nlk_sk(sk);
1190 	unsigned int val = 0;
1191 	int err;
1192 
1193 	if (level != SOL_NETLINK)
1194 		return -ENOPROTOOPT;
1195 
1196 	if (optlen >= sizeof(int) &&
1197 	    get_user(val, (unsigned int __user *)optval))
1198 		return -EFAULT;
1199 
1200 	switch (optname) {
1201 	case NETLINK_PKTINFO:
1202 		if (val)
1203 			nlk->flags |= NETLINK_RECV_PKTINFO;
1204 		else
1205 			nlk->flags &= ~NETLINK_RECV_PKTINFO;
1206 		err = 0;
1207 		break;
1208 	case NETLINK_ADD_MEMBERSHIP:
1209 	case NETLINK_DROP_MEMBERSHIP: {
1210 		if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
1211 			return -EPERM;
1212 		err = netlink_realloc_groups(sk);
1213 		if (err)
1214 			return err;
1215 		if (!val || val - 1 >= nlk->ngroups)
1216 			return -EINVAL;
1217 		netlink_table_grab();
1218 		netlink_update_socket_mc(nlk, val,
1219 					 optname == NETLINK_ADD_MEMBERSHIP);
1220 		netlink_table_ungrab();
1221 
1222 		if (nlk->netlink_bind)
1223 			nlk->netlink_bind(val);
1224 
1225 		err = 0;
1226 		break;
1227 	}
1228 	case NETLINK_BROADCAST_ERROR:
1229 		if (val)
1230 			nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1231 		else
1232 			nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1233 		err = 0;
1234 		break;
1235 	case NETLINK_NO_ENOBUFS:
1236 		if (val) {
1237 			nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1238 			clear_bit(0, &nlk->state);
1239 			wake_up_interruptible(&nlk->wait);
1240 		} else {
1241 			nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1242 		}
1243 		err = 0;
1244 		break;
1245 	default:
1246 		err = -ENOPROTOOPT;
1247 	}
1248 	return err;
1249 }
1250 
1251 static int netlink_getsockopt(struct socket *sock, int level, int optname,
1252 			      char __user *optval, int __user *optlen)
1253 {
1254 	struct sock *sk = sock->sk;
1255 	struct netlink_sock *nlk = nlk_sk(sk);
1256 	int len, val, err;
1257 
1258 	if (level != SOL_NETLINK)
1259 		return -ENOPROTOOPT;
1260 
1261 	if (get_user(len, optlen))
1262 		return -EFAULT;
1263 	if (len < 0)
1264 		return -EINVAL;
1265 
1266 	switch (optname) {
1267 	case NETLINK_PKTINFO:
1268 		if (len < sizeof(int))
1269 			return -EINVAL;
1270 		len = sizeof(int);
1271 		val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
1272 		if (put_user(len, optlen) ||
1273 		    put_user(val, optval))
1274 			return -EFAULT;
1275 		err = 0;
1276 		break;
1277 	case NETLINK_BROADCAST_ERROR:
1278 		if (len < sizeof(int))
1279 			return -EINVAL;
1280 		len = sizeof(int);
1281 		val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1282 		if (put_user(len, optlen) ||
1283 		    put_user(val, optval))
1284 			return -EFAULT;
1285 		err = 0;
1286 		break;
1287 	case NETLINK_NO_ENOBUFS:
1288 		if (len < sizeof(int))
1289 			return -EINVAL;
1290 		len = sizeof(int);
1291 		val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1292 		if (put_user(len, optlen) ||
1293 		    put_user(val, optval))
1294 			return -EFAULT;
1295 		err = 0;
1296 		break;
1297 	default:
1298 		err = -ENOPROTOOPT;
1299 	}
1300 	return err;
1301 }
1302 
1303 static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1304 {
1305 	struct nl_pktinfo info;
1306 
1307 	info.group = NETLINK_CB(skb).dst_group;
1308 	put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1309 }
1310 
1311 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1312 			   struct msghdr *msg, size_t len)
1313 {
1314 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1315 	struct sock *sk = sock->sk;
1316 	struct netlink_sock *nlk = nlk_sk(sk);
1317 	struct sockaddr_nl *addr = msg->msg_name;
1318 	u32 dst_portid;
1319 	u32 dst_group;
1320 	struct sk_buff *skb;
1321 	int err;
1322 	struct scm_cookie scm;
1323 
1324 	if (msg->msg_flags&MSG_OOB)
1325 		return -EOPNOTSUPP;
1326 
1327 	if (NULL == siocb->scm)
1328 		siocb->scm = &scm;
1329 
1330 	err = scm_send(sock, msg, siocb->scm, true);
1331 	if (err < 0)
1332 		return err;
1333 
1334 	if (msg->msg_namelen) {
1335 		err = -EINVAL;
1336 		if (addr->nl_family != AF_NETLINK)
1337 			goto out;
1338 		dst_portid = addr->nl_pid;
1339 		dst_group = ffs(addr->nl_groups);
1340 		err =  -EPERM;
1341 		if ((dst_group || dst_portid) &&
1342 		    !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
1343 			goto out;
1344 	} else {
1345 		dst_portid = nlk->dst_portid;
1346 		dst_group = nlk->dst_group;
1347 	}
1348 
1349 	if (!nlk->portid) {
1350 		err = netlink_autobind(sock);
1351 		if (err)
1352 			goto out;
1353 	}
1354 
1355 	err = -EMSGSIZE;
1356 	if (len > sk->sk_sndbuf - 32)
1357 		goto out;
1358 	err = -ENOBUFS;
1359 	skb = alloc_skb(len, GFP_KERNEL);
1360 	if (skb == NULL)
1361 		goto out;
1362 
1363 	NETLINK_CB(skb).portid	= nlk->portid;
1364 	NETLINK_CB(skb).dst_group = dst_group;
1365 	NETLINK_CB(skb).creds	= siocb->scm->creds;
1366 
1367 	err = -EFAULT;
1368 	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1369 		kfree_skb(skb);
1370 		goto out;
1371 	}
1372 
1373 	err = security_netlink_send(sk, skb);
1374 	if (err) {
1375 		kfree_skb(skb);
1376 		goto out;
1377 	}
1378 
1379 	if (dst_group) {
1380 		atomic_inc(&skb->users);
1381 		netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
1382 	}
1383 	err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
1384 
1385 out:
1386 	scm_destroy(siocb->scm);
1387 	return err;
1388 }
1389 
1390 static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1391 			   struct msghdr *msg, size_t len,
1392 			   int flags)
1393 {
1394 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1395 	struct scm_cookie scm;
1396 	struct sock *sk = sock->sk;
1397 	struct netlink_sock *nlk = nlk_sk(sk);
1398 	int noblock = flags&MSG_DONTWAIT;
1399 	size_t copied;
1400 	struct sk_buff *skb, *data_skb;
1401 	int err, ret;
1402 
1403 	if (flags&MSG_OOB)
1404 		return -EOPNOTSUPP;
1405 
1406 	copied = 0;
1407 
1408 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1409 	if (skb == NULL)
1410 		goto out;
1411 
1412 	data_skb = skb;
1413 
1414 #ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1415 	if (unlikely(skb_shinfo(skb)->frag_list)) {
1416 		/*
1417 		 * If this skb has a frag_list, then here that means that we
1418 		 * will have to use the frag_list skb's data for compat tasks
1419 		 * and the regular skb's data for normal (non-compat) tasks.
1420 		 *
1421 		 * If we need to send the compat skb, assign it to the
1422 		 * 'data_skb' variable so that it will be used below for data
1423 		 * copying. We keep 'skb' for everything else, including
1424 		 * freeing both later.
1425 		 */
1426 		if (flags & MSG_CMSG_COMPAT)
1427 			data_skb = skb_shinfo(skb)->frag_list;
1428 	}
1429 #endif
1430 
1431 	msg->msg_namelen = 0;
1432 
1433 	copied = data_skb->len;
1434 	if (len < copied) {
1435 		msg->msg_flags |= MSG_TRUNC;
1436 		copied = len;
1437 	}
1438 
1439 	skb_reset_transport_header(data_skb);
1440 	err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
1441 
1442 	if (msg->msg_name) {
1443 		struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1444 		addr->nl_family = AF_NETLINK;
1445 		addr->nl_pad    = 0;
1446 		addr->nl_pid	= NETLINK_CB(skb).portid;
1447 		addr->nl_groups	= netlink_group_mask(NETLINK_CB(skb).dst_group);
1448 		msg->msg_namelen = sizeof(*addr);
1449 	}
1450 
1451 	if (nlk->flags & NETLINK_RECV_PKTINFO)
1452 		netlink_cmsg_recv_pktinfo(msg, skb);
1453 
1454 	if (NULL == siocb->scm) {
1455 		memset(&scm, 0, sizeof(scm));
1456 		siocb->scm = &scm;
1457 	}
1458 	siocb->scm->creds = *NETLINK_CREDS(skb);
1459 	if (flags & MSG_TRUNC)
1460 		copied = data_skb->len;
1461 
1462 	skb_free_datagram(sk, skb);
1463 
1464 	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1465 		ret = netlink_dump(sk);
1466 		if (ret) {
1467 			sk->sk_err = ret;
1468 			sk->sk_error_report(sk);
1469 		}
1470 	}
1471 
1472 	scm_recv(sock, msg, siocb->scm, flags);
1473 out:
1474 	netlink_rcv_wake(sk);
1475 	return err ? : copied;
1476 }
1477 
1478 static void netlink_data_ready(struct sock *sk, int len)
1479 {
1480 	BUG();
1481 }
1482 
1483 /*
1484  *	We export these functions to other modules. They provide a
1485  *	complete set of kernel non-blocking support for message
1486  *	queueing.
1487  */
1488 
1489 struct sock *
1490 __netlink_kernel_create(struct net *net, int unit, struct module *module,
1491 			struct netlink_kernel_cfg *cfg)
1492 {
1493 	struct socket *sock;
1494 	struct sock *sk;
1495 	struct netlink_sock *nlk;
1496 	struct listeners *listeners = NULL;
1497 	struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
1498 	unsigned int groups;
1499 
1500 	BUG_ON(!nl_table);
1501 
1502 	if (unit < 0 || unit >= MAX_LINKS)
1503 		return NULL;
1504 
1505 	if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1506 		return NULL;
1507 
1508 	/*
1509 	 * We have to just have a reference on the net from sk, but don't
1510 	 * get_net it. Besides, we cannot get and then put the net here.
1511 	 * So we create one inside init_net and the move it to net.
1512 	 */
1513 
1514 	if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1515 		goto out_sock_release_nosk;
1516 
1517 	sk = sock->sk;
1518 	sk_change_net(sk, net);
1519 
1520 	if (!cfg || cfg->groups < 32)
1521 		groups = 32;
1522 	else
1523 		groups = cfg->groups;
1524 
1525 	listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1526 	if (!listeners)
1527 		goto out_sock_release;
1528 
1529 	sk->sk_data_ready = netlink_data_ready;
1530 	if (cfg && cfg->input)
1531 		nlk_sk(sk)->netlink_rcv = cfg->input;
1532 
1533 	if (netlink_insert(sk, net, 0))
1534 		goto out_sock_release;
1535 
1536 	nlk = nlk_sk(sk);
1537 	nlk->flags |= NETLINK_KERNEL_SOCKET;
1538 
1539 	netlink_table_grab();
1540 	if (!nl_table[unit].registered) {
1541 		nl_table[unit].groups = groups;
1542 		rcu_assign_pointer(nl_table[unit].listeners, listeners);
1543 		nl_table[unit].cb_mutex = cb_mutex;
1544 		nl_table[unit].module = module;
1545 		if (cfg) {
1546 			nl_table[unit].bind = cfg->bind;
1547 			nl_table[unit].flags = cfg->flags;
1548 		}
1549 		nl_table[unit].registered = 1;
1550 	} else {
1551 		kfree(listeners);
1552 		nl_table[unit].registered++;
1553 	}
1554 	netlink_table_ungrab();
1555 	return sk;
1556 
1557 out_sock_release:
1558 	kfree(listeners);
1559 	netlink_kernel_release(sk);
1560 	return NULL;
1561 
1562 out_sock_release_nosk:
1563 	sock_release(sock);
1564 	return NULL;
1565 }
1566 EXPORT_SYMBOL(__netlink_kernel_create);
1567 
1568 void
1569 netlink_kernel_release(struct sock *sk)
1570 {
1571 	sk_release_kernel(sk);
1572 }
1573 EXPORT_SYMBOL(netlink_kernel_release);
1574 
1575 int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1576 {
1577 	struct listeners *new, *old;
1578 	struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1579 
1580 	if (groups < 32)
1581 		groups = 32;
1582 
1583 	if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1584 		new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1585 		if (!new)
1586 			return -ENOMEM;
1587 		old = nl_deref_protected(tbl->listeners);
1588 		memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1589 		rcu_assign_pointer(tbl->listeners, new);
1590 
1591 		kfree_rcu(old, rcu);
1592 	}
1593 	tbl->groups = groups;
1594 
1595 	return 0;
1596 }
1597 
1598 /**
1599  * netlink_change_ngroups - change number of multicast groups
1600  *
1601  * This changes the number of multicast groups that are available
1602  * on a certain netlink family. Note that it is not possible to
1603  * change the number of groups to below 32. Also note that it does
1604  * not implicitly call netlink_clear_multicast_users() when the
1605  * number of groups is reduced.
1606  *
1607  * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
1608  * @groups: The new number of groups.
1609  */
1610 int netlink_change_ngroups(struct sock *sk, unsigned int groups)
1611 {
1612 	int err;
1613 
1614 	netlink_table_grab();
1615 	err = __netlink_change_ngroups(sk, groups);
1616 	netlink_table_ungrab();
1617 
1618 	return err;
1619 }
1620 
1621 void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1622 {
1623 	struct sock *sk;
1624 	struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
1625 
1626 	sk_for_each_bound(sk, &tbl->mc_list)
1627 		netlink_update_socket_mc(nlk_sk(sk), group, 0);
1628 }
1629 
1630 /**
1631  * netlink_clear_multicast_users - kick off multicast listeners
1632  *
1633  * This function removes all listeners from the given group.
1634  * @ksk: The kernel netlink socket, as returned by
1635  *	netlink_kernel_create().
1636  * @group: The multicast group to clear.
1637  */
1638 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1639 {
1640 	netlink_table_grab();
1641 	__netlink_clear_multicast_users(ksk, group);
1642 	netlink_table_ungrab();
1643 }
1644 
1645 struct nlmsghdr *
1646 __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
1647 {
1648 	struct nlmsghdr *nlh;
1649 	int size = NLMSG_LENGTH(len);
1650 
1651 	nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
1652 	nlh->nlmsg_type = type;
1653 	nlh->nlmsg_len = size;
1654 	nlh->nlmsg_flags = flags;
1655 	nlh->nlmsg_pid = portid;
1656 	nlh->nlmsg_seq = seq;
1657 	if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
1658 		memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
1659 	return nlh;
1660 }
1661 EXPORT_SYMBOL(__nlmsg_put);
1662 
1663 /*
1664  * It looks a bit ugly.
1665  * It would be better to create kernel thread.
1666  */
1667 
1668 static int netlink_dump(struct sock *sk)
1669 {
1670 	struct netlink_sock *nlk = nlk_sk(sk);
1671 	struct netlink_callback *cb;
1672 	struct sk_buff *skb = NULL;
1673 	struct nlmsghdr *nlh;
1674 	int len, err = -ENOBUFS;
1675 	int alloc_size;
1676 
1677 	mutex_lock(nlk->cb_mutex);
1678 
1679 	cb = nlk->cb;
1680 	if (cb == NULL) {
1681 		err = -EINVAL;
1682 		goto errout_skb;
1683 	}
1684 
1685 	alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
1686 
1687 	skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
1688 	if (!skb)
1689 		goto errout_skb;
1690 
1691 	len = cb->dump(skb, cb);
1692 
1693 	if (len > 0) {
1694 		mutex_unlock(nlk->cb_mutex);
1695 
1696 		if (sk_filter(sk, skb))
1697 			kfree_skb(skb);
1698 		else
1699 			__netlink_sendskb(sk, skb);
1700 		return 0;
1701 	}
1702 
1703 	nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1704 	if (!nlh)
1705 		goto errout_skb;
1706 
1707 	nl_dump_check_consistent(cb, nlh);
1708 
1709 	memcpy(nlmsg_data(nlh), &len, sizeof(len));
1710 
1711 	if (sk_filter(sk, skb))
1712 		kfree_skb(skb);
1713 	else
1714 		__netlink_sendskb(sk, skb);
1715 
1716 	if (cb->done)
1717 		cb->done(cb);
1718 	nlk->cb = NULL;
1719 	mutex_unlock(nlk->cb_mutex);
1720 
1721 	module_put(cb->module);
1722 	netlink_consume_callback(cb);
1723 	return 0;
1724 
1725 errout_skb:
1726 	mutex_unlock(nlk->cb_mutex);
1727 	kfree_skb(skb);
1728 	return err;
1729 }
1730 
1731 int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1732 			 const struct nlmsghdr *nlh,
1733 			 struct netlink_dump_control *control)
1734 {
1735 	struct netlink_callback *cb;
1736 	struct sock *sk;
1737 	struct netlink_sock *nlk;
1738 	int ret;
1739 
1740 	cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1741 	if (cb == NULL)
1742 		return -ENOBUFS;
1743 
1744 	cb->dump = control->dump;
1745 	cb->done = control->done;
1746 	cb->nlh = nlh;
1747 	cb->data = control->data;
1748 	cb->module = control->module;
1749 	cb->min_dump_alloc = control->min_dump_alloc;
1750 	atomic_inc(&skb->users);
1751 	cb->skb = skb;
1752 
1753 	sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
1754 	if (sk == NULL) {
1755 		netlink_destroy_callback(cb);
1756 		return -ECONNREFUSED;
1757 	}
1758 	nlk = nlk_sk(sk);
1759 
1760 	mutex_lock(nlk->cb_mutex);
1761 	/* A dump is in progress... */
1762 	if (nlk->cb) {
1763 		mutex_unlock(nlk->cb_mutex);
1764 		netlink_destroy_callback(cb);
1765 		ret = -EBUSY;
1766 		goto out;
1767 	}
1768 	/* add reference of module which cb->dump belongs to */
1769 	if (!try_module_get(cb->module)) {
1770 		mutex_unlock(nlk->cb_mutex);
1771 		netlink_destroy_callback(cb);
1772 		ret = -EPROTONOSUPPORT;
1773 		goto out;
1774 	}
1775 
1776 	nlk->cb = cb;
1777 	mutex_unlock(nlk->cb_mutex);
1778 
1779 	ret = netlink_dump(sk);
1780 out:
1781 	sock_put(sk);
1782 
1783 	if (ret)
1784 		return ret;
1785 
1786 	/* We successfully started a dump, by returning -EINTR we
1787 	 * signal not to send ACK even if it was requested.
1788 	 */
1789 	return -EINTR;
1790 }
1791 EXPORT_SYMBOL(__netlink_dump_start);
1792 
1793 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1794 {
1795 	struct sk_buff *skb;
1796 	struct nlmsghdr *rep;
1797 	struct nlmsgerr *errmsg;
1798 	size_t payload = sizeof(*errmsg);
1799 
1800 	/* error messages get the original request appened */
1801 	if (err)
1802 		payload += nlmsg_len(nlh);
1803 
1804 	skb = nlmsg_new(payload, GFP_KERNEL);
1805 	if (!skb) {
1806 		struct sock *sk;
1807 
1808 		sk = netlink_lookup(sock_net(in_skb->sk),
1809 				    in_skb->sk->sk_protocol,
1810 				    NETLINK_CB(in_skb).portid);
1811 		if (sk) {
1812 			sk->sk_err = ENOBUFS;
1813 			sk->sk_error_report(sk);
1814 			sock_put(sk);
1815 		}
1816 		return;
1817 	}
1818 
1819 	rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
1820 			  NLMSG_ERROR, payload, 0);
1821 	errmsg = nlmsg_data(rep);
1822 	errmsg->error = err;
1823 	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1824 	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
1825 }
1826 EXPORT_SYMBOL(netlink_ack);
1827 
1828 int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1829 						     struct nlmsghdr *))
1830 {
1831 	struct nlmsghdr *nlh;
1832 	int err;
1833 
1834 	while (skb->len >= nlmsg_total_size(0)) {
1835 		int msglen;
1836 
1837 		nlh = nlmsg_hdr(skb);
1838 		err = 0;
1839 
1840 		if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1841 			return 0;
1842 
1843 		/* Only requests are handled by the kernel */
1844 		if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1845 			goto ack;
1846 
1847 		/* Skip control messages */
1848 		if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
1849 			goto ack;
1850 
1851 		err = cb(skb, nlh);
1852 		if (err == -EINTR)
1853 			goto skip;
1854 
1855 ack:
1856 		if (nlh->nlmsg_flags & NLM_F_ACK || err)
1857 			netlink_ack(skb, nlh, err);
1858 
1859 skip:
1860 		msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1861 		if (msglen > skb->len)
1862 			msglen = skb->len;
1863 		skb_pull(skb, msglen);
1864 	}
1865 
1866 	return 0;
1867 }
1868 EXPORT_SYMBOL(netlink_rcv_skb);
1869 
1870 /**
1871  * nlmsg_notify - send a notification netlink message
1872  * @sk: netlink socket to use
1873  * @skb: notification message
1874  * @portid: destination netlink portid for reports or 0
1875  * @group: destination multicast group or 0
1876  * @report: 1 to report back, 0 to disable
1877  * @flags: allocation flags
1878  */
1879 int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
1880 		 unsigned int group, int report, gfp_t flags)
1881 {
1882 	int err = 0;
1883 
1884 	if (group) {
1885 		int exclude_portid = 0;
1886 
1887 		if (report) {
1888 			atomic_inc(&skb->users);
1889 			exclude_portid = portid;
1890 		}
1891 
1892 		/* errors reported via destination sk->sk_err, but propagate
1893 		 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1894 		err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
1895 	}
1896 
1897 	if (report) {
1898 		int err2;
1899 
1900 		err2 = nlmsg_unicast(sk, skb, portid);
1901 		if (!err || err == -ESRCH)
1902 			err = err2;
1903 	}
1904 
1905 	return err;
1906 }
1907 EXPORT_SYMBOL(nlmsg_notify);
1908 
1909 #ifdef CONFIG_PROC_FS
1910 struct nl_seq_iter {
1911 	struct seq_net_private p;
1912 	int link;
1913 	int hash_idx;
1914 };
1915 
1916 static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1917 {
1918 	struct nl_seq_iter *iter = seq->private;
1919 	int i, j;
1920 	struct sock *s;
1921 	loff_t off = 0;
1922 
1923 	for (i = 0; i < MAX_LINKS; i++) {
1924 		struct nl_portid_hash *hash = &nl_table[i].hash;
1925 
1926 		for (j = 0; j <= hash->mask; j++) {
1927 			sk_for_each(s, &hash->table[j]) {
1928 				if (sock_net(s) != seq_file_net(seq))
1929 					continue;
1930 				if (off == pos) {
1931 					iter->link = i;
1932 					iter->hash_idx = j;
1933 					return s;
1934 				}
1935 				++off;
1936 			}
1937 		}
1938 	}
1939 	return NULL;
1940 }
1941 
1942 static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1943 	__acquires(nl_table_lock)
1944 {
1945 	read_lock(&nl_table_lock);
1946 	return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1947 }
1948 
1949 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1950 {
1951 	struct sock *s;
1952 	struct nl_seq_iter *iter;
1953 	int i, j;
1954 
1955 	++*pos;
1956 
1957 	if (v == SEQ_START_TOKEN)
1958 		return netlink_seq_socket_idx(seq, 0);
1959 
1960 	iter = seq->private;
1961 	s = v;
1962 	do {
1963 		s = sk_next(s);
1964 	} while (s && sock_net(s) != seq_file_net(seq));
1965 	if (s)
1966 		return s;
1967 
1968 	i = iter->link;
1969 	j = iter->hash_idx + 1;
1970 
1971 	do {
1972 		struct nl_portid_hash *hash = &nl_table[i].hash;
1973 
1974 		for (; j <= hash->mask; j++) {
1975 			s = sk_head(&hash->table[j]);
1976 			while (s && sock_net(s) != seq_file_net(seq))
1977 				s = sk_next(s);
1978 			if (s) {
1979 				iter->link = i;
1980 				iter->hash_idx = j;
1981 				return s;
1982 			}
1983 		}
1984 
1985 		j = 0;
1986 	} while (++i < MAX_LINKS);
1987 
1988 	return NULL;
1989 }
1990 
1991 static void netlink_seq_stop(struct seq_file *seq, void *v)
1992 	__releases(nl_table_lock)
1993 {
1994 	read_unlock(&nl_table_lock);
1995 }
1996 
1997 
1998 static int netlink_seq_show(struct seq_file *seq, void *v)
1999 {
2000 	if (v == SEQ_START_TOKEN) {
2001 		seq_puts(seq,
2002 			 "sk       Eth Pid    Groups   "
2003 			 "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
2004 	} else {
2005 		struct sock *s = v;
2006 		struct netlink_sock *nlk = nlk_sk(s);
2007 
2008 		seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
2009 			   s,
2010 			   s->sk_protocol,
2011 			   nlk->portid,
2012 			   nlk->groups ? (u32)nlk->groups[0] : 0,
2013 			   sk_rmem_alloc_get(s),
2014 			   sk_wmem_alloc_get(s),
2015 			   nlk->cb,
2016 			   atomic_read(&s->sk_refcnt),
2017 			   atomic_read(&s->sk_drops),
2018 			   sock_i_ino(s)
2019 			);
2020 
2021 	}
2022 	return 0;
2023 }
2024 
2025 static const struct seq_operations netlink_seq_ops = {
2026 	.start  = netlink_seq_start,
2027 	.next   = netlink_seq_next,
2028 	.stop   = netlink_seq_stop,
2029 	.show   = netlink_seq_show,
2030 };
2031 
2032 
2033 static int netlink_seq_open(struct inode *inode, struct file *file)
2034 {
2035 	return seq_open_net(inode, file, &netlink_seq_ops,
2036 				sizeof(struct nl_seq_iter));
2037 }
2038 
2039 static const struct file_operations netlink_seq_fops = {
2040 	.owner		= THIS_MODULE,
2041 	.open		= netlink_seq_open,
2042 	.read		= seq_read,
2043 	.llseek		= seq_lseek,
2044 	.release	= seq_release_net,
2045 };
2046 
2047 #endif
2048 
2049 int netlink_register_notifier(struct notifier_block *nb)
2050 {
2051 	return atomic_notifier_chain_register(&netlink_chain, nb);
2052 }
2053 EXPORT_SYMBOL(netlink_register_notifier);
2054 
2055 int netlink_unregister_notifier(struct notifier_block *nb)
2056 {
2057 	return atomic_notifier_chain_unregister(&netlink_chain, nb);
2058 }
2059 EXPORT_SYMBOL(netlink_unregister_notifier);
2060 
2061 static const struct proto_ops netlink_ops = {
2062 	.family =	PF_NETLINK,
2063 	.owner =	THIS_MODULE,
2064 	.release =	netlink_release,
2065 	.bind =		netlink_bind,
2066 	.connect =	netlink_connect,
2067 	.socketpair =	sock_no_socketpair,
2068 	.accept =	sock_no_accept,
2069 	.getname =	netlink_getname,
2070 	.poll =		datagram_poll,
2071 	.ioctl =	sock_no_ioctl,
2072 	.listen =	sock_no_listen,
2073 	.shutdown =	sock_no_shutdown,
2074 	.setsockopt =	netlink_setsockopt,
2075 	.getsockopt =	netlink_getsockopt,
2076 	.sendmsg =	netlink_sendmsg,
2077 	.recvmsg =	netlink_recvmsg,
2078 	.mmap =		sock_no_mmap,
2079 	.sendpage =	sock_no_sendpage,
2080 };
2081 
2082 static const struct net_proto_family netlink_family_ops = {
2083 	.family = PF_NETLINK,
2084 	.create = netlink_create,
2085 	.owner	= THIS_MODULE,	/* for consistency 8) */
2086 };
2087 
2088 static int __net_init netlink_net_init(struct net *net)
2089 {
2090 #ifdef CONFIG_PROC_FS
2091 	if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
2092 		return -ENOMEM;
2093 #endif
2094 	return 0;
2095 }
2096 
2097 static void __net_exit netlink_net_exit(struct net *net)
2098 {
2099 #ifdef CONFIG_PROC_FS
2100 	remove_proc_entry("netlink", net->proc_net);
2101 #endif
2102 }
2103 
2104 static void __init netlink_add_usersock_entry(void)
2105 {
2106 	struct listeners *listeners;
2107 	int groups = 32;
2108 
2109 	listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2110 	if (!listeners)
2111 		panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2112 
2113 	netlink_table_grab();
2114 
2115 	nl_table[NETLINK_USERSOCK].groups = groups;
2116 	rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2117 	nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2118 	nl_table[NETLINK_USERSOCK].registered = 1;
2119 	nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
2120 
2121 	netlink_table_ungrab();
2122 }
2123 
2124 static struct pernet_operations __net_initdata netlink_net_ops = {
2125 	.init = netlink_net_init,
2126 	.exit = netlink_net_exit,
2127 };
2128 
2129 static int __init netlink_proto_init(void)
2130 {
2131 	int i;
2132 	unsigned long limit;
2133 	unsigned int order;
2134 	int err = proto_register(&netlink_proto, 0);
2135 
2136 	if (err != 0)
2137 		goto out;
2138 
2139 	BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2140 
2141 	nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2142 	if (!nl_table)
2143 		goto panic;
2144 
2145 	if (totalram_pages >= (128 * 1024))
2146 		limit = totalram_pages >> (21 - PAGE_SHIFT);
2147 	else
2148 		limit = totalram_pages >> (23 - PAGE_SHIFT);
2149 
2150 	order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2151 	limit = (1UL << order) / sizeof(struct hlist_head);
2152 	order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
2153 
2154 	for (i = 0; i < MAX_LINKS; i++) {
2155 		struct nl_portid_hash *hash = &nl_table[i].hash;
2156 
2157 		hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
2158 		if (!hash->table) {
2159 			while (i-- > 0)
2160 				nl_portid_hash_free(nl_table[i].hash.table,
2161 						 1 * sizeof(*hash->table));
2162 			kfree(nl_table);
2163 			goto panic;
2164 		}
2165 		hash->max_shift = order;
2166 		hash->shift = 0;
2167 		hash->mask = 0;
2168 		hash->rehash_time = jiffies;
2169 	}
2170 
2171 	netlink_add_usersock_entry();
2172 
2173 	sock_register(&netlink_family_ops);
2174 	register_pernet_subsys(&netlink_net_ops);
2175 	/* The netlink device handler may be needed early. */
2176 	rtnetlink_init();
2177 out:
2178 	return err;
2179 panic:
2180 	panic("netlink_init: Cannot allocate nl_table\n");
2181 }
2182 
2183 core_initcall(netlink_proto_init);
2184