xref: /linux/net/netfilter/ipvs/ip_vs_ctl.c (revision aa6065206987278291c09d0c6aebed687114c925)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * IPVS         An implementation of the IP virtual server support for the
4  *              LINUX operating system.  IPVS is now implemented as a module
5  *              over the NetFilter framework. IPVS can be used to build a
6  *              high-performance and highly available server based on a
7  *              cluster of servers.
8  *
9  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
10  *              Peter Kese <peter.kese@ijs.si>
11  *              Julian Anastasov <ja@ssi.bg>
12  *
13  * Changes:
14  */
15 
16 #define pr_fmt(fmt) "IPVS: " fmt
17 
18 #include <linux/module.h>
19 #include <linux/init.h>
20 #include <linux/types.h>
21 #include <linux/capability.h>
22 #include <linux/fs.h>
23 #include <linux/sysctl.h>
24 #include <linux/proc_fs.h>
25 #include <linux/workqueue.h>
26 #include <linux/seq_file.h>
27 #include <linux/slab.h>
28 
29 #include <linux/netfilter.h>
30 #include <linux/netfilter_ipv4.h>
31 #include <linux/mutex.h>
32 #include <linux/rcupdate_wait.h>
33 
34 #include <net/net_namespace.h>
35 #include <linux/nsproxy.h>
36 #include <net/ip.h>
37 #ifdef CONFIG_IP_VS_IPV6
38 #include <net/ipv6.h>
39 #include <net/ip6_route.h>
40 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
41 #endif
42 #include <net/route.h>
43 #include <net/sock.h>
44 #include <net/genetlink.h>
45 
46 #include <linux/uaccess.h>
47 
48 #include <net/ip_vs.h>
49 
50 MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME);
51 
52 static struct lock_class_key __ipvs_service_key;
53 
54 /* sysctl variables */
55 
56 #ifdef CONFIG_IP_VS_DEBUG
57 static int sysctl_ip_vs_debug_level = 0;
58 
59 int ip_vs_get_debug_level(void)
60 {
61 	return sysctl_ip_vs_debug_level;
62 }
63 #endif
64 
65 
66 /*  Protos */
67 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
68 
69 
70 #ifdef CONFIG_IP_VS_IPV6
71 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
72 static bool __ip_vs_addr_is_local_v6(struct net *net,
73 				     const struct in6_addr *addr)
74 {
75 	struct flowi6 fl6 = {
76 		.daddr = *addr,
77 	};
78 	struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
79 	bool is_local;
80 
81 	is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
82 
83 	dst_release(dst);
84 	return is_local;
85 }
86 #endif
87 
88 #ifdef CONFIG_SYSCTL
89 /*
90  *	update_defense_level is called from keventd and from sysctl,
91  *	so it needs to protect itself from softirqs
92  */
93 static void update_defense_level(struct netns_ipvs *ipvs)
94 {
95 	struct sysinfo i;
96 	int availmem;
97 	int amemthresh;
98 	int nomem;
99 	int to_change = -1;
100 
101 	/* we only count free and buffered memory (in pages) */
102 	si_meminfo(&i);
103 	availmem = i.freeram + i.bufferram;
104 	/* however in linux 2.5 the i.bufferram is total page cache size,
105 	   we need adjust it */
106 	/* si_swapinfo(&i); */
107 	/* availmem = availmem - (i.totalswap - i.freeswap); */
108 
109 	amemthresh = max(READ_ONCE(ipvs->sysctl_amemthresh), 0);
110 	nomem = (availmem < amemthresh);
111 
112 	local_bh_disable();
113 
114 	/* drop_entry */
115 	spin_lock(&ipvs->dropentry_lock);
116 	switch (ipvs->sysctl_drop_entry) {
117 	case 0:
118 		atomic_set(&ipvs->dropentry, 0);
119 		break;
120 	case 1:
121 		if (nomem) {
122 			atomic_set(&ipvs->dropentry, 1);
123 			ipvs->sysctl_drop_entry = 2;
124 		} else {
125 			atomic_set(&ipvs->dropentry, 0);
126 		}
127 		break;
128 	case 2:
129 		if (nomem) {
130 			atomic_set(&ipvs->dropentry, 1);
131 		} else {
132 			atomic_set(&ipvs->dropentry, 0);
133 			ipvs->sysctl_drop_entry = 1;
134 		}
135 		break;
136 	case 3:
137 		atomic_set(&ipvs->dropentry, 1);
138 		break;
139 	}
140 	spin_unlock(&ipvs->dropentry_lock);
141 
142 	/* drop_packet */
143 	spin_lock(&ipvs->droppacket_lock);
144 	switch (ipvs->sysctl_drop_packet) {
145 	case 0:
146 		ipvs->drop_rate = 0;
147 		break;
148 	case 1:
149 		if (nomem) {
150 			ipvs->drop_counter = amemthresh / (amemthresh - availmem);
151 			ipvs->drop_rate = ipvs->drop_counter;
152 			ipvs->sysctl_drop_packet = 2;
153 		} else {
154 			ipvs->drop_rate = 0;
155 		}
156 		break;
157 	case 2:
158 		if (nomem) {
159 			ipvs->drop_counter = amemthresh / (amemthresh - availmem);
160 			ipvs->drop_rate = ipvs->drop_counter;
161 		} else {
162 			ipvs->drop_rate = 0;
163 			ipvs->sysctl_drop_packet = 1;
164 		}
165 		break;
166 	case 3:
167 		ipvs->drop_rate = ipvs->sysctl_am_droprate;
168 		break;
169 	}
170 	spin_unlock(&ipvs->droppacket_lock);
171 
172 	/* secure_tcp */
173 	spin_lock(&ipvs->securetcp_lock);
174 	switch (ipvs->sysctl_secure_tcp) {
175 	case 0:
176 		if (ipvs->old_secure_tcp >= 2)
177 			to_change = 0;
178 		break;
179 	case 1:
180 		if (nomem) {
181 			if (ipvs->old_secure_tcp < 2)
182 				to_change = 1;
183 			ipvs->sysctl_secure_tcp = 2;
184 		} else {
185 			if (ipvs->old_secure_tcp >= 2)
186 				to_change = 0;
187 		}
188 		break;
189 	case 2:
190 		if (nomem) {
191 			if (ipvs->old_secure_tcp < 2)
192 				to_change = 1;
193 		} else {
194 			if (ipvs->old_secure_tcp >= 2)
195 				to_change = 0;
196 			ipvs->sysctl_secure_tcp = 1;
197 		}
198 		break;
199 	case 3:
200 		if (ipvs->old_secure_tcp < 2)
201 			to_change = 1;
202 		break;
203 	}
204 	ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp;
205 	if (to_change >= 0)
206 		ip_vs_protocol_timeout_change(ipvs,
207 					      ipvs->sysctl_secure_tcp > 1);
208 	spin_unlock(&ipvs->securetcp_lock);
209 
210 	local_bh_enable();
211 }
212 
213 /* Handler for delayed work for expiring no
214  * destination connections
215  */
216 static void expire_nodest_conn_handler(struct work_struct *work)
217 {
218 	struct netns_ipvs *ipvs;
219 
220 	ipvs = container_of(work, struct netns_ipvs,
221 			    expire_nodest_conn_work.work);
222 	ip_vs_expire_nodest_conn_flush(ipvs);
223 }
224 
225 /*
226  *	Timer for checking the defense
227  */
228 #define DEFENSE_TIMER_PERIOD	1*HZ
229 
230 static void defense_work_handler(struct work_struct *work)
231 {
232 	struct netns_ipvs *ipvs =
233 		container_of(work, struct netns_ipvs, defense_work.work);
234 
235 	update_defense_level(ipvs);
236 	if (atomic_read(&ipvs->dropentry))
237 		ip_vs_random_dropentry(ipvs);
238 	queue_delayed_work(system_long_wq, &ipvs->defense_work,
239 			   DEFENSE_TIMER_PERIOD);
240 }
241 #endif
242 
243 static void est_reload_work_handler(struct work_struct *work)
244 {
245 	struct netns_ipvs *ipvs =
246 		container_of(work, struct netns_ipvs, est_reload_work.work);
247 	int genid_done = atomic_read(&ipvs->est_genid_done);
248 	unsigned long delay = HZ / 10;	/* repeat startups after failure */
249 	bool repeat = false;
250 	int genid;
251 	int id;
252 
253 	mutex_lock(&ipvs->est_mutex);
254 	genid = atomic_read(&ipvs->est_genid);
255 	for (id = 0; id < ipvs->est_kt_count; id++) {
256 		struct ip_vs_est_kt_data *kd = ipvs->est_kt_arr[id];
257 
258 		/* netns clean up started, abort delayed work */
259 		if (!READ_ONCE(ipvs->enable))
260 			goto unlock;
261 		if (!kd)
262 			continue;
263 		/* New config ? Stop kthread tasks */
264 		if (genid != genid_done) {
265 			if (!id) {
266 				/* Only we can stop kt 0 but not under mutex */
267 				mutex_unlock(&ipvs->est_mutex);
268 				ip_vs_est_kthread_stop(kd);
269 				mutex_lock(&ipvs->est_mutex);
270 				if (!READ_ONCE(ipvs->enable))
271 					goto unlock;
272 				/* kd for kt 0 is never destroyed */
273 			} else {
274 				ip_vs_est_kthread_stop(kd);
275 			}
276 		}
277 		if (!kd->task && !ip_vs_est_stopped(ipvs)) {
278 			bool start;
279 
280 			/* Do not start kthreads above 0 in calc phase */
281 			if (id)
282 				start = !ipvs->est_calc_phase;
283 			else
284 				start = kd->needed;
285 			if (start && ip_vs_est_kthread_start(ipvs, kd) < 0)
286 				repeat = true;
287 		}
288 	}
289 
290 	atomic_set(&ipvs->est_genid_done, genid);
291 
292 	if (repeat)
293 		queue_delayed_work(system_long_wq, &ipvs->est_reload_work,
294 				   delay);
295 
296 unlock:
297 	mutex_unlock(&ipvs->est_mutex);
298 }
299 
300 static int get_conn_tab_size(struct netns_ipvs *ipvs)
301 {
302 	const struct ip_vs_rht *t;
303 	int size = 0;
304 
305 	rcu_read_lock();
306 	t = rcu_dereference(ipvs->conn_tab);
307 	if (t)
308 		size = t->size;
309 	rcu_read_unlock();
310 
311 	return size;
312 }
313 
314 int
315 ip_vs_use_count_inc(void)
316 {
317 	return try_module_get(THIS_MODULE);
318 }
319 
320 void
321 ip_vs_use_count_dec(void)
322 {
323 	module_put(THIS_MODULE);
324 }
325 
326 
327 /* Service hashing:
328  * Operation			Locking order
329  * ---------------------------------------------------------------------------
330  * add table			service_mutex, svc_resize_sem(W)
331  * del table			service_mutex
332  * move between tables		svc_resize_sem(W), seqcount_t(W), bit lock
333  * add/del service		service_mutex, bit lock
334  * find service			RCU, seqcount_t(R)
335  * walk services(blocking)	service_mutex, svc_resize_sem(R)
336  * walk services(non-blocking)	RCU, seqcount_t(R)
337  *
338  * - new tables are linked/unlinked under service_mutex and svc_resize_sem
339  * - new table is linked on resizing and all operations can run in parallel
340  * in 2 tables until the new table is registered as current one
341  * - two contexts can modify buckets: config and table resize, both in
342  * process context
343  * - only table resizer can move entries, so we do not protect t->seqc[]
344  * items with t->lock[]
345  * - lookups occur under RCU lock and seqcount reader lock to detect if
346  * services are moved to new table
347  * - move operations may disturb readers: find operation will not miss entries
348  * but walkers may see same entry twice if they are forced to retry chains
349  * - walkers using cond_resched_rcu() on !PREEMPT_RCU may need to hold
350  * service_mutex to disallow new tables to be installed or to check
351  * svc_table_changes and repeat the RCU read section if new table is installed
352  */
353 
354 /*
355  *	Returns hash value for virtual service
356  */
357 static inline u32
358 ip_vs_svc_hashval(struct ip_vs_rht *t, int af, unsigned int proto,
359 		  const union nf_inet_addr *addr, __be16 port)
360 {
361 	return ip_vs_rht_hash_linfo(t, af, addr, ntohs(port), proto);
362 }
363 
364 /*
365  *	Returns hash value of fwmark for virtual service lookup
366  */
367 static inline u32 ip_vs_svc_fwm_hashval(struct ip_vs_rht *t, int af,
368 					__u32 fwmark)
369 {
370 	return jhash_2words(fwmark, af, (u32)t->hash_key.key[0]);
371 }
372 
373 /* Hashes a service in the svc_table by <proto,addr,port> or by fwmark */
374 static int ip_vs_svc_hash(struct ip_vs_service *svc)
375 {
376 	struct netns_ipvs *ipvs = svc->ipvs;
377 	struct hlist_bl_head *head;
378 	struct ip_vs_rht *t;
379 	u32 hash;
380 
381 	if (svc->flags & IP_VS_SVC_F_HASHED) {
382 		pr_err("%s(): request for already hashed, called from %pS\n",
383 		       __func__, __builtin_return_address(0));
384 		return 0;
385 	}
386 
387 	/* increase its refcnt because it is referenced by the svc table */
388 	atomic_inc(&svc->refcnt);
389 
390 	/* New entries go into recent table */
391 	t = rcu_dereference_protected(ipvs->svc_table, 1);
392 	t = rcu_dereference_protected(t->new_tbl, 1);
393 
394 	if (svc->fwmark == 0) {
395 		/*
396 		 *  Hash it by <protocol,addr,port>
397 		 */
398 		hash = ip_vs_svc_hashval(t, svc->af, svc->protocol,
399 					 &svc->addr, svc->port);
400 	} else {
401 		/*
402 		 *  Hash it by fwmark
403 		 */
404 		hash = ip_vs_svc_fwm_hashval(t, svc->af, svc->fwmark);
405 	}
406 	head = t->buckets + (hash & t->mask);
407 	hlist_bl_lock(head);
408 	WRITE_ONCE(svc->hash_key, ip_vs_rht_build_hash_key(t, hash));
409 	svc->flags |= IP_VS_SVC_F_HASHED;
410 	hlist_bl_add_head_rcu(&svc->s_list, head);
411 	hlist_bl_unlock(head);
412 
413 	return 1;
414 }
415 
416 
417 /*
418  *	Unhashes a service from svc_table.
419  *	Should be called with locked tables.
420  */
421 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
422 {
423 	struct netns_ipvs *ipvs = svc->ipvs;
424 	struct hlist_bl_head *head;
425 	struct ip_vs_rht *t;
426 	u32 hash_key2;
427 	u32 hash_key;
428 
429 	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
430 		pr_err("%s(): request for unhash flagged, called from %pS\n",
431 		       __func__, __builtin_return_address(0));
432 		return 0;
433 	}
434 
435 	t = rcu_dereference_protected(ipvs->svc_table, 1);
436 	hash_key = READ_ONCE(svc->hash_key);
437 	/* We need to lock the bucket in the right table */
438 	if (ip_vs_rht_same_table(t, hash_key)) {
439 		head = t->buckets + (hash_key & t->mask);
440 		hlist_bl_lock(head);
441 		/* Ensure hash_key is read under lock */
442 		hash_key2 = READ_ONCE(svc->hash_key);
443 		/* Moved to new table ? */
444 		if (hash_key != hash_key2) {
445 			hlist_bl_unlock(head);
446 			t = rcu_dereference_protected(t->new_tbl, 1);
447 			head = t->buckets + (hash_key2 & t->mask);
448 			hlist_bl_lock(head);
449 		}
450 	} else {
451 		/* It is already moved to new table */
452 		t = rcu_dereference_protected(t->new_tbl, 1);
453 		head = t->buckets + (hash_key & t->mask);
454 		hlist_bl_lock(head);
455 	}
456 	/* Remove it from svc_table */
457 	hlist_bl_del_rcu(&svc->s_list);
458 
459 	svc->flags &= ~IP_VS_SVC_F_HASHED;
460 	atomic_dec(&svc->refcnt);
461 	hlist_bl_unlock(head);
462 	return 1;
463 }
464 
465 
466 /*
467  *	Get service by {netns, proto,addr,port} in the service table.
468  */
469 static inline struct ip_vs_service *
470 __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
471 		     const union nf_inet_addr *vaddr, __be16 vport)
472 {
473 	DECLARE_IP_VS_RHT_WALK_BUCKET_RCU();
474 	struct hlist_bl_head *head;
475 	struct ip_vs_service *svc;
476 	struct ip_vs_rht *t, *p;
477 	struct hlist_bl_node *e;
478 	u32 hash, hash_key;
479 
480 	ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, p) {
481 		/* Check for "full" addressed entries */
482 		hash = ip_vs_svc_hashval(t, af, protocol, vaddr, vport);
483 
484 		hash_key = ip_vs_rht_build_hash_key(t, hash);
485 		ip_vs_rht_walk_bucket_rcu(t, hash_key, head) {
486 			hlist_bl_for_each_entry_rcu(svc, e, head, s_list) {
487 				if (READ_ONCE(svc->hash_key) == hash_key &&
488 				    svc->af == af &&
489 				    ip_vs_addr_equal(af, &svc->addr, vaddr) &&
490 				    svc->port == vport &&
491 				    svc->protocol == protocol && !svc->fwmark) {
492 					/* HIT */
493 					return svc;
494 				}
495 			}
496 		}
497 	}
498 
499 	return NULL;
500 }
501 
502 
503 /*
504  *	Get service by {fwmark} in the service table.
505  */
506 static inline struct ip_vs_service *
507 __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
508 {
509 	DECLARE_IP_VS_RHT_WALK_BUCKET_RCU();
510 	struct hlist_bl_head *head;
511 	struct ip_vs_service *svc;
512 	struct ip_vs_rht *t, *p;
513 	struct hlist_bl_node *e;
514 	u32 hash, hash_key;
515 
516 	ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, p) {
517 		/* Check for fwmark addressed entries */
518 		hash = ip_vs_svc_fwm_hashval(t, af, fwmark);
519 
520 		hash_key = ip_vs_rht_build_hash_key(t, hash);
521 		ip_vs_rht_walk_bucket_rcu(t, hash_key, head) {
522 			hlist_bl_for_each_entry_rcu(svc, e, head, s_list) {
523 				if (READ_ONCE(svc->hash_key) == hash_key &&
524 				    svc->fwmark == fwmark && svc->af == af) {
525 					/* HIT */
526 					return svc;
527 				}
528 			}
529 		}
530 	}
531 
532 	return NULL;
533 }
534 
535 /* Find service, called under RCU lock */
536 struct ip_vs_service *
537 ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
538 		   const union nf_inet_addr *vaddr, __be16 vport)
539 {
540 	struct ip_vs_service *svc = NULL;
541 	int af_id = ip_vs_af_index(af);
542 
543 	/*
544 	 *	Check the table hashed by fwmark first
545 	 */
546 	if (fwmark && atomic_read(&ipvs->fwm_services[af_id])) {
547 		svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
548 		if (svc)
549 			goto out;
550 	}
551 
552 	if (!atomic_read(&ipvs->nonfwm_services[af_id]))
553 		goto out;
554 
555 	/*
556 	 *	Check the table hashed by <protocol,addr,port>
557 	 *	for "full" addressed entries
558 	 */
559 	svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
560 	if (svc)
561 		goto out;
562 
563 	if (protocol == IPPROTO_TCP &&
564 	    atomic_read(&ipvs->ftpsvc_counter[af_id]) &&
565 	    (vport == FTPDATA || !inet_port_requires_bind_service(ipvs->net, ntohs(vport)))) {
566 		/*
567 		 * Check if ftp service entry exists, the packet
568 		 * might belong to FTP data connections.
569 		 */
570 		svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
571 		if (svc)
572 			goto out;
573 	}
574 
575 	if (atomic_read(&ipvs->nullsvc_counter[af_id])) {
576 		/*
577 		 * Check if the catch-all port (port zero) exists
578 		 */
579 		svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
580 	}
581 
582   out:
583 	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
584 		      fwmark, ip_vs_proto_name(protocol),
585 		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
586 		      svc ? "hit" : "not hit");
587 
588 	return svc;
589 }
590 
591 /* Return the number of registered services */
592 static int ip_vs_get_num_services(struct netns_ipvs *ipvs)
593 {
594 	int ns = 0, ni = IP_VS_AF_MAX;
595 
596 	while (--ni >= 0)
597 		ns += atomic_read(&ipvs->num_services[ni]);
598 	return ns;
599 }
600 
601 /* Get default load factor to map num_services/u_thresh to t->size */
602 static int ip_vs_svc_default_load_factor(struct netns_ipvs *ipvs)
603 {
604 	int factor;
605 
606 	if (net_eq(ipvs->net, &init_net))
607 		factor = -3;	/* grow if load is above 12.5% */
608 	else
609 		factor = -2;	/* grow if load is above 25% */
610 	return factor;
611 }
612 
613 /* Get the desired svc_table size */
614 static int ip_vs_svc_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t,
615 				  int lfactor)
616 {
617 	return ip_vs_rht_desired_size(ipvs, t, ip_vs_get_num_services(ipvs),
618 				      lfactor, IP_VS_SVC_TAB_MIN_BITS,
619 				      IP_VS_SVC_TAB_MAX_BITS);
620 }
621 
622 /* Allocate svc_table */
623 static struct ip_vs_rht *ip_vs_svc_table_alloc(struct netns_ipvs *ipvs,
624 					       int buckets, int lfactor)
625 {
626 	struct ip_vs_rht *t;
627 	int scounts, locks;
628 
629 	/* No frequent lookups to race with resizing, so use max of 64
630 	 * seqcounts. Only resizer moves entries, so use 0 locks.
631 	 */
632 	scounts = clamp(buckets >> 4, 1, 64);
633 	locks = 0;
634 
635 	t = ip_vs_rht_alloc(buckets, scounts, locks);
636 	if (!t)
637 		return NULL;
638 	t->lfactor = lfactor;
639 	ip_vs_rht_set_thresholds(t, t->size, lfactor, IP_VS_SVC_TAB_MIN_BITS,
640 				 IP_VS_SVC_TAB_MAX_BITS);
641 	return t;
642 }
643 
644 /* svc_table resizer work */
645 static void svc_resize_work_handler(struct work_struct *work)
646 {
647 	struct hlist_bl_head *head, *head2;
648 	struct ip_vs_rht *t_free = NULL;
649 	unsigned int resched_score = 0;
650 	struct hlist_bl_node *cn, *nn;
651 	struct ip_vs_rht *t, *t_new;
652 	struct ip_vs_service *svc;
653 	struct netns_ipvs *ipvs;
654 	bool more_work = true;
655 	seqcount_t *sc;
656 	int limit = 0;
657 	int new_size;
658 	int lfactor;
659 	u32 bucket;
660 
661 	ipvs = container_of(work, struct netns_ipvs, svc_resize_work.work);
662 
663 	if (!down_write_trylock(&ipvs->svc_resize_sem))
664 		goto out;
665 	if (!mutex_trylock(&ipvs->service_mutex))
666 		goto unlock_sem;
667 	more_work = false;
668 	clear_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags);
669 	if (!READ_ONCE(ipvs->enable) ||
670 	    test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
671 		goto unlock_m;
672 	t = rcu_dereference_protected(ipvs->svc_table, 1);
673 	/* Do nothing if table is removed */
674 	if (!t)
675 		goto unlock_m;
676 	/* New table needs to be registered? BUG! */
677 	if (t != rcu_dereference_protected(t->new_tbl, 1))
678 		goto unlock_m;
679 
680 	lfactor = sysctl_svc_lfactor(ipvs);
681 	/* Should we resize ? */
682 	new_size = ip_vs_svc_desired_size(ipvs, t, lfactor);
683 	if (new_size == t->size && lfactor == t->lfactor)
684 		goto unlock_m;
685 
686 	t_new = ip_vs_svc_table_alloc(ipvs, new_size, lfactor);
687 	if (!t_new) {
688 		more_work = true;
689 		goto unlock_m;
690 	}
691 	/* Flip the table_id */
692 	t_new->table_id = t->table_id ^ IP_VS_RHT_TABLE_ID_MASK;
693 
694 	rcu_assign_pointer(t->new_tbl, t_new);
695 	/* Allow add/del to new_tbl while moving from old table */
696 	mutex_unlock(&ipvs->service_mutex);
697 
698 	ip_vs_rht_for_each_bucket(t, bucket, head) {
699 same_bucket:
700 		if (++limit >= 16) {
701 			if (!READ_ONCE(ipvs->enable) ||
702 			    test_bit(IP_VS_WORK_SVC_NORESIZE,
703 				     &ipvs->work_flags))
704 				goto unlock_sem;
705 			if (resched_score >= 100) {
706 				resched_score = 0;
707 				cond_resched();
708 			}
709 			limit = 0;
710 		}
711 		if (hlist_bl_empty(head)) {
712 			resched_score++;
713 			continue;
714 		}
715 		/* Preemption calls ahead... */
716 		resched_score = 0;
717 
718 		sc = &t->seqc[bucket & t->seqc_mask];
719 		/* seqcount_t usage considering PREEMPT_RT rules:
720 		 * - we are the only writer => preemption can be allowed
721 		 * - readers (SoftIRQ) => disable BHs
722 		 * - readers (processes) => preemption should be disabled
723 		 */
724 		local_bh_disable();
725 		preempt_disable_nested();
726 		write_seqcount_begin(sc);
727 		hlist_bl_lock(head);
728 
729 		hlist_bl_for_each_entry_safe(svc, cn, nn, head, s_list) {
730 			u32 hash;
731 
732 			/* New hash for the new table */
733 			if (svc->fwmark == 0) {
734 				/*  Hash it by <protocol,addr,port> */
735 				hash = ip_vs_svc_hashval(t_new, svc->af,
736 							 svc->protocol,
737 							 &svc->addr, svc->port);
738 			} else {
739 				/* Hash it by fwmark */
740 				hash = ip_vs_svc_fwm_hashval(t_new, svc->af,
741 							     svc->fwmark);
742 			}
743 			hlist_bl_del_rcu(&svc->s_list);
744 			head2 = t_new->buckets + (hash & t_new->mask);
745 
746 			hlist_bl_lock(head2);
747 			WRITE_ONCE(svc->hash_key,
748 				   ip_vs_rht_build_hash_key(t_new, hash));
749 			/* t_new->seqc are not used at this stage, we race
750 			 * only with add/del, so only lock the bucket.
751 			 */
752 			hlist_bl_add_head_rcu(&svc->s_list, head2);
753 			hlist_bl_unlock(head2);
754 			/* Too long chain? Do it in steps */
755 			if (++limit >= 64)
756 				break;
757 		}
758 
759 		hlist_bl_unlock(head);
760 		write_seqcount_end(sc);
761 		preempt_enable_nested();
762 		local_bh_enable();
763 		if (limit >= 64)
764 			goto same_bucket;
765 	}
766 
767 	/* Tables can be switched only under service_mutex */
768 	while (!mutex_trylock(&ipvs->service_mutex)) {
769 		cond_resched();
770 		if (!READ_ONCE(ipvs->enable) ||
771 		    test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
772 			goto unlock_sem;
773 	}
774 	if (!READ_ONCE(ipvs->enable) ||
775 	    test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
776 		goto unlock_m;
777 
778 	rcu_assign_pointer(ipvs->svc_table, t_new);
779 	/* Inform readers that new table is installed */
780 	smp_mb__before_atomic();
781 	atomic_inc(&ipvs->svc_table_changes);
782 	t_free = t;
783 
784 unlock_m:
785 	mutex_unlock(&ipvs->service_mutex);
786 
787 unlock_sem:
788 	up_write(&ipvs->svc_resize_sem);
789 
790 	if (t_free) {
791 		/* RCU readers should not see more than two tables in chain.
792 		 * To prevent new table to be attached wait here instead of
793 		 * freeing the old table in RCU callback.
794 		 */
795 		synchronize_rcu();
796 		ip_vs_rht_free(t_free);
797 	}
798 
799 out:
800 	if (!READ_ONCE(ipvs->enable) || !more_work ||
801 	    test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
802 		return;
803 	queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 1);
804 }
805 
806 static inline void
807 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
808 {
809 	atomic_inc(&svc->refcnt);
810 	rcu_assign_pointer(dest->svc, svc);
811 }
812 
813 static void ip_vs_service_free(struct ip_vs_service *svc)
814 {
815 	ip_vs_stats_release(&svc->stats);
816 	kfree(svc);
817 }
818 
819 static void ip_vs_service_rcu_free(struct rcu_head *head)
820 {
821 	struct ip_vs_service *svc;
822 
823 	svc = container_of(head, struct ip_vs_service, rcu_head);
824 	ip_vs_service_free(svc);
825 }
826 
827 static void __ip_vs_svc_put(struct ip_vs_service *svc)
828 {
829 	if (atomic_dec_and_test(&svc->refcnt)) {
830 		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
831 			      svc->fwmark,
832 			      IP_VS_DBG_ADDR(svc->af, &svc->addr),
833 			      ntohs(svc->port));
834 		call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
835 	}
836 }
837 
838 
839 /*
840  *	Returns hash value for real service
841  */
842 static inline unsigned int ip_vs_rs_hashkey(int af,
843 					    const union nf_inet_addr *addr,
844 					    __be16 port)
845 {
846 	unsigned int porth = ntohs(port);
847 	__be32 addr_fold = addr->ip;
848 
849 #ifdef CONFIG_IP_VS_IPV6
850 	if (af == AF_INET6)
851 		addr_fold = addr->ip6[0]^addr->ip6[1]^
852 			    addr->ip6[2]^addr->ip6[3];
853 #endif
854 
855 	return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
856 		& IP_VS_RTAB_MASK;
857 }
858 
859 /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
860 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
861 {
862 	unsigned int hash;
863 	__be16 port;
864 
865 	if (dest->in_rs_table)
866 		return;
867 
868 	switch (IP_VS_DFWD_METHOD(dest)) {
869 	case IP_VS_CONN_F_MASQ:
870 		port = dest->port;
871 		break;
872 	case IP_VS_CONN_F_TUNNEL:
873 		switch (dest->tun_type) {
874 		case IP_VS_CONN_F_TUNNEL_TYPE_GUE:
875 			port = dest->tun_port;
876 			break;
877 		case IP_VS_CONN_F_TUNNEL_TYPE_IPIP:
878 		case IP_VS_CONN_F_TUNNEL_TYPE_GRE:
879 			port = 0;
880 			break;
881 		default:
882 			return;
883 		}
884 		break;
885 	default:
886 		return;
887 	}
888 
889 	/*
890 	 *	Hash by proto,addr,port,
891 	 *	which are the parameters of the real service.
892 	 */
893 	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port);
894 
895 	hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
896 	dest->in_rs_table = 1;
897 }
898 
899 /* Unhash ip_vs_dest from rs_table. */
900 static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
901 {
902 	/*
903 	 * Remove it from the rs_table table.
904 	 */
905 	if (dest->in_rs_table) {
906 		hlist_del_rcu(&dest->d_list);
907 		dest->in_rs_table = 0;
908 	}
909 }
910 
911 /* Check if real service by <proto,addr,port> is present */
912 bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
913 			    const union nf_inet_addr *daddr, __be16 dport)
914 {
915 	unsigned int hash;
916 	struct ip_vs_dest *dest;
917 
918 	/* Check for "full" addressed entries */
919 	hash = ip_vs_rs_hashkey(af, daddr, dport);
920 
921 	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
922 		if (dest->port == dport &&
923 		    dest->af == af &&
924 		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
925 		    (dest->protocol == protocol || dest->vfwmark) &&
926 		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
927 			/* HIT */
928 			return true;
929 		}
930 	}
931 
932 	return false;
933 }
934 
935 /* Find real service record by <proto,addr,port>.
936  * In case of multiple records with the same <proto,addr,port>, only
937  * the first found record is returned.
938  *
939  * To be called under RCU lock.
940  */
941 struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af,
942 					   __u16 protocol,
943 					   const union nf_inet_addr *daddr,
944 					   __be16 dport)
945 {
946 	unsigned int hash;
947 	struct ip_vs_dest *dest;
948 
949 	/* Check for "full" addressed entries */
950 	hash = ip_vs_rs_hashkey(af, daddr, dport);
951 
952 	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
953 		if (dest->port == dport &&
954 		    dest->af == af &&
955 		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
956 		    (dest->protocol == protocol || dest->vfwmark) &&
957 		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) {
958 			/* HIT */
959 			return dest;
960 		}
961 	}
962 
963 	return NULL;
964 }
965 
966 /* Find real service record by <af,addr,tun_port>.
967  * In case of multiple records with the same <af,addr,tun_port>, only
968  * the first found record is returned.
969  *
970  * To be called under RCU lock.
971  */
972 struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af,
973 				     const union nf_inet_addr *daddr,
974 				     __be16 tun_port)
975 {
976 	struct ip_vs_dest *dest;
977 	unsigned int hash;
978 
979 	/* Check for "full" addressed entries */
980 	hash = ip_vs_rs_hashkey(af, daddr, tun_port);
981 
982 	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
983 		if (dest->tun_port == tun_port &&
984 		    dest->af == af &&
985 		    ip_vs_addr_equal(af, &dest->addr, daddr) &&
986 		    IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) {
987 			/* HIT */
988 			return dest;
989 		}
990 	}
991 
992 	return NULL;
993 }
994 
995 /* Lookup destination by {addr,port} in the given service
996  * Called under RCU lock.
997  */
998 static struct ip_vs_dest *
999 ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
1000 		  const union nf_inet_addr *daddr, __be16 dport)
1001 {
1002 	struct ip_vs_dest *dest;
1003 
1004 	/*
1005 	 * Find the destination for the given service
1006 	 */
1007 	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
1008 		if ((dest->af == dest_af) &&
1009 		    ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
1010 		    (dest->port == dport)) {
1011 			/* HIT */
1012 			return dest;
1013 		}
1014 	}
1015 
1016 	return NULL;
1017 }
1018 
1019 /*
1020  * Find destination by {daddr,dport,vaddr,protocol}
1021  * Created to be used in ip_vs_process_message() in
1022  * the backup synchronization daemon. It finds the
1023  * destination to be bound to the received connection
1024  * on the backup.
1025  * Called under RCU lock, no refcnt is returned.
1026  */
1027 struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
1028 				   const union nf_inet_addr *daddr,
1029 				   __be16 dport,
1030 				   const union nf_inet_addr *vaddr,
1031 				   __be16 vport, __u16 protocol, __u32 fwmark,
1032 				   __u32 flags)
1033 {
1034 	struct ip_vs_dest *dest;
1035 	struct ip_vs_service *svc;
1036 	__be16 port = dport;
1037 
1038 	svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
1039 	if (!svc)
1040 		return NULL;
1041 	if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
1042 		port = 0;
1043 	dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
1044 	if (!dest)
1045 		dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
1046 	return dest;
1047 }
1048 
1049 void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
1050 {
1051 	struct ip_vs_dest_dst *dest_dst = container_of(head,
1052 						       struct ip_vs_dest_dst,
1053 						       rcu_head);
1054 
1055 	dst_release(dest_dst->dst_cache);
1056 	kfree(dest_dst);
1057 }
1058 
1059 /* Release dest_dst and dst_cache for dest in user context */
1060 static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
1061 {
1062 	struct ip_vs_dest_dst *old;
1063 
1064 	old = rcu_dereference_protected(dest->dest_dst, 1);
1065 	if (old) {
1066 		RCU_INIT_POINTER(dest->dest_dst, NULL);
1067 		call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
1068 	}
1069 }
1070 
1071 /*
1072  *  Lookup dest by {svc,addr,port} in the destination trash.
1073  *  The destination trash is used to hold the destinations that are removed
1074  *  from the service table but are still referenced by some conn entries.
1075  *  The reason to add the destination trash is when the dest is temporary
1076  *  down (either by administrator or by monitor program), the dest can be
1077  *  picked back from the trash, the remaining connections to the dest can
1078  *  continue, and the counting information of the dest is also useful for
1079  *  scheduling.
1080  */
1081 static struct ip_vs_dest *
1082 ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
1083 		     const union nf_inet_addr *daddr, __be16 dport)
1084 {
1085 	struct ip_vs_dest *dest;
1086 	struct netns_ipvs *ipvs = svc->ipvs;
1087 
1088 	/*
1089 	 * Find the destination in trash
1090 	 */
1091 	spin_lock_bh(&ipvs->dest_trash_lock);
1092 	list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
1093 		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
1094 			      "dest->refcnt=%d\n",
1095 			      dest->vfwmark,
1096 			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
1097 			      ntohs(dest->port),
1098 			      refcount_read(&dest->refcnt));
1099 		if (dest->af == dest_af &&
1100 		    ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
1101 		    dest->port == dport &&
1102 		    dest->vfwmark == svc->fwmark &&
1103 		    dest->protocol == svc->protocol &&
1104 		    (svc->fwmark ||
1105 		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1106 		      dest->vport == svc->port))) {
1107 			/* HIT */
1108 			list_del(&dest->t_list);
1109 			goto out;
1110 		}
1111 	}
1112 
1113 	dest = NULL;
1114 
1115 out:
1116 	spin_unlock_bh(&ipvs->dest_trash_lock);
1117 
1118 	return dest;
1119 }
1120 
1121 /* Put destination in trash */
1122 static void ip_vs_trash_put_dest(struct netns_ipvs *ipvs,
1123 				 struct ip_vs_dest *dest, unsigned long istart,
1124 				 bool cleanup)
1125 {
1126 	spin_lock_bh(&ipvs->dest_trash_lock);
1127 	IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1128 		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1129 		      refcount_read(&dest->refcnt));
1130 	if (list_empty(&ipvs->dest_trash) && !cleanup)
1131 		mod_timer(&ipvs->dest_trash_timer,
1132 			  jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1133 	/* dest lives in trash with reference */
1134 	list_add(&dest->t_list, &ipvs->dest_trash);
1135 	dest->idle_start = istart;
1136 	spin_unlock_bh(&ipvs->dest_trash_lock);
1137 }
1138 
1139 static void ip_vs_dest_rcu_free(struct rcu_head *head)
1140 {
1141 	struct ip_vs_dest *dest;
1142 
1143 	dest = container_of(head, struct ip_vs_dest, rcu_head);
1144 	ip_vs_stats_release(&dest->stats);
1145 	ip_vs_dest_put_and_free(dest);
1146 }
1147 
1148 static void ip_vs_dest_free(struct ip_vs_dest *dest)
1149 {
1150 	struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
1151 
1152 	__ip_vs_svc_put(svc);
1153 	call_rcu(&dest->rcu_head, ip_vs_dest_rcu_free);
1154 }
1155 
1156 /*
1157  *  Clean up all the destinations in the trash
1158  *  Called by the ip_vs_control_cleanup()
1159  *
1160  *  When the ip_vs_control_clearup is activated by ipvs module exit,
1161  *  the service tables must have been flushed and all the connections
1162  *  are expired, and the refcnt of each destination in the trash must
1163  *  be 1, so we simply release them here.
1164  */
1165 static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
1166 {
1167 	struct ip_vs_dest *dest, *nxt;
1168 
1169 	timer_delete_sync(&ipvs->dest_trash_timer);
1170 	/* No need to use dest_trash_lock */
1171 	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
1172 		list_del(&dest->t_list);
1173 		ip_vs_dest_free(dest);
1174 	}
1175 }
1176 
1177 static void ip_vs_stats_rcu_free(struct rcu_head *head)
1178 {
1179 	struct ip_vs_stats_rcu *rs = container_of(head,
1180 						  struct ip_vs_stats_rcu,
1181 						  rcu_head);
1182 
1183 	ip_vs_stats_release(&rs->s);
1184 	kfree(rs);
1185 }
1186 
1187 static void
1188 ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
1189 {
1190 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
1191 
1192 	spin_lock(&src->lock);
1193 
1194 	IP_VS_SHOW_STATS_COUNTER(conns);
1195 	IP_VS_SHOW_STATS_COUNTER(inpkts);
1196 	IP_VS_SHOW_STATS_COUNTER(outpkts);
1197 	IP_VS_SHOW_STATS_COUNTER(inbytes);
1198 	IP_VS_SHOW_STATS_COUNTER(outbytes);
1199 
1200 	ip_vs_read_estimator(dst, src);
1201 
1202 	spin_unlock(&src->lock);
1203 }
1204 
1205 static void
1206 ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
1207 {
1208 	dst->conns = (u32)src->conns;
1209 	dst->inpkts = (u32)src->inpkts;
1210 	dst->outpkts = (u32)src->outpkts;
1211 	dst->inbytes = src->inbytes;
1212 	dst->outbytes = src->outbytes;
1213 	dst->cps = (u32)src->cps;
1214 	dst->inpps = (u32)src->inpps;
1215 	dst->outpps = (u32)src->outpps;
1216 	dst->inbps = (u32)src->inbps;
1217 	dst->outbps = (u32)src->outbps;
1218 }
1219 
1220 static void
1221 ip_vs_zero_stats(struct ip_vs_stats *stats)
1222 {
1223 	spin_lock(&stats->lock);
1224 
1225 	/* get current counters as zero point, rates are zeroed */
1226 
1227 #define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
1228 
1229 	IP_VS_ZERO_STATS_COUNTER(conns);
1230 	IP_VS_ZERO_STATS_COUNTER(inpkts);
1231 	IP_VS_ZERO_STATS_COUNTER(outpkts);
1232 	IP_VS_ZERO_STATS_COUNTER(inbytes);
1233 	IP_VS_ZERO_STATS_COUNTER(outbytes);
1234 
1235 	ip_vs_zero_estimator(stats);
1236 
1237 	spin_unlock(&stats->lock);
1238 }
1239 
1240 /* Allocate fields after kzalloc */
1241 int ip_vs_stats_init_alloc(struct ip_vs_stats *s)
1242 {
1243 	int i;
1244 
1245 	spin_lock_init(&s->lock);
1246 	s->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1247 	if (!s->cpustats)
1248 		return -ENOMEM;
1249 
1250 	for_each_possible_cpu(i) {
1251 		struct ip_vs_cpu_stats *cs = per_cpu_ptr(s->cpustats, i);
1252 
1253 		u64_stats_init(&cs->syncp);
1254 	}
1255 	return 0;
1256 }
1257 
1258 struct ip_vs_stats *ip_vs_stats_alloc(void)
1259 {
1260 	struct ip_vs_stats *s = kzalloc_obj(*s);
1261 
1262 	if (s && ip_vs_stats_init_alloc(s) >= 0)
1263 		return s;
1264 	kfree(s);
1265 	return NULL;
1266 }
1267 
1268 void ip_vs_stats_release(struct ip_vs_stats *stats)
1269 {
1270 	free_percpu(stats->cpustats);
1271 }
1272 
1273 void ip_vs_stats_free(struct ip_vs_stats *stats)
1274 {
1275 	if (stats) {
1276 		ip_vs_stats_release(stats);
1277 		kfree(stats);
1278 	}
1279 }
1280 
1281 /*
1282  *	Update a destination in the given service
1283  */
1284 static void
1285 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
1286 		    struct ip_vs_dest_user_kern *udest, int add)
1287 {
1288 	struct netns_ipvs *ipvs = svc->ipvs;
1289 	struct ip_vs_service *old_svc;
1290 	struct ip_vs_scheduler *sched;
1291 	int conn_flags;
1292 
1293 	/* We cannot modify an address and change the address family */
1294 	BUG_ON(!add && udest->af != dest->af);
1295 
1296 	if (add && udest->af != svc->af)
1297 		ipvs->mixed_address_family_dests++;
1298 
1299 	/* keep the last_weight with latest non-0 weight */
1300 	if (add || udest->weight != 0)
1301 		atomic_set(&dest->last_weight, udest->weight);
1302 
1303 	/* set the weight and the flags */
1304 	atomic_set(&dest->weight, udest->weight);
1305 	conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
1306 	conn_flags |= IP_VS_CONN_F_INACTIVE;
1307 
1308 	/* Need to rehash? */
1309 	if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) !=
1310 	    IP_VS_DFWD_METHOD(dest) ||
1311 	    udest->tun_type != dest->tun_type ||
1312 	    udest->tun_port != dest->tun_port)
1313 		ip_vs_rs_unhash(dest);
1314 
1315 	/* set the tunnel info */
1316 	dest->tun_type = udest->tun_type;
1317 	dest->tun_port = udest->tun_port;
1318 	dest->tun_flags = udest->tun_flags;
1319 
1320 	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
1321 	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
1322 		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
1323 	} else {
1324 		/* FTP-NAT requires conntrack for mangling */
1325 		if (svc->port == FTPPORT)
1326 			ip_vs_register_conntrack(svc);
1327 	}
1328 	atomic_set(&dest->conn_flags, conn_flags);
1329 	/* Put the real service in rs_table if not present. */
1330 	ip_vs_rs_hash(ipvs, dest);
1331 
1332 	/* bind the service */
1333 	old_svc = rcu_dereference_protected(dest->svc, 1);
1334 	if (!old_svc) {
1335 		__ip_vs_bind_svc(dest, svc);
1336 	} else {
1337 		if (old_svc != svc) {
1338 			ip_vs_zero_stats(&dest->stats);
1339 			__ip_vs_bind_svc(dest, svc);
1340 			__ip_vs_svc_put(old_svc);
1341 		}
1342 	}
1343 
1344 	/* set the dest status flags */
1345 	dest->flags |= IP_VS_DEST_F_AVAILABLE;
1346 
1347 	if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
1348 		dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
1349 	dest->u_threshold = udest->u_threshold;
1350 	dest->l_threshold = udest->l_threshold;
1351 
1352 	dest->af = udest->af;
1353 
1354 	if (add) {
1355 		list_add_rcu(&dest->n_list, &svc->destinations);
1356 		svc->num_dests++;
1357 		sched = rcu_dereference_protected(svc->scheduler, 1);
1358 		if (sched && sched->add_dest)
1359 			sched->add_dest(svc, dest);
1360 	} else {
1361 		spin_lock_bh(&dest->dst_lock);
1362 		__ip_vs_dst_cache_reset(dest);
1363 		spin_unlock_bh(&dest->dst_lock);
1364 
1365 		sched = rcu_dereference_protected(svc->scheduler, 1);
1366 		if (sched && sched->upd_dest)
1367 			sched->upd_dest(svc, dest);
1368 	}
1369 }
1370 
1371 
1372 /*
1373  *	Create a destination for the given service
1374  */
1375 static int
1376 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1377 {
1378 	struct ip_vs_dest *dest;
1379 	unsigned int atype;
1380 	int ret;
1381 
1382 #ifdef CONFIG_IP_VS_IPV6
1383 	if (udest->af == AF_INET6) {
1384 		atype = ipv6_addr_type(&udest->addr.in6);
1385 		if ((!(atype & IPV6_ADDR_UNICAST) ||
1386 			atype & IPV6_ADDR_LINKLOCAL) &&
1387 			!__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
1388 			return -EINVAL;
1389 
1390 		ret = nf_defrag_ipv6_enable(svc->ipvs->net);
1391 		if (ret)
1392 			return ret;
1393 	} else
1394 #endif
1395 	{
1396 		atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
1397 		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
1398 			return -EINVAL;
1399 	}
1400 
1401 	dest = kzalloc_obj(struct ip_vs_dest);
1402 	if (dest == NULL)
1403 		return -ENOMEM;
1404 
1405 	ret = ip_vs_stats_init_alloc(&dest->stats);
1406 	if (ret < 0)
1407 		goto err_alloc;
1408 
1409 	ret = ip_vs_start_estimator(svc->ipvs, &dest->stats);
1410 	if (ret < 0)
1411 		goto err_stats;
1412 
1413 	dest->af = udest->af;
1414 	dest->protocol = svc->protocol;
1415 	dest->vaddr = svc->addr;
1416 	dest->vport = svc->port;
1417 	dest->vfwmark = svc->fwmark;
1418 	ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
1419 	dest->port = udest->port;
1420 
1421 	atomic_set(&dest->activeconns, 0);
1422 	atomic_set(&dest->inactconns, 0);
1423 	atomic_set(&dest->persistconns, 0);
1424 	refcount_set(&dest->refcnt, 1);
1425 
1426 	INIT_HLIST_NODE(&dest->d_list);
1427 	spin_lock_init(&dest->dst_lock);
1428 	__ip_vs_update_dest(svc, dest, udest, 1);
1429 
1430 	return 0;
1431 
1432 err_stats:
1433 	ip_vs_stats_release(&dest->stats);
1434 
1435 err_alloc:
1436 	kfree(dest);
1437 	return ret;
1438 }
1439 
1440 
1441 /*
1442  *	Add a destination into an existing service
1443  */
1444 static int
1445 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1446 {
1447 	struct ip_vs_dest *dest;
1448 	union nf_inet_addr daddr;
1449 	__be16 dport = udest->port;
1450 	int ret;
1451 
1452 	if (udest->weight < 0) {
1453 		pr_err("%s(): server weight less than zero\n", __func__);
1454 		return -ERANGE;
1455 	}
1456 
1457 	if (udest->l_threshold > udest->u_threshold) {
1458 		pr_err("%s(): lower threshold is higher than upper threshold\n",
1459 			__func__);
1460 		return -ERANGE;
1461 	}
1462 
1463 	if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
1464 		if (udest->tun_port == 0) {
1465 			pr_err("%s(): tunnel port is zero\n", __func__);
1466 			return -EINVAL;
1467 		}
1468 	}
1469 
1470 	ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
1471 
1472 	/* We use function that requires RCU lock */
1473 	rcu_read_lock();
1474 	dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
1475 	rcu_read_unlock();
1476 
1477 	if (dest != NULL) {
1478 		IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
1479 		return -EEXIST;
1480 	}
1481 
1482 	/*
1483 	 * Check if the dest already exists in the trash and
1484 	 * is from the same service
1485 	 */
1486 	dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
1487 
1488 	if (dest != NULL) {
1489 		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
1490 			      "dest->refcnt=%d, service %u/%s:%u\n",
1491 			      IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
1492 			      refcount_read(&dest->refcnt),
1493 			      dest->vfwmark,
1494 			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
1495 			      ntohs(dest->vport));
1496 
1497 		ret = ip_vs_start_estimator(svc->ipvs, &dest->stats);
1498 		/* On error put back dest into the trash */
1499 		if (ret < 0)
1500 			ip_vs_trash_put_dest(svc->ipvs, dest, dest->idle_start,
1501 					     false);
1502 		else
1503 			__ip_vs_update_dest(svc, dest, udest, 1);
1504 	} else {
1505 		/*
1506 		 * Allocate and initialize the dest structure
1507 		 */
1508 		ret = ip_vs_new_dest(svc, udest);
1509 	}
1510 
1511 	return ret;
1512 }
1513 
1514 
1515 /*
1516  *	Edit a destination in the given service
1517  */
1518 static int
1519 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1520 {
1521 	struct ip_vs_dest *dest;
1522 	union nf_inet_addr daddr;
1523 	__be16 dport = udest->port;
1524 
1525 	if (udest->weight < 0) {
1526 		pr_err("%s(): server weight less than zero\n", __func__);
1527 		return -ERANGE;
1528 	}
1529 
1530 	if (udest->l_threshold > udest->u_threshold) {
1531 		pr_err("%s(): lower threshold is higher than upper threshold\n",
1532 			__func__);
1533 		return -ERANGE;
1534 	}
1535 
1536 	if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
1537 		if (udest->tun_port == 0) {
1538 			pr_err("%s(): tunnel port is zero\n", __func__);
1539 			return -EINVAL;
1540 		}
1541 	}
1542 
1543 	ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
1544 
1545 	/* We use function that requires RCU lock */
1546 	rcu_read_lock();
1547 	dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
1548 	rcu_read_unlock();
1549 
1550 	if (dest == NULL) {
1551 		IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1552 		return -ENOENT;
1553 	}
1554 
1555 	__ip_vs_update_dest(svc, dest, udest, 0);
1556 
1557 	return 0;
1558 }
1559 
1560 /*
1561  *	Delete a destination (must be already unlinked from the service)
1562  */
1563 static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
1564 			     bool cleanup)
1565 {
1566 	ip_vs_stop_estimator(ipvs, &dest->stats);
1567 
1568 	/*
1569 	 *  Remove it from the d-linked list with the real services.
1570 	 */
1571 	ip_vs_rs_unhash(dest);
1572 
1573 	ip_vs_trash_put_dest(ipvs, dest, 0, cleanup);
1574 
1575 	/* Queue up delayed work to expire all no destination connections.
1576 	 * No-op when CONFIG_SYSCTL is disabled.
1577 	 */
1578 	if (!cleanup)
1579 		ip_vs_enqueue_expire_nodest_conns(ipvs);
1580 }
1581 
1582 
1583 /*
1584  *	Unlink a destination from the given service
1585  */
1586 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1587 				struct ip_vs_dest *dest,
1588 				int svcupd)
1589 {
1590 	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1591 
1592 	spin_lock_bh(&dest->dst_lock);
1593 	__ip_vs_dst_cache_reset(dest);
1594 	spin_unlock_bh(&dest->dst_lock);
1595 
1596 	/*
1597 	 *  Remove it from the d-linked destination list.
1598 	 */
1599 	list_del_rcu(&dest->n_list);
1600 	svc->num_dests--;
1601 
1602 	if (dest->af != svc->af)
1603 		svc->ipvs->mixed_address_family_dests--;
1604 
1605 	if (svcupd) {
1606 		struct ip_vs_scheduler *sched;
1607 
1608 		sched = rcu_dereference_protected(svc->scheduler, 1);
1609 		if (sched && sched->del_dest)
1610 			sched->del_dest(svc, dest);
1611 	}
1612 }
1613 
1614 
1615 /*
1616  *	Delete a destination server in the given service
1617  */
1618 static int
1619 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1620 {
1621 	struct ip_vs_dest *dest;
1622 	__be16 dport = udest->port;
1623 
1624 	/* We use function that requires RCU lock */
1625 	rcu_read_lock();
1626 	dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
1627 	rcu_read_unlock();
1628 
1629 	if (dest == NULL) {
1630 		IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1631 		return -ENOENT;
1632 	}
1633 
1634 	/*
1635 	 *	Unlink dest from the service
1636 	 */
1637 	__ip_vs_unlink_dest(svc, dest, 1);
1638 
1639 	/*
1640 	 *	Delete the destination
1641 	 */
1642 	__ip_vs_del_dest(svc->ipvs, dest, false);
1643 
1644 	return 0;
1645 }
1646 
1647 static void ip_vs_dest_trash_expire(struct timer_list *t)
1648 {
1649 	struct netns_ipvs *ipvs = timer_container_of(ipvs, t,
1650 						     dest_trash_timer);
1651 	struct ip_vs_dest *dest, *next;
1652 	unsigned long now = jiffies;
1653 
1654 	spin_lock(&ipvs->dest_trash_lock);
1655 	list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1656 		if (refcount_read(&dest->refcnt) > 1)
1657 			continue;
1658 		if (dest->idle_start) {
1659 			if (time_before(now, dest->idle_start +
1660 					     IP_VS_DEST_TRASH_PERIOD))
1661 				continue;
1662 		} else {
1663 			dest->idle_start = max(1UL, now);
1664 			continue;
1665 		}
1666 		IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1667 			      dest->vfwmark,
1668 			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
1669 			      ntohs(dest->port));
1670 		list_del(&dest->t_list);
1671 		ip_vs_dest_free(dest);
1672 	}
1673 	if (!list_empty(&ipvs->dest_trash))
1674 		mod_timer(&ipvs->dest_trash_timer,
1675 			  jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1676 	spin_unlock(&ipvs->dest_trash_lock);
1677 }
1678 
1679 /*
1680  *	Add a service into the service hash table
1681  */
1682 static int
1683 ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
1684 		  struct ip_vs_service **svc_p)
1685 {
1686 	struct ip_vs_scheduler *sched = NULL;
1687 	struct ip_vs_rht *tc_new = NULL;
1688 	struct ip_vs_rht *t, *t_new = NULL;
1689 	int af_id = ip_vs_af_index(u->af);
1690 	struct ip_vs_service *svc = NULL;
1691 	struct ip_vs_pe *pe = NULL;
1692 	int ret_hooks = -1;
1693 	int ret = 0;
1694 
1695 	/* increase the module use count */
1696 	if (!ip_vs_use_count_inc())
1697 		return -ENOPROTOOPT;
1698 
1699 	/* Lookup the scheduler by 'u->sched_name' */
1700 	if (strcmp(u->sched_name, "none")) {
1701 		sched = ip_vs_scheduler_get(u->sched_name);
1702 		if (!sched) {
1703 			pr_info("Scheduler module ip_vs_%s not found\n",
1704 				u->sched_name);
1705 			ret = -ENOENT;
1706 			goto out_err;
1707 		}
1708 	}
1709 
1710 	if (u->pe_name && *u->pe_name) {
1711 		pe = ip_vs_pe_getbyname(u->pe_name);
1712 		if (pe == NULL) {
1713 			pr_info("persistence engine module ip_vs_pe_%s "
1714 				"not found\n", u->pe_name);
1715 			ret = -ENOENT;
1716 			goto out_err;
1717 		}
1718 	}
1719 
1720 #ifdef CONFIG_IP_VS_IPV6
1721 	if (u->af == AF_INET6) {
1722 		__u32 plen = (__force __u32) u->netmask;
1723 
1724 		if (plen < 1 || plen > 128) {
1725 			ret = -EINVAL;
1726 			goto out_err;
1727 		}
1728 
1729 		ret = nf_defrag_ipv6_enable(ipvs->net);
1730 		if (ret)
1731 			goto out_err;
1732 	}
1733 #endif
1734 
1735 	t = rcu_dereference_protected(ipvs->svc_table, 1);
1736 	if (!t) {
1737 		int lfactor = sysctl_svc_lfactor(ipvs);
1738 		int new_size = ip_vs_svc_desired_size(ipvs, NULL, lfactor);
1739 
1740 		t_new = ip_vs_svc_table_alloc(ipvs, new_size, lfactor);
1741 		if (!t_new) {
1742 			ret = -ENOMEM;
1743 			goto out_err;
1744 		}
1745 	}
1746 
1747 	if (!rcu_dereference_protected(ipvs->conn_tab, 1)) {
1748 		int lfactor = sysctl_conn_lfactor(ipvs);
1749 		int new_size = ip_vs_conn_desired_size(ipvs, NULL, lfactor);
1750 
1751 		tc_new = ip_vs_conn_tab_alloc(ipvs, new_size, lfactor);
1752 		if (!tc_new) {
1753 			ret = -ENOMEM;
1754 			goto out_err;
1755 		}
1756 	}
1757 
1758 	if (!atomic_read(&ipvs->num_services[af_id])) {
1759 		ret = ip_vs_register_hooks(ipvs, u->af);
1760 		if (ret < 0)
1761 			goto out_err;
1762 		ret_hooks = ret;
1763 	}
1764 
1765 	svc = kzalloc_obj(struct ip_vs_service);
1766 	if (svc == NULL) {
1767 		IP_VS_DBG(1, "%s(): no memory\n", __func__);
1768 		ret = -ENOMEM;
1769 		goto out_err;
1770 	}
1771 	ret = ip_vs_stats_init_alloc(&svc->stats);
1772 	if (ret < 0)
1773 		goto out_err;
1774 
1775 	/* I'm the first user of the service */
1776 	atomic_set(&svc->refcnt, 0);
1777 
1778 	svc->af = u->af;
1779 	svc->protocol = u->protocol;
1780 	ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1781 	svc->port = u->port;
1782 	svc->fwmark = u->fwmark;
1783 	svc->flags = u->flags & ~IP_VS_SVC_F_HASHED;
1784 	svc->timeout = u->timeout * HZ;
1785 	svc->netmask = u->netmask;
1786 	svc->ipvs = ipvs;
1787 
1788 	INIT_LIST_HEAD(&svc->destinations);
1789 	spin_lock_init(&svc->sched_lock);
1790 
1791 	/* Bind the scheduler */
1792 	if (sched) {
1793 		ret = ip_vs_bind_scheduler(svc, sched);
1794 		if (ret)
1795 			goto out_err;
1796 	}
1797 
1798 	ret = ip_vs_start_estimator(ipvs, &svc->stats);
1799 	if (ret < 0)
1800 		goto out_err;
1801 
1802 	if (t_new) {
1803 		clear_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags);
1804 		rcu_assign_pointer(ipvs->svc_table, t_new);
1805 		t_new = NULL;
1806 	}
1807 	if (tc_new) {
1808 		rcu_assign_pointer(ipvs->conn_tab, tc_new);
1809 		tc_new = NULL;
1810 	}
1811 
1812 	/* Update the virtual service counters */
1813 	if (svc->port == FTPPORT)
1814 		atomic_inc(&ipvs->ftpsvc_counter[af_id]);
1815 	else if (!svc->port && !svc->fwmark)
1816 		atomic_inc(&ipvs->nullsvc_counter[af_id]);
1817 	if (pe && pe->conn_out)
1818 		atomic_inc(&ipvs->conn_out_counter[af_id]);
1819 
1820 	/* Bind the ct retriever */
1821 	RCU_INIT_POINTER(svc->pe, pe);
1822 	pe = NULL;
1823 
1824 	if (svc->fwmark)
1825 		atomic_inc(&ipvs->fwm_services[af_id]);
1826 	else
1827 		atomic_inc(&ipvs->nonfwm_services[af_id]);
1828 	atomic_inc(&ipvs->num_services[af_id]);
1829 
1830 	/* Hash the service into the service table */
1831 	ip_vs_svc_hash(svc);
1832 
1833 	/* Schedule resize work */
1834 	if (t && ip_vs_get_num_services(ipvs) > t->u_thresh &&
1835 	    !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags))
1836 		queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work,
1837 				   1);
1838 
1839 	*svc_p = svc;
1840 
1841 	if (!READ_ONCE(ipvs->enable)) {
1842 		mutex_lock(&ipvs->est_mutex);
1843 
1844 		/* Now there is a service - full throttle */
1845 		WRITE_ONCE(ipvs->enable, 1);
1846 
1847 		ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
1848 
1849 		/* Start estimation for first time */
1850 		ip_vs_est_reload_start(ipvs, true);
1851 		mutex_unlock(&ipvs->est_mutex);
1852 	}
1853 
1854 	return 0;
1855 
1856 
1857  out_err:
1858 	if (tc_new)
1859 		ip_vs_rht_free(tc_new);
1860 	if (t_new)
1861 		ip_vs_rht_free(t_new);
1862 	if (ret_hooks >= 0)
1863 		ip_vs_unregister_hooks(ipvs, u->af);
1864 	if (svc != NULL) {
1865 		ip_vs_unbind_scheduler(svc, sched);
1866 		ip_vs_service_free(svc);
1867 	}
1868 	ip_vs_scheduler_put(sched);
1869 	ip_vs_pe_put(pe);
1870 
1871 	/* decrease the module use count */
1872 	ip_vs_use_count_dec();
1873 
1874 	return ret;
1875 }
1876 
1877 
1878 /*
1879  *	Edit a service and bind it with a new scheduler
1880  */
1881 static int
1882 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1883 {
1884 	struct ip_vs_scheduler *sched = NULL, *old_sched;
1885 	struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1886 	int ret = 0;
1887 	bool new_pe_conn_out, old_pe_conn_out;
1888 	struct netns_ipvs *ipvs = svc->ipvs;
1889 	int af_id = ip_vs_af_index(svc->af);
1890 
1891 	/*
1892 	 * Lookup the scheduler, by 'u->sched_name'
1893 	 */
1894 	if (strcmp(u->sched_name, "none")) {
1895 		sched = ip_vs_scheduler_get(u->sched_name);
1896 		if (!sched) {
1897 			pr_info("Scheduler module ip_vs_%s not found\n",
1898 				u->sched_name);
1899 			return -ENOENT;
1900 		}
1901 	}
1902 	old_sched = sched;
1903 
1904 	if (u->pe_name && *u->pe_name) {
1905 		pe = ip_vs_pe_getbyname(u->pe_name);
1906 		if (pe == NULL) {
1907 			pr_info("persistence engine module ip_vs_pe_%s "
1908 				"not found\n", u->pe_name);
1909 			ret = -ENOENT;
1910 			goto out;
1911 		}
1912 		old_pe = pe;
1913 	}
1914 
1915 #ifdef CONFIG_IP_VS_IPV6
1916 	if (u->af == AF_INET6) {
1917 		__u32 plen = (__force __u32) u->netmask;
1918 
1919 		if (plen < 1 || plen > 128) {
1920 			ret = -EINVAL;
1921 			goto out;
1922 		}
1923 	}
1924 #endif
1925 
1926 	old_sched = rcu_dereference_protected(svc->scheduler, 1);
1927 	if (sched != old_sched) {
1928 		if (old_sched) {
1929 			ip_vs_unbind_scheduler(svc, old_sched);
1930 			RCU_INIT_POINTER(svc->scheduler, NULL);
1931 			/* Wait all svc->sched_data users */
1932 			synchronize_rcu();
1933 		}
1934 		/* Bind the new scheduler */
1935 		if (sched) {
1936 			ret = ip_vs_bind_scheduler(svc, sched);
1937 			if (ret) {
1938 				ip_vs_scheduler_put(sched);
1939 				goto out;
1940 			}
1941 		}
1942 	}
1943 
1944 	/*
1945 	 * Set the flags and timeout value
1946 	 */
1947 	svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1948 	svc->timeout = u->timeout * HZ;
1949 	svc->netmask = u->netmask;
1950 
1951 	old_pe = rcu_dereference_protected(svc->pe, 1);
1952 	if (pe != old_pe) {
1953 		rcu_assign_pointer(svc->pe, pe);
1954 		/* check for optional methods in new pe */
1955 		new_pe_conn_out = (pe && pe->conn_out) ? true : false;
1956 		old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false;
1957 		if (new_pe_conn_out && !old_pe_conn_out)
1958 			atomic_inc(&ipvs->conn_out_counter[af_id]);
1959 		if (old_pe_conn_out && !new_pe_conn_out)
1960 			atomic_dec(&ipvs->conn_out_counter[af_id]);
1961 	}
1962 
1963 out:
1964 	ip_vs_scheduler_put(old_sched);
1965 	ip_vs_pe_put(old_pe);
1966 	return ret;
1967 }
1968 
1969 /*
1970  *	Delete a service from the service list
1971  *	- The service must be unlinked, unlocked and not referenced!
1972  *	- We are called under _bh lock
1973  */
1974 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1975 {
1976 	struct ip_vs_dest *dest, *nxt;
1977 	struct ip_vs_scheduler *old_sched;
1978 	struct ip_vs_pe *old_pe;
1979 	struct netns_ipvs *ipvs = svc->ipvs;
1980 	int af_id = ip_vs_af_index(svc->af);
1981 
1982 	atomic_dec(&ipvs->num_services[af_id]);
1983 	if (!atomic_read(&ipvs->num_services[af_id]))
1984 		ip_vs_unregister_hooks(ipvs, svc->af);
1985 	if (svc->fwmark)
1986 		atomic_dec(&ipvs->fwm_services[af_id]);
1987 	else
1988 		atomic_dec(&ipvs->nonfwm_services[af_id]);
1989 
1990 	ip_vs_stop_estimator(svc->ipvs, &svc->stats);
1991 
1992 	/* Unbind scheduler */
1993 	old_sched = rcu_dereference_protected(svc->scheduler, 1);
1994 	ip_vs_unbind_scheduler(svc, old_sched);
1995 	ip_vs_scheduler_put(old_sched);
1996 
1997 	/* Unbind persistence engine, keep svc->pe */
1998 	old_pe = rcu_dereference_protected(svc->pe, 1);
1999 	if (old_pe && old_pe->conn_out)
2000 		atomic_dec(&ipvs->conn_out_counter[af_id]);
2001 	ip_vs_pe_put(old_pe);
2002 
2003 	/*
2004 	 *    Unlink the whole destination list
2005 	 */
2006 	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
2007 		__ip_vs_unlink_dest(svc, dest, 0);
2008 		__ip_vs_del_dest(svc->ipvs, dest, cleanup);
2009 	}
2010 
2011 	/*
2012 	 *    Update the virtual service counters
2013 	 */
2014 	if (svc->port == FTPPORT)
2015 		atomic_dec(&ipvs->ftpsvc_counter[af_id]);
2016 	else if (!svc->port && !svc->fwmark)
2017 		atomic_dec(&ipvs->nullsvc_counter[af_id]);
2018 
2019 	/*
2020 	 *    Free the service if nobody refers to it
2021 	 */
2022 	__ip_vs_svc_put(svc);
2023 
2024 	/* decrease the module use count */
2025 	ip_vs_use_count_dec();
2026 }
2027 
2028 /*
2029  * Unlink a service from list and try to delete it if its refcnt reached 0
2030  */
2031 static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
2032 {
2033 	ip_vs_unregister_conntrack(svc);
2034 	/* Hold svc to avoid double release from dest_trash */
2035 	atomic_inc(&svc->refcnt);
2036 	/*
2037 	 * Unhash it from the service table
2038 	 */
2039 	ip_vs_svc_unhash(svc);
2040 
2041 	__ip_vs_del_service(svc, cleanup);
2042 }
2043 
2044 /*
2045  *	Delete a service from the service list
2046  */
2047 static int ip_vs_del_service(struct ip_vs_service *svc)
2048 {
2049 	struct netns_ipvs *ipvs;
2050 	struct ip_vs_rht *t, *p;
2051 	int ns;
2052 
2053 	if (svc == NULL)
2054 		return -EEXIST;
2055 	ipvs = svc->ipvs;
2056 	ip_vs_unlink_service(svc, false);
2057 	t = rcu_dereference_protected(ipvs->svc_table, 1);
2058 
2059 	/* Drop the table if no more services */
2060 	ns = ip_vs_get_num_services(ipvs);
2061 	if (!ns) {
2062 		/* Stop the resizer and drop the tables */
2063 		set_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags);
2064 		cancel_delayed_work_sync(&ipvs->svc_resize_work);
2065 		if (t) {
2066 			rcu_assign_pointer(ipvs->svc_table, NULL);
2067 			/* Inform readers that table is removed */
2068 			smp_mb__before_atomic();
2069 			atomic_inc(&ipvs->svc_table_changes);
2070 			while (1) {
2071 				p = rcu_dereference_protected(t->new_tbl, 1);
2072 				call_rcu(&t->rcu_head, ip_vs_rht_rcu_free);
2073 				if (p == t)
2074 					break;
2075 				t = p;
2076 			}
2077 		}
2078 	} else if (ns <= t->l_thresh &&
2079 		   !test_and_set_bit(IP_VS_WORK_SVC_RESIZE,
2080 				     &ipvs->work_flags)) {
2081 		queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work,
2082 				   1);
2083 	}
2084 	return 0;
2085 }
2086 
2087 
2088 /*
2089  *	Flush all the virtual services
2090  */
2091 static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
2092 {
2093 	DECLARE_IP_VS_RHT_WALK_BUCKETS();
2094 	struct hlist_bl_head *head;
2095 	struct ip_vs_service *svc;
2096 	struct hlist_bl_node *ne;
2097 	struct hlist_bl_node *e;
2098 	struct ip_vs_rht *t, *p;
2099 
2100 	/* Stop the resizer and drop the tables */
2101 	if (!test_and_set_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags))
2102 		cancel_delayed_work_sync(&ipvs->svc_resize_work);
2103 	/* No resizer, so now we have exclusive write access */
2104 
2105 	if (ip_vs_get_num_services(ipvs)) {
2106 		ip_vs_rht_walk_buckets(ipvs->svc_table, head) {
2107 			hlist_bl_for_each_entry_safe(svc, e, ne, head, s_list)
2108 				ip_vs_unlink_service(svc, cleanup);
2109 		}
2110 	}
2111 
2112 	/* Unregister the hash table and release it after RCU grace period */
2113 	t = rcu_dereference_protected(ipvs->svc_table, 1);
2114 	if (t) {
2115 		rcu_assign_pointer(ipvs->svc_table, NULL);
2116 		/* Inform readers that table is removed */
2117 		smp_mb__before_atomic();
2118 		atomic_inc(&ipvs->svc_table_changes);
2119 		while (1) {
2120 			p = rcu_dereference_protected(t->new_tbl, 1);
2121 			call_rcu(&t->rcu_head, ip_vs_rht_rcu_free);
2122 			if (p == t)
2123 				break;
2124 			t = p;
2125 		}
2126 	}
2127 	/* Stop the tot_stats estimator early under service_mutex
2128 	 * to avoid locking it again later.
2129 	 */
2130 	if (cleanup)
2131 		ip_vs_stop_estimator_tot_stats(ipvs);
2132 	return 0;
2133 }
2134 
2135 /*
2136  *	Delete service by {netns} in the service table.
2137  *	Called by __ip_vs_batch_cleanup()
2138  */
2139 void ip_vs_service_nets_cleanup(struct list_head *net_list)
2140 {
2141 	struct netns_ipvs *ipvs;
2142 	struct net *net;
2143 
2144 	/* Check for "full" addressed entries */
2145 	list_for_each_entry(net, net_list, exit_list) {
2146 		ipvs = net_ipvs(net);
2147 		mutex_lock(&ipvs->service_mutex);
2148 		ip_vs_flush(ipvs, true);
2149 		mutex_unlock(&ipvs->service_mutex);
2150 	}
2151 }
2152 
2153 /* Put all references for device (dst_cache) */
2154 static inline void
2155 ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
2156 {
2157 	struct ip_vs_dest_dst *dest_dst;
2158 
2159 	spin_lock_bh(&dest->dst_lock);
2160 	dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
2161 	if (dest_dst && dest_dst->dst_cache->dev == dev) {
2162 		IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
2163 			      dev->name,
2164 			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
2165 			      ntohs(dest->port),
2166 			      refcount_read(&dest->refcnt));
2167 		__ip_vs_dst_cache_reset(dest);
2168 	}
2169 	spin_unlock_bh(&dest->dst_lock);
2170 
2171 }
2172 /* Netdev event receiver
2173  * Currently only NETDEV_DOWN is handled to release refs to cached dsts
2174  */
2175 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
2176 			   void *ptr)
2177 {
2178 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2179 	struct net *net = dev_net(dev);
2180 	struct netns_ipvs *ipvs = net_ipvs(net);
2181 	DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU();
2182 	unsigned int resched_score = 0;
2183 	struct hlist_bl_head *head;
2184 	struct ip_vs_service *svc;
2185 	struct hlist_bl_node *e;
2186 	struct ip_vs_dest *dest;
2187 	int old_gen, new_gen;
2188 
2189 	if (event != NETDEV_DOWN || !ipvs)
2190 		return NOTIFY_DONE;
2191 	IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
2192 
2193 	old_gen = atomic_read(&ipvs->svc_table_changes);
2194 
2195 	rcu_read_lock();
2196 
2197 repeat:
2198 	smp_rmb(); /* ipvs->svc_table and svc_table_changes */
2199 	ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) {
2200 		hlist_bl_for_each_entry_rcu(svc, e, head, s_list) {
2201 			list_for_each_entry_rcu(dest, &svc->destinations,
2202 						n_list) {
2203 				ip_vs_forget_dev(dest, dev);
2204 				resched_score += 10;
2205 			}
2206 			resched_score++;
2207 		}
2208 		resched_score++;
2209 		if (resched_score >= 100) {
2210 			resched_score = 0;
2211 			cond_resched_rcu();
2212 			new_gen = atomic_read(&ipvs->svc_table_changes);
2213 			/* New table installed ? */
2214 			if (old_gen != new_gen) {
2215 				old_gen = new_gen;
2216 				goto repeat;
2217 			}
2218 		}
2219 	}
2220 	rcu_read_unlock();
2221 
2222 	return NOTIFY_DONE;
2223 }
2224 
2225 /*
2226  *	Zero counters in a service or all services
2227  */
2228 static int ip_vs_zero_service(struct ip_vs_service *svc)
2229 {
2230 	struct ip_vs_dest *dest;
2231 
2232 	list_for_each_entry(dest, &svc->destinations, n_list) {
2233 		ip_vs_zero_stats(&dest->stats);
2234 	}
2235 	ip_vs_zero_stats(&svc->stats);
2236 	return 0;
2237 }
2238 
2239 static int ip_vs_zero_all(struct netns_ipvs *ipvs)
2240 {
2241 	DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU();
2242 	unsigned int resched_score = 0;
2243 	struct hlist_bl_head *head;
2244 	struct ip_vs_service *svc;
2245 	struct hlist_bl_node *e;
2246 
2247 	rcu_read_lock();
2248 
2249 	ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) {
2250 		hlist_bl_for_each_entry_rcu(svc, e, head, s_list) {
2251 			ip_vs_zero_service(svc);
2252 			resched_score += 10;
2253 		}
2254 		resched_score++;
2255 		if (resched_score >= 100) {
2256 			resched_score = 0;
2257 			cond_resched_rcu();
2258 		}
2259 	}
2260 
2261 	rcu_read_unlock();
2262 
2263 	ip_vs_zero_stats(&ipvs->tot_stats->s);
2264 	return 0;
2265 }
2266 
2267 #ifdef CONFIG_SYSCTL
2268 
2269 static int
2270 proc_do_defense_mode(const struct ctl_table *table, int write,
2271 		     void *buffer, size_t *lenp, loff_t *ppos)
2272 {
2273 	struct netns_ipvs *ipvs = table->extra2;
2274 	int *valp = table->data;
2275 	int val = *valp;
2276 	int rc;
2277 
2278 	struct ctl_table tmp = {
2279 		.data = &val,
2280 		.maxlen = sizeof(int),
2281 		.mode = table->mode,
2282 	};
2283 
2284 	rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2285 	if (write && (*valp != val)) {
2286 		if (val < 0 || val > 3) {
2287 			rc = -EINVAL;
2288 		} else {
2289 			*valp = val;
2290 			update_defense_level(ipvs);
2291 		}
2292 	}
2293 	return rc;
2294 }
2295 
2296 static int
2297 proc_do_sync_threshold(const struct ctl_table *table, int write,
2298 		       void *buffer, size_t *lenp, loff_t *ppos)
2299 {
2300 	struct netns_ipvs *ipvs = table->extra2;
2301 	int *valp = table->data;
2302 	int val[2];
2303 	int rc;
2304 	struct ctl_table tmp = {
2305 		.data = &val,
2306 		.maxlen = table->maxlen,
2307 		.mode = table->mode,
2308 	};
2309 
2310 	mutex_lock(&ipvs->sync_mutex);
2311 	memcpy(val, valp, sizeof(val));
2312 	rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2313 	if (write) {
2314 		if (val[0] < 0 || val[1] < 0 ||
2315 		    (val[0] >= val[1] && val[1]))
2316 			rc = -EINVAL;
2317 		else
2318 			memcpy(valp, val, sizeof(val));
2319 	}
2320 	mutex_unlock(&ipvs->sync_mutex);
2321 	return rc;
2322 }
2323 
2324 static int
2325 proc_do_sync_ports(const struct ctl_table *table, int write,
2326 		   void *buffer, size_t *lenp, loff_t *ppos)
2327 {
2328 	int *valp = table->data;
2329 	int val = *valp;
2330 	int rc;
2331 
2332 	struct ctl_table tmp = {
2333 		.data = &val,
2334 		.maxlen = sizeof(int),
2335 		.mode = table->mode,
2336 	};
2337 
2338 	rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2339 	if (write && (*valp != val)) {
2340 		if (val < 1 || !is_power_of_2(val))
2341 			rc = -EINVAL;
2342 		else
2343 			*valp = val;
2344 	}
2345 	return rc;
2346 }
2347 
2348 static int ipvs_proc_est_cpumask_set(const struct ctl_table *table,
2349 				     void *buffer)
2350 {
2351 	struct netns_ipvs *ipvs = table->extra2;
2352 	cpumask_var_t *valp = table->data;
2353 	cpumask_var_t newmask;
2354 	int ret;
2355 
2356 	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL))
2357 		return -ENOMEM;
2358 
2359 	ret = cpulist_parse(buffer, newmask);
2360 	if (ret)
2361 		goto out;
2362 
2363 	mutex_lock(&ipvs->est_mutex);
2364 
2365 	if (!ipvs->est_cpulist_valid) {
2366 		if (!zalloc_cpumask_var(valp, GFP_KERNEL)) {
2367 			ret = -ENOMEM;
2368 			goto unlock;
2369 		}
2370 		ipvs->est_cpulist_valid = 1;
2371 	}
2372 	cpumask_and(newmask, newmask, &current->cpus_mask);
2373 	cpumask_copy(*valp, newmask);
2374 	/* est_max_threads may depend on cpulist size */
2375 	ipvs->est_max_threads = ip_vs_est_max_threads(ipvs);
2376 	ipvs->est_calc_phase = 1;
2377 	ip_vs_est_reload_start(ipvs, true);
2378 
2379 unlock:
2380 	mutex_unlock(&ipvs->est_mutex);
2381 
2382 out:
2383 	free_cpumask_var(newmask);
2384 	return ret;
2385 }
2386 
2387 static int ipvs_proc_est_cpumask_get(const struct ctl_table *table,
2388 				     void *buffer, size_t size)
2389 {
2390 	struct netns_ipvs *ipvs = table->extra2;
2391 	cpumask_var_t *valp = table->data;
2392 	struct cpumask *mask;
2393 	int ret;
2394 
2395 	mutex_lock(&ipvs->est_mutex);
2396 
2397 	/* HK_TYPE_KTHREAD cpumask needs RCU protection */
2398 	scoped_guard(rcu) {
2399 		if (ipvs->est_cpulist_valid)
2400 			mask = *valp;
2401 		else
2402 			mask = (struct cpumask *)housekeeping_cpumask(HK_TYPE_KTHREAD);
2403 		ret = scnprintf(buffer, size, "%*pbl\n", cpumask_pr_args(mask));
2404 	}
2405 
2406 	mutex_unlock(&ipvs->est_mutex);
2407 
2408 	return ret;
2409 }
2410 
2411 static int ipvs_proc_est_cpulist(const struct ctl_table *table, int write,
2412 				 void *buffer, size_t *lenp, loff_t *ppos)
2413 {
2414 	int ret;
2415 
2416 	/* Ignore both read and write(append) if *ppos not 0 */
2417 	if (*ppos || !*lenp) {
2418 		*lenp = 0;
2419 		return 0;
2420 	}
2421 	if (write) {
2422 		/* proc_sys_call_handler() appends terminator */
2423 		ret = ipvs_proc_est_cpumask_set(table, buffer);
2424 		if (ret >= 0)
2425 			*ppos += *lenp;
2426 	} else {
2427 		/* proc_sys_call_handler() allocates 1 byte for terminator */
2428 		ret = ipvs_proc_est_cpumask_get(table, buffer, *lenp + 1);
2429 		if (ret >= 0) {
2430 			*lenp = ret;
2431 			*ppos += *lenp;
2432 			ret = 0;
2433 		}
2434 	}
2435 	return ret;
2436 }
2437 
2438 static int ipvs_proc_est_nice(const struct ctl_table *table, int write,
2439 			      void *buffer, size_t *lenp, loff_t *ppos)
2440 {
2441 	struct netns_ipvs *ipvs = table->extra2;
2442 	int *valp = table->data;
2443 	int val = *valp;
2444 	int ret;
2445 
2446 	struct ctl_table tmp_table = {
2447 		.data = &val,
2448 		.maxlen = sizeof(int),
2449 		.mode = table->mode,
2450 	};
2451 
2452 	ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
2453 	if (write && ret >= 0) {
2454 		if (val < MIN_NICE || val > MAX_NICE) {
2455 			ret = -EINVAL;
2456 		} else {
2457 			mutex_lock(&ipvs->est_mutex);
2458 			if (*valp != val) {
2459 				*valp = val;
2460 				ip_vs_est_reload_start(ipvs, true);
2461 			}
2462 			mutex_unlock(&ipvs->est_mutex);
2463 		}
2464 	}
2465 	return ret;
2466 }
2467 
2468 static int ipvs_proc_run_estimation(const struct ctl_table *table, int write,
2469 				    void *buffer, size_t *lenp, loff_t *ppos)
2470 {
2471 	struct netns_ipvs *ipvs = table->extra2;
2472 	int *valp = table->data;
2473 	int val = *valp;
2474 	int ret;
2475 
2476 	struct ctl_table tmp_table = {
2477 		.data = &val,
2478 		.maxlen = sizeof(int),
2479 		.mode = table->mode,
2480 	};
2481 
2482 	ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
2483 	if (write && ret >= 0) {
2484 		mutex_lock(&ipvs->est_mutex);
2485 		if (*valp != val) {
2486 			*valp = val;
2487 			ip_vs_est_reload_start(ipvs, true);
2488 		}
2489 		mutex_unlock(&ipvs->est_mutex);
2490 	}
2491 	return ret;
2492 }
2493 
2494 static int ipvs_proc_conn_lfactor(const struct ctl_table *table, int write,
2495 				  void *buffer, size_t *lenp, loff_t *ppos)
2496 {
2497 	struct netns_ipvs *ipvs = table->extra2;
2498 	int *valp = table->data;
2499 	int val = *valp;
2500 	int ret;
2501 
2502 	struct ctl_table tmp_table = {
2503 		.data = &val,
2504 		.maxlen = sizeof(int),
2505 	};
2506 
2507 	ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
2508 	if (write && ret >= 0) {
2509 		if (val < -8 || val > 8) {
2510 			ret = -EINVAL;
2511 		} else {
2512 			WRITE_ONCE(*valp, val);
2513 			if (rcu_access_pointer(ipvs->conn_tab))
2514 				mod_delayed_work(system_unbound_wq,
2515 						 &ipvs->conn_resize_work, 0);
2516 		}
2517 	}
2518 	return ret;
2519 }
2520 
2521 static int ipvs_proc_svc_lfactor(const struct ctl_table *table, int write,
2522 				 void *buffer, size_t *lenp, loff_t *ppos)
2523 {
2524 	struct netns_ipvs *ipvs = table->extra2;
2525 	int *valp = table->data;
2526 	int val = *valp;
2527 	int ret;
2528 
2529 	struct ctl_table tmp_table = {
2530 		.data = &val,
2531 		.maxlen = sizeof(int),
2532 	};
2533 
2534 	ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
2535 	if (write && ret >= 0) {
2536 		if (val < -8 || val > 8) {
2537 			ret = -EINVAL;
2538 		} else {
2539 			mutex_lock(&ipvs->service_mutex);
2540 			WRITE_ONCE(*valp, val);
2541 			/* Make sure the services are present */
2542 			if (rcu_access_pointer(ipvs->svc_table) &&
2543 			    READ_ONCE(ipvs->enable) &&
2544 			    !test_bit(IP_VS_WORK_SVC_NORESIZE,
2545 				      &ipvs->work_flags))
2546 				mod_delayed_work(system_unbound_wq,
2547 						 &ipvs->svc_resize_work, 0);
2548 			mutex_unlock(&ipvs->service_mutex);
2549 		}
2550 	}
2551 	return ret;
2552 }
2553 
2554 /*
2555  *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
2556  *	Do not change order or insert new entries without
2557  *	align with netns init in ip_vs_control_net_init()
2558  */
2559 
2560 static struct ctl_table vs_vars[] = {
2561 	{
2562 		.procname	= "amemthresh",
2563 		.maxlen		= sizeof(int),
2564 		.mode		= 0644,
2565 		.proc_handler	= proc_dointvec,
2566 	},
2567 	{
2568 		.procname	= "am_droprate",
2569 		.maxlen		= sizeof(int),
2570 		.mode		= 0644,
2571 		.proc_handler	= proc_dointvec,
2572 	},
2573 	{
2574 		.procname	= "drop_entry",
2575 		.maxlen		= sizeof(int),
2576 		.mode		= 0644,
2577 		.proc_handler	= proc_do_defense_mode,
2578 	},
2579 	{
2580 		.procname	= "drop_packet",
2581 		.maxlen		= sizeof(int),
2582 		.mode		= 0644,
2583 		.proc_handler	= proc_do_defense_mode,
2584 	},
2585 #ifdef CONFIG_IP_VS_NFCT
2586 	{
2587 		.procname	= "conntrack",
2588 		.maxlen		= sizeof(int),
2589 		.mode		= 0644,
2590 		.proc_handler	= &proc_dointvec,
2591 	},
2592 #endif
2593 	{
2594 		.procname	= "secure_tcp",
2595 		.maxlen		= sizeof(int),
2596 		.mode		= 0644,
2597 		.proc_handler	= proc_do_defense_mode,
2598 	},
2599 	{
2600 		.procname	= "snat_reroute",
2601 		.maxlen		= sizeof(int),
2602 		.mode		= 0644,
2603 		.proc_handler	= &proc_dointvec,
2604 	},
2605 	{
2606 		.procname	= "sync_version",
2607 		.maxlen		= sizeof(int),
2608 		.mode		= 0644,
2609 		.proc_handler	= proc_dointvec_minmax,
2610 		.extra1		= SYSCTL_ZERO,
2611 		.extra2		= SYSCTL_ONE,
2612 	},
2613 	{
2614 		.procname	= "sync_ports",
2615 		.maxlen		= sizeof(int),
2616 		.mode		= 0644,
2617 		.proc_handler	= proc_do_sync_ports,
2618 	},
2619 	{
2620 		.procname	= "sync_persist_mode",
2621 		.maxlen		= sizeof(int),
2622 		.mode		= 0644,
2623 		.proc_handler	= proc_dointvec,
2624 	},
2625 	{
2626 		.procname	= "sync_qlen_max",
2627 		.maxlen		= sizeof(unsigned long),
2628 		.mode		= 0644,
2629 		.proc_handler	= proc_doulongvec_minmax,
2630 	},
2631 	{
2632 		.procname	= "sync_sock_size",
2633 		.maxlen		= sizeof(int),
2634 		.mode		= 0644,
2635 		.proc_handler	= proc_dointvec,
2636 	},
2637 	{
2638 		.procname	= "cache_bypass",
2639 		.maxlen		= sizeof(int),
2640 		.mode		= 0644,
2641 		.proc_handler	= proc_dointvec,
2642 	},
2643 	{
2644 		.procname	= "expire_nodest_conn",
2645 		.maxlen		= sizeof(int),
2646 		.mode		= 0644,
2647 		.proc_handler	= proc_dointvec,
2648 	},
2649 	{
2650 		.procname	= "sloppy_tcp",
2651 		.maxlen		= sizeof(int),
2652 		.mode		= 0644,
2653 		.proc_handler	= proc_dointvec,
2654 	},
2655 	{
2656 		.procname	= "sloppy_sctp",
2657 		.maxlen		= sizeof(int),
2658 		.mode		= 0644,
2659 		.proc_handler	= proc_dointvec,
2660 	},
2661 	{
2662 		.procname	= "expire_quiescent_template",
2663 		.maxlen		= sizeof(int),
2664 		.mode		= 0644,
2665 		.proc_handler	= proc_dointvec,
2666 	},
2667 	{
2668 		.procname	= "sync_threshold",
2669 		.maxlen		=
2670 			sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
2671 		.mode		= 0644,
2672 		.proc_handler	= proc_do_sync_threshold,
2673 	},
2674 	{
2675 		.procname	= "sync_refresh_period",
2676 		.maxlen		= sizeof(int),
2677 		.mode		= 0644,
2678 		.proc_handler	= proc_dointvec_jiffies,
2679 	},
2680 	{
2681 		.procname	= "sync_retries",
2682 		.maxlen		= sizeof(int),
2683 		.mode		= 0644,
2684 		.proc_handler	= proc_dointvec_minmax,
2685 		.extra1		= SYSCTL_ZERO,
2686 		.extra2		= SYSCTL_THREE,
2687 	},
2688 	{
2689 		.procname	= "nat_icmp_send",
2690 		.maxlen		= sizeof(int),
2691 		.mode		= 0644,
2692 		.proc_handler	= proc_dointvec,
2693 	},
2694 	{
2695 		.procname	= "pmtu_disc",
2696 		.maxlen		= sizeof(int),
2697 		.mode		= 0644,
2698 		.proc_handler	= proc_dointvec,
2699 	},
2700 	{
2701 		.procname	= "backup_only",
2702 		.maxlen		= sizeof(int),
2703 		.mode		= 0644,
2704 		.proc_handler	= proc_dointvec,
2705 	},
2706 	{
2707 		.procname	= "conn_reuse_mode",
2708 		.maxlen		= sizeof(int),
2709 		.mode		= 0644,
2710 		.proc_handler	= proc_dointvec,
2711 	},
2712 	{
2713 		.procname	= "schedule_icmp",
2714 		.maxlen		= sizeof(int),
2715 		.mode		= 0644,
2716 		.proc_handler	= proc_dointvec,
2717 	},
2718 	{
2719 		.procname	= "ignore_tunneled",
2720 		.maxlen		= sizeof(int),
2721 		.mode		= 0644,
2722 		.proc_handler	= proc_dointvec,
2723 	},
2724 	{
2725 		.procname	= "run_estimation",
2726 		.maxlen		= sizeof(int),
2727 		.mode		= 0644,
2728 		.proc_handler	= ipvs_proc_run_estimation,
2729 	},
2730 	{
2731 		.procname	= "est_cpulist",
2732 		.maxlen		= NR_CPUS,	/* unused */
2733 		.mode		= 0644,
2734 		.proc_handler	= ipvs_proc_est_cpulist,
2735 	},
2736 	{
2737 		.procname	= "est_nice",
2738 		.maxlen		= sizeof(int),
2739 		.mode		= 0644,
2740 		.proc_handler	= ipvs_proc_est_nice,
2741 	},
2742 	{
2743 		.procname	= "conn_lfactor",
2744 		.maxlen		= sizeof(int),
2745 		.mode		= 0644,
2746 		.proc_handler	= ipvs_proc_conn_lfactor,
2747 	},
2748 	{
2749 		.procname	= "svc_lfactor",
2750 		.maxlen		= sizeof(int),
2751 		.mode		= 0644,
2752 		.proc_handler	= ipvs_proc_svc_lfactor,
2753 	},
2754 #ifdef CONFIG_IP_VS_DEBUG
2755 	{
2756 		.procname	= "debug_level",
2757 		.data		= &sysctl_ip_vs_debug_level,
2758 		.maxlen		= sizeof(int),
2759 		.mode		= 0644,
2760 		.proc_handler	= proc_dointvec,
2761 	},
2762 #endif
2763 };
2764 
2765 #endif
2766 
2767 #ifdef CONFIG_PROC_FS
2768 
2769 struct ip_vs_iter {
2770 	struct seq_net_private p;  /* Do not move this, netns depends upon it*/
2771 	struct ip_vs_rht *t;
2772 	u32 bucket;
2773 };
2774 
2775 /*
2776  *	Write the contents of the VS rule table to a PROCfs file.
2777  *	(It is kept just for backward compatibility)
2778  */
2779 static inline const char *ip_vs_fwd_name(unsigned int flags)
2780 {
2781 	switch (flags & IP_VS_CONN_F_FWD_MASK) {
2782 	case IP_VS_CONN_F_LOCALNODE:
2783 		return "Local";
2784 	case IP_VS_CONN_F_TUNNEL:
2785 		return "Tunnel";
2786 	case IP_VS_CONN_F_DROUTE:
2787 		return "Route";
2788 	default:
2789 		return "Masq";
2790 	}
2791 }
2792 
2793 /* Do not expect consistent view during add, del and move(table resize).
2794  * We may miss entries and even show duplicates.
2795  */
2796 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
2797 {
2798 	struct ip_vs_iter *iter = seq->private;
2799 	struct ip_vs_rht *t = iter->t;
2800 	struct ip_vs_service *svc;
2801 	struct hlist_bl_node *e;
2802 	int idx;
2803 
2804 	if (!t)
2805 		return NULL;
2806 	for (idx = 0; idx < t->size; idx++) {
2807 		hlist_bl_for_each_entry_rcu(svc, e, &t->buckets[idx], s_list) {
2808 			if (!ip_vs_rht_same_table(t, READ_ONCE(svc->hash_key)))
2809 				break;
2810 			if (pos-- == 0) {
2811 				iter->bucket = idx;
2812 				return svc;
2813 			}
2814 		}
2815 	}
2816 	return NULL;
2817 }
2818 
2819 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
2820 	__acquires(RCU)
2821 {
2822 	struct ip_vs_iter *iter = seq->private;
2823 	struct net *net = seq_file_net(seq);
2824 	struct netns_ipvs *ipvs = net_ipvs(net);
2825 
2826 	rcu_read_lock();
2827 	iter->t = rcu_dereference(ipvs->svc_table);
2828 	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
2829 }
2830 
2831 
2832 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2833 {
2834 	struct ip_vs_service *svc;
2835 	struct ip_vs_iter *iter;
2836 	struct hlist_bl_node *e;
2837 	struct ip_vs_rht *t;
2838 
2839 	++*pos;
2840 	if (v == SEQ_START_TOKEN)
2841 		return ip_vs_info_array(seq,0);
2842 
2843 	svc = v;
2844 	iter = seq->private;
2845 	t = iter->t;
2846 	if (!t)
2847 		return NULL;
2848 
2849 	hlist_bl_for_each_entry_continue_rcu(svc, e, s_list) {
2850 		/* Our cursor was moved to new table ? */
2851 		if (!ip_vs_rht_same_table(t, READ_ONCE(svc->hash_key)))
2852 			break;
2853 		return svc;
2854 	}
2855 
2856 	while (++iter->bucket < t->size) {
2857 		hlist_bl_for_each_entry_rcu(svc, e, &t->buckets[iter->bucket],
2858 					    s_list) {
2859 			if (!ip_vs_rht_same_table(t, READ_ONCE(svc->hash_key)))
2860 				break;
2861 			return svc;
2862 		}
2863 	}
2864 	return NULL;
2865 }
2866 
2867 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2868 	__releases(RCU)
2869 {
2870 	rcu_read_unlock();
2871 }
2872 
2873 
2874 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2875 {
2876 	struct net *net = seq_file_net(seq);
2877 	struct netns_ipvs *ipvs = net_ipvs(net);
2878 
2879 	if (v == SEQ_START_TOKEN) {
2880 		seq_printf(seq,
2881 			"IP Virtual Server version %d.%d.%d (size=%d)\n",
2882 			NVERSION(IP_VS_VERSION_CODE), get_conn_tab_size(ipvs));
2883 		seq_puts(seq,
2884 			 "Prot LocalAddress:Port Scheduler Flags\n");
2885 		seq_puts(seq,
2886 			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2887 	} else {
2888 		const struct ip_vs_service *svc = v;
2889 		const struct ip_vs_dest *dest;
2890 		struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2891 		char *sched_name = sched ? sched->name : "none";
2892 
2893 		if (!svc->fwmark) {
2894 #ifdef CONFIG_IP_VS_IPV6
2895 			if (svc->af == AF_INET6)
2896 				seq_printf(seq, "%s  [%pI6]:%04X %s ",
2897 					   ip_vs_proto_name(svc->protocol),
2898 					   &svc->addr.in6,
2899 					   ntohs(svc->port),
2900 					   sched_name);
2901 			else
2902 #endif
2903 				seq_printf(seq, "%s  %08X:%04X %s %s ",
2904 					   ip_vs_proto_name(svc->protocol),
2905 					   ntohl(svc->addr.ip),
2906 					   ntohs(svc->port),
2907 					   sched_name,
2908 					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2909 		} else {
2910 			seq_printf(seq, "FWM  %08X %s %s",
2911 				   svc->fwmark, sched_name,
2912 				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2913 		}
2914 
2915 		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2916 			seq_printf(seq, "persistent %d %08X\n",
2917 				svc->timeout,
2918 				ntohl(svc->netmask));
2919 		else
2920 			seq_putc(seq, '\n');
2921 
2922 		list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
2923 #ifdef CONFIG_IP_VS_IPV6
2924 			if (dest->af == AF_INET6)
2925 				seq_printf(seq,
2926 					   "  -> [%pI6]:%04X"
2927 					   "      %-7s %-6d %-10d %-10d\n",
2928 					   &dest->addr.in6,
2929 					   ntohs(dest->port),
2930 					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2931 					   atomic_read(&dest->weight),
2932 					   atomic_read(&dest->activeconns),
2933 					   atomic_read(&dest->inactconns));
2934 			else
2935 #endif
2936 				seq_printf(seq,
2937 					   "  -> %08X:%04X      "
2938 					   "%-7s %-6d %-10d %-10d\n",
2939 					   ntohl(dest->addr.ip),
2940 					   ntohs(dest->port),
2941 					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2942 					   atomic_read(&dest->weight),
2943 					   atomic_read(&dest->activeconns),
2944 					   atomic_read(&dest->inactconns));
2945 
2946 		}
2947 	}
2948 	return 0;
2949 }
2950 
2951 static const struct seq_operations ip_vs_info_seq_ops = {
2952 	.start = ip_vs_info_seq_start,
2953 	.next  = ip_vs_info_seq_next,
2954 	.stop  = ip_vs_info_seq_stop,
2955 	.show  = ip_vs_info_seq_show,
2956 };
2957 
2958 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2959 {
2960 	struct net *net = seq_file_single_net(seq);
2961 	struct ip_vs_kstats show;
2962 
2963 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2964 	seq_puts(seq,
2965 		 "   Total Incoming Outgoing         Incoming         Outgoing\n");
2966 	seq_puts(seq,
2967 		 "   Conns  Packets  Packets            Bytes            Bytes\n");
2968 
2969 	ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats->s);
2970 	seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
2971 		   (unsigned long long)show.conns,
2972 		   (unsigned long long)show.inpkts,
2973 		   (unsigned long long)show.outpkts,
2974 		   (unsigned long long)show.inbytes,
2975 		   (unsigned long long)show.outbytes);
2976 
2977 /*                01234567 01234567 01234567 0123456701234567 0123456701234567*/
2978 	seq_puts(seq,
2979 		 " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2980 	seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
2981 		   (unsigned long long)show.cps,
2982 		   (unsigned long long)show.inpps,
2983 		   (unsigned long long)show.outpps,
2984 		   (unsigned long long)show.inbps,
2985 		   (unsigned long long)show.outbps);
2986 
2987 	return 0;
2988 }
2989 
2990 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2991 {
2992 	struct net *net = seq_file_single_net(seq);
2993 	struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats->s;
2994 	struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
2995 	struct ip_vs_kstats kstats;
2996 	int i;
2997 
2998 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2999 	seq_puts(seq,
3000 		 "       Total Incoming Outgoing         Incoming         Outgoing\n");
3001 	seq_puts(seq,
3002 		 "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
3003 
3004 	for_each_possible_cpu(i) {
3005 		struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
3006 		unsigned int start;
3007 		u64 conns, inpkts, outpkts, inbytes, outbytes;
3008 
3009 		do {
3010 			start = u64_stats_fetch_begin(&u->syncp);
3011 			conns = u64_stats_read(&u->cnt.conns);
3012 			inpkts = u64_stats_read(&u->cnt.inpkts);
3013 			outpkts = u64_stats_read(&u->cnt.outpkts);
3014 			inbytes = u64_stats_read(&u->cnt.inbytes);
3015 			outbytes = u64_stats_read(&u->cnt.outbytes);
3016 		} while (u64_stats_fetch_retry(&u->syncp, start));
3017 
3018 		seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
3019 			   i, (u64)conns, (u64)inpkts,
3020 			   (u64)outpkts, (u64)inbytes,
3021 			   (u64)outbytes);
3022 	}
3023 
3024 	ip_vs_copy_stats(&kstats, tot_stats);
3025 
3026 	seq_printf(seq, "  ~ %8LX %8LX %8LX %16LX %16LX\n\n",
3027 		   (unsigned long long)kstats.conns,
3028 		   (unsigned long long)kstats.inpkts,
3029 		   (unsigned long long)kstats.outpkts,
3030 		   (unsigned long long)kstats.inbytes,
3031 		   (unsigned long long)kstats.outbytes);
3032 
3033 /*                ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
3034 	seq_puts(seq,
3035 		 "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
3036 	seq_printf(seq, "    %8LX %8LX %8LX %16LX %16LX\n",
3037 		   kstats.cps,
3038 		   kstats.inpps,
3039 		   kstats.outpps,
3040 		   kstats.inbps,
3041 		   kstats.outbps);
3042 
3043 	return 0;
3044 }
3045 
3046 static int ip_vs_status_show(struct seq_file *seq, void *v)
3047 {
3048 	struct net *net = seq_file_single_net(seq);
3049 	struct netns_ipvs *ipvs = net_ipvs(net);
3050 	unsigned int resched_score = 0;
3051 	struct ip_vs_conn_hnode *hn;
3052 	struct hlist_bl_head *head;
3053 	struct ip_vs_service *svc;
3054 	struct ip_vs_rht *t, *pt;
3055 	struct hlist_bl_node *e;
3056 	int old_gen, new_gen;
3057 	u32 counts[8];
3058 	u32 bucket;
3059 	u32 count;
3060 	int loops;
3061 	u32 sum1;
3062 	u32 sum;
3063 	int i;
3064 
3065 	rcu_read_lock();
3066 
3067 	t = rcu_dereference(ipvs->conn_tab);
3068 
3069 	seq_printf(seq, "Conns:\t%d\n", atomic_read(&ipvs->conn_count));
3070 	seq_printf(seq, "Conn buckets:\t%d (%d bits, lfactor %d)\n",
3071 		   t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0);
3072 
3073 	if (!atomic_read(&ipvs->conn_count))
3074 		goto after_conns;
3075 	old_gen = atomic_read(&ipvs->conn_tab_changes);
3076 	loops = 0;
3077 
3078 repeat_conn:
3079 	smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */
3080 	memset(counts, 0, sizeof(counts));
3081 	ip_vs_rht_for_each_table_rcu(ipvs->conn_tab, t, pt) {
3082 		for (bucket = 0; bucket < t->size; bucket++) {
3083 			DECLARE_IP_VS_RHT_WALK_BUCKET_RCU();
3084 
3085 			count = 0;
3086 			resched_score++;
3087 			ip_vs_rht_walk_bucket_rcu(t, bucket, head) {
3088 				count = 0;
3089 				hlist_bl_for_each_entry_rcu(hn, e, head, node) {
3090 					count++;
3091 					if (count >= ARRAY_SIZE(counts) - 1)
3092 						break;
3093 				}
3094 			}
3095 			resched_score += count;
3096 			if (resched_score >= 100) {
3097 				resched_score = 0;
3098 				cond_resched_rcu();
3099 				new_gen = atomic_read(&ipvs->conn_tab_changes);
3100 				/* New table installed ? */
3101 				if (old_gen != new_gen) {
3102 					/* Too many changes? */
3103 					if (++loops >= 5)
3104 						goto after_conns;
3105 					old_gen = new_gen;
3106 					goto repeat_conn;
3107 				}
3108 			}
3109 			counts[count]++;
3110 		}
3111 	}
3112 	for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++)
3113 		sum += counts[i];
3114 	sum1 = sum - counts[0];
3115 	seq_printf(seq, "Conn buckets empty:\t%u (%llu%%)\n",
3116 		   counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U)));
3117 	for (i = 1; i < ARRAY_SIZE(counts); i++) {
3118 		if (!counts[i])
3119 			continue;
3120 		seq_printf(seq, "Conn buckets len-%d:\t%u (%llu%%)\n",
3121 			   i, counts[i],
3122 			   div_u64((u64)counts[i] * 100U, max(sum1, 1U)));
3123 	}
3124 
3125 after_conns:
3126 	t = rcu_dereference(ipvs->svc_table);
3127 
3128 	count = ip_vs_get_num_services(ipvs);
3129 	seq_printf(seq, "Services:\t%u\n", count);
3130 	seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n",
3131 		   t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0);
3132 
3133 	if (!count)
3134 		goto after_svc;
3135 	old_gen = atomic_read(&ipvs->svc_table_changes);
3136 	loops = 0;
3137 
3138 repeat_svc:
3139 	smp_rmb(); /* ipvs->svc_table and svc_table_changes */
3140 	memset(counts, 0, sizeof(counts));
3141 	ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, pt) {
3142 		for (bucket = 0; bucket < t->size; bucket++) {
3143 			DECLARE_IP_VS_RHT_WALK_BUCKET_RCU();
3144 
3145 			count = 0;
3146 			resched_score++;
3147 			ip_vs_rht_walk_bucket_rcu(t, bucket, head) {
3148 				count = 0;
3149 				hlist_bl_for_each_entry_rcu(svc, e, head,
3150 							    s_list) {
3151 					count++;
3152 					if (count >= ARRAY_SIZE(counts) - 1)
3153 						break;
3154 				}
3155 			}
3156 			resched_score += count;
3157 			if (resched_score >= 100) {
3158 				resched_score = 0;
3159 				cond_resched_rcu();
3160 				new_gen = atomic_read(&ipvs->svc_table_changes);
3161 				/* New table installed ? */
3162 				if (old_gen != new_gen) {
3163 					/* Too many changes? */
3164 					if (++loops >= 5)
3165 						goto after_svc;
3166 					old_gen = new_gen;
3167 					goto repeat_svc;
3168 				}
3169 			}
3170 			counts[count]++;
3171 		}
3172 	}
3173 	for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++)
3174 		sum += counts[i];
3175 	sum1 = sum - counts[0];
3176 	seq_printf(seq, "Service buckets empty:\t%u (%llu%%)\n",
3177 		   counts[0], div_u64((u64)counts[0] * 100U, max(sum, 1U)));
3178 	for (i = 1; i < ARRAY_SIZE(counts); i++) {
3179 		if (!counts[i])
3180 			continue;
3181 		seq_printf(seq, "Service buckets len-%d:\t%u (%llu%%)\n",
3182 			   i, counts[i],
3183 			   div_u64((u64)counts[i] * 100U, max(sum1, 1U)));
3184 	}
3185 
3186 after_svc:
3187 	seq_printf(seq, "Stats thread slots:\t%d (max %lu)\n",
3188 		   ipvs->est_kt_count, ipvs->est_max_threads);
3189 	seq_printf(seq, "Stats chain max len:\t%d\n", ipvs->est_chain_max);
3190 	seq_printf(seq, "Stats thread ests:\t%d\n",
3191 		   ipvs->est_chain_max * IPVS_EST_CHAIN_FACTOR *
3192 		   IPVS_EST_NTICKS);
3193 
3194 	rcu_read_unlock();
3195 	return 0;
3196 }
3197 
3198 #endif
3199 
3200 /*
3201  *	Set timeout values for tcp tcpfin udp in the timeout_table.
3202  */
3203 static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
3204 {
3205 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
3206 	struct ip_vs_proto_data *pd;
3207 #endif
3208 
3209 	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
3210 		  u->tcp_timeout,
3211 		  u->tcp_fin_timeout,
3212 		  u->udp_timeout);
3213 
3214 #ifdef CONFIG_IP_VS_PROTO_TCP
3215 	if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) ||
3216 	    u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) {
3217 		return -EINVAL;
3218 	}
3219 #endif
3220 
3221 #ifdef CONFIG_IP_VS_PROTO_UDP
3222 	if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ))
3223 		return -EINVAL;
3224 #endif
3225 
3226 #ifdef CONFIG_IP_VS_PROTO_TCP
3227 	if (u->tcp_timeout) {
3228 		pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
3229 		pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
3230 			= u->tcp_timeout * HZ;
3231 	}
3232 
3233 	if (u->tcp_fin_timeout) {
3234 		pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
3235 		pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
3236 			= u->tcp_fin_timeout * HZ;
3237 	}
3238 #endif
3239 
3240 #ifdef CONFIG_IP_VS_PROTO_UDP
3241 	if (u->udp_timeout) {
3242 		pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
3243 		pd->timeout_table[IP_VS_UDP_S_NORMAL]
3244 			= u->udp_timeout * HZ;
3245 	}
3246 #endif
3247 	return 0;
3248 }
3249 
3250 #define CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
3251 
3252 struct ip_vs_svcdest_user {
3253 	struct ip_vs_service_user	s;
3254 	struct ip_vs_dest_user		d;
3255 };
3256 
3257 static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
3258 	[CMDID(IP_VS_SO_SET_ADD)]         = sizeof(struct ip_vs_service_user),
3259 	[CMDID(IP_VS_SO_SET_EDIT)]        = sizeof(struct ip_vs_service_user),
3260 	[CMDID(IP_VS_SO_SET_DEL)]         = sizeof(struct ip_vs_service_user),
3261 	[CMDID(IP_VS_SO_SET_ADDDEST)]     = sizeof(struct ip_vs_svcdest_user),
3262 	[CMDID(IP_VS_SO_SET_DELDEST)]     = sizeof(struct ip_vs_svcdest_user),
3263 	[CMDID(IP_VS_SO_SET_EDITDEST)]    = sizeof(struct ip_vs_svcdest_user),
3264 	[CMDID(IP_VS_SO_SET_TIMEOUT)]     = sizeof(struct ip_vs_timeout_user),
3265 	[CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
3266 	[CMDID(IP_VS_SO_SET_STOPDAEMON)]  = sizeof(struct ip_vs_daemon_user),
3267 	[CMDID(IP_VS_SO_SET_ZERO)]        = sizeof(struct ip_vs_service_user),
3268 };
3269 
3270 union ip_vs_set_arglen {
3271 	struct ip_vs_service_user	field_IP_VS_SO_SET_ADD;
3272 	struct ip_vs_service_user	field_IP_VS_SO_SET_EDIT;
3273 	struct ip_vs_service_user	field_IP_VS_SO_SET_DEL;
3274 	struct ip_vs_svcdest_user	field_IP_VS_SO_SET_ADDDEST;
3275 	struct ip_vs_svcdest_user	field_IP_VS_SO_SET_DELDEST;
3276 	struct ip_vs_svcdest_user	field_IP_VS_SO_SET_EDITDEST;
3277 	struct ip_vs_timeout_user	field_IP_VS_SO_SET_TIMEOUT;
3278 	struct ip_vs_daemon_user	field_IP_VS_SO_SET_STARTDAEMON;
3279 	struct ip_vs_daemon_user	field_IP_VS_SO_SET_STOPDAEMON;
3280 	struct ip_vs_service_user	field_IP_VS_SO_SET_ZERO;
3281 };
3282 
3283 #define MAX_SET_ARGLEN	sizeof(union ip_vs_set_arglen)
3284 
3285 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
3286 				  struct ip_vs_service_user *usvc_compat)
3287 {
3288 	memset(usvc, 0, sizeof(*usvc));
3289 
3290 	usvc->af		= AF_INET;
3291 	usvc->protocol		= usvc_compat->protocol;
3292 	usvc->addr.ip		= usvc_compat->addr;
3293 	usvc->port		= usvc_compat->port;
3294 	usvc->fwmark		= usvc_compat->fwmark;
3295 
3296 	/* Deep copy of sched_name is not needed here */
3297 	usvc->sched_name	= usvc_compat->sched_name;
3298 
3299 	usvc->flags		= usvc_compat->flags;
3300 	usvc->timeout		= usvc_compat->timeout;
3301 	usvc->netmask		= usvc_compat->netmask;
3302 }
3303 
3304 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
3305 				   struct ip_vs_dest_user *udest_compat)
3306 {
3307 	memset(udest, 0, sizeof(*udest));
3308 
3309 	udest->addr.ip		= udest_compat->addr;
3310 	udest->port		= udest_compat->port;
3311 	udest->conn_flags	= udest_compat->conn_flags;
3312 	udest->weight		= udest_compat->weight;
3313 	udest->u_threshold	= udest_compat->u_threshold;
3314 	udest->l_threshold	= udest_compat->l_threshold;
3315 	udest->af		= AF_INET;
3316 	udest->tun_type		= IP_VS_CONN_F_TUNNEL_TYPE_IPIP;
3317 }
3318 
3319 static int
3320 do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len)
3321 {
3322 	struct net *net = sock_net(sk);
3323 	int ret;
3324 	unsigned char arg[MAX_SET_ARGLEN];
3325 	struct ip_vs_service_user *usvc_compat;
3326 	struct ip_vs_service_user_kern usvc;
3327 	struct ip_vs_service *svc;
3328 	struct ip_vs_dest_user *udest_compat;
3329 	struct ip_vs_dest_user_kern udest;
3330 	struct netns_ipvs *ipvs = net_ipvs(net);
3331 
3332 	BUILD_BUG_ON(sizeof(arg) > 255);
3333 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3334 		return -EPERM;
3335 
3336 	if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
3337 		return -EINVAL;
3338 	if (len != set_arglen[CMDID(cmd)]) {
3339 		IP_VS_DBG(1, "set_ctl: len %u != %u\n",
3340 			  len, set_arglen[CMDID(cmd)]);
3341 		return -EINVAL;
3342 	}
3343 
3344 	if (copy_from_sockptr(arg, ptr, len) != 0)
3345 		return -EFAULT;
3346 
3347 	/* Handle daemons since they have another lock */
3348 	if (cmd == IP_VS_SO_SET_STARTDAEMON ||
3349 	    cmd == IP_VS_SO_SET_STOPDAEMON) {
3350 		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
3351 
3352 		if (cmd == IP_VS_SO_SET_STARTDAEMON) {
3353 			struct ipvs_sync_daemon_cfg cfg;
3354 
3355 			memset(&cfg, 0, sizeof(cfg));
3356 			ret = -EINVAL;
3357 			if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
3358 				    sizeof(cfg.mcast_ifn)) <= 0)
3359 				return ret;
3360 			cfg.syncid = dm->syncid;
3361 			ret = start_sync_thread(ipvs, &cfg, dm->state);
3362 		} else {
3363 			ret = stop_sync_thread(ipvs, dm->state);
3364 		}
3365 		return ret;
3366 	}
3367 
3368 	mutex_lock(&ipvs->service_mutex);
3369 	if (cmd == IP_VS_SO_SET_FLUSH) {
3370 		/* Flush the virtual service */
3371 		ret = ip_vs_flush(ipvs, false);
3372 		goto out_unlock;
3373 	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
3374 		/* Set timeout values for (tcp tcpfin udp) */
3375 		ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
3376 		goto out_unlock;
3377 	} else if (!len) {
3378 		/* No more commands with len == 0 below */
3379 		ret = -EINVAL;
3380 		goto out_unlock;
3381 	}
3382 
3383 	usvc_compat = (struct ip_vs_service_user *)arg;
3384 	udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
3385 
3386 	/* We only use the new structs internally, so copy userspace compat
3387 	 * structs to extended internal versions */
3388 	ip_vs_copy_usvc_compat(&usvc, usvc_compat);
3389 	ip_vs_copy_udest_compat(&udest, udest_compat);
3390 
3391 	if (cmd == IP_VS_SO_SET_ZERO) {
3392 		/* if no service address is set, zero counters in all */
3393 		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
3394 			ret = ip_vs_zero_all(ipvs);
3395 			goto out_unlock;
3396 		}
3397 	}
3398 
3399 	if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) &&
3400 	    strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) ==
3401 	    IP_VS_SCHEDNAME_MAXLEN) {
3402 		ret = -EINVAL;
3403 		goto out_unlock;
3404 	}
3405 
3406 	/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
3407 	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
3408 	    usvc.protocol != IPPROTO_SCTP) {
3409 		pr_err("set_ctl: invalid protocol: %d %pI4:%d\n",
3410 		       usvc.protocol, &usvc.addr.ip,
3411 		       ntohs(usvc.port));
3412 		ret = -EFAULT;
3413 		goto out_unlock;
3414 	}
3415 
3416 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
3417 	rcu_read_lock();
3418 	if (usvc.fwmark == 0)
3419 		svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
3420 					   &usvc.addr, usvc.port);
3421 	else
3422 		svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
3423 	rcu_read_unlock();
3424 
3425 	if (cmd != IP_VS_SO_SET_ADD
3426 	    && (svc == NULL || svc->protocol != usvc.protocol)) {
3427 		ret = -ESRCH;
3428 		goto out_unlock;
3429 	}
3430 
3431 	switch (cmd) {
3432 	case IP_VS_SO_SET_ADD:
3433 		if (svc != NULL)
3434 			ret = -EEXIST;
3435 		else
3436 			ret = ip_vs_add_service(ipvs, &usvc, &svc);
3437 		break;
3438 	case IP_VS_SO_SET_EDIT:
3439 		ret = ip_vs_edit_service(svc, &usvc);
3440 		break;
3441 	case IP_VS_SO_SET_DEL:
3442 		ret = ip_vs_del_service(svc);
3443 		if (!ret)
3444 			goto out_unlock;
3445 		break;
3446 	case IP_VS_SO_SET_ZERO:
3447 		ret = ip_vs_zero_service(svc);
3448 		break;
3449 	case IP_VS_SO_SET_ADDDEST:
3450 		ret = ip_vs_add_dest(svc, &udest);
3451 		break;
3452 	case IP_VS_SO_SET_EDITDEST:
3453 		ret = ip_vs_edit_dest(svc, &udest);
3454 		break;
3455 	case IP_VS_SO_SET_DELDEST:
3456 		ret = ip_vs_del_dest(svc, &udest);
3457 		break;
3458 	default:
3459 		WARN_ON_ONCE(1);
3460 		ret = -EINVAL;
3461 		break;
3462 	}
3463 
3464   out_unlock:
3465 	mutex_unlock(&ipvs->service_mutex);
3466 	return ret;
3467 }
3468 
3469 
3470 static void
3471 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
3472 {
3473 	struct ip_vs_scheduler *sched;
3474 	struct ip_vs_kstats kstats;
3475 	char *sched_name;
3476 
3477 	sched = rcu_dereference_protected(src->scheduler, 1);
3478 	sched_name = sched ? sched->name : "none";
3479 	dst->protocol = src->protocol;
3480 	dst->addr = src->addr.ip;
3481 	dst->port = src->port;
3482 	dst->fwmark = src->fwmark;
3483 	strscpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
3484 	dst->flags = src->flags;
3485 	dst->timeout = src->timeout / HZ;
3486 	dst->netmask = src->netmask;
3487 	dst->num_dests = src->num_dests;
3488 	ip_vs_copy_stats(&kstats, &src->stats);
3489 	ip_vs_export_stats_user(&dst->stats, &kstats);
3490 }
3491 
3492 static inline int
3493 __ip_vs_get_service_entries(struct netns_ipvs *ipvs,
3494 			    const struct ip_vs_get_services *get,
3495 			    struct ip_vs_get_services __user *uptr)
3496 {
3497 	struct ip_vs_service_entry entry;
3498 	DECLARE_IP_VS_RHT_WALK_BUCKETS();
3499 	struct hlist_bl_head *head;
3500 	struct ip_vs_service *svc;
3501 	struct hlist_bl_node *e;
3502 	int count = 0;
3503 	int ret = 0;
3504 
3505 	lockdep_assert_held(&ipvs->svc_resize_sem);
3506 	/* All service modifications are disabled, go ahead */
3507 	ip_vs_rht_walk_buckets(ipvs->svc_table, head) {
3508 		hlist_bl_for_each_entry(svc, e, head, s_list) {
3509 			/* Only expose IPv4 entries to old interface */
3510 			if (svc->af != AF_INET)
3511 				continue;
3512 
3513 			if (count >= get->num_services)
3514 				goto out;
3515 			memset(&entry, 0, sizeof(entry));
3516 			ip_vs_copy_service(&entry, svc);
3517 			if (copy_to_user(&uptr->entrytable[count],
3518 					 &entry, sizeof(entry))) {
3519 				ret = -EFAULT;
3520 				goto out;
3521 			}
3522 			count++;
3523 		}
3524 	}
3525 
3526 out:
3527 	return ret;
3528 }
3529 
3530 static inline int
3531 __ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
3532 			 struct ip_vs_get_dests __user *uptr)
3533 {
3534 	struct ip_vs_service *svc;
3535 	union nf_inet_addr addr = { .ip = get->addr };
3536 	int ret = 0;
3537 
3538 	rcu_read_lock();
3539 	if (get->fwmark)
3540 		svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
3541 	else
3542 		svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
3543 					   get->port);
3544 	rcu_read_unlock();
3545 
3546 	if (svc) {
3547 		int count = 0;
3548 		struct ip_vs_dest *dest;
3549 		struct ip_vs_dest_entry entry;
3550 		struct ip_vs_kstats kstats;
3551 
3552 		memset(&entry, 0, sizeof(entry));
3553 		list_for_each_entry(dest, &svc->destinations, n_list) {
3554 			if (count >= get->num_dests)
3555 				break;
3556 
3557 			/* Cannot expose heterogeneous members via sockopt
3558 			 * interface
3559 			 */
3560 			if (dest->af != svc->af)
3561 				continue;
3562 
3563 			entry.addr = dest->addr.ip;
3564 			entry.port = dest->port;
3565 			entry.conn_flags = atomic_read(&dest->conn_flags);
3566 			entry.weight = atomic_read(&dest->weight);
3567 			entry.u_threshold = dest->u_threshold;
3568 			entry.l_threshold = dest->l_threshold;
3569 			entry.activeconns = atomic_read(&dest->activeconns);
3570 			entry.inactconns = atomic_read(&dest->inactconns);
3571 			entry.persistconns = atomic_read(&dest->persistconns);
3572 			ip_vs_copy_stats(&kstats, &dest->stats);
3573 			ip_vs_export_stats_user(&entry.stats, &kstats);
3574 			if (copy_to_user(&uptr->entrytable[count],
3575 					 &entry, sizeof(entry))) {
3576 				ret = -EFAULT;
3577 				break;
3578 			}
3579 			count++;
3580 		}
3581 	} else
3582 		ret = -ESRCH;
3583 	return ret;
3584 }
3585 
3586 static inline void
3587 __ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
3588 {
3589 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
3590 	struct ip_vs_proto_data *pd;
3591 #endif
3592 
3593 	memset(u, 0, sizeof (*u));
3594 
3595 #ifdef CONFIG_IP_VS_PROTO_TCP
3596 	pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
3597 	u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
3598 	u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
3599 #endif
3600 #ifdef CONFIG_IP_VS_PROTO_UDP
3601 	pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
3602 	u->udp_timeout =
3603 			pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
3604 #endif
3605 }
3606 
3607 static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
3608 	[CMDID(IP_VS_SO_GET_VERSION)]  = 64,
3609 	[CMDID(IP_VS_SO_GET_INFO)]     = sizeof(struct ip_vs_getinfo),
3610 	[CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
3611 	[CMDID(IP_VS_SO_GET_SERVICE)]  = sizeof(struct ip_vs_service_entry),
3612 	[CMDID(IP_VS_SO_GET_DESTS)]    = sizeof(struct ip_vs_get_dests),
3613 	[CMDID(IP_VS_SO_GET_TIMEOUT)]  = sizeof(struct ip_vs_timeout_user),
3614 	[CMDID(IP_VS_SO_GET_DAEMON)]   = 2 * sizeof(struct ip_vs_daemon_user),
3615 };
3616 
3617 union ip_vs_get_arglen {
3618 	char				field_IP_VS_SO_GET_VERSION[64];
3619 	struct ip_vs_getinfo		field_IP_VS_SO_GET_INFO;
3620 	struct ip_vs_get_services	field_IP_VS_SO_GET_SERVICES;
3621 	struct ip_vs_service_entry	field_IP_VS_SO_GET_SERVICE;
3622 	struct ip_vs_get_dests		field_IP_VS_SO_GET_DESTS;
3623 	struct ip_vs_timeout_user	field_IP_VS_SO_GET_TIMEOUT;
3624 	struct ip_vs_daemon_user	field_IP_VS_SO_GET_DAEMON[2];
3625 };
3626 
3627 #define MAX_GET_ARGLEN	sizeof(union ip_vs_get_arglen)
3628 
3629 static int
3630 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
3631 {
3632 	unsigned char arg[MAX_GET_ARGLEN];
3633 	int ret = 0;
3634 	unsigned int copylen;
3635 	struct net *net = sock_net(sk);
3636 	struct netns_ipvs *ipvs = net_ipvs(net);
3637 
3638 	BUG_ON(!net);
3639 	BUILD_BUG_ON(sizeof(arg) > 255);
3640 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3641 		return -EPERM;
3642 
3643 	if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
3644 		return -EINVAL;
3645 
3646 	copylen = get_arglen[CMDID(cmd)];
3647 	if (*len < (int) copylen) {
3648 		IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
3649 		return -EINVAL;
3650 	}
3651 
3652 	if (copy_from_user(arg, user, copylen) != 0)
3653 		return -EFAULT;
3654 	/*
3655 	 * Handle daemons first since it has its own locking
3656 	 */
3657 	if (cmd == IP_VS_SO_GET_DAEMON) {
3658 		struct ip_vs_daemon_user d[2];
3659 
3660 		memset(&d, 0, sizeof(d));
3661 		mutex_lock(&ipvs->sync_mutex);
3662 		if (ipvs->sync_state & IP_VS_STATE_MASTER) {
3663 			d[0].state = IP_VS_STATE_MASTER;
3664 			strscpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn,
3665 				sizeof(d[0].mcast_ifn));
3666 			d[0].syncid = ipvs->mcfg.syncid;
3667 		}
3668 		if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
3669 			d[1].state = IP_VS_STATE_BACKUP;
3670 			strscpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn,
3671 				sizeof(d[1].mcast_ifn));
3672 			d[1].syncid = ipvs->bcfg.syncid;
3673 		}
3674 		if (copy_to_user(user, &d, sizeof(d)) != 0)
3675 			ret = -EFAULT;
3676 		mutex_unlock(&ipvs->sync_mutex);
3677 		return ret;
3678 	}
3679 
3680 	if (cmd == IP_VS_SO_GET_SERVICES) {
3681 		struct ip_vs_get_services *get;
3682 		size_t size;
3683 
3684 		get = (struct ip_vs_get_services *)arg;
3685 		size = struct_size(get, entrytable, get->num_services);
3686 		if (*len != size) {
3687 			pr_err("length: %u != %zu\n", *len, size);
3688 			return -EINVAL;
3689 		}
3690 		/* Protect against table resizer moving the entries.
3691 		 * Try reverse locking, so that we do not hold the mutex
3692 		 * while waiting for semaphore.
3693 		 */
3694 		while (1) {
3695 			ret = down_read_killable(&ipvs->svc_resize_sem);
3696 			if (ret < 0)
3697 				return ret;
3698 			if (mutex_trylock(&ipvs->service_mutex))
3699 				break;
3700 			up_read(&ipvs->svc_resize_sem);
3701 			cond_resched();
3702 		}
3703 		ret = __ip_vs_get_service_entries(ipvs, get, user);
3704 		up_read(&ipvs->svc_resize_sem);
3705 		mutex_unlock(&ipvs->service_mutex);
3706 		return ret;
3707 	}
3708 
3709 	mutex_lock(&ipvs->service_mutex);
3710 	switch (cmd) {
3711 	case IP_VS_SO_GET_VERSION:
3712 	{
3713 		char buf[64];
3714 
3715 		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
3716 			NVERSION(IP_VS_VERSION_CODE), get_conn_tab_size(ipvs));
3717 		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
3718 			ret = -EFAULT;
3719 			goto out;
3720 		}
3721 		*len = strlen(buf)+1;
3722 	}
3723 	break;
3724 
3725 	case IP_VS_SO_GET_INFO:
3726 	{
3727 		struct ip_vs_getinfo info;
3728 
3729 		info.version = IP_VS_VERSION_CODE;
3730 		info.size = get_conn_tab_size(ipvs);
3731 		info.num_services =
3732 			atomic_read(&ipvs->num_services[IP_VS_AF_INET]);
3733 		if (copy_to_user(user, &info, sizeof(info)) != 0)
3734 			ret = -EFAULT;
3735 	}
3736 	break;
3737 
3738 	case IP_VS_SO_GET_SERVICE:
3739 	{
3740 		struct ip_vs_service_entry *entry;
3741 		struct ip_vs_service *svc;
3742 		union nf_inet_addr addr;
3743 
3744 		entry = (struct ip_vs_service_entry *)arg;
3745 		addr.ip = entry->addr;
3746 		rcu_read_lock();
3747 		if (entry->fwmark)
3748 			svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
3749 		else
3750 			svc = __ip_vs_service_find(ipvs, AF_INET,
3751 						   entry->protocol, &addr,
3752 						   entry->port);
3753 		rcu_read_unlock();
3754 		if (svc) {
3755 			ip_vs_copy_service(entry, svc);
3756 			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
3757 				ret = -EFAULT;
3758 		} else
3759 			ret = -ESRCH;
3760 	}
3761 	break;
3762 
3763 	case IP_VS_SO_GET_DESTS:
3764 	{
3765 		struct ip_vs_get_dests *get;
3766 		size_t size;
3767 
3768 		get = (struct ip_vs_get_dests *)arg;
3769 		size = struct_size(get, entrytable, get->num_dests);
3770 		if (*len != size) {
3771 			pr_err("length: %u != %zu\n", *len, size);
3772 			ret = -EINVAL;
3773 			goto out;
3774 		}
3775 		ret = __ip_vs_get_dest_entries(ipvs, get, user);
3776 	}
3777 	break;
3778 
3779 	case IP_VS_SO_GET_TIMEOUT:
3780 	{
3781 		struct ip_vs_timeout_user t;
3782 
3783 		__ip_vs_get_timeouts(ipvs, &t);
3784 		if (copy_to_user(user, &t, sizeof(t)) != 0)
3785 			ret = -EFAULT;
3786 	}
3787 	break;
3788 
3789 	default:
3790 		ret = -EINVAL;
3791 	}
3792 
3793 out:
3794 	mutex_unlock(&ipvs->service_mutex);
3795 	return ret;
3796 }
3797 
3798 
3799 static struct nf_sockopt_ops ip_vs_sockopts = {
3800 	.pf		= PF_INET,
3801 	.set_optmin	= IP_VS_BASE_CTL,
3802 	.set_optmax	= IP_VS_SO_SET_MAX+1,
3803 	.set		= do_ip_vs_set_ctl,
3804 	.get_optmin	= IP_VS_BASE_CTL,
3805 	.get_optmax	= IP_VS_SO_GET_MAX+1,
3806 	.get		= do_ip_vs_get_ctl,
3807 	.owner		= THIS_MODULE,
3808 };
3809 
3810 /*
3811  * Generic Netlink interface
3812  */
3813 
3814 /* IPVS genetlink family */
3815 static struct genl_family ip_vs_genl_family;
3816 
3817 /* Policy used for first-level command attributes */
3818 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
3819 	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
3820 	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
3821 	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
3822 	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
3823 	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
3824 	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
3825 };
3826 
3827 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
3828 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
3829 	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
3830 	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
3831 					    .len = IP_VS_IFNAME_MAXLEN - 1 },
3832 	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
3833 	[IPVS_DAEMON_ATTR_SYNC_MAXLEN]	= { .type = NLA_U16 },
3834 	[IPVS_DAEMON_ATTR_MCAST_GROUP]	= { .type = NLA_U32 },
3835 	[IPVS_DAEMON_ATTR_MCAST_GROUP6]	= { .len = sizeof(struct in6_addr) },
3836 	[IPVS_DAEMON_ATTR_MCAST_PORT]	= { .type = NLA_U16 },
3837 	[IPVS_DAEMON_ATTR_MCAST_TTL]	= { .type = NLA_U8 },
3838 };
3839 
3840 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
3841 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
3842 	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
3843 	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
3844 	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
3845 					    .len = sizeof(union nf_inet_addr) },
3846 	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
3847 	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
3848 	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
3849 					    .len = IP_VS_SCHEDNAME_MAXLEN - 1 },
3850 	[IPVS_SVC_ATTR_PE_NAME]		= { .type = NLA_NUL_STRING,
3851 					    .len = IP_VS_PENAME_MAXLEN },
3852 	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
3853 					    .len = sizeof(struct ip_vs_flags) },
3854 	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
3855 	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
3856 	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
3857 };
3858 
3859 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
3860 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
3861 	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
3862 					    .len = sizeof(union nf_inet_addr) },
3863 	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
3864 	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
3865 	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
3866 	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
3867 	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
3868 	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
3869 	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
3870 	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
3871 	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
3872 	[IPVS_DEST_ATTR_ADDR_FAMILY]	= { .type = NLA_U16 },
3873 	[IPVS_DEST_ATTR_TUN_TYPE]	= { .type = NLA_U8 },
3874 	[IPVS_DEST_ATTR_TUN_PORT]	= { .type = NLA_U16 },
3875 	[IPVS_DEST_ATTR_TUN_FLAGS]	= { .type = NLA_U16 },
3876 };
3877 
3878 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
3879 				 struct ip_vs_kstats *kstats)
3880 {
3881 	struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type);
3882 
3883 	if (!nl_stats)
3884 		return -EMSGSIZE;
3885 
3886 	if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
3887 	    nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
3888 	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
3889 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
3890 			      IPVS_STATS_ATTR_PAD) ||
3891 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
3892 			      IPVS_STATS_ATTR_PAD) ||
3893 	    nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
3894 	    nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
3895 	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
3896 	    nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
3897 	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
3898 		goto nla_put_failure;
3899 	nla_nest_end(skb, nl_stats);
3900 
3901 	return 0;
3902 
3903 nla_put_failure:
3904 	nla_nest_cancel(skb, nl_stats);
3905 	return -EMSGSIZE;
3906 }
3907 
3908 static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
3909 				   struct ip_vs_kstats *kstats)
3910 {
3911 	struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type);
3912 
3913 	if (!nl_stats)
3914 		return -EMSGSIZE;
3915 
3916 	if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns,
3917 			      IPVS_STATS_ATTR_PAD) ||
3918 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts,
3919 			      IPVS_STATS_ATTR_PAD) ||
3920 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts,
3921 			      IPVS_STATS_ATTR_PAD) ||
3922 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes,
3923 			      IPVS_STATS_ATTR_PAD) ||
3924 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes,
3925 			      IPVS_STATS_ATTR_PAD) ||
3926 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps,
3927 			      IPVS_STATS_ATTR_PAD) ||
3928 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps,
3929 			      IPVS_STATS_ATTR_PAD) ||
3930 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps,
3931 			      IPVS_STATS_ATTR_PAD) ||
3932 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps,
3933 			      IPVS_STATS_ATTR_PAD) ||
3934 	    nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps,
3935 			      IPVS_STATS_ATTR_PAD))
3936 		goto nla_put_failure;
3937 	nla_nest_end(skb, nl_stats);
3938 
3939 	return 0;
3940 
3941 nla_put_failure:
3942 	nla_nest_cancel(skb, nl_stats);
3943 	return -EMSGSIZE;
3944 }
3945 
3946 static int ip_vs_genl_fill_service(struct sk_buff *skb,
3947 				   struct ip_vs_service *svc)
3948 {
3949 	struct ip_vs_scheduler *sched;
3950 	struct ip_vs_pe *pe;
3951 	struct nlattr *nl_service;
3952 	struct ip_vs_flags flags = { .flags = svc->flags,
3953 				     .mask = ~0 };
3954 	struct ip_vs_kstats kstats;
3955 	char *sched_name;
3956 
3957 	nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE);
3958 	if (!nl_service)
3959 		return -EMSGSIZE;
3960 
3961 	if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
3962 		goto nla_put_failure;
3963 	if (svc->fwmark) {
3964 		if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
3965 			goto nla_put_failure;
3966 	} else {
3967 		if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
3968 		    nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
3969 		    nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
3970 			goto nla_put_failure;
3971 	}
3972 
3973 	sched = rcu_dereference(svc->scheduler);
3974 	sched_name = sched ? sched->name : "none";
3975 	pe = rcu_dereference(svc->pe);
3976 	if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
3977 	    (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
3978 	    nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
3979 	    nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
3980 	    nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
3981 		goto nla_put_failure;
3982 	ip_vs_copy_stats(&kstats, &svc->stats);
3983 	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
3984 		goto nla_put_failure;
3985 	if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
3986 		goto nla_put_failure;
3987 
3988 	nla_nest_end(skb, nl_service);
3989 
3990 	return 0;
3991 
3992 nla_put_failure:
3993 	nla_nest_cancel(skb, nl_service);
3994 	return -EMSGSIZE;
3995 }
3996 
3997 static int ip_vs_genl_dump_service(struct sk_buff *skb,
3998 				   struct ip_vs_service *svc,
3999 				   struct netlink_callback *cb)
4000 {
4001 	void *hdr;
4002 
4003 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
4004 			  &ip_vs_genl_family, NLM_F_MULTI,
4005 			  IPVS_CMD_NEW_SERVICE);
4006 	if (!hdr)
4007 		return -EMSGSIZE;
4008 
4009 	if (ip_vs_genl_fill_service(skb, svc) < 0)
4010 		goto nla_put_failure;
4011 
4012 	genlmsg_end(skb, hdr);
4013 	return 0;
4014 
4015 nla_put_failure:
4016 	genlmsg_cancel(skb, hdr);
4017 	return -EMSGSIZE;
4018 }
4019 
4020 static int ip_vs_genl_dump_services(struct sk_buff *skb,
4021 				    struct netlink_callback *cb)
4022 {
4023 	DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU();
4024 	struct net *net = sock_net(skb->sk);
4025 	struct netns_ipvs *ipvs = net_ipvs(net);
4026 	struct hlist_bl_head *head;
4027 	struct ip_vs_service *svc;
4028 	struct hlist_bl_node *e;
4029 	int start = cb->args[0];
4030 	int idx = 0;
4031 
4032 	down_read(&ipvs->svc_resize_sem);
4033 	rcu_read_lock();
4034 	ip_vs_rht_walk_buckets_safe_rcu(ipvs->svc_table, head) {
4035 		hlist_bl_for_each_entry_rcu(svc, e, head, s_list) {
4036 			if (++idx <= start)
4037 				continue;
4038 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
4039 				idx--;
4040 				goto nla_put_failure;
4041 			}
4042 		}
4043 	}
4044 
4045 nla_put_failure:
4046 	rcu_read_unlock();
4047 	up_read(&ipvs->svc_resize_sem);
4048 	cb->args[0] = idx;
4049 
4050 	return skb->len;
4051 }
4052 
4053 static bool ip_vs_is_af_valid(int af)
4054 {
4055 	if (af == AF_INET)
4056 		return true;
4057 #ifdef CONFIG_IP_VS_IPV6
4058 	if (af == AF_INET6 && ipv6_mod_enabled())
4059 		return true;
4060 #endif
4061 	return false;
4062 }
4063 
4064 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
4065 				    struct ip_vs_service_user_kern *usvc,
4066 				    struct nlattr *nla, bool full_entry,
4067 				    struct ip_vs_service **ret_svc)
4068 {
4069 	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
4070 	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
4071 	struct ip_vs_service *svc;
4072 
4073 	/* Parse mandatory identifying service fields first */
4074 	if (nla == NULL ||
4075 	    nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL))
4076 		return -EINVAL;
4077 
4078 	nla_af		= attrs[IPVS_SVC_ATTR_AF];
4079 	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
4080 	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
4081 	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
4082 	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
4083 
4084 	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
4085 		return -EINVAL;
4086 
4087 	memset(usvc, 0, sizeof(*usvc));
4088 
4089 	usvc->af = nla_get_u16(nla_af);
4090 	if (!ip_vs_is_af_valid(usvc->af))
4091 		return -EAFNOSUPPORT;
4092 
4093 	if (nla_fwmark) {
4094 		usvc->protocol = IPPROTO_TCP;
4095 		usvc->fwmark = nla_get_u32(nla_fwmark);
4096 	} else {
4097 		usvc->protocol = nla_get_u16(nla_protocol);
4098 		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
4099 		usvc->port = nla_get_be16(nla_port);
4100 		usvc->fwmark = 0;
4101 	}
4102 
4103 	if (usvc->fwmark)
4104 		svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
4105 	else
4106 		svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
4107 					   &usvc->addr, usvc->port);
4108 	*ret_svc = svc;
4109 
4110 	/* If a full entry was requested, check for the additional fields */
4111 	if (full_entry) {
4112 		struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
4113 			      *nla_netmask;
4114 		struct ip_vs_flags flags;
4115 
4116 		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
4117 		nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
4118 		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
4119 		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
4120 		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
4121 
4122 		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
4123 			return -EINVAL;
4124 
4125 		nla_memcpy(&flags, nla_flags, sizeof(flags));
4126 
4127 		/* prefill flags from service if it already exists */
4128 		if (svc)
4129 			usvc->flags = svc->flags;
4130 
4131 		/* set new flags from userland */
4132 		usvc->flags = (usvc->flags & ~flags.mask) |
4133 			      (flags.flags & flags.mask);
4134 		usvc->sched_name = nla_data(nla_sched);
4135 		usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
4136 		usvc->timeout = nla_get_u32(nla_timeout);
4137 		usvc->netmask = nla_get_be32(nla_netmask);
4138 	}
4139 
4140 	return 0;
4141 }
4142 
4143 static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
4144 						     struct nlattr *nla)
4145 {
4146 	struct ip_vs_service_user_kern usvc;
4147 	struct ip_vs_service *svc;
4148 	int ret;
4149 
4150 	ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc);
4151 	return ret ? ERR_PTR(ret) : svc;
4152 }
4153 
4154 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
4155 {
4156 	struct nlattr *nl_dest;
4157 	struct ip_vs_kstats kstats;
4158 
4159 	nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST);
4160 	if (!nl_dest)
4161 		return -EMSGSIZE;
4162 
4163 	if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
4164 	    nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
4165 	    nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
4166 			(atomic_read(&dest->conn_flags) &
4167 			 IP_VS_CONN_F_FWD_MASK)) ||
4168 	    nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
4169 			atomic_read(&dest->weight)) ||
4170 	    nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE,
4171 		       dest->tun_type) ||
4172 	    nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
4173 			 dest->tun_port) ||
4174 	    nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS,
4175 			dest->tun_flags) ||
4176 	    nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
4177 	    nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
4178 	    nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
4179 			atomic_read(&dest->activeconns)) ||
4180 	    nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
4181 			atomic_read(&dest->inactconns)) ||
4182 	    nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
4183 			atomic_read(&dest->persistconns)) ||
4184 	    nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
4185 		goto nla_put_failure;
4186 	ip_vs_copy_stats(&kstats, &dest->stats);
4187 	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
4188 		goto nla_put_failure;
4189 	if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
4190 		goto nla_put_failure;
4191 
4192 	nla_nest_end(skb, nl_dest);
4193 
4194 	return 0;
4195 
4196 nla_put_failure:
4197 	nla_nest_cancel(skb, nl_dest);
4198 	return -EMSGSIZE;
4199 }
4200 
4201 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
4202 				struct netlink_callback *cb)
4203 {
4204 	void *hdr;
4205 
4206 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
4207 			  &ip_vs_genl_family, NLM_F_MULTI,
4208 			  IPVS_CMD_NEW_DEST);
4209 	if (!hdr)
4210 		return -EMSGSIZE;
4211 
4212 	if (ip_vs_genl_fill_dest(skb, dest) < 0)
4213 		goto nla_put_failure;
4214 
4215 	genlmsg_end(skb, hdr);
4216 	return 0;
4217 
4218 nla_put_failure:
4219 	genlmsg_cancel(skb, hdr);
4220 	return -EMSGSIZE;
4221 }
4222 
4223 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
4224 				 struct netlink_callback *cb)
4225 {
4226 	int idx = 0;
4227 	int start = cb->args[0];
4228 	struct ip_vs_service *svc;
4229 	struct ip_vs_dest *dest;
4230 	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
4231 	struct net *net = sock_net(skb->sk);
4232 	struct netns_ipvs *ipvs = net_ipvs(net);
4233 
4234 	rcu_read_lock();
4235 
4236 	/* Try to find the service for which to dump destinations */
4237 	if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack))
4238 		goto out_err;
4239 
4240 
4241 	svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
4242 	if (IS_ERR_OR_NULL(svc))
4243 		goto out_err;
4244 
4245 	/* Dump the destinations */
4246 	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
4247 		if (++idx <= start)
4248 			continue;
4249 		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
4250 			idx--;
4251 			goto nla_put_failure;
4252 		}
4253 	}
4254 
4255 nla_put_failure:
4256 	cb->args[0] = idx;
4257 
4258 out_err:
4259 	rcu_read_unlock();
4260 
4261 	return skb->len;
4262 }
4263 
4264 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
4265 				 struct nlattr *nla, bool full_entry)
4266 {
4267 	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
4268 	struct nlattr *nla_addr, *nla_port;
4269 	struct nlattr *nla_addr_family;
4270 
4271 	/* Parse mandatory identifying destination fields first */
4272 	if (nla == NULL ||
4273 	    nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL))
4274 		return -EINVAL;
4275 
4276 	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
4277 	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
4278 	nla_addr_family	= attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
4279 
4280 	if (!(nla_addr && nla_port))
4281 		return -EINVAL;
4282 
4283 	memset(udest, 0, sizeof(*udest));
4284 
4285 	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
4286 	udest->port = nla_get_be16(nla_port);
4287 
4288 	udest->af = nla_get_u16_default(nla_addr_family, 0);
4289 
4290 	/* If a full entry was requested, check for the additional fields */
4291 	if (full_entry) {
4292 		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
4293 			      *nla_l_thresh, *nla_tun_type, *nla_tun_port,
4294 			      *nla_tun_flags;
4295 
4296 		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
4297 		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
4298 		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
4299 		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
4300 		nla_tun_type	= attrs[IPVS_DEST_ATTR_TUN_TYPE];
4301 		nla_tun_port	= attrs[IPVS_DEST_ATTR_TUN_PORT];
4302 		nla_tun_flags	= attrs[IPVS_DEST_ATTR_TUN_FLAGS];
4303 
4304 		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
4305 			return -EINVAL;
4306 
4307 		udest->conn_flags = nla_get_u32(nla_fwd)
4308 				    & IP_VS_CONN_F_FWD_MASK;
4309 		udest->weight = nla_get_u32(nla_weight);
4310 		udest->u_threshold = nla_get_u32(nla_u_thresh);
4311 		udest->l_threshold = nla_get_u32(nla_l_thresh);
4312 
4313 		if (nla_tun_type)
4314 			udest->tun_type = nla_get_u8(nla_tun_type);
4315 
4316 		if (nla_tun_port)
4317 			udest->tun_port = nla_get_be16(nla_tun_port);
4318 
4319 		if (nla_tun_flags)
4320 			udest->tun_flags = nla_get_u16(nla_tun_flags);
4321 	}
4322 
4323 	return 0;
4324 }
4325 
4326 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
4327 				  struct ipvs_sync_daemon_cfg *c)
4328 {
4329 	struct nlattr *nl_daemon;
4330 
4331 	nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON);
4332 	if (!nl_daemon)
4333 		return -EMSGSIZE;
4334 
4335 	if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
4336 	    nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) ||
4337 	    nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) ||
4338 	    nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) ||
4339 	    nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) ||
4340 	    nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl))
4341 		goto nla_put_failure;
4342 #ifdef CONFIG_IP_VS_IPV6
4343 	if (c->mcast_af == AF_INET6) {
4344 		if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6,
4345 				     &c->mcast_group.in6))
4346 			goto nla_put_failure;
4347 	} else
4348 #endif
4349 		if (c->mcast_af == AF_INET &&
4350 		    nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP,
4351 				    c->mcast_group.ip))
4352 			goto nla_put_failure;
4353 	nla_nest_end(skb, nl_daemon);
4354 
4355 	return 0;
4356 
4357 nla_put_failure:
4358 	nla_nest_cancel(skb, nl_daemon);
4359 	return -EMSGSIZE;
4360 }
4361 
4362 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
4363 				  struct ipvs_sync_daemon_cfg *c,
4364 				  struct netlink_callback *cb)
4365 {
4366 	void *hdr;
4367 	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
4368 			  &ip_vs_genl_family, NLM_F_MULTI,
4369 			  IPVS_CMD_NEW_DAEMON);
4370 	if (!hdr)
4371 		return -EMSGSIZE;
4372 
4373 	if (ip_vs_genl_fill_daemon(skb, state, c))
4374 		goto nla_put_failure;
4375 
4376 	genlmsg_end(skb, hdr);
4377 	return 0;
4378 
4379 nla_put_failure:
4380 	genlmsg_cancel(skb, hdr);
4381 	return -EMSGSIZE;
4382 }
4383 
4384 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
4385 				   struct netlink_callback *cb)
4386 {
4387 	struct net *net = sock_net(skb->sk);
4388 	struct netns_ipvs *ipvs = net_ipvs(net);
4389 
4390 	mutex_lock(&ipvs->sync_mutex);
4391 	if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
4392 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
4393 					   &ipvs->mcfg, cb) < 0)
4394 			goto nla_put_failure;
4395 
4396 		cb->args[0] = 1;
4397 	}
4398 
4399 	if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
4400 		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
4401 					   &ipvs->bcfg, cb) < 0)
4402 			goto nla_put_failure;
4403 
4404 		cb->args[1] = 1;
4405 	}
4406 
4407 nla_put_failure:
4408 	mutex_unlock(&ipvs->sync_mutex);
4409 
4410 	return skb->len;
4411 }
4412 
4413 static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
4414 {
4415 	struct ipvs_sync_daemon_cfg c;
4416 	struct nlattr *a;
4417 	int ret;
4418 
4419 	memset(&c, 0, sizeof(c));
4420 	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
4421 	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
4422 	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
4423 		return -EINVAL;
4424 	strscpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
4425 		sizeof(c.mcast_ifn));
4426 	c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]);
4427 
4428 	a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN];
4429 	if (a)
4430 		c.sync_maxlen = nla_get_u16(a);
4431 
4432 	a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP];
4433 	if (a) {
4434 		c.mcast_af = AF_INET;
4435 		c.mcast_group.ip = nla_get_in_addr(a);
4436 		if (!ipv4_is_multicast(c.mcast_group.ip))
4437 			return -EINVAL;
4438 	} else {
4439 		a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6];
4440 		if (a) {
4441 #ifdef CONFIG_IP_VS_IPV6
4442 			int addr_type;
4443 
4444 			c.mcast_af = AF_INET6;
4445 			c.mcast_group.in6 = nla_get_in6_addr(a);
4446 			addr_type = ipv6_addr_type(&c.mcast_group.in6);
4447 			if (!(addr_type & IPV6_ADDR_MULTICAST))
4448 				return -EINVAL;
4449 #else
4450 			return -EAFNOSUPPORT;
4451 #endif
4452 		}
4453 	}
4454 
4455 	a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT];
4456 	if (a)
4457 		c.mcast_port = nla_get_u16(a);
4458 
4459 	a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL];
4460 	if (a)
4461 		c.mcast_ttl = nla_get_u8(a);
4462 
4463 	/* The synchronization protocol is incompatible with mixed family
4464 	 * services
4465 	 */
4466 	if (ipvs->mixed_address_family_dests > 0)
4467 		return -EINVAL;
4468 
4469 	ret = start_sync_thread(ipvs, &c,
4470 				nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
4471 	return ret;
4472 }
4473 
4474 static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
4475 {
4476 	int ret;
4477 
4478 	if (!attrs[IPVS_DAEMON_ATTR_STATE])
4479 		return -EINVAL;
4480 
4481 	ret = stop_sync_thread(ipvs,
4482 			       nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
4483 	return ret;
4484 }
4485 
4486 static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
4487 {
4488 	struct ip_vs_timeout_user t;
4489 
4490 	__ip_vs_get_timeouts(ipvs, &t);
4491 
4492 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
4493 		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
4494 
4495 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
4496 		t.tcp_fin_timeout =
4497 			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
4498 
4499 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
4500 		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
4501 
4502 	return ip_vs_set_timeout(ipvs, &t);
4503 }
4504 
4505 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
4506 {
4507 	int ret = -EINVAL, cmd;
4508 	struct net *net = sock_net(skb->sk);
4509 	struct netns_ipvs *ipvs = net_ipvs(net);
4510 
4511 	cmd = info->genlhdr->cmd;
4512 
4513 	if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
4514 		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
4515 
4516 		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
4517 		    nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack))
4518 			goto out;
4519 
4520 		if (cmd == IPVS_CMD_NEW_DAEMON)
4521 			ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
4522 		else
4523 			ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
4524 	}
4525 
4526 out:
4527 	return ret;
4528 }
4529 
4530 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
4531 {
4532 	bool need_full_svc = false, need_full_dest = false;
4533 	struct ip_vs_service *svc = NULL;
4534 	struct ip_vs_service_user_kern usvc;
4535 	struct ip_vs_dest_user_kern udest;
4536 	int ret = 0, cmd;
4537 	struct net *net = sock_net(skb->sk);
4538 	struct netns_ipvs *ipvs = net_ipvs(net);
4539 
4540 	cmd = info->genlhdr->cmd;
4541 
4542 	mutex_lock(&ipvs->service_mutex);
4543 
4544 	if (cmd == IPVS_CMD_FLUSH) {
4545 		ret = ip_vs_flush(ipvs, false);
4546 		goto out;
4547 	} else if (cmd == IPVS_CMD_SET_CONFIG) {
4548 		ret = ip_vs_genl_set_config(ipvs, info->attrs);
4549 		goto out;
4550 	} else if (cmd == IPVS_CMD_ZERO &&
4551 		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
4552 		ret = ip_vs_zero_all(ipvs);
4553 		goto out;
4554 	}
4555 
4556 	/* All following commands require a service argument, so check if we
4557 	 * received a valid one. We need a full service specification when
4558 	 * adding / editing a service. Only identifying members otherwise. */
4559 	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
4560 		need_full_svc = true;
4561 
4562 	/* We use function that requires RCU lock (hlist_bl) */
4563 	rcu_read_lock();
4564 	ret = ip_vs_genl_parse_service(ipvs, &usvc,
4565 				       info->attrs[IPVS_CMD_ATTR_SERVICE],
4566 				       need_full_svc, &svc);
4567 	rcu_read_unlock();
4568 	if (ret)
4569 		goto out;
4570 
4571 	/* Unless we're adding a new service, the service must already exist */
4572 	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
4573 		ret = -ESRCH;
4574 		goto out;
4575 	}
4576 
4577 	/* Destination commands require a valid destination argument. For
4578 	 * adding / editing a destination, we need a full destination
4579 	 * specification. */
4580 	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
4581 	    cmd == IPVS_CMD_DEL_DEST) {
4582 		if (cmd != IPVS_CMD_DEL_DEST)
4583 			need_full_dest = true;
4584 
4585 		ret = ip_vs_genl_parse_dest(&udest,
4586 					    info->attrs[IPVS_CMD_ATTR_DEST],
4587 					    need_full_dest);
4588 		if (ret)
4589 			goto out;
4590 
4591 		/* Old protocols did not allow the user to specify address
4592 		 * family, so we set it to zero instead.  We also didn't
4593 		 * allow heterogeneous pools in the old code, so it's safe
4594 		 * to assume that this will have the same address family as
4595 		 * the service.
4596 		 */
4597 		if (udest.af == 0)
4598 			udest.af = svc->af;
4599 
4600 		if (!ip_vs_is_af_valid(udest.af)) {
4601 			ret = -EAFNOSUPPORT;
4602 			goto out;
4603 		}
4604 
4605 		if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) {
4606 			/* The synchronization protocol is incompatible
4607 			 * with mixed family services
4608 			 */
4609 			if (ipvs->sync_state) {
4610 				ret = -EINVAL;
4611 				goto out;
4612 			}
4613 
4614 			/* Which connection types do we support? */
4615 			switch (udest.conn_flags) {
4616 			case IP_VS_CONN_F_TUNNEL:
4617 				/* We are able to forward this */
4618 				break;
4619 			default:
4620 				ret = -EINVAL;
4621 				goto out;
4622 			}
4623 		}
4624 	}
4625 
4626 	switch (cmd) {
4627 	case IPVS_CMD_NEW_SERVICE:
4628 		if (svc == NULL)
4629 			ret = ip_vs_add_service(ipvs, &usvc, &svc);
4630 		else
4631 			ret = -EEXIST;
4632 		break;
4633 	case IPVS_CMD_SET_SERVICE:
4634 		ret = ip_vs_edit_service(svc, &usvc);
4635 		break;
4636 	case IPVS_CMD_DEL_SERVICE:
4637 		ret = ip_vs_del_service(svc);
4638 		/* do not use svc, it can be freed */
4639 		break;
4640 	case IPVS_CMD_NEW_DEST:
4641 		ret = ip_vs_add_dest(svc, &udest);
4642 		break;
4643 	case IPVS_CMD_SET_DEST:
4644 		ret = ip_vs_edit_dest(svc, &udest);
4645 		break;
4646 	case IPVS_CMD_DEL_DEST:
4647 		ret = ip_vs_del_dest(svc, &udest);
4648 		break;
4649 	case IPVS_CMD_ZERO:
4650 		ret = ip_vs_zero_service(svc);
4651 		break;
4652 	default:
4653 		ret = -EINVAL;
4654 	}
4655 
4656 out:
4657 	mutex_unlock(&ipvs->service_mutex);
4658 
4659 	return ret;
4660 }
4661 
4662 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
4663 {
4664 	struct sk_buff *msg;
4665 	void *reply;
4666 	int ret, cmd, reply_cmd;
4667 	struct net *net = sock_net(skb->sk);
4668 	struct netns_ipvs *ipvs = net_ipvs(net);
4669 
4670 	cmd = info->genlhdr->cmd;
4671 
4672 	if (cmd == IPVS_CMD_GET_SERVICE)
4673 		reply_cmd = IPVS_CMD_NEW_SERVICE;
4674 	else if (cmd == IPVS_CMD_GET_INFO)
4675 		reply_cmd = IPVS_CMD_SET_INFO;
4676 	else if (cmd == IPVS_CMD_GET_CONFIG)
4677 		reply_cmd = IPVS_CMD_SET_CONFIG;
4678 	else {
4679 		pr_err("unknown Generic Netlink command\n");
4680 		return -EINVAL;
4681 	}
4682 
4683 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
4684 	if (!msg)
4685 		return -ENOMEM;
4686 
4687 	rcu_read_lock();
4688 
4689 	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
4690 	if (reply == NULL)
4691 		goto nla_put_failure;
4692 
4693 	switch (cmd) {
4694 	case IPVS_CMD_GET_SERVICE:
4695 	{
4696 		struct ip_vs_service *svc;
4697 
4698 		svc = ip_vs_genl_find_service(ipvs,
4699 					      info->attrs[IPVS_CMD_ATTR_SERVICE]);
4700 		if (IS_ERR(svc)) {
4701 			ret = PTR_ERR(svc);
4702 			goto out_err;
4703 		} else if (svc) {
4704 			ret = ip_vs_genl_fill_service(msg, svc);
4705 			if (ret)
4706 				goto nla_put_failure;
4707 		} else {
4708 			ret = -ESRCH;
4709 			goto out_err;
4710 		}
4711 
4712 		break;
4713 	}
4714 
4715 	case IPVS_CMD_GET_CONFIG:
4716 	{
4717 		struct ip_vs_timeout_user t;
4718 
4719 		__ip_vs_get_timeouts(ipvs, &t);
4720 #ifdef CONFIG_IP_VS_PROTO_TCP
4721 		if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
4722 				t.tcp_timeout) ||
4723 		    nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
4724 				t.tcp_fin_timeout))
4725 			goto nla_put_failure;
4726 #endif
4727 #ifdef CONFIG_IP_VS_PROTO_UDP
4728 		if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
4729 			goto nla_put_failure;
4730 #endif
4731 
4732 		break;
4733 	}
4734 
4735 	case IPVS_CMD_GET_INFO:
4736 		if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
4737 				IP_VS_VERSION_CODE) ||
4738 		    nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
4739 				get_conn_tab_size(ipvs)))
4740 			goto nla_put_failure;
4741 		break;
4742 	}
4743 
4744 	genlmsg_end(msg, reply);
4745 	ret = genlmsg_reply(msg, info);
4746 	goto out;
4747 
4748 nla_put_failure:
4749 	pr_err("not enough space in Netlink message\n");
4750 	ret = -EMSGSIZE;
4751 
4752 out_err:
4753 	nlmsg_free(msg);
4754 out:
4755 	rcu_read_unlock();
4756 
4757 	return ret;
4758 }
4759 
4760 
4761 static const struct genl_small_ops ip_vs_genl_ops[] = {
4762 	{
4763 		.cmd	= IPVS_CMD_NEW_SERVICE,
4764 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4765 		.flags	= GENL_ADMIN_PERM,
4766 		.doit	= ip_vs_genl_set_cmd,
4767 	},
4768 	{
4769 		.cmd	= IPVS_CMD_SET_SERVICE,
4770 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4771 		.flags	= GENL_ADMIN_PERM,
4772 		.doit	= ip_vs_genl_set_cmd,
4773 	},
4774 	{
4775 		.cmd	= IPVS_CMD_DEL_SERVICE,
4776 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4777 		.flags	= GENL_ADMIN_PERM,
4778 		.doit	= ip_vs_genl_set_cmd,
4779 	},
4780 	{
4781 		.cmd	= IPVS_CMD_GET_SERVICE,
4782 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4783 		.flags	= GENL_ADMIN_PERM,
4784 		.doit	= ip_vs_genl_get_cmd,
4785 		.dumpit	= ip_vs_genl_dump_services,
4786 	},
4787 	{
4788 		.cmd	= IPVS_CMD_NEW_DEST,
4789 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4790 		.flags	= GENL_ADMIN_PERM,
4791 		.doit	= ip_vs_genl_set_cmd,
4792 	},
4793 	{
4794 		.cmd	= IPVS_CMD_SET_DEST,
4795 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4796 		.flags	= GENL_ADMIN_PERM,
4797 		.doit	= ip_vs_genl_set_cmd,
4798 	},
4799 	{
4800 		.cmd	= IPVS_CMD_DEL_DEST,
4801 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4802 		.flags	= GENL_ADMIN_PERM,
4803 		.doit	= ip_vs_genl_set_cmd,
4804 	},
4805 	{
4806 		.cmd	= IPVS_CMD_GET_DEST,
4807 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4808 		.flags	= GENL_ADMIN_PERM,
4809 		.dumpit	= ip_vs_genl_dump_dests,
4810 	},
4811 	{
4812 		.cmd	= IPVS_CMD_NEW_DAEMON,
4813 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4814 		.flags	= GENL_ADMIN_PERM,
4815 		.doit	= ip_vs_genl_set_daemon,
4816 	},
4817 	{
4818 		.cmd	= IPVS_CMD_DEL_DAEMON,
4819 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4820 		.flags	= GENL_ADMIN_PERM,
4821 		.doit	= ip_vs_genl_set_daemon,
4822 	},
4823 	{
4824 		.cmd	= IPVS_CMD_GET_DAEMON,
4825 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4826 		.flags	= GENL_ADMIN_PERM,
4827 		.dumpit	= ip_vs_genl_dump_daemons,
4828 	},
4829 	{
4830 		.cmd	= IPVS_CMD_SET_CONFIG,
4831 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4832 		.flags	= GENL_ADMIN_PERM,
4833 		.doit	= ip_vs_genl_set_cmd,
4834 	},
4835 	{
4836 		.cmd	= IPVS_CMD_GET_CONFIG,
4837 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4838 		.flags	= GENL_ADMIN_PERM,
4839 		.doit	= ip_vs_genl_get_cmd,
4840 	},
4841 	{
4842 		.cmd	= IPVS_CMD_GET_INFO,
4843 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4844 		.flags	= GENL_ADMIN_PERM,
4845 		.doit	= ip_vs_genl_get_cmd,
4846 	},
4847 	{
4848 		.cmd	= IPVS_CMD_ZERO,
4849 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4850 		.flags	= GENL_ADMIN_PERM,
4851 		.doit	= ip_vs_genl_set_cmd,
4852 	},
4853 	{
4854 		.cmd	= IPVS_CMD_FLUSH,
4855 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
4856 		.flags	= GENL_ADMIN_PERM,
4857 		.doit	= ip_vs_genl_set_cmd,
4858 	},
4859 };
4860 
4861 static struct genl_family ip_vs_genl_family __ro_after_init = {
4862 	.hdrsize	= 0,
4863 	.name		= IPVS_GENL_NAME,
4864 	.version	= IPVS_GENL_VERSION,
4865 	.maxattr	= IPVS_CMD_ATTR_MAX,
4866 	.policy = ip_vs_cmd_policy,
4867 	.netnsok        = true,         /* Make ipvsadm to work on netns */
4868 	.module		= THIS_MODULE,
4869 	.small_ops	= ip_vs_genl_ops,
4870 	.n_small_ops	= ARRAY_SIZE(ip_vs_genl_ops),
4871 	.resv_start_op	= IPVS_CMD_FLUSH + 1,
4872 	.parallel_ops	= 1,
4873 };
4874 
4875 static int __init ip_vs_genl_register(void)
4876 {
4877 	return genl_register_family(&ip_vs_genl_family);
4878 }
4879 
4880 static void ip_vs_genl_unregister(void)
4881 {
4882 	genl_unregister_family(&ip_vs_genl_family);
4883 }
4884 
4885 /* End of Generic Netlink interface definitions */
4886 
4887 /*
4888  * per netns intit/exit func.
4889  */
4890 #ifdef CONFIG_SYSCTL
4891 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
4892 {
4893 	struct net *net = ipvs->net;
4894 	struct ctl_table *tbl;
4895 	int idx, ret;
4896 	size_t ctl_table_size = ARRAY_SIZE(vs_vars);
4897 	bool unpriv = net->user_ns != &init_user_ns;
4898 
4899 	atomic_set(&ipvs->dropentry, 0);
4900 	spin_lock_init(&ipvs->dropentry_lock);
4901 	spin_lock_init(&ipvs->droppacket_lock);
4902 	spin_lock_init(&ipvs->securetcp_lock);
4903 	INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
4904 	INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work,
4905 			  expire_nodest_conn_handler);
4906 	ipvs->est_stopped = 0;
4907 
4908 	if (!net_eq(net, &init_net)) {
4909 		tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
4910 		if (tbl == NULL)
4911 			return -ENOMEM;
4912 	} else
4913 		tbl = vs_vars;
4914 	/* Initialize sysctl defaults */
4915 	for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
4916 		if (tbl[idx].proc_handler == proc_do_defense_mode)
4917 			tbl[idx].extra2 = ipvs;
4918 	}
4919 	idx = 0;
4920 	ipvs->sysctl_amemthresh = 1024;
4921 	tbl[idx++].data = &ipvs->sysctl_amemthresh;
4922 	ipvs->sysctl_am_droprate = 10;
4923 	tbl[idx++].data = &ipvs->sysctl_am_droprate;
4924 	tbl[idx++].data = &ipvs->sysctl_drop_entry;
4925 	tbl[idx++].data = &ipvs->sysctl_drop_packet;
4926 #ifdef CONFIG_IP_VS_NFCT
4927 	tbl[idx++].data = &ipvs->sysctl_conntrack;
4928 #endif
4929 	tbl[idx++].data = &ipvs->sysctl_secure_tcp;
4930 	ipvs->sysctl_snat_reroute = 1;
4931 	tbl[idx++].data = &ipvs->sysctl_snat_reroute;
4932 	ipvs->sysctl_sync_ver = 1;
4933 	tbl[idx++].data = &ipvs->sysctl_sync_ver;
4934 	ipvs->sysctl_sync_ports = 1;
4935 	tbl[idx++].data = &ipvs->sysctl_sync_ports;
4936 	tbl[idx++].data = &ipvs->sysctl_sync_persist_mode;
4937 
4938 	ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
4939 	if (unpriv)
4940 		tbl[idx].mode = 0444;
4941 	tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
4942 
4943 	ipvs->sysctl_sync_sock_size = 0;
4944 	if (unpriv)
4945 		tbl[idx].mode = 0444;
4946 	tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
4947 
4948 	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
4949 	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
4950 	tbl[idx++].data = &ipvs->sysctl_sloppy_tcp;
4951 	tbl[idx++].data = &ipvs->sysctl_sloppy_sctp;
4952 	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
4953 	ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
4954 	ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
4955 	tbl[idx].data = &ipvs->sysctl_sync_threshold;
4956 	tbl[idx].extra2 = ipvs;
4957 	tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
4958 	ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
4959 	tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
4960 	ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
4961 	tbl[idx++].data = &ipvs->sysctl_sync_retries;
4962 	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
4963 	ipvs->sysctl_pmtu_disc = 1;
4964 	tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
4965 	tbl[idx++].data = &ipvs->sysctl_backup_only;
4966 	ipvs->sysctl_conn_reuse_mode = 1;
4967 	tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
4968 	tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
4969 	tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
4970 
4971 	ipvs->sysctl_run_estimation = 1;
4972 	if (unpriv)
4973 		tbl[idx].mode = 0444;
4974 	tbl[idx].extra2 = ipvs;
4975 	tbl[idx++].data = &ipvs->sysctl_run_estimation;
4976 
4977 	ipvs->est_cpulist_valid = 0;
4978 	if (unpriv)
4979 		tbl[idx].mode = 0444;
4980 	tbl[idx].extra2 = ipvs;
4981 	tbl[idx++].data = &ipvs->sysctl_est_cpulist;
4982 
4983 	ipvs->sysctl_est_nice = IPVS_EST_NICE;
4984 	if (unpriv)
4985 		tbl[idx].mode = 0444;
4986 	tbl[idx].extra2 = ipvs;
4987 	tbl[idx++].data = &ipvs->sysctl_est_nice;
4988 
4989 	if (unpriv)
4990 		tbl[idx].mode = 0444;
4991 	tbl[idx].extra2 = ipvs;
4992 	tbl[idx++].data = &ipvs->sysctl_conn_lfactor;
4993 
4994 	if (unpriv)
4995 		tbl[idx].mode = 0444;
4996 	tbl[idx].extra2 = ipvs;
4997 	tbl[idx++].data = &ipvs->sysctl_svc_lfactor;
4998 
4999 #ifdef CONFIG_IP_VS_DEBUG
5000 	/* Global sysctls must be ro in non-init netns */
5001 	if (!net_eq(net, &init_net))
5002 		tbl[idx++].mode = 0444;
5003 #endif
5004 
5005 	ret = -ENOMEM;
5006 	ipvs->sysctl_hdr = register_net_sysctl_sz(net, "net/ipv4/vs", tbl,
5007 						  ctl_table_size);
5008 	if (!ipvs->sysctl_hdr)
5009 		goto err;
5010 	ipvs->sysctl_tbl = tbl;
5011 
5012 	ret = ip_vs_start_estimator(ipvs, &ipvs->tot_stats->s);
5013 	if (ret < 0)
5014 		goto err;
5015 
5016 	/* Schedule defense work */
5017 	queue_delayed_work(system_long_wq, &ipvs->defense_work,
5018 			   DEFENSE_TIMER_PERIOD);
5019 
5020 	return 0;
5021 
5022 err:
5023 	unregister_net_sysctl_table(ipvs->sysctl_hdr);
5024 	if (!net_eq(net, &init_net))
5025 		kfree(tbl);
5026 	return ret;
5027 }
5028 
5029 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
5030 {
5031 	struct net *net = ipvs->net;
5032 
5033 	cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work);
5034 	cancel_delayed_work_sync(&ipvs->defense_work);
5035 	cancel_work_sync(&ipvs->defense_work.work);
5036 	unregister_net_sysctl_table(ipvs->sysctl_hdr);
5037 	if (ipvs->tot_stats->s.est.ktid != -2) {
5038 		/* Not stopped yet? This happens only on netns init error and
5039 		 * we even do not need to lock the service_mutex for this case.
5040 		 */
5041 		mutex_lock(&ipvs->service_mutex);
5042 		ip_vs_stop_estimator(ipvs, &ipvs->tot_stats->s);
5043 		mutex_unlock(&ipvs->service_mutex);
5044 	}
5045 
5046 	if (ipvs->est_cpulist_valid)
5047 		free_cpumask_var(ipvs->sysctl_est_cpulist);
5048 
5049 	if (!net_eq(net, &init_net))
5050 		kfree(ipvs->sysctl_tbl);
5051 }
5052 
5053 #else
5054 
5055 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
5056 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
5057 
5058 #endif
5059 
5060 static struct notifier_block ip_vs_dst_notifier = {
5061 	.notifier_call = ip_vs_dst_event,
5062 #ifdef CONFIG_IP_VS_IPV6
5063 	.priority = ADDRCONF_NOTIFY_PRIORITY + 5,
5064 #endif
5065 };
5066 
5067 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
5068 {
5069 	int ret = -ENOMEM;
5070 	int idx;
5071 
5072 	/* Initialize service_mutex, svc_table per netns */
5073 	__mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key);
5074 	init_rwsem(&ipvs->svc_resize_sem);
5075 	INIT_DELAYED_WORK(&ipvs->svc_resize_work, svc_resize_work_handler);
5076 	atomic_set(&ipvs->svc_table_changes, 0);
5077 	RCU_INIT_POINTER(ipvs->svc_table, NULL);
5078 
5079 	/* Initialize rs_table */
5080 	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
5081 		INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
5082 
5083 	INIT_LIST_HEAD(&ipvs->dest_trash);
5084 	spin_lock_init(&ipvs->dest_trash_lock);
5085 	timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0);
5086 	for (idx = 0; idx < IP_VS_AF_MAX; idx++) {
5087 		atomic_set(&ipvs->num_services[idx], 0);
5088 		atomic_set(&ipvs->fwm_services[idx], 0);
5089 		atomic_set(&ipvs->nonfwm_services[idx], 0);
5090 		atomic_set(&ipvs->ftpsvc_counter[idx], 0);
5091 		atomic_set(&ipvs->nullsvc_counter[idx], 0);
5092 		atomic_set(&ipvs->conn_out_counter[idx], 0);
5093 	}
5094 
5095 	INIT_DELAYED_WORK(&ipvs->est_reload_work, est_reload_work_handler);
5096 	ipvs->sysctl_svc_lfactor = ip_vs_svc_default_load_factor(ipvs);
5097 
5098 	/* procfs stats */
5099 	ipvs->tot_stats = kzalloc_obj(*ipvs->tot_stats);
5100 	if (!ipvs->tot_stats)
5101 		goto out;
5102 	if (ip_vs_stats_init_alloc(&ipvs->tot_stats->s) < 0)
5103 		goto err_tot_stats;
5104 
5105 #ifdef CONFIG_PROC_FS
5106 	if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net,
5107 			     &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter)))
5108 		goto err_vs;
5109 	if (!proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
5110 				    ip_vs_stats_show, NULL))
5111 		goto err_stats;
5112 	if (!proc_create_net_single("ip_vs_stats_percpu", 0,
5113 				    ipvs->net->proc_net,
5114 				    ip_vs_stats_percpu_show, NULL))
5115 		goto err_percpu;
5116 	if (!proc_create_net_single("ip_vs_status", 0440, ipvs->net->proc_net,
5117 				    ip_vs_status_show, NULL))
5118 		goto err_status;
5119 #endif
5120 
5121 	ret = ip_vs_control_net_init_sysctl(ipvs);
5122 	if (ret < 0)
5123 		goto err;
5124 
5125 	return 0;
5126 
5127 err:
5128 #ifdef CONFIG_PROC_FS
5129 	remove_proc_entry("ip_vs_status", ipvs->net->proc_net);
5130 
5131 err_status:
5132 	remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
5133 
5134 err_percpu:
5135 	remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
5136 
5137 err_stats:
5138 	remove_proc_entry("ip_vs", ipvs->net->proc_net);
5139 
5140 err_vs:
5141 #endif
5142 	ip_vs_stats_release(&ipvs->tot_stats->s);
5143 
5144 err_tot_stats:
5145 	kfree(ipvs->tot_stats);
5146 
5147 out:
5148 	return ret;
5149 }
5150 
5151 void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
5152 {
5153 	ip_vs_trash_cleanup(ipvs);
5154 	ip_vs_control_net_cleanup_sysctl(ipvs);
5155 	cancel_delayed_work_sync(&ipvs->est_reload_work);
5156 #ifdef CONFIG_PROC_FS
5157 	remove_proc_entry("ip_vs_status", ipvs->net->proc_net);
5158 	remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
5159 	remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
5160 	remove_proc_entry("ip_vs", ipvs->net->proc_net);
5161 #endif
5162 	call_rcu(&ipvs->tot_stats->rcu_head, ip_vs_stats_rcu_free);
5163 }
5164 
5165 int __init ip_vs_register_nl_ioctl(void)
5166 {
5167 	int ret;
5168 
5169 	ret = nf_register_sockopt(&ip_vs_sockopts);
5170 	if (ret) {
5171 		pr_err("cannot register sockopt.\n");
5172 		goto err_sock;
5173 	}
5174 
5175 	ret = ip_vs_genl_register();
5176 	if (ret) {
5177 		pr_err("cannot register Generic Netlink interface.\n");
5178 		goto err_genl;
5179 	}
5180 	return 0;
5181 
5182 err_genl:
5183 	nf_unregister_sockopt(&ip_vs_sockopts);
5184 err_sock:
5185 	return ret;
5186 }
5187 
5188 void ip_vs_unregister_nl_ioctl(void)
5189 {
5190 	ip_vs_genl_unregister();
5191 	nf_unregister_sockopt(&ip_vs_sockopts);
5192 }
5193 
5194 int __init ip_vs_control_init(void)
5195 {
5196 	int ret;
5197 
5198 	ret = register_netdevice_notifier(&ip_vs_dst_notifier);
5199 	if (ret < 0)
5200 		return ret;
5201 
5202 	return 0;
5203 }
5204 
5205 
5206 void ip_vs_control_cleanup(void)
5207 {
5208 	unregister_netdevice_notifier(&ip_vs_dst_notifier);
5209 	/* relying on common rcu_barrier() in ip_vs_cleanup() */
5210 }
5211