xref: /linux/net/tipc/monitor.c (revision 0d240e7811c4ec1965760ee4643b5bbc9cfacbb3)
1 /*
2  * net/tipc/monitor.c
3  *
4  * Copyright (c) 2016, Ericsson AB
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the names of the copyright holders nor the names of its
16  *    contributors may be used to endorse or promote products derived from
17  *    this software without specific prior written permission.
18  *
19  * Alternatively, this software may be distributed under the terms of the
20  * GNU General Public License ("GPL") version 2 as published by the Free
21  * Software Foundation.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include "core.h"
37 #include "addr.h"
38 #include "monitor.h"
39 
40 #define MAX_MON_DOMAIN       64
41 #define MON_TIMEOUT          120000
42 #define MAX_PEER_DOWN_EVENTS 4
43 
44 /* struct tipc_mon_domain: domain record to be transferred between peers
45  * @len: actual size of domain record
46  * @gen: current generation of sender's domain
47  * @ack_gen: most recent generation of self's domain acked by peer
48  * @member_cnt: number of domain member nodes described in this record
49  * @up_map: bit map indicating which of the members the sender considers up
50  * @members: identity of the domain members
51  */
52 struct tipc_mon_domain {
53 	u16 len;
54 	u16 gen;
55 	u16 ack_gen;
56 	u16 member_cnt;
57 	u64 up_map;
58 	u32 members[MAX_MON_DOMAIN];
59 };
60 
61 /* struct tipc_peer: state of a peer node and its domain
62  * @addr: tipc node identity of peer
63  * @head_map: shows which other nodes currently consider peer 'up'
64  * @domain: most recent domain record from peer
65  * @hash: position in hashed lookup list
66  * @list: position in linked list, in circular ascending order by 'addr'
67  * @applied: number of reported domain members applied on this monitor list
68  * @is_up: peer is up as seen from this node
69  * @is_head: peer is assigned domain head as seen from this node
70  * @is_local: peer is in local domain and should be continuously monitored
71  * @down_cnt: - numbers of other peers which have reported this on lost
72  */
73 struct tipc_peer {
74 	u32 addr;
75 	struct tipc_mon_domain *domain;
76 	struct hlist_node hash;
77 	struct list_head list;
78 	u8 applied;
79 	u8 down_cnt;
80 	bool is_up;
81 	bool is_head;
82 	bool is_local;
83 };
84 
85 struct tipc_monitor {
86 	struct hlist_head peers[NODE_HTABLE_SIZE];
87 	int peer_cnt;
88 	struct tipc_peer *self;
89 	rwlock_t lock;
90 	struct tipc_mon_domain cache;
91 	u16 list_gen;
92 	u16 dom_gen;
93 	struct net *net;
94 	struct timer_list timer;
95 	unsigned long timer_intv;
96 };
97 
98 static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id)
99 {
100 	return tipc_net(net)->monitors[bearer_id];
101 }
102 
103 const int tipc_max_domain_size = sizeof(struct tipc_mon_domain);
104 
105 /* dom_rec_len(): actual length of domain record for transport
106  */
107 static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt)
108 {
109 	return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32));
110 }
111 
112 /* dom_size() : calculate size of own domain based on number of peers
113  */
114 static int dom_size(int peers)
115 {
116 	int i = 0;
117 
118 	while ((i * i) < peers)
119 		i++;
120 	return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN;
121 }
122 
123 static void map_set(u64 *up_map, int i, unsigned int v)
124 {
125 	*up_map &= ~(1 << i);
126 	*up_map |= (v << i);
127 }
128 
129 static int map_get(u64 up_map, int i)
130 {
131 	return (up_map & (1 << i)) >> i;
132 }
133 
134 static struct tipc_peer *peer_prev(struct tipc_peer *peer)
135 {
136 	return list_last_entry(&peer->list, struct tipc_peer, list);
137 }
138 
139 static struct tipc_peer *peer_nxt(struct tipc_peer *peer)
140 {
141 	return list_first_entry(&peer->list, struct tipc_peer, list);
142 }
143 
144 static struct tipc_peer *peer_head(struct tipc_peer *peer)
145 {
146 	while (!peer->is_head)
147 		peer = peer_prev(peer);
148 	return peer;
149 }
150 
151 static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr)
152 {
153 	struct tipc_peer *peer;
154 	unsigned int thash = tipc_hashfn(addr);
155 
156 	hlist_for_each_entry(peer, &mon->peers[thash], hash) {
157 		if (peer->addr == addr)
158 			return peer;
159 	}
160 	return NULL;
161 }
162 
163 static struct tipc_peer *get_self(struct net *net, int bearer_id)
164 {
165 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
166 
167 	return mon->self;
168 }
169 
170 static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon)
171 {
172 	struct tipc_net *tn = tipc_net(net);
173 
174 	return mon->peer_cnt > tn->mon_threshold;
175 }
176 
177 /* mon_identify_lost_members() : - identify amd mark potentially lost members
178  */
179 static void mon_identify_lost_members(struct tipc_peer *peer,
180 				      struct tipc_mon_domain *dom_bef,
181 				      int applied_bef)
182 {
183 	struct tipc_peer *member = peer;
184 	struct tipc_mon_domain *dom_aft = peer->domain;
185 	int applied_aft = peer->applied;
186 	int i;
187 
188 	for (i = 0; i < applied_bef; i++) {
189 		member = peer_nxt(member);
190 
191 		/* Do nothing if self or peer already see member as down */
192 		if (!member->is_up || !map_get(dom_bef->up_map, i))
193 			continue;
194 
195 		/* Loss of local node must be detected by active probing */
196 		if (member->is_local)
197 			continue;
198 
199 		/* Start probing if member was removed from applied domain */
200 		if (!applied_aft || (applied_aft < i)) {
201 			member->down_cnt = 1;
202 			continue;
203 		}
204 
205 		/* Member loss is confirmed if it is still in applied domain */
206 		if (!map_get(dom_aft->up_map, i))
207 			member->down_cnt++;
208 	}
209 }
210 
211 /* mon_apply_domain() : match a peer's domain record against monitor list
212  */
213 static void mon_apply_domain(struct tipc_monitor *mon,
214 			     struct tipc_peer *peer)
215 {
216 	struct tipc_mon_domain *dom = peer->domain;
217 	struct tipc_peer *member;
218 	u32 addr;
219 	int i;
220 
221 	if (!dom || !peer->is_up)
222 		return;
223 
224 	/* Scan across domain members and match against monitor list */
225 	peer->applied = 0;
226 	member = peer_nxt(peer);
227 	for (i = 0; i < dom->member_cnt; i++) {
228 		addr = dom->members[i];
229 		if (addr != member->addr)
230 			return;
231 		peer->applied++;
232 		member = peer_nxt(member);
233 	}
234 }
235 
236 /* mon_update_local_domain() : update after peer addition/removal/up/down
237  */
238 static void mon_update_local_domain(struct tipc_monitor *mon)
239 {
240 	struct tipc_peer *self = mon->self;
241 	struct tipc_mon_domain *cache = &mon->cache;
242 	struct tipc_mon_domain *dom = self->domain;
243 	struct tipc_peer *peer = self;
244 	u64 prev_up_map = dom->up_map;
245 	u16 member_cnt, i;
246 	bool diff;
247 
248 	/* Update local domain size based on current size of cluster */
249 	member_cnt = dom_size(mon->peer_cnt) - 1;
250 	self->applied = member_cnt;
251 
252 	/* Update native and cached outgoing local domain records */
253 	dom->len = dom_rec_len(dom, member_cnt);
254 	diff = dom->member_cnt != member_cnt;
255 	dom->member_cnt = member_cnt;
256 	for (i = 0; i < member_cnt; i++) {
257 		peer = peer_nxt(peer);
258 		diff |= dom->members[i] != peer->addr;
259 		dom->members[i] = peer->addr;
260 		map_set(&dom->up_map, i, peer->is_up);
261 		cache->members[i] = htonl(peer->addr);
262 	}
263 	diff |= dom->up_map != prev_up_map;
264 	if (!diff)
265 		return;
266 	dom->gen = ++mon->dom_gen;
267 	cache->len = htons(dom->len);
268 	cache->gen = htons(dom->gen);
269 	cache->member_cnt = htons(member_cnt);
270 	cache->up_map = cpu_to_be64(dom->up_map);
271 	mon_apply_domain(mon, self);
272 }
273 
274 /* mon_update_neighbors() : update preceding neighbors of added/removed peer
275  */
276 static void mon_update_neighbors(struct tipc_monitor *mon,
277 				 struct tipc_peer *peer)
278 {
279 	int dz, i;
280 
281 	dz = dom_size(mon->peer_cnt);
282 	for (i = 0; i < dz; i++) {
283 		mon_apply_domain(mon, peer);
284 		peer = peer_prev(peer);
285 	}
286 }
287 
288 /* mon_assign_roles() : reassign peer roles after a network change
289  * The monitor list is consistent at this stage; i.e., each peer is monitoring
290  * a set of domain members as matched between domain record and the monitor list
291  */
292 static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head)
293 {
294 	struct tipc_peer *peer = peer_nxt(head);
295 	struct tipc_peer *self = mon->self;
296 	int i = 0;
297 
298 	for (; peer != self; peer = peer_nxt(peer)) {
299 		peer->is_local = false;
300 
301 		/* Update domain member */
302 		if (i++ < head->applied) {
303 			peer->is_head = false;
304 			if (head == self)
305 				peer->is_local = true;
306 			continue;
307 		}
308 		/* Assign next domain head */
309 		if (!peer->is_up)
310 			continue;
311 		if (peer->is_head)
312 			break;
313 		head = peer;
314 		head->is_head = true;
315 		i = 0;
316 	}
317 	mon->list_gen++;
318 }
319 
320 void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id)
321 {
322 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
323 	struct tipc_peer *self = get_self(net, bearer_id);
324 	struct tipc_peer *peer, *prev, *head;
325 
326 	write_lock_bh(&mon->lock);
327 	peer = get_peer(mon, addr);
328 	if (!peer)
329 		goto exit;
330 	prev = peer_prev(peer);
331 	list_del(&peer->list);
332 	hlist_del(&peer->hash);
333 	kfree(peer->domain);
334 	kfree(peer);
335 	mon->peer_cnt--;
336 	head = peer_head(prev);
337 	if (head == self)
338 		mon_update_local_domain(mon);
339 	mon_update_neighbors(mon, prev);
340 
341 	/* Revert to full-mesh monitoring if we reach threshold */
342 	if (!tipc_mon_is_active(net, mon)) {
343 		list_for_each_entry(peer, &self->list, list) {
344 			kfree(peer->domain);
345 			peer->domain = NULL;
346 			peer->applied = 0;
347 		}
348 	}
349 	mon_assign_roles(mon, head);
350 exit:
351 	write_unlock_bh(&mon->lock);
352 }
353 
354 static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr,
355 			      struct tipc_peer **peer)
356 {
357 	struct tipc_peer *self = mon->self;
358 	struct tipc_peer *cur, *prev, *p;
359 
360 	p = kzalloc(sizeof(*p), GFP_ATOMIC);
361 	*peer = p;
362 	if (!p)
363 		return false;
364 	p->addr = addr;
365 
366 	/* Add new peer to lookup list */
367 	INIT_LIST_HEAD(&p->list);
368 	hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]);
369 
370 	/* Sort new peer into iterator list, in ascending circular order */
371 	prev = self;
372 	list_for_each_entry(cur, &self->list, list) {
373 		if ((addr > prev->addr) && (addr < cur->addr))
374 			break;
375 		if (((addr < cur->addr) || (addr > prev->addr)) &&
376 		    (prev->addr > cur->addr))
377 			break;
378 		prev = cur;
379 	}
380 	list_add_tail(&p->list, &cur->list);
381 	mon->peer_cnt++;
382 	mon_update_neighbors(mon, p);
383 	return true;
384 }
385 
386 void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id)
387 {
388 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
389 	struct tipc_peer *self = get_self(net, bearer_id);
390 	struct tipc_peer *peer, *head;
391 
392 	write_lock_bh(&mon->lock);
393 	peer = get_peer(mon, addr);
394 	if (!peer && !tipc_mon_add_peer(mon, addr, &peer))
395 		goto exit;
396 	peer->is_up = true;
397 	head = peer_head(peer);
398 	if (head == self)
399 		mon_update_local_domain(mon);
400 	mon_assign_roles(mon, head);
401 exit:
402 	write_unlock_bh(&mon->lock);
403 }
404 
405 void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id)
406 {
407 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
408 	struct tipc_peer *self = get_self(net, bearer_id);
409 	struct tipc_peer *peer, *head;
410 	struct tipc_mon_domain *dom;
411 	int applied;
412 
413 	write_lock_bh(&mon->lock);
414 	peer = get_peer(mon, addr);
415 	if (!peer) {
416 		pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id);
417 		goto exit;
418 	}
419 	applied = peer->applied;
420 	peer->applied = 0;
421 	dom = peer->domain;
422 	peer->domain = NULL;
423 	if (peer->is_head)
424 		mon_identify_lost_members(peer, dom, applied);
425 	kfree(dom);
426 	peer->is_up = false;
427 	peer->is_head = false;
428 	peer->is_local = false;
429 	peer->down_cnt = 0;
430 	head = peer_head(peer);
431 	if (head == self)
432 		mon_update_local_domain(mon);
433 	mon_assign_roles(mon, head);
434 exit:
435 	write_unlock_bh(&mon->lock);
436 }
437 
438 /* tipc_mon_rcv - process monitor domain event message
439  */
440 void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
441 		  struct tipc_mon_state *state, int bearer_id)
442 {
443 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
444 	struct tipc_mon_domain *arrv_dom = data;
445 	struct tipc_mon_domain dom_bef;
446 	struct tipc_mon_domain *dom;
447 	struct tipc_peer *peer;
448 	u16 new_member_cnt = ntohs(arrv_dom->member_cnt);
449 	int new_dlen = dom_rec_len(arrv_dom, new_member_cnt);
450 	u16 new_gen = ntohs(arrv_dom->gen);
451 	u16 acked_gen = ntohs(arrv_dom->ack_gen);
452 	bool probing = state->probing;
453 	int i, applied_bef;
454 
455 	state->probing = false;
456 	if (!dlen)
457 		return;
458 
459 	/* Sanity check received domain record */
460 	if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
461 		pr_warn_ratelimited("Received illegal domain record\n");
462 		return;
463 	}
464 
465 	/* Synch generation numbers with peer if link just came up */
466 	if (!state->synched) {
467 		state->peer_gen = new_gen - 1;
468 		state->acked_gen = acked_gen;
469 		state->synched = true;
470 	}
471 
472 	if (more(acked_gen, state->acked_gen))
473 		state->acked_gen = acked_gen;
474 
475 	/* Drop duplicate unless we are waiting for a probe response */
476 	if (!more(new_gen, state->peer_gen) && !probing)
477 		return;
478 
479 	write_lock_bh(&mon->lock);
480 	peer = get_peer(mon, addr);
481 	if (!peer || !peer->is_up)
482 		goto exit;
483 
484 	/* Peer is confirmed, stop any ongoing probing */
485 	peer->down_cnt = 0;
486 
487 	/* Task is done for duplicate record */
488 	if (!more(new_gen, state->peer_gen))
489 		goto exit;
490 
491 	state->peer_gen = new_gen;
492 
493 	/* Cache current domain record for later use */
494 	dom_bef.member_cnt = 0;
495 	dom = peer->domain;
496 	if (dom)
497 		memcpy(&dom_bef, dom, dom->len);
498 
499 	/* Transform and store received domain record */
500 	if (!dom || (dom->len < new_dlen)) {
501 		kfree(dom);
502 		dom = kmalloc(new_dlen, GFP_ATOMIC);
503 		peer->domain = dom;
504 		if (!dom)
505 			goto exit;
506 	}
507 	dom->len = new_dlen;
508 	dom->gen = new_gen;
509 	dom->member_cnt = new_member_cnt;
510 	dom->up_map = be64_to_cpu(arrv_dom->up_map);
511 	for (i = 0; i < new_member_cnt; i++)
512 		dom->members[i] = ntohl(arrv_dom->members[i]);
513 
514 	/* Update peers affected by this domain record */
515 	applied_bef = peer->applied;
516 	mon_apply_domain(mon, peer);
517 	mon_identify_lost_members(peer, &dom_bef, applied_bef);
518 	mon_assign_roles(mon, peer_head(peer));
519 exit:
520 	write_unlock_bh(&mon->lock);
521 }
522 
523 void tipc_mon_prep(struct net *net, void *data, int *dlen,
524 		   struct tipc_mon_state *state, int bearer_id)
525 {
526 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
527 	struct tipc_mon_domain *dom = data;
528 	u16 gen = mon->dom_gen;
529 	u16 len;
530 
531 	if (!tipc_mon_is_active(net, mon))
532 		return;
533 
534 	/* Send only a dummy record with ack if peer has acked our last sent */
535 	if (likely(state->acked_gen == gen)) {
536 		len = dom_rec_len(dom, 0);
537 		*dlen = len;
538 		dom->len = htons(len);
539 		dom->gen = htons(gen);
540 		dom->ack_gen = htons(state->peer_gen);
541 		dom->member_cnt = 0;
542 		return;
543 	}
544 	/* Send the full record */
545 	read_lock_bh(&mon->lock);
546 	len = ntohs(mon->cache.len);
547 	*dlen = len;
548 	memcpy(data, &mon->cache, len);
549 	read_unlock_bh(&mon->lock);
550 	dom->ack_gen = htons(state->peer_gen);
551 }
552 
553 void tipc_mon_get_state(struct net *net, u32 addr,
554 			struct tipc_mon_state *state,
555 			int bearer_id)
556 {
557 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
558 	struct tipc_peer *peer;
559 
560 	/* Used cached state if table has not changed */
561 	if (!state->probing &&
562 	    (state->list_gen == mon->list_gen) &&
563 	    (state->acked_gen == mon->dom_gen))
564 		return;
565 
566 	read_lock_bh(&mon->lock);
567 	peer = get_peer(mon, addr);
568 	if (peer) {
569 		state->probing = state->acked_gen != mon->dom_gen;
570 		state->probing |= peer->down_cnt;
571 		state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS;
572 		state->monitoring = peer->is_local;
573 		state->monitoring |= peer->is_head;
574 		state->list_gen = mon->list_gen;
575 	}
576 	read_unlock_bh(&mon->lock);
577 }
578 
579 static void mon_timeout(unsigned long m)
580 {
581 	struct tipc_monitor *mon = (void *)m;
582 	struct tipc_peer *self;
583 	int best_member_cnt = dom_size(mon->peer_cnt) - 1;
584 
585 	write_lock_bh(&mon->lock);
586 	self = mon->self;
587 	if (self && (best_member_cnt != self->applied)) {
588 		mon_update_local_domain(mon);
589 		mon_assign_roles(mon, self);
590 	}
591 	write_unlock_bh(&mon->lock);
592 	mod_timer(&mon->timer, jiffies + mon->timer_intv);
593 }
594 
595 int tipc_mon_create(struct net *net, int bearer_id)
596 {
597 	struct tipc_net *tn = tipc_net(net);
598 	struct tipc_monitor *mon;
599 	struct tipc_peer *self;
600 	struct tipc_mon_domain *dom;
601 
602 	if (tn->monitors[bearer_id])
603 		return 0;
604 
605 	mon = kzalloc(sizeof(*mon), GFP_ATOMIC);
606 	self = kzalloc(sizeof(*self), GFP_ATOMIC);
607 	dom = kzalloc(sizeof(*dom), GFP_ATOMIC);
608 	if (!mon || !self || !dom) {
609 		kfree(mon);
610 		kfree(self);
611 		kfree(dom);
612 		return -ENOMEM;
613 	}
614 	tn->monitors[bearer_id] = mon;
615 	rwlock_init(&mon->lock);
616 	mon->net = net;
617 	mon->peer_cnt = 1;
618 	mon->self = self;
619 	self->domain = dom;
620 	self->addr = tipc_own_addr(net);
621 	self->is_up = true;
622 	self->is_head = true;
623 	INIT_LIST_HEAD(&self->list);
624 	setup_timer(&mon->timer, mon_timeout, (unsigned long)mon);
625 	mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff));
626 	mod_timer(&mon->timer, jiffies + mon->timer_intv);
627 	return 0;
628 }
629 
630 void tipc_mon_delete(struct net *net, int bearer_id)
631 {
632 	struct tipc_net *tn = tipc_net(net);
633 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
634 	struct tipc_peer *self = get_self(net, bearer_id);
635 	struct tipc_peer *peer, *tmp;
636 
637 	write_lock_bh(&mon->lock);
638 	tn->monitors[bearer_id] = NULL;
639 	list_for_each_entry_safe(peer, tmp, &self->list, list) {
640 		list_del(&peer->list);
641 		hlist_del(&peer->hash);
642 		kfree(peer->domain);
643 		kfree(peer);
644 	}
645 	mon->self = NULL;
646 	write_unlock_bh(&mon->lock);
647 	del_timer_sync(&mon->timer);
648 	kfree(self->domain);
649 	kfree(self);
650 	kfree(mon);
651 }
652