1 /* 2 * net/tipc/monitor.c 3 * 4 * Copyright (c) 2016, Ericsson AB 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the names of the copyright holders nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * Alternatively, this software may be distributed under the terms of the 20 * GNU General Public License ("GPL") version 2 as published by the Free 21 * Software Foundation. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 #include "core.h" 37 #include "addr.h" 38 #include "monitor.h" 39 40 #define MAX_MON_DOMAIN 64 41 #define MON_TIMEOUT 120000 42 #define MAX_PEER_DOWN_EVENTS 4 43 44 /* struct tipc_mon_domain: domain record to be transferred between peers 45 * @len: actual size of domain record 46 * @gen: current generation of sender's domain 47 * @ack_gen: most recent generation of self's domain acked by peer 48 * @member_cnt: number of domain member nodes described in this record 49 * @up_map: bit map indicating which of the members the sender considers up 50 * @members: identity of the domain members 51 */ 52 struct tipc_mon_domain { 53 u16 len; 54 u16 gen; 55 u16 ack_gen; 56 u16 member_cnt; 57 u64 up_map; 58 u32 members[MAX_MON_DOMAIN]; 59 }; 60 61 /* struct tipc_peer: state of a peer node and its domain 62 * @addr: tipc node identity of peer 63 * @head_map: shows which other nodes currently consider peer 'up' 64 * @domain: most recent domain record from peer 65 * @hash: position in hashed lookup list 66 * @list: position in linked list, in circular ascending order by 'addr' 67 * @applied: number of reported domain members applied on this monitor list 68 * @is_up: peer is up as seen from this node 69 * @is_head: peer is assigned domain head as seen from this node 70 * @is_local: peer is in local domain and should be continuously monitored 71 * @down_cnt: - numbers of other peers which have reported this on lost 72 */ 73 struct tipc_peer { 74 u32 addr; 75 struct tipc_mon_domain *domain; 76 struct hlist_node hash; 77 struct list_head list; 78 u8 applied; 79 u8 down_cnt; 80 bool is_up; 81 bool is_head; 82 bool is_local; 83 }; 84 85 struct tipc_monitor { 86 struct hlist_head peers[NODE_HTABLE_SIZE]; 87 int peer_cnt; 88 struct tipc_peer *self; 89 rwlock_t lock; 90 struct tipc_mon_domain cache; 91 u16 list_gen; 92 u16 dom_gen; 93 struct net *net; 94 struct timer_list timer; 95 unsigned long timer_intv; 96 }; 97 98 static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id) 99 { 100 return tipc_net(net)->monitors[bearer_id]; 101 } 102 103 const int tipc_max_domain_size = sizeof(struct tipc_mon_domain); 104 105 /* dom_rec_len(): actual length of domain record for transport 106 */ 107 static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt) 108 { 109 return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32)); 110 } 111 112 /* dom_size() : calculate size of own domain based on number of peers 113 */ 114 static int dom_size(int peers) 115 { 116 int i = 0; 117 118 while ((i * i) < peers) 119 i++; 120 return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN; 121 } 122 123 static void map_set(u64 *up_map, int i, unsigned int v) 124 { 125 *up_map &= ~(1 << i); 126 *up_map |= (v << i); 127 } 128 129 static int map_get(u64 up_map, int i) 130 { 131 return (up_map & (1 << i)) >> i; 132 } 133 134 static struct tipc_peer *peer_prev(struct tipc_peer *peer) 135 { 136 return list_last_entry(&peer->list, struct tipc_peer, list); 137 } 138 139 static struct tipc_peer *peer_nxt(struct tipc_peer *peer) 140 { 141 return list_first_entry(&peer->list, struct tipc_peer, list); 142 } 143 144 static struct tipc_peer *peer_head(struct tipc_peer *peer) 145 { 146 while (!peer->is_head) 147 peer = peer_prev(peer); 148 return peer; 149 } 150 151 static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr) 152 { 153 struct tipc_peer *peer; 154 unsigned int thash = tipc_hashfn(addr); 155 156 hlist_for_each_entry(peer, &mon->peers[thash], hash) { 157 if (peer->addr == addr) 158 return peer; 159 } 160 return NULL; 161 } 162 163 static struct tipc_peer *get_self(struct net *net, int bearer_id) 164 { 165 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 166 167 return mon->self; 168 } 169 170 static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon) 171 { 172 struct tipc_net *tn = tipc_net(net); 173 174 return mon->peer_cnt > tn->mon_threshold; 175 } 176 177 /* mon_identify_lost_members() : - identify amd mark potentially lost members 178 */ 179 static void mon_identify_lost_members(struct tipc_peer *peer, 180 struct tipc_mon_domain *dom_bef, 181 int applied_bef) 182 { 183 struct tipc_peer *member = peer; 184 struct tipc_mon_domain *dom_aft = peer->domain; 185 int applied_aft = peer->applied; 186 int i; 187 188 for (i = 0; i < applied_bef; i++) { 189 member = peer_nxt(member); 190 191 /* Do nothing if self or peer already see member as down */ 192 if (!member->is_up || !map_get(dom_bef->up_map, i)) 193 continue; 194 195 /* Loss of local node must be detected by active probing */ 196 if (member->is_local) 197 continue; 198 199 /* Start probing if member was removed from applied domain */ 200 if (!applied_aft || (applied_aft < i)) { 201 member->down_cnt = 1; 202 continue; 203 } 204 205 /* Member loss is confirmed if it is still in applied domain */ 206 if (!map_get(dom_aft->up_map, i)) 207 member->down_cnt++; 208 } 209 } 210 211 /* mon_apply_domain() : match a peer's domain record against monitor list 212 */ 213 static void mon_apply_domain(struct tipc_monitor *mon, 214 struct tipc_peer *peer) 215 { 216 struct tipc_mon_domain *dom = peer->domain; 217 struct tipc_peer *member; 218 u32 addr; 219 int i; 220 221 if (!dom || !peer->is_up) 222 return; 223 224 /* Scan across domain members and match against monitor list */ 225 peer->applied = 0; 226 member = peer_nxt(peer); 227 for (i = 0; i < dom->member_cnt; i++) { 228 addr = dom->members[i]; 229 if (addr != member->addr) 230 return; 231 peer->applied++; 232 member = peer_nxt(member); 233 } 234 } 235 236 /* mon_update_local_domain() : update after peer addition/removal/up/down 237 */ 238 static void mon_update_local_domain(struct tipc_monitor *mon) 239 { 240 struct tipc_peer *self = mon->self; 241 struct tipc_mon_domain *cache = &mon->cache; 242 struct tipc_mon_domain *dom = self->domain; 243 struct tipc_peer *peer = self; 244 u64 prev_up_map = dom->up_map; 245 u16 member_cnt, i; 246 bool diff; 247 248 /* Update local domain size based on current size of cluster */ 249 member_cnt = dom_size(mon->peer_cnt) - 1; 250 self->applied = member_cnt; 251 252 /* Update native and cached outgoing local domain records */ 253 dom->len = dom_rec_len(dom, member_cnt); 254 diff = dom->member_cnt != member_cnt; 255 dom->member_cnt = member_cnt; 256 for (i = 0; i < member_cnt; i++) { 257 peer = peer_nxt(peer); 258 diff |= dom->members[i] != peer->addr; 259 dom->members[i] = peer->addr; 260 map_set(&dom->up_map, i, peer->is_up); 261 cache->members[i] = htonl(peer->addr); 262 } 263 diff |= dom->up_map != prev_up_map; 264 if (!diff) 265 return; 266 dom->gen = ++mon->dom_gen; 267 cache->len = htons(dom->len); 268 cache->gen = htons(dom->gen); 269 cache->member_cnt = htons(member_cnt); 270 cache->up_map = cpu_to_be64(dom->up_map); 271 mon_apply_domain(mon, self); 272 } 273 274 /* mon_update_neighbors() : update preceding neighbors of added/removed peer 275 */ 276 static void mon_update_neighbors(struct tipc_monitor *mon, 277 struct tipc_peer *peer) 278 { 279 int dz, i; 280 281 dz = dom_size(mon->peer_cnt); 282 for (i = 0; i < dz; i++) { 283 mon_apply_domain(mon, peer); 284 peer = peer_prev(peer); 285 } 286 } 287 288 /* mon_assign_roles() : reassign peer roles after a network change 289 * The monitor list is consistent at this stage; i.e., each peer is monitoring 290 * a set of domain members as matched between domain record and the monitor list 291 */ 292 static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head) 293 { 294 struct tipc_peer *peer = peer_nxt(head); 295 struct tipc_peer *self = mon->self; 296 int i = 0; 297 298 for (; peer != self; peer = peer_nxt(peer)) { 299 peer->is_local = false; 300 301 /* Update domain member */ 302 if (i++ < head->applied) { 303 peer->is_head = false; 304 if (head == self) 305 peer->is_local = true; 306 continue; 307 } 308 /* Assign next domain head */ 309 if (!peer->is_up) 310 continue; 311 if (peer->is_head) 312 break; 313 head = peer; 314 head->is_head = true; 315 i = 0; 316 } 317 mon->list_gen++; 318 } 319 320 void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id) 321 { 322 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 323 struct tipc_peer *self = get_self(net, bearer_id); 324 struct tipc_peer *peer, *prev, *head; 325 326 write_lock_bh(&mon->lock); 327 peer = get_peer(mon, addr); 328 if (!peer) 329 goto exit; 330 prev = peer_prev(peer); 331 list_del(&peer->list); 332 hlist_del(&peer->hash); 333 kfree(peer->domain); 334 kfree(peer); 335 mon->peer_cnt--; 336 head = peer_head(prev); 337 if (head == self) 338 mon_update_local_domain(mon); 339 mon_update_neighbors(mon, prev); 340 341 /* Revert to full-mesh monitoring if we reach threshold */ 342 if (!tipc_mon_is_active(net, mon)) { 343 list_for_each_entry(peer, &self->list, list) { 344 kfree(peer->domain); 345 peer->domain = NULL; 346 peer->applied = 0; 347 } 348 } 349 mon_assign_roles(mon, head); 350 exit: 351 write_unlock_bh(&mon->lock); 352 } 353 354 static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr, 355 struct tipc_peer **peer) 356 { 357 struct tipc_peer *self = mon->self; 358 struct tipc_peer *cur, *prev, *p; 359 360 p = kzalloc(sizeof(*p), GFP_ATOMIC); 361 *peer = p; 362 if (!p) 363 return false; 364 p->addr = addr; 365 366 /* Add new peer to lookup list */ 367 INIT_LIST_HEAD(&p->list); 368 hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]); 369 370 /* Sort new peer into iterator list, in ascending circular order */ 371 prev = self; 372 list_for_each_entry(cur, &self->list, list) { 373 if ((addr > prev->addr) && (addr < cur->addr)) 374 break; 375 if (((addr < cur->addr) || (addr > prev->addr)) && 376 (prev->addr > cur->addr)) 377 break; 378 prev = cur; 379 } 380 list_add_tail(&p->list, &cur->list); 381 mon->peer_cnt++; 382 mon_update_neighbors(mon, p); 383 return true; 384 } 385 386 void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id) 387 { 388 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 389 struct tipc_peer *self = get_self(net, bearer_id); 390 struct tipc_peer *peer, *head; 391 392 write_lock_bh(&mon->lock); 393 peer = get_peer(mon, addr); 394 if (!peer && !tipc_mon_add_peer(mon, addr, &peer)) 395 goto exit; 396 peer->is_up = true; 397 head = peer_head(peer); 398 if (head == self) 399 mon_update_local_domain(mon); 400 mon_assign_roles(mon, head); 401 exit: 402 write_unlock_bh(&mon->lock); 403 } 404 405 void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id) 406 { 407 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 408 struct tipc_peer *self = get_self(net, bearer_id); 409 struct tipc_peer *peer, *head; 410 struct tipc_mon_domain *dom; 411 int applied; 412 413 write_lock_bh(&mon->lock); 414 peer = get_peer(mon, addr); 415 if (!peer) { 416 pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id); 417 goto exit; 418 } 419 applied = peer->applied; 420 peer->applied = 0; 421 dom = peer->domain; 422 peer->domain = NULL; 423 if (peer->is_head) 424 mon_identify_lost_members(peer, dom, applied); 425 kfree(dom); 426 peer->is_up = false; 427 peer->is_head = false; 428 peer->is_local = false; 429 peer->down_cnt = 0; 430 head = peer_head(peer); 431 if (head == self) 432 mon_update_local_domain(mon); 433 mon_assign_roles(mon, head); 434 exit: 435 write_unlock_bh(&mon->lock); 436 } 437 438 /* tipc_mon_rcv - process monitor domain event message 439 */ 440 void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, 441 struct tipc_mon_state *state, int bearer_id) 442 { 443 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 444 struct tipc_mon_domain *arrv_dom = data; 445 struct tipc_mon_domain dom_bef; 446 struct tipc_mon_domain *dom; 447 struct tipc_peer *peer; 448 u16 new_member_cnt = ntohs(arrv_dom->member_cnt); 449 int new_dlen = dom_rec_len(arrv_dom, new_member_cnt); 450 u16 new_gen = ntohs(arrv_dom->gen); 451 u16 acked_gen = ntohs(arrv_dom->ack_gen); 452 bool probing = state->probing; 453 int i, applied_bef; 454 455 state->probing = false; 456 if (!dlen) 457 return; 458 459 /* Sanity check received domain record */ 460 if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) { 461 pr_warn_ratelimited("Received illegal domain record\n"); 462 return; 463 } 464 465 /* Synch generation numbers with peer if link just came up */ 466 if (!state->synched) { 467 state->peer_gen = new_gen - 1; 468 state->acked_gen = acked_gen; 469 state->synched = true; 470 } 471 472 if (more(acked_gen, state->acked_gen)) 473 state->acked_gen = acked_gen; 474 475 /* Drop duplicate unless we are waiting for a probe response */ 476 if (!more(new_gen, state->peer_gen) && !probing) 477 return; 478 479 write_lock_bh(&mon->lock); 480 peer = get_peer(mon, addr); 481 if (!peer || !peer->is_up) 482 goto exit; 483 484 /* Peer is confirmed, stop any ongoing probing */ 485 peer->down_cnt = 0; 486 487 /* Task is done for duplicate record */ 488 if (!more(new_gen, state->peer_gen)) 489 goto exit; 490 491 state->peer_gen = new_gen; 492 493 /* Cache current domain record for later use */ 494 dom_bef.member_cnt = 0; 495 dom = peer->domain; 496 if (dom) 497 memcpy(&dom_bef, dom, dom->len); 498 499 /* Transform and store received domain record */ 500 if (!dom || (dom->len < new_dlen)) { 501 kfree(dom); 502 dom = kmalloc(new_dlen, GFP_ATOMIC); 503 peer->domain = dom; 504 if (!dom) 505 goto exit; 506 } 507 dom->len = new_dlen; 508 dom->gen = new_gen; 509 dom->member_cnt = new_member_cnt; 510 dom->up_map = be64_to_cpu(arrv_dom->up_map); 511 for (i = 0; i < new_member_cnt; i++) 512 dom->members[i] = ntohl(arrv_dom->members[i]); 513 514 /* Update peers affected by this domain record */ 515 applied_bef = peer->applied; 516 mon_apply_domain(mon, peer); 517 mon_identify_lost_members(peer, &dom_bef, applied_bef); 518 mon_assign_roles(mon, peer_head(peer)); 519 exit: 520 write_unlock_bh(&mon->lock); 521 } 522 523 void tipc_mon_prep(struct net *net, void *data, int *dlen, 524 struct tipc_mon_state *state, int bearer_id) 525 { 526 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 527 struct tipc_mon_domain *dom = data; 528 u16 gen = mon->dom_gen; 529 u16 len; 530 531 if (!tipc_mon_is_active(net, mon)) 532 return; 533 534 /* Send only a dummy record with ack if peer has acked our last sent */ 535 if (likely(state->acked_gen == gen)) { 536 len = dom_rec_len(dom, 0); 537 *dlen = len; 538 dom->len = htons(len); 539 dom->gen = htons(gen); 540 dom->ack_gen = htons(state->peer_gen); 541 dom->member_cnt = 0; 542 return; 543 } 544 /* Send the full record */ 545 read_lock_bh(&mon->lock); 546 len = ntohs(mon->cache.len); 547 *dlen = len; 548 memcpy(data, &mon->cache, len); 549 read_unlock_bh(&mon->lock); 550 dom->ack_gen = htons(state->peer_gen); 551 } 552 553 void tipc_mon_get_state(struct net *net, u32 addr, 554 struct tipc_mon_state *state, 555 int bearer_id) 556 { 557 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 558 struct tipc_peer *peer; 559 560 /* Used cached state if table has not changed */ 561 if (!state->probing && 562 (state->list_gen == mon->list_gen) && 563 (state->acked_gen == mon->dom_gen)) 564 return; 565 566 read_lock_bh(&mon->lock); 567 peer = get_peer(mon, addr); 568 if (peer) { 569 state->probing = state->acked_gen != mon->dom_gen; 570 state->probing |= peer->down_cnt; 571 state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS; 572 state->monitoring = peer->is_local; 573 state->monitoring |= peer->is_head; 574 state->list_gen = mon->list_gen; 575 } 576 read_unlock_bh(&mon->lock); 577 } 578 579 static void mon_timeout(unsigned long m) 580 { 581 struct tipc_monitor *mon = (void *)m; 582 struct tipc_peer *self; 583 int best_member_cnt = dom_size(mon->peer_cnt) - 1; 584 585 write_lock_bh(&mon->lock); 586 self = mon->self; 587 if (self && (best_member_cnt != self->applied)) { 588 mon_update_local_domain(mon); 589 mon_assign_roles(mon, self); 590 } 591 write_unlock_bh(&mon->lock); 592 mod_timer(&mon->timer, jiffies + mon->timer_intv); 593 } 594 595 int tipc_mon_create(struct net *net, int bearer_id) 596 { 597 struct tipc_net *tn = tipc_net(net); 598 struct tipc_monitor *mon; 599 struct tipc_peer *self; 600 struct tipc_mon_domain *dom; 601 602 if (tn->monitors[bearer_id]) 603 return 0; 604 605 mon = kzalloc(sizeof(*mon), GFP_ATOMIC); 606 self = kzalloc(sizeof(*self), GFP_ATOMIC); 607 dom = kzalloc(sizeof(*dom), GFP_ATOMIC); 608 if (!mon || !self || !dom) { 609 kfree(mon); 610 kfree(self); 611 kfree(dom); 612 return -ENOMEM; 613 } 614 tn->monitors[bearer_id] = mon; 615 rwlock_init(&mon->lock); 616 mon->net = net; 617 mon->peer_cnt = 1; 618 mon->self = self; 619 self->domain = dom; 620 self->addr = tipc_own_addr(net); 621 self->is_up = true; 622 self->is_head = true; 623 INIT_LIST_HEAD(&self->list); 624 setup_timer(&mon->timer, mon_timeout, (unsigned long)mon); 625 mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); 626 mod_timer(&mon->timer, jiffies + mon->timer_intv); 627 return 0; 628 } 629 630 void tipc_mon_delete(struct net *net, int bearer_id) 631 { 632 struct tipc_net *tn = tipc_net(net); 633 struct tipc_monitor *mon = tipc_monitor(net, bearer_id); 634 struct tipc_peer *self = get_self(net, bearer_id); 635 struct tipc_peer *peer, *tmp; 636 637 write_lock_bh(&mon->lock); 638 tn->monitors[bearer_id] = NULL; 639 list_for_each_entry_safe(peer, tmp, &self->list, list) { 640 list_del(&peer->list); 641 hlist_del(&peer->hash); 642 kfree(peer->domain); 643 kfree(peer); 644 } 645 mon->self = NULL; 646 write_unlock_bh(&mon->lock); 647 del_timer_sync(&mon->timer); 648 kfree(self->domain); 649 kfree(self); 650 kfree(mon); 651 } 652