1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Management Component Transport Protocol (MCTP) - routing 4 * implementation. 5 * 6 * This is currently based on a simple routing table, with no dst cache. The 7 * number of routes should stay fairly small, so the lookup cost is small. 8 * 9 * Copyright (c) 2021 Code Construct 10 * Copyright (c) 2021 Google 11 */ 12 13 #include <linux/idr.h> 14 #include <linux/kconfig.h> 15 #include <linux/mctp.h> 16 #include <linux/netdevice.h> 17 #include <linux/rtnetlink.h> 18 #include <linux/skbuff.h> 19 20 #include <kunit/static_stub.h> 21 22 #include <uapi/linux/if_arp.h> 23 24 #include <net/mctp.h> 25 #include <net/mctpdevice.h> 26 #include <net/netlink.h> 27 #include <net/sock.h> 28 29 #include <trace/events/mctp.h> 30 31 static const unsigned int mctp_message_maxlen = 64 * 1024; 32 static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ; 33 34 static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev); 35 36 /* route output callbacks */ 37 static int mctp_dst_discard(struct mctp_dst *dst, struct sk_buff *skb) 38 { 39 kfree_skb(skb); 40 return 0; 41 } 42 43 static struct mctp_sock *mctp_lookup_bind_details(struct net *net, 44 struct sk_buff *skb, 45 u8 type, u8 dest, 46 u8 src, bool allow_net_any) 47 { 48 struct mctp_skb_cb *cb = mctp_cb(skb); 49 struct sock *sk; 50 u8 hash; 51 52 WARN_ON_ONCE(!rcu_read_lock_held()); 53 54 hash = mctp_bind_hash(type, dest, src); 55 56 sk_for_each_rcu(sk, &net->mctp.binds[hash]) { 57 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 58 59 if (!allow_net_any && msk->bind_net == MCTP_NET_ANY) 60 continue; 61 62 if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net) 63 continue; 64 65 if (msk->bind_type != type) 66 continue; 67 68 if (msk->bind_peer_set && 69 !mctp_address_matches(msk->bind_peer_addr, src)) 70 continue; 71 72 if (!mctp_address_matches(msk->bind_local_addr, dest)) 73 continue; 74 75 return msk; 76 } 77 78 return NULL; 79 } 80 81 static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) 82 { 83 struct mctp_sock *msk; 84 struct mctp_hdr *mh; 85 u8 type; 86 87 /* TODO: look up in skb->cb? */ 88 mh = mctp_hdr(skb); 89 90 if (!skb_headlen(skb)) 91 return NULL; 92 93 type = (*(u8 *)skb->data) & 0x7f; 94 95 /* Look for binds in order of widening scope. A given destination or 96 * source address also implies matching on a particular network. 97 * 98 * - Matching destination and source 99 * - Matching destination 100 * - Matching source 101 * - Matching network, any address 102 * - Any network or address 103 */ 104 105 msk = mctp_lookup_bind_details(net, skb, type, mh->dest, mh->src, 106 false); 107 if (msk) 108 return msk; 109 msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY, mh->src, 110 false); 111 if (msk) 112 return msk; 113 msk = mctp_lookup_bind_details(net, skb, type, mh->dest, MCTP_ADDR_ANY, 114 false); 115 if (msk) 116 return msk; 117 msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY, 118 MCTP_ADDR_ANY, false); 119 if (msk) 120 return msk; 121 msk = mctp_lookup_bind_details(net, skb, type, MCTP_ADDR_ANY, 122 MCTP_ADDR_ANY, true); 123 if (msk) 124 return msk; 125 126 return NULL; 127 } 128 129 /* A note on the key allocations. 130 * 131 * struct net->mctp.keys contains our set of currently-allocated keys for 132 * MCTP tag management. The lookup tuple for these is the peer EID, 133 * local EID and MCTP tag. 134 * 135 * In some cases, the peer EID may be MCTP_EID_ANY: for example, when a 136 * broadcast message is sent, we may receive responses from any peer EID. 137 * Because the broadcast dest address is equivalent to ANY, we create 138 * a key with (local = local-eid, peer = ANY). This allows a match on the 139 * incoming broadcast responses from any peer. 140 * 141 * We perform lookups when packets are received, and when tags are allocated 142 * in two scenarios: 143 * 144 * - when a packet is sent, with a locally-owned tag: we need to find an 145 * unused tag value for the (local, peer) EID pair. 146 * 147 * - when a tag is manually allocated: we need to find an unused tag value 148 * for the peer EID, but don't have a specific local EID at that stage. 149 * 150 * in the latter case, on successful allocation, we end up with a tag with 151 * (local = ANY, peer = peer-eid). 152 * 153 * So, the key set allows both a local EID of ANY, as well as a peer EID of 154 * ANY in the lookup tuple. Both may be ANY if we prealloc for a broadcast. 155 * The matching (in mctp_key_match()) during lookup allows the match value to 156 * be ANY in either the dest or source addresses. 157 * 158 * When allocating (+ inserting) a tag, we need to check for conflicts amongst 159 * the existing tag set. This requires macthing either exactly on the local 160 * and peer addresses, or either being ANY. 161 */ 162 163 static bool mctp_key_match(struct mctp_sk_key *key, unsigned int net, 164 mctp_eid_t local, mctp_eid_t peer, u8 tag) 165 { 166 if (key->net != net) 167 return false; 168 169 if (!mctp_address_matches(key->local_addr, local)) 170 return false; 171 172 if (!mctp_address_matches(key->peer_addr, peer)) 173 return false; 174 175 if (key->tag != tag) 176 return false; 177 178 return true; 179 } 180 181 /* returns a key (with key->lock held, and refcounted), or NULL if no such 182 * key exists. 183 */ 184 static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, 185 unsigned int netid, mctp_eid_t peer, 186 unsigned long *irqflags) 187 __acquires(&key->lock) 188 { 189 struct mctp_sk_key *key, *ret; 190 unsigned long flags; 191 struct mctp_hdr *mh; 192 u8 tag; 193 194 mh = mctp_hdr(skb); 195 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 196 197 ret = NULL; 198 spin_lock_irqsave(&net->mctp.keys_lock, flags); 199 200 hlist_for_each_entry(key, &net->mctp.keys, hlist) { 201 if (!mctp_key_match(key, netid, mh->dest, peer, tag)) 202 continue; 203 204 spin_lock(&key->lock); 205 if (key->valid) { 206 refcount_inc(&key->refs); 207 ret = key; 208 break; 209 } 210 spin_unlock(&key->lock); 211 } 212 213 if (ret) { 214 spin_unlock(&net->mctp.keys_lock); 215 *irqflags = flags; 216 } else { 217 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 218 } 219 220 return ret; 221 } 222 223 static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, 224 unsigned int net, 225 mctp_eid_t local, mctp_eid_t peer, 226 u8 tag, gfp_t gfp) 227 { 228 struct mctp_sk_key *key; 229 230 key = kzalloc(sizeof(*key), gfp); 231 if (!key) 232 return NULL; 233 234 key->net = net; 235 key->peer_addr = peer; 236 key->local_addr = local; 237 key->tag = tag; 238 key->sk = &msk->sk; 239 key->valid = true; 240 spin_lock_init(&key->lock); 241 refcount_set(&key->refs, 1); 242 sock_hold(key->sk); 243 244 return key; 245 } 246 247 void mctp_key_unref(struct mctp_sk_key *key) 248 { 249 unsigned long flags; 250 251 if (!refcount_dec_and_test(&key->refs)) 252 return; 253 254 /* even though no refs exist here, the lock allows us to stay 255 * consistent with the locking requirement of mctp_dev_release_key 256 */ 257 spin_lock_irqsave(&key->lock, flags); 258 mctp_dev_release_key(key->dev, key); 259 spin_unlock_irqrestore(&key->lock, flags); 260 261 sock_put(key->sk); 262 kfree(key); 263 } 264 265 static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) 266 { 267 struct net *net = sock_net(&msk->sk); 268 struct mctp_sk_key *tmp; 269 unsigned long flags; 270 int rc = 0; 271 272 spin_lock_irqsave(&net->mctp.keys_lock, flags); 273 274 if (sock_flag(&msk->sk, SOCK_DEAD)) { 275 rc = -EINVAL; 276 goto out_unlock; 277 } 278 279 hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { 280 if (mctp_key_match(tmp, key->net, key->local_addr, 281 key->peer_addr, key->tag)) { 282 spin_lock(&tmp->lock); 283 if (tmp->valid) 284 rc = -EEXIST; 285 spin_unlock(&tmp->lock); 286 if (rc) 287 break; 288 } 289 } 290 291 if (!rc) { 292 refcount_inc(&key->refs); 293 key->expiry = jiffies + mctp_key_lifetime; 294 timer_reduce(&msk->key_expiry, key->expiry); 295 296 hlist_add_head(&key->hlist, &net->mctp.keys); 297 hlist_add_head(&key->sklist, &msk->keys); 298 } 299 300 out_unlock: 301 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 302 303 return rc; 304 } 305 306 /* Helper for mctp_route_input(). 307 * We're done with the key; unlock and unref the key. 308 * For the usual case of automatic expiry we remove the key from lists. 309 * In the case that manual allocation is set on a key we release the lock 310 * and local ref, reset reassembly, but don't remove from lists. 311 */ 312 static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net, 313 unsigned long flags, unsigned long reason) 314 __releases(&key->lock) 315 { 316 struct sk_buff *skb; 317 318 trace_mctp_key_release(key, reason); 319 skb = key->reasm_head; 320 key->reasm_head = NULL; 321 322 if (!key->manual_alloc) { 323 key->reasm_dead = true; 324 key->valid = false; 325 mctp_dev_release_key(key->dev, key); 326 } 327 spin_unlock_irqrestore(&key->lock, flags); 328 329 if (!key->manual_alloc) { 330 spin_lock_irqsave(&net->mctp.keys_lock, flags); 331 if (!hlist_unhashed(&key->hlist)) { 332 hlist_del_init(&key->hlist); 333 hlist_del_init(&key->sklist); 334 mctp_key_unref(key); 335 } 336 spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 337 } 338 339 /* and one for the local reference */ 340 mctp_key_unref(key); 341 342 kfree_skb(skb); 343 } 344 345 #ifdef CONFIG_MCTP_FLOWS 346 static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) 347 { 348 struct mctp_flow *flow; 349 350 flow = skb_ext_add(skb, SKB_EXT_MCTP); 351 if (!flow) 352 return; 353 354 refcount_inc(&key->refs); 355 flow->key = key; 356 } 357 358 static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) 359 { 360 struct mctp_sk_key *key; 361 struct mctp_flow *flow; 362 363 flow = skb_ext_find(skb, SKB_EXT_MCTP); 364 if (!flow) 365 return; 366 367 key = flow->key; 368 369 if (key->dev) { 370 WARN_ON(key->dev != dev); 371 return; 372 } 373 374 mctp_dev_set_key(dev, key); 375 } 376 #else 377 static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {} 378 static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {} 379 #endif 380 381 static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) 382 { 383 struct mctp_hdr *hdr = mctp_hdr(skb); 384 u8 exp_seq, this_seq; 385 386 this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT) 387 & MCTP_HDR_SEQ_MASK; 388 389 if (!key->reasm_head) { 390 /* Since we're manipulating the shared frag_list, ensure it isn't 391 * shared with any other SKBs. 392 */ 393 key->reasm_head = skb_unshare(skb, GFP_ATOMIC); 394 if (!key->reasm_head) 395 return -ENOMEM; 396 397 key->reasm_tailp = &(skb_shinfo(key->reasm_head)->frag_list); 398 key->last_seq = this_seq; 399 return 0; 400 } 401 402 exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK; 403 404 if (this_seq != exp_seq) 405 return -EINVAL; 406 407 if (key->reasm_head->len + skb->len > mctp_message_maxlen) 408 return -EINVAL; 409 410 skb->next = NULL; 411 skb->sk = NULL; 412 *key->reasm_tailp = skb; 413 key->reasm_tailp = &skb->next; 414 415 key->last_seq = this_seq; 416 417 key->reasm_head->data_len += skb->len; 418 key->reasm_head->len += skb->len; 419 key->reasm_head->truesize += skb->truesize; 420 421 return 0; 422 } 423 424 static int mctp_dst_input(struct mctp_dst *dst, struct sk_buff *skb) 425 { 426 struct mctp_sk_key *key, *any_key = NULL; 427 struct net *net = dev_net(skb->dev); 428 struct mctp_sock *msk; 429 struct mctp_hdr *mh; 430 unsigned int netid; 431 unsigned long f; 432 u8 tag, flags; 433 int rc; 434 435 msk = NULL; 436 rc = -EINVAL; 437 438 /* We may be receiving a locally-routed packet; drop source sk 439 * accounting. 440 * 441 * From here, we will either queue the skb - either to a frag_queue, or 442 * to a receiving socket. When that succeeds, we clear the skb pointer; 443 * a non-NULL skb on exit will be otherwise unowned, and hence 444 * kfree_skb()-ed. 445 */ 446 skb_orphan(skb); 447 448 if (skb->pkt_type == PACKET_OUTGOING) 449 skb->pkt_type = PACKET_LOOPBACK; 450 451 /* ensure we have enough data for a header and a type */ 452 if (skb->len < sizeof(struct mctp_hdr) + 1) 453 goto out; 454 455 /* grab header, advance data ptr */ 456 mh = mctp_hdr(skb); 457 netid = mctp_cb(skb)->net; 458 skb_pull(skb, sizeof(struct mctp_hdr)); 459 460 if (mh->ver != 1) 461 goto out; 462 463 flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); 464 tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 465 466 rcu_read_lock(); 467 468 /* lookup socket / reasm context, exactly matching (src,dest,tag). 469 * we hold a ref on the key, and key->lock held. 470 */ 471 key = mctp_lookup_key(net, skb, netid, mh->src, &f); 472 473 if (flags & MCTP_HDR_FLAG_SOM) { 474 if (key) { 475 msk = container_of(key->sk, struct mctp_sock, sk); 476 } else { 477 /* first response to a broadcast? do a more general 478 * key lookup to find the socket, but don't use this 479 * key for reassembly - we'll create a more specific 480 * one for future packets if required (ie, !EOM). 481 * 482 * this lookup requires key->peer to be MCTP_ADDR_ANY, 483 * it doesn't match just any key->peer. 484 */ 485 any_key = mctp_lookup_key(net, skb, netid, 486 MCTP_ADDR_ANY, &f); 487 if (any_key) { 488 msk = container_of(any_key->sk, 489 struct mctp_sock, sk); 490 spin_unlock_irqrestore(&any_key->lock, f); 491 } 492 } 493 494 if (!key && !msk && (tag & MCTP_HDR_FLAG_TO)) 495 msk = mctp_lookup_bind(net, skb); 496 497 if (!msk) { 498 rc = -ENOENT; 499 goto out_unlock; 500 } 501 502 /* single-packet message? deliver to socket, clean up any 503 * pending key. 504 */ 505 if (flags & MCTP_HDR_FLAG_EOM) { 506 rc = sock_queue_rcv_skb(&msk->sk, skb); 507 if (!rc) 508 skb = NULL; 509 if (key) { 510 /* we've hit a pending reassembly; not much we 511 * can do but drop it 512 */ 513 __mctp_key_done_in(key, net, f, 514 MCTP_TRACE_KEY_REPLIED); 515 key = NULL; 516 } 517 goto out_unlock; 518 } 519 520 /* broadcast response or a bind() - create a key for further 521 * packets for this message 522 */ 523 if (!key) { 524 key = mctp_key_alloc(msk, netid, mh->dest, mh->src, 525 tag, GFP_ATOMIC); 526 if (!key) { 527 rc = -ENOMEM; 528 goto out_unlock; 529 } 530 531 /* we can queue without the key lock here, as the 532 * key isn't observable yet 533 */ 534 mctp_frag_queue(key, skb); 535 536 /* if the key_add fails, we've raced with another 537 * SOM packet with the same src, dest and tag. There's 538 * no way to distinguish future packets, so all we 539 * can do is drop; we'll free the skb on exit from 540 * this function. 541 */ 542 rc = mctp_key_add(key, msk); 543 if (!rc) { 544 trace_mctp_key_acquire(key); 545 skb = NULL; 546 } 547 548 /* we don't need to release key->lock on exit, so 549 * clean up here and suppress the unlock via 550 * setting to NULL 551 */ 552 mctp_key_unref(key); 553 key = NULL; 554 555 } else { 556 if (key->reasm_head || key->reasm_dead) { 557 /* duplicate start? drop everything */ 558 __mctp_key_done_in(key, net, f, 559 MCTP_TRACE_KEY_INVALIDATED); 560 rc = -EEXIST; 561 key = NULL; 562 } else { 563 rc = mctp_frag_queue(key, skb); 564 if (!rc) 565 skb = NULL; 566 } 567 } 568 569 } else if (key) { 570 /* this packet continues a previous message; reassemble 571 * using the message-specific key 572 */ 573 574 /* we need to be continuing an existing reassembly... */ 575 if (!key->reasm_head) 576 rc = -EINVAL; 577 else 578 rc = mctp_frag_queue(key, skb); 579 580 if (rc) 581 goto out_unlock; 582 583 /* we've queued; the queue owns the skb now */ 584 skb = NULL; 585 586 /* end of message? deliver to socket, and we're done with 587 * the reassembly/response key 588 */ 589 if (flags & MCTP_HDR_FLAG_EOM) { 590 rc = sock_queue_rcv_skb(key->sk, key->reasm_head); 591 if (!rc) 592 key->reasm_head = NULL; 593 __mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED); 594 key = NULL; 595 } 596 597 } else { 598 /* not a start, no matching key */ 599 rc = -ENOENT; 600 } 601 602 out_unlock: 603 rcu_read_unlock(); 604 if (key) { 605 spin_unlock_irqrestore(&key->lock, f); 606 mctp_key_unref(key); 607 } 608 if (any_key) 609 mctp_key_unref(any_key); 610 out: 611 kfree_skb(skb); 612 return rc; 613 } 614 615 static int mctp_dst_output(struct mctp_dst *dst, struct sk_buff *skb) 616 { 617 char daddr_buf[MAX_ADDR_LEN]; 618 char *daddr = NULL; 619 int rc; 620 621 skb->protocol = htons(ETH_P_MCTP); 622 skb->pkt_type = PACKET_OUTGOING; 623 624 if (skb->len > dst->mtu) { 625 kfree_skb(skb); 626 return -EMSGSIZE; 627 } 628 629 /* direct route; use the hwaddr we stashed in sendmsg */ 630 if (dst->halen) { 631 if (dst->halen != skb->dev->addr_len) { 632 /* sanity check, sendmsg should have already caught this */ 633 kfree_skb(skb); 634 return -EMSGSIZE; 635 } 636 daddr = dst->haddr; 637 } else { 638 /* If lookup fails let the device handle daddr==NULL */ 639 if (mctp_neigh_lookup(dst->dev, dst->nexthop, daddr_buf) == 0) 640 daddr = daddr_buf; 641 } 642 643 rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), 644 daddr, skb->dev->dev_addr, skb->len); 645 if (rc < 0) { 646 kfree_skb(skb); 647 return -EHOSTUNREACH; 648 } 649 650 mctp_flow_prepare_output(skb, dst->dev); 651 652 rc = dev_queue_xmit(skb); 653 if (rc) 654 rc = net_xmit_errno(rc); 655 656 return rc; 657 } 658 659 /* route alloc/release */ 660 static void mctp_route_release(struct mctp_route *rt) 661 { 662 if (refcount_dec_and_test(&rt->refs)) { 663 if (rt->dst_type == MCTP_ROUTE_DIRECT) 664 mctp_dev_put(rt->dev); 665 kfree_rcu(rt, rcu); 666 } 667 } 668 669 /* returns a route with the refcount at 1 */ 670 static struct mctp_route *mctp_route_alloc(void) 671 { 672 struct mctp_route *rt; 673 674 rt = kzalloc(sizeof(*rt), GFP_KERNEL); 675 if (!rt) 676 return NULL; 677 678 INIT_LIST_HEAD(&rt->list); 679 refcount_set(&rt->refs, 1); 680 rt->output = mctp_dst_discard; 681 682 return rt; 683 } 684 685 unsigned int mctp_default_net(struct net *net) 686 { 687 return READ_ONCE(net->mctp.default_net); 688 } 689 690 int mctp_default_net_set(struct net *net, unsigned int index) 691 { 692 if (index == 0) 693 return -EINVAL; 694 WRITE_ONCE(net->mctp.default_net, index); 695 return 0; 696 } 697 698 /* tag management */ 699 static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, 700 struct mctp_sock *msk) 701 { 702 struct netns_mctp *mns = &net->mctp; 703 704 lockdep_assert_held(&mns->keys_lock); 705 706 key->expiry = jiffies + mctp_key_lifetime; 707 timer_reduce(&msk->key_expiry, key->expiry); 708 709 /* we hold the net->key_lock here, allowing updates to both 710 * then net and sk 711 */ 712 hlist_add_head_rcu(&key->hlist, &mns->keys); 713 hlist_add_head_rcu(&key->sklist, &msk->keys); 714 refcount_inc(&key->refs); 715 } 716 717 /* Allocate a locally-owned tag value for (local, peer), and reserve 718 * it for the socket msk 719 */ 720 struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, 721 unsigned int netid, 722 mctp_eid_t local, mctp_eid_t peer, 723 bool manual, u8 *tagp) 724 { 725 struct net *net = sock_net(&msk->sk); 726 struct netns_mctp *mns = &net->mctp; 727 struct mctp_sk_key *key, *tmp; 728 unsigned long flags; 729 u8 tagbits; 730 731 /* for NULL destination EIDs, we may get a response from any peer */ 732 if (peer == MCTP_ADDR_NULL) 733 peer = MCTP_ADDR_ANY; 734 735 /* be optimistic, alloc now */ 736 key = mctp_key_alloc(msk, netid, local, peer, 0, GFP_KERNEL); 737 if (!key) 738 return ERR_PTR(-ENOMEM); 739 740 /* 8 possible tag values */ 741 tagbits = 0xff; 742 743 spin_lock_irqsave(&mns->keys_lock, flags); 744 745 /* Walk through the existing keys, looking for potential conflicting 746 * tags. If we find a conflict, clear that bit from tagbits 747 */ 748 hlist_for_each_entry(tmp, &mns->keys, hlist) { 749 /* We can check the lookup fields (*_addr, tag) without the 750 * lock held, they don't change over the lifetime of the key. 751 */ 752 753 /* tags are net-specific */ 754 if (tmp->net != netid) 755 continue; 756 757 /* if we don't own the tag, it can't conflict */ 758 if (tmp->tag & MCTP_HDR_FLAG_TO) 759 continue; 760 761 /* Since we're avoiding conflicting entries, match peer and 762 * local addresses, including with a wildcard on ANY. See 763 * 'A note on key allocations' for background. 764 */ 765 if (peer != MCTP_ADDR_ANY && 766 !mctp_address_matches(tmp->peer_addr, peer)) 767 continue; 768 769 if (local != MCTP_ADDR_ANY && 770 !mctp_address_matches(tmp->local_addr, local)) 771 continue; 772 773 spin_lock(&tmp->lock); 774 /* key must still be valid. If we find a match, clear the 775 * potential tag value 776 */ 777 if (tmp->valid) 778 tagbits &= ~(1 << tmp->tag); 779 spin_unlock(&tmp->lock); 780 781 if (!tagbits) 782 break; 783 } 784 785 if (tagbits) { 786 key->tag = __ffs(tagbits); 787 mctp_reserve_tag(net, key, msk); 788 trace_mctp_key_acquire(key); 789 790 key->manual_alloc = manual; 791 *tagp = key->tag; 792 } 793 794 spin_unlock_irqrestore(&mns->keys_lock, flags); 795 796 if (!tagbits) { 797 mctp_key_unref(key); 798 return ERR_PTR(-EBUSY); 799 } 800 801 return key; 802 } 803 804 static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk, 805 unsigned int netid, 806 mctp_eid_t daddr, 807 u8 req_tag, u8 *tagp) 808 { 809 struct net *net = sock_net(&msk->sk); 810 struct netns_mctp *mns = &net->mctp; 811 struct mctp_sk_key *key, *tmp; 812 unsigned long flags; 813 814 req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER); 815 key = NULL; 816 817 spin_lock_irqsave(&mns->keys_lock, flags); 818 819 hlist_for_each_entry(tmp, &mns->keys, hlist) { 820 if (tmp->net != netid) 821 continue; 822 823 if (tmp->tag != req_tag) 824 continue; 825 826 if (!mctp_address_matches(tmp->peer_addr, daddr)) 827 continue; 828 829 if (!tmp->manual_alloc) 830 continue; 831 832 spin_lock(&tmp->lock); 833 if (tmp->valid) { 834 key = tmp; 835 refcount_inc(&key->refs); 836 spin_unlock(&tmp->lock); 837 break; 838 } 839 spin_unlock(&tmp->lock); 840 } 841 spin_unlock_irqrestore(&mns->keys_lock, flags); 842 843 if (!key) 844 return ERR_PTR(-ENOENT); 845 846 if (tagp) 847 *tagp = key->tag; 848 849 return key; 850 } 851 852 /* routing lookups */ 853 static unsigned int mctp_route_netid(struct mctp_route *rt) 854 { 855 return rt->dst_type == MCTP_ROUTE_DIRECT ? 856 READ_ONCE(rt->dev->net) : rt->gateway.net; 857 } 858 859 static bool mctp_rt_match_eid(struct mctp_route *rt, 860 unsigned int net, mctp_eid_t eid) 861 { 862 return mctp_route_netid(rt) == net && 863 rt->min <= eid && rt->max >= eid; 864 } 865 866 /* compares match, used for duplicate prevention */ 867 static bool mctp_rt_compare_exact(struct mctp_route *rt1, 868 struct mctp_route *rt2) 869 { 870 ASSERT_RTNL(); 871 return mctp_route_netid(rt1) == mctp_route_netid(rt2) && 872 rt1->min == rt2->min && 873 rt1->max == rt2->max; 874 } 875 876 /* must only be called on a direct route, as the final output hop */ 877 static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid, 878 unsigned int mtu, struct mctp_route *route) 879 { 880 mctp_dev_hold(route->dev); 881 dst->nexthop = eid; 882 dst->dev = route->dev; 883 dst->mtu = READ_ONCE(dst->dev->dev->mtu); 884 if (mtu) 885 dst->mtu = min(dst->mtu, mtu); 886 dst->halen = 0; 887 dst->output = route->output; 888 } 889 890 int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, 891 unsigned char halen, const unsigned char *haddr) 892 { 893 struct net_device *netdev; 894 struct mctp_dev *dev; 895 int rc = -ENOENT; 896 897 if (halen > sizeof(dst->haddr)) 898 return -EINVAL; 899 900 rcu_read_lock(); 901 902 netdev = dev_get_by_index_rcu(net, ifindex); 903 if (!netdev) 904 goto out_unlock; 905 906 if (netdev->addr_len != halen) { 907 rc = -EINVAL; 908 goto out_unlock; 909 } 910 911 dev = __mctp_dev_get(netdev); 912 if (!dev) 913 goto out_unlock; 914 915 dst->dev = dev; 916 dst->mtu = READ_ONCE(netdev->mtu); 917 dst->halen = halen; 918 dst->output = mctp_dst_output; 919 dst->nexthop = 0; 920 memcpy(dst->haddr, haddr, halen); 921 922 rc = 0; 923 924 out_unlock: 925 rcu_read_unlock(); 926 return rc; 927 } 928 929 void mctp_dst_release(struct mctp_dst *dst) 930 { 931 mctp_dev_put(dst->dev); 932 } 933 934 static struct mctp_route *mctp_route_lookup_single(struct net *net, 935 unsigned int dnet, 936 mctp_eid_t daddr) 937 { 938 struct mctp_route *rt; 939 940 list_for_each_entry_rcu(rt, &net->mctp.routes, list) { 941 if (mctp_rt_match_eid(rt, dnet, daddr)) 942 return rt; 943 } 944 945 return NULL; 946 } 947 948 /* populates *dst on successful lookup, if set */ 949 int mctp_route_lookup(struct net *net, unsigned int dnet, 950 mctp_eid_t daddr, struct mctp_dst *dst) 951 { 952 const unsigned int max_depth = 32; 953 unsigned int depth, mtu = 0; 954 int rc = -EHOSTUNREACH; 955 956 rcu_read_lock(); 957 958 for (depth = 0; depth < max_depth; depth++) { 959 struct mctp_route *rt; 960 961 rt = mctp_route_lookup_single(net, dnet, daddr); 962 if (!rt) 963 break; 964 965 /* clamp mtu to the smallest in the path, allowing 0 966 * to specify no restrictions 967 */ 968 if (mtu && rt->mtu) 969 mtu = min(mtu, rt->mtu); 970 else 971 mtu = mtu ?: rt->mtu; 972 973 if (rt->dst_type == MCTP_ROUTE_DIRECT) { 974 if (dst) 975 mctp_dst_from_route(dst, daddr, mtu, rt); 976 rc = 0; 977 break; 978 979 } else if (rt->dst_type == MCTP_ROUTE_GATEWAY) { 980 daddr = rt->gateway.eid; 981 } 982 } 983 984 rcu_read_unlock(); 985 986 return rc; 987 } 988 989 static int mctp_route_lookup_null(struct net *net, struct net_device *dev, 990 struct mctp_dst *dst) 991 { 992 int rc = -EHOSTUNREACH; 993 struct mctp_route *rt; 994 995 rcu_read_lock(); 996 997 list_for_each_entry_rcu(rt, &net->mctp.routes, list) { 998 if (rt->dst_type != MCTP_ROUTE_DIRECT || rt->type != RTN_LOCAL) 999 continue; 1000 1001 if (rt->dev->dev != dev) 1002 continue; 1003 1004 mctp_dst_from_route(dst, 0, 0, rt); 1005 rc = 0; 1006 break; 1007 } 1008 1009 rcu_read_unlock(); 1010 1011 return rc; 1012 } 1013 1014 static int mctp_do_fragment_route(struct mctp_dst *dst, struct sk_buff *skb, 1015 unsigned int mtu, u8 tag) 1016 { 1017 const unsigned int hlen = sizeof(struct mctp_hdr); 1018 struct mctp_hdr *hdr, *hdr2; 1019 unsigned int pos, size, headroom; 1020 struct sk_buff *skb2; 1021 int rc; 1022 u8 seq; 1023 1024 hdr = mctp_hdr(skb); 1025 seq = 0; 1026 rc = 0; 1027 1028 if (mtu < hlen + 1) { 1029 kfree_skb(skb); 1030 return -EMSGSIZE; 1031 } 1032 1033 /* keep same headroom as the original skb */ 1034 headroom = skb_headroom(skb); 1035 1036 /* we've got the header */ 1037 skb_pull(skb, hlen); 1038 1039 for (pos = 0; pos < skb->len;) { 1040 /* size of message payload */ 1041 size = min(mtu - hlen, skb->len - pos); 1042 1043 skb2 = alloc_skb(headroom + hlen + size, GFP_KERNEL); 1044 if (!skb2) { 1045 rc = -ENOMEM; 1046 break; 1047 } 1048 1049 /* generic skb copy */ 1050 skb2->protocol = skb->protocol; 1051 skb2->priority = skb->priority; 1052 skb2->dev = skb->dev; 1053 memcpy(skb2->cb, skb->cb, sizeof(skb2->cb)); 1054 1055 if (skb->sk) 1056 skb_set_owner_w(skb2, skb->sk); 1057 1058 /* establish packet */ 1059 skb_reserve(skb2, headroom); 1060 skb_reset_network_header(skb2); 1061 skb_put(skb2, hlen + size); 1062 skb2->transport_header = skb2->network_header + hlen; 1063 1064 /* copy header fields, calculate SOM/EOM flags & seq */ 1065 hdr2 = mctp_hdr(skb2); 1066 hdr2->ver = hdr->ver; 1067 hdr2->dest = hdr->dest; 1068 hdr2->src = hdr->src; 1069 hdr2->flags_seq_tag = tag & 1070 (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 1071 1072 if (pos == 0) 1073 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM; 1074 1075 if (pos + size == skb->len) 1076 hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM; 1077 1078 hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT; 1079 1080 /* copy message payload */ 1081 skb_copy_bits(skb, pos, skb_transport_header(skb2), size); 1082 1083 /* we need to copy the extensions, for MCTP flow data */ 1084 skb_ext_copy(skb2, skb); 1085 1086 /* do route */ 1087 rc = dst->output(dst, skb2); 1088 if (rc) 1089 break; 1090 1091 seq = (seq + 1) & MCTP_HDR_SEQ_MASK; 1092 pos += size; 1093 } 1094 1095 consume_skb(skb); 1096 return rc; 1097 } 1098 1099 int mctp_local_output(struct sock *sk, struct mctp_dst *dst, 1100 struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) 1101 { 1102 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 1103 struct mctp_sk_key *key; 1104 struct mctp_hdr *hdr; 1105 unsigned long flags; 1106 unsigned int netid; 1107 unsigned int mtu; 1108 mctp_eid_t saddr; 1109 int rc; 1110 u8 tag; 1111 1112 KUNIT_STATIC_STUB_REDIRECT(mctp_local_output, sk, dst, skb, daddr, 1113 req_tag); 1114 1115 rc = -ENODEV; 1116 1117 spin_lock_irqsave(&dst->dev->addrs_lock, flags); 1118 if (dst->dev->num_addrs == 0) { 1119 rc = -EHOSTUNREACH; 1120 } else { 1121 /* use the outbound interface's first address as our source */ 1122 saddr = dst->dev->addrs[0]; 1123 rc = 0; 1124 } 1125 spin_unlock_irqrestore(&dst->dev->addrs_lock, flags); 1126 netid = READ_ONCE(dst->dev->net); 1127 1128 if (rc) 1129 goto out_release; 1130 1131 if (req_tag & MCTP_TAG_OWNER) { 1132 if (req_tag & MCTP_TAG_PREALLOC) 1133 key = mctp_lookup_prealloc_tag(msk, netid, daddr, 1134 req_tag, &tag); 1135 else 1136 key = mctp_alloc_local_tag(msk, netid, saddr, daddr, 1137 false, &tag); 1138 1139 if (IS_ERR(key)) { 1140 rc = PTR_ERR(key); 1141 goto out_release; 1142 } 1143 mctp_skb_set_flow(skb, key); 1144 /* done with the key in this scope */ 1145 mctp_key_unref(key); 1146 tag |= MCTP_HDR_FLAG_TO; 1147 } else { 1148 key = NULL; 1149 tag = req_tag & MCTP_TAG_MASK; 1150 } 1151 1152 skb->pkt_type = PACKET_OUTGOING; 1153 skb->protocol = htons(ETH_P_MCTP); 1154 skb->priority = 0; 1155 skb_reset_transport_header(skb); 1156 skb_push(skb, sizeof(struct mctp_hdr)); 1157 skb_reset_network_header(skb); 1158 skb->dev = dst->dev->dev; 1159 1160 /* set up common header fields */ 1161 hdr = mctp_hdr(skb); 1162 hdr->ver = 1; 1163 hdr->dest = daddr; 1164 hdr->src = saddr; 1165 1166 mtu = dst->mtu; 1167 1168 if (skb->len + sizeof(struct mctp_hdr) <= mtu) { 1169 hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | 1170 MCTP_HDR_FLAG_EOM | tag; 1171 rc = dst->output(dst, skb); 1172 } else { 1173 rc = mctp_do_fragment_route(dst, skb, mtu, tag); 1174 } 1175 1176 /* route output functions consume the skb, even on error */ 1177 skb = NULL; 1178 1179 out_release: 1180 kfree_skb(skb); 1181 return rc; 1182 } 1183 1184 /* route management */ 1185 1186 /* mctp_route_add(): Add the provided route, previously allocated via 1187 * mctp_route_alloc(). On success, takes ownership of @rt, which includes a 1188 * hold on rt->dev for usage in the route table. On failure a caller will want 1189 * to mctp_route_release(). 1190 * 1191 * We expect that the caller has set rt->type, rt->dst_type, rt->min, rt->max, 1192 * rt->mtu and either rt->dev (with a reference held appropriately) or 1193 * rt->gateway. Other fields will be populated. 1194 */ 1195 static int mctp_route_add(struct net *net, struct mctp_route *rt) 1196 { 1197 struct mctp_route *ert; 1198 1199 if (!mctp_address_unicast(rt->min) || !mctp_address_unicast(rt->max)) 1200 return -EINVAL; 1201 1202 if (rt->dst_type == MCTP_ROUTE_DIRECT && !rt->dev) 1203 return -EINVAL; 1204 1205 if (rt->dst_type == MCTP_ROUTE_GATEWAY && !rt->gateway.eid) 1206 return -EINVAL; 1207 1208 switch (rt->type) { 1209 case RTN_LOCAL: 1210 rt->output = mctp_dst_input; 1211 break; 1212 case RTN_UNICAST: 1213 rt->output = mctp_dst_output; 1214 break; 1215 default: 1216 return -EINVAL; 1217 } 1218 1219 ASSERT_RTNL(); 1220 1221 /* Prevent duplicate identical routes. */ 1222 list_for_each_entry(ert, &net->mctp.routes, list) { 1223 if (mctp_rt_compare_exact(rt, ert)) { 1224 return -EEXIST; 1225 } 1226 } 1227 1228 list_add_rcu(&rt->list, &net->mctp.routes); 1229 1230 return 0; 1231 } 1232 1233 static int mctp_route_remove(struct net *net, unsigned int netid, 1234 mctp_eid_t daddr_start, unsigned int daddr_extent, 1235 unsigned char type) 1236 { 1237 struct mctp_route *rt, *tmp; 1238 mctp_eid_t daddr_end; 1239 bool dropped; 1240 1241 if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) 1242 return -EINVAL; 1243 1244 daddr_end = daddr_start + daddr_extent; 1245 dropped = false; 1246 1247 ASSERT_RTNL(); 1248 1249 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { 1250 if (mctp_route_netid(rt) == netid && 1251 rt->min == daddr_start && rt->max == daddr_end && 1252 rt->type == type) { 1253 list_del_rcu(&rt->list); 1254 /* TODO: immediate RTM_DELROUTE */ 1255 mctp_route_release(rt); 1256 dropped = true; 1257 } 1258 } 1259 1260 return dropped ? 0 : -ENOENT; 1261 } 1262 1263 int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) 1264 { 1265 struct mctp_route *rt; 1266 int rc; 1267 1268 rt = mctp_route_alloc(); 1269 if (!rt) 1270 return -ENOMEM; 1271 1272 rt->min = addr; 1273 rt->max = addr; 1274 rt->dst_type = MCTP_ROUTE_DIRECT; 1275 rt->dev = mdev; 1276 rt->type = RTN_LOCAL; 1277 1278 mctp_dev_hold(rt->dev); 1279 1280 rc = mctp_route_add(dev_net(mdev->dev), rt); 1281 if (rc) 1282 mctp_route_release(rt); 1283 1284 return rc; 1285 } 1286 1287 int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) 1288 { 1289 return mctp_route_remove(dev_net(mdev->dev), mdev->net, 1290 addr, 0, RTN_LOCAL); 1291 } 1292 1293 /* removes all entries for a given device */ 1294 void mctp_route_remove_dev(struct mctp_dev *mdev) 1295 { 1296 struct net *net = dev_net(mdev->dev); 1297 struct mctp_route *rt, *tmp; 1298 1299 ASSERT_RTNL(); 1300 list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { 1301 if (rt->dst_type == MCTP_ROUTE_DIRECT && rt->dev == mdev) { 1302 list_del_rcu(&rt->list); 1303 /* TODO: immediate RTM_DELROUTE */ 1304 mctp_route_release(rt); 1305 } 1306 } 1307 } 1308 1309 /* Incoming packet-handling */ 1310 1311 static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, 1312 struct packet_type *pt, 1313 struct net_device *orig_dev) 1314 { 1315 struct net *net = dev_net(dev); 1316 struct mctp_dev *mdev; 1317 struct mctp_skb_cb *cb; 1318 struct mctp_dst dst; 1319 struct mctp_hdr *mh; 1320 int rc; 1321 1322 rcu_read_lock(); 1323 mdev = __mctp_dev_get(dev); 1324 rcu_read_unlock(); 1325 if (!mdev) { 1326 /* basic non-data sanity checks */ 1327 goto err_drop; 1328 } 1329 1330 if (!pskb_may_pull(skb, sizeof(struct mctp_hdr))) 1331 goto err_drop; 1332 1333 skb_reset_transport_header(skb); 1334 skb_reset_network_header(skb); 1335 1336 /* We have enough for a header; decode and route */ 1337 mh = mctp_hdr(skb); 1338 if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) 1339 goto err_drop; 1340 1341 /* source must be valid unicast or null; drop reserved ranges and 1342 * broadcast 1343 */ 1344 if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src))) 1345 goto err_drop; 1346 1347 /* dest address: as above, but allow broadcast */ 1348 if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) || 1349 mctp_address_broadcast(mh->dest))) 1350 goto err_drop; 1351 1352 /* MCTP drivers must populate halen/haddr */ 1353 if (dev->type == ARPHRD_MCTP) { 1354 cb = mctp_cb(skb); 1355 } else { 1356 cb = __mctp_cb(skb); 1357 cb->halen = 0; 1358 } 1359 cb->net = READ_ONCE(mdev->net); 1360 cb->ifindex = dev->ifindex; 1361 1362 rc = mctp_route_lookup(net, cb->net, mh->dest, &dst); 1363 1364 /* NULL EID, but addressed to our physical address */ 1365 if (rc && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) 1366 rc = mctp_route_lookup_null(net, dev, &dst); 1367 1368 if (rc) 1369 goto err_drop; 1370 1371 dst.output(&dst, skb); 1372 mctp_dst_release(&dst); 1373 mctp_dev_put(mdev); 1374 1375 return NET_RX_SUCCESS; 1376 1377 err_drop: 1378 kfree_skb(skb); 1379 mctp_dev_put(mdev); 1380 return NET_RX_DROP; 1381 } 1382 1383 static struct packet_type mctp_packet_type = { 1384 .type = cpu_to_be16(ETH_P_MCTP), 1385 .func = mctp_pkttype_receive, 1386 }; 1387 1388 /* netlink interface */ 1389 1390 static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = { 1391 [RTA_DST] = { .type = NLA_U8 }, 1392 [RTA_METRICS] = { .type = NLA_NESTED }, 1393 [RTA_OIF] = { .type = NLA_U32 }, 1394 [RTA_GATEWAY] = NLA_POLICY_EXACT_LEN(sizeof(struct mctp_fq_addr)), 1395 }; 1396 1397 static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = { 1398 [RTAX_MTU] = { .type = NLA_U32 }, 1399 }; 1400 1401 /* base parsing; common to both _lookup and _populate variants. 1402 * 1403 * For gateway routes (which have a RTA_GATEWAY, and no RTA_OIF), we populate 1404 * *gatweayp. for direct routes (RTA_OIF, no RTA_GATEWAY), we populate *mdev. 1405 */ 1406 static int mctp_route_nlparse_common(struct net *net, struct nlmsghdr *nlh, 1407 struct netlink_ext_ack *extack, 1408 struct nlattr **tb, struct rtmsg **rtm, 1409 struct mctp_dev **mdev, 1410 struct mctp_fq_addr *gatewayp, 1411 mctp_eid_t *daddr_start) 1412 { 1413 struct mctp_fq_addr *gateway = NULL; 1414 unsigned int ifindex = 0; 1415 struct net_device *dev; 1416 int rc; 1417 1418 rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX, 1419 rta_mctp_policy, extack); 1420 if (rc < 0) { 1421 NL_SET_ERR_MSG(extack, "incorrect format"); 1422 return rc; 1423 } 1424 1425 if (!tb[RTA_DST]) { 1426 NL_SET_ERR_MSG(extack, "dst EID missing"); 1427 return -EINVAL; 1428 } 1429 *daddr_start = nla_get_u8(tb[RTA_DST]); 1430 1431 if (tb[RTA_OIF]) 1432 ifindex = nla_get_u32(tb[RTA_OIF]); 1433 1434 if (tb[RTA_GATEWAY]) 1435 gateway = nla_data(tb[RTA_GATEWAY]); 1436 1437 if (ifindex && gateway) { 1438 NL_SET_ERR_MSG(extack, 1439 "cannot specify both ifindex and gateway"); 1440 return -EINVAL; 1441 1442 } else if (ifindex) { 1443 dev = __dev_get_by_index(net, ifindex); 1444 if (!dev) { 1445 NL_SET_ERR_MSG(extack, "bad ifindex"); 1446 return -ENODEV; 1447 } 1448 *mdev = mctp_dev_get_rtnl(dev); 1449 if (!*mdev) 1450 return -ENODEV; 1451 gatewayp->eid = 0; 1452 1453 } else if (gateway) { 1454 if (!mctp_address_unicast(gateway->eid)) { 1455 NL_SET_ERR_MSG(extack, "bad gateway"); 1456 return -EINVAL; 1457 } 1458 1459 gatewayp->eid = gateway->eid; 1460 gatewayp->net = gateway->net != MCTP_NET_ANY ? 1461 gateway->net : 1462 READ_ONCE(net->mctp.default_net); 1463 *mdev = NULL; 1464 1465 } else { 1466 NL_SET_ERR_MSG(extack, "no route output provided"); 1467 return -EINVAL; 1468 } 1469 1470 *rtm = nlmsg_data(nlh); 1471 if ((*rtm)->rtm_family != AF_MCTP) { 1472 NL_SET_ERR_MSG(extack, "route family must be AF_MCTP"); 1473 return -EINVAL; 1474 } 1475 1476 if ((*rtm)->rtm_type != RTN_UNICAST) { 1477 NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST"); 1478 return -EINVAL; 1479 } 1480 1481 return 0; 1482 } 1483 1484 /* Route parsing for lookup operations; we only need the "route target" 1485 * components (ie., network and dest-EID range). 1486 */ 1487 static int mctp_route_nlparse_lookup(struct net *net, struct nlmsghdr *nlh, 1488 struct netlink_ext_ack *extack, 1489 unsigned char *type, unsigned int *netid, 1490 mctp_eid_t *daddr_start, 1491 unsigned int *daddr_extent) 1492 { 1493 struct nlattr *tb[RTA_MAX + 1]; 1494 struct mctp_fq_addr gw; 1495 struct mctp_dev *mdev; 1496 struct rtmsg *rtm; 1497 int rc; 1498 1499 rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm, 1500 &mdev, &gw, daddr_start); 1501 if (rc) 1502 return rc; 1503 1504 if (mdev) { 1505 *netid = mdev->net; 1506 } else if (gw.eid) { 1507 *netid = gw.net; 1508 } else { 1509 /* bug: _nlparse_common should not allow this */ 1510 return -1; 1511 } 1512 1513 *type = rtm->rtm_type; 1514 *daddr_extent = rtm->rtm_dst_len; 1515 1516 return 0; 1517 } 1518 1519 /* Full route parse for RTM_NEWROUTE: populate @rt. On success, 1520 * MCTP_ROUTE_DIRECT routes (ie, those with a direct dev) will hold a reference 1521 * to that dev. 1522 */ 1523 static int mctp_route_nlparse_populate(struct net *net, struct nlmsghdr *nlh, 1524 struct netlink_ext_ack *extack, 1525 struct mctp_route *rt) 1526 { 1527 struct nlattr *tbx[RTAX_MAX + 1]; 1528 struct nlattr *tb[RTA_MAX + 1]; 1529 unsigned int daddr_extent; 1530 struct mctp_fq_addr gw; 1531 mctp_eid_t daddr_start; 1532 struct mctp_dev *dev; 1533 struct rtmsg *rtm; 1534 u32 mtu = 0; 1535 int rc; 1536 1537 rc = mctp_route_nlparse_common(net, nlh, extack, tb, &rtm, 1538 &dev, &gw, &daddr_start); 1539 if (rc) 1540 return rc; 1541 1542 daddr_extent = rtm->rtm_dst_len; 1543 1544 if (daddr_extent > 0xff || daddr_extent + daddr_start >= 255) { 1545 NL_SET_ERR_MSG(extack, "invalid eid range"); 1546 return -EINVAL; 1547 } 1548 1549 if (tb[RTA_METRICS]) { 1550 rc = nla_parse_nested(tbx, RTAX_MAX, tb[RTA_METRICS], 1551 rta_metrics_policy, NULL); 1552 if (rc < 0) { 1553 NL_SET_ERR_MSG(extack, "incorrect RTA_METRICS format"); 1554 return rc; 1555 } 1556 if (tbx[RTAX_MTU]) 1557 mtu = nla_get_u32(tbx[RTAX_MTU]); 1558 } 1559 1560 rt->type = rtm->rtm_type; 1561 rt->min = daddr_start; 1562 rt->max = daddr_start + daddr_extent; 1563 rt->mtu = mtu; 1564 if (gw.eid) { 1565 rt->dst_type = MCTP_ROUTE_GATEWAY; 1566 rt->gateway.eid = gw.eid; 1567 rt->gateway.net = gw.net; 1568 } else { 1569 rt->dst_type = MCTP_ROUTE_DIRECT; 1570 rt->dev = dev; 1571 mctp_dev_hold(rt->dev); 1572 } 1573 1574 return 0; 1575 } 1576 1577 static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1578 struct netlink_ext_ack *extack) 1579 { 1580 struct net *net = sock_net(skb->sk); 1581 struct mctp_route *rt; 1582 int rc; 1583 1584 rt = mctp_route_alloc(); 1585 if (!rt) 1586 return -ENOMEM; 1587 1588 rc = mctp_route_nlparse_populate(net, nlh, extack, rt); 1589 if (rc < 0) 1590 goto err_free; 1591 1592 if (rt->dst_type == MCTP_ROUTE_DIRECT && 1593 rt->dev->dev->flags & IFF_LOOPBACK) { 1594 NL_SET_ERR_MSG(extack, "no routes to loopback"); 1595 rc = -EINVAL; 1596 goto err_free; 1597 } 1598 1599 rc = mctp_route_add(net, rt); 1600 if (!rc) 1601 return 0; 1602 1603 err_free: 1604 mctp_route_release(rt); 1605 return rc; 1606 } 1607 1608 static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 1609 struct netlink_ext_ack *extack) 1610 { 1611 struct net *net = sock_net(skb->sk); 1612 unsigned int netid, daddr_extent; 1613 unsigned char type = RTN_UNSPEC; 1614 mctp_eid_t daddr_start; 1615 int rc; 1616 1617 rc = mctp_route_nlparse_lookup(net, nlh, extack, &type, &netid, 1618 &daddr_start, &daddr_extent); 1619 if (rc < 0) 1620 return rc; 1621 1622 /* we only have unicast routes */ 1623 if (type != RTN_UNICAST) 1624 return -EINVAL; 1625 1626 rc = mctp_route_remove(net, netid, daddr_start, daddr_extent, type); 1627 return rc; 1628 } 1629 1630 static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, 1631 u32 portid, u32 seq, int event, unsigned int flags) 1632 { 1633 struct nlmsghdr *nlh; 1634 struct rtmsg *hdr; 1635 void *metrics; 1636 1637 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); 1638 if (!nlh) 1639 return -EMSGSIZE; 1640 1641 hdr = nlmsg_data(nlh); 1642 hdr->rtm_family = AF_MCTP; 1643 1644 /* we use the _len fields as a number of EIDs, rather than 1645 * a number of bits in the address 1646 */ 1647 hdr->rtm_dst_len = rt->max - rt->min; 1648 hdr->rtm_src_len = 0; 1649 hdr->rtm_tos = 0; 1650 hdr->rtm_table = RT_TABLE_DEFAULT; 1651 hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ 1652 hdr->rtm_type = rt->type; 1653 1654 if (nla_put_u8(skb, RTA_DST, rt->min)) 1655 goto cancel; 1656 1657 metrics = nla_nest_start_noflag(skb, RTA_METRICS); 1658 if (!metrics) 1659 goto cancel; 1660 1661 if (rt->mtu) { 1662 if (nla_put_u32(skb, RTAX_MTU, rt->mtu)) 1663 goto cancel; 1664 } 1665 1666 nla_nest_end(skb, metrics); 1667 1668 if (rt->dst_type == MCTP_ROUTE_DIRECT) { 1669 hdr->rtm_scope = RT_SCOPE_LINK; 1670 if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex)) 1671 goto cancel; 1672 } else if (rt->dst_type == MCTP_ROUTE_GATEWAY) { 1673 hdr->rtm_scope = RT_SCOPE_UNIVERSE; 1674 if (nla_put(skb, RTA_GATEWAY, 1675 sizeof(rt->gateway), &rt->gateway)) 1676 goto cancel; 1677 } 1678 1679 nlmsg_end(skb, nlh); 1680 1681 return 0; 1682 1683 cancel: 1684 nlmsg_cancel(skb, nlh); 1685 return -EMSGSIZE; 1686 } 1687 1688 static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb) 1689 { 1690 struct net *net = sock_net(skb->sk); 1691 struct mctp_route *rt; 1692 int s_idx, idx; 1693 1694 /* TODO: allow filtering on route data, possibly under 1695 * cb->strict_check 1696 */ 1697 1698 /* TODO: change to struct overlay */ 1699 s_idx = cb->args[0]; 1700 idx = 0; 1701 1702 rcu_read_lock(); 1703 list_for_each_entry_rcu(rt, &net->mctp.routes, list) { 1704 if (idx++ < s_idx) 1705 continue; 1706 if (mctp_fill_rtinfo(skb, rt, 1707 NETLINK_CB(cb->skb).portid, 1708 cb->nlh->nlmsg_seq, 1709 RTM_NEWROUTE, NLM_F_MULTI) < 0) 1710 break; 1711 } 1712 1713 rcu_read_unlock(); 1714 cb->args[0] = idx; 1715 1716 return skb->len; 1717 } 1718 1719 /* net namespace implementation */ 1720 static int __net_init mctp_routes_net_init(struct net *net) 1721 { 1722 struct netns_mctp *ns = &net->mctp; 1723 1724 INIT_LIST_HEAD(&ns->routes); 1725 hash_init(ns->binds); 1726 mutex_init(&ns->bind_lock); 1727 INIT_HLIST_HEAD(&ns->keys); 1728 spin_lock_init(&ns->keys_lock); 1729 WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET)); 1730 return 0; 1731 } 1732 1733 static void __net_exit mctp_routes_net_exit(struct net *net) 1734 { 1735 struct mctp_route *rt; 1736 1737 rcu_read_lock(); 1738 list_for_each_entry_rcu(rt, &net->mctp.routes, list) 1739 mctp_route_release(rt); 1740 rcu_read_unlock(); 1741 } 1742 1743 static struct pernet_operations mctp_net_ops = { 1744 .init = mctp_routes_net_init, 1745 .exit = mctp_routes_net_exit, 1746 }; 1747 1748 static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers[] = { 1749 {THIS_MODULE, PF_MCTP, RTM_NEWROUTE, mctp_newroute, NULL, 0}, 1750 {THIS_MODULE, PF_MCTP, RTM_DELROUTE, mctp_delroute, NULL, 0}, 1751 {THIS_MODULE, PF_MCTP, RTM_GETROUTE, NULL, mctp_dump_rtinfo, 0}, 1752 }; 1753 1754 int __init mctp_routes_init(void) 1755 { 1756 int err; 1757 1758 dev_add_pack(&mctp_packet_type); 1759 1760 err = register_pernet_subsys(&mctp_net_ops); 1761 if (err) 1762 goto err_pernet; 1763 1764 err = rtnl_register_many(mctp_route_rtnl_msg_handlers); 1765 if (err) 1766 goto err_rtnl; 1767 1768 return 0; 1769 1770 err_rtnl: 1771 unregister_pernet_subsys(&mctp_net_ops); 1772 err_pernet: 1773 dev_remove_pack(&mctp_packet_type); 1774 return err; 1775 } 1776 1777 void mctp_routes_exit(void) 1778 { 1779 rtnl_unregister_many(mctp_route_rtnl_msg_handlers); 1780 unregister_pernet_subsys(&mctp_net_ops); 1781 dev_remove_pack(&mctp_packet_type); 1782 } 1783 1784 #if IS_ENABLED(CONFIG_MCTP_TEST) 1785 #include "test/route-test.c" 1786 #endif 1787