1 /* 2 * net/tipc/socket.c: TIPC socket API 3 * 4 * Copyright (c) 2001-2007, 2012-2015, Ericsson AB 5 * Copyright (c) 2004-2008, 2010-2013, Wind River Systems 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the names of the copyright holders nor the names of its 17 * contributors may be used to endorse or promote products derived from 18 * this software without specific prior written permission. 19 * 20 * Alternatively, this software may be distributed under the terms of the 21 * GNU General Public License ("GPL") version 2 as published by the Free 22 * Software Foundation. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <linux/rhashtable.h> 38 #include "core.h" 39 #include "name_table.h" 40 #include "node.h" 41 #include "link.h" 42 #include "name_distr.h" 43 #include "socket.h" 44 #include "bcast.h" 45 #include "netlink.h" 46 47 #define SS_LISTENING -1 /* socket is listening */ 48 #define SS_READY -2 /* socket is connectionless */ 49 50 #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ 51 #define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ 52 #define TIPC_FWD_MSG 1 53 #define TIPC_CONN_OK 0 54 #define TIPC_CONN_PROBING 1 55 #define TIPC_MAX_PORT 0xffffffff 56 #define TIPC_MIN_PORT 1 57 58 /** 59 * struct tipc_sock - TIPC socket structure 60 * @sk: socket - interacts with 'port' and with user via the socket API 61 * @connected: non-zero if port is currently connected to a peer port 62 * @conn_type: TIPC type used when connection was established 63 * @conn_instance: TIPC instance used when connection was established 64 * @published: non-zero if port has one or more associated names 65 * @max_pkt: maximum packet size "hint" used when building messages sent by port 66 * @portid: unique port identity in TIPC socket hash table 67 * @phdr: preformatted message header used when sending messages 68 * @port_list: adjacent ports in TIPC's global list of ports 69 * @publications: list of publications for port 70 * @pub_count: total # of publications port has made during its lifetime 71 * @probing_state: 72 * @probing_intv: 73 * @conn_timeout: the time we can wait for an unresponded setup request 74 * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue 75 * @link_cong: non-zero if owner must sleep because of link congestion 76 * @sent_unacked: # messages sent by socket, and not yet acked by peer 77 * @rcv_unacked: # messages read by user, but not yet acked back to peer 78 * @remote: 'connected' peer for dgram/rdm 79 * @node: hash table node 80 * @rcu: rcu struct for tipc_sock 81 */ 82 struct tipc_sock { 83 struct sock sk; 84 int connected; 85 u32 conn_type; 86 u32 conn_instance; 87 int published; 88 u32 max_pkt; 89 u32 portid; 90 struct tipc_msg phdr; 91 struct list_head sock_list; 92 struct list_head publications; 93 u32 pub_count; 94 u32 probing_state; 95 unsigned long probing_intv; 96 uint conn_timeout; 97 atomic_t dupl_rcvcnt; 98 bool link_cong; 99 u16 snt_unacked; 100 u16 snd_win; 101 u16 peer_caps; 102 u16 rcv_unacked; 103 u16 rcv_win; 104 struct sockaddr_tipc remote; 105 struct rhash_head node; 106 struct rcu_head rcu; 107 }; 108 109 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); 110 static void tipc_data_ready(struct sock *sk); 111 static void tipc_write_space(struct sock *sk); 112 static void tipc_sock_destruct(struct sock *sk); 113 static int tipc_release(struct socket *sock); 114 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); 115 static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p); 116 static void tipc_sk_timeout(unsigned long data); 117 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, 118 struct tipc_name_seq const *seq); 119 static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, 120 struct tipc_name_seq const *seq); 121 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); 122 static int tipc_sk_insert(struct tipc_sock *tsk); 123 static void tipc_sk_remove(struct tipc_sock *tsk); 124 static int __tipc_send_stream(struct socket *sock, struct msghdr *m, 125 size_t dsz); 126 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); 127 128 static const struct proto_ops packet_ops; 129 static const struct proto_ops stream_ops; 130 static const struct proto_ops msg_ops; 131 static struct proto tipc_proto; 132 133 static const struct rhashtable_params tsk_rht_params; 134 135 /* 136 * Revised TIPC socket locking policy: 137 * 138 * Most socket operations take the standard socket lock when they start 139 * and hold it until they finish (or until they need to sleep). Acquiring 140 * this lock grants the owner exclusive access to the fields of the socket 141 * data structures, with the exception of the backlog queue. A few socket 142 * operations can be done without taking the socket lock because they only 143 * read socket information that never changes during the life of the socket. 144 * 145 * Socket operations may acquire the lock for the associated TIPC port if they 146 * need to perform an operation on the port. If any routine needs to acquire 147 * both the socket lock and the port lock it must take the socket lock first 148 * to avoid the risk of deadlock. 149 * 150 * The dispatcher handling incoming messages cannot grab the socket lock in 151 * the standard fashion, since invoked it runs at the BH level and cannot block. 152 * Instead, it checks to see if the socket lock is currently owned by someone, 153 * and either handles the message itself or adds it to the socket's backlog 154 * queue; in the latter case the queued message is processed once the process 155 * owning the socket lock releases it. 156 * 157 * NOTE: Releasing the socket lock while an operation is sleeping overcomes 158 * the problem of a blocked socket operation preventing any other operations 159 * from occurring. However, applications must be careful if they have 160 * multiple threads trying to send (or receive) on the same socket, as these 161 * operations might interfere with each other. For example, doing a connect 162 * and a receive at the same time might allow the receive to consume the 163 * ACK message meant for the connect. While additional work could be done 164 * to try and overcome this, it doesn't seem to be worthwhile at the present. 165 * 166 * NOTE: Releasing the socket lock while an operation is sleeping also ensures 167 * that another operation that must be performed in a non-blocking manner is 168 * not delayed for very long because the lock has already been taken. 169 * 170 * NOTE: This code assumes that certain fields of a port/socket pair are 171 * constant over its lifetime; such fields can be examined without taking 172 * the socket lock and/or port lock, and do not need to be re-read even 173 * after resuming processing after waiting. These fields include: 174 * - socket type 175 * - pointer to socket sk structure (aka tipc_sock structure) 176 * - pointer to port structure 177 * - port reference 178 */ 179 180 static u32 tsk_own_node(struct tipc_sock *tsk) 181 { 182 return msg_prevnode(&tsk->phdr); 183 } 184 185 static u32 tsk_peer_node(struct tipc_sock *tsk) 186 { 187 return msg_destnode(&tsk->phdr); 188 } 189 190 static u32 tsk_peer_port(struct tipc_sock *tsk) 191 { 192 return msg_destport(&tsk->phdr); 193 } 194 195 static bool tsk_unreliable(struct tipc_sock *tsk) 196 { 197 return msg_src_droppable(&tsk->phdr) != 0; 198 } 199 200 static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable) 201 { 202 msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0); 203 } 204 205 static bool tsk_unreturnable(struct tipc_sock *tsk) 206 { 207 return msg_dest_droppable(&tsk->phdr) != 0; 208 } 209 210 static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable) 211 { 212 msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0); 213 } 214 215 static int tsk_importance(struct tipc_sock *tsk) 216 { 217 return msg_importance(&tsk->phdr); 218 } 219 220 static int tsk_set_importance(struct tipc_sock *tsk, int imp) 221 { 222 if (imp > TIPC_CRITICAL_IMPORTANCE) 223 return -EINVAL; 224 msg_set_importance(&tsk->phdr, (u32)imp); 225 return 0; 226 } 227 228 static struct tipc_sock *tipc_sk(const struct sock *sk) 229 { 230 return container_of(sk, struct tipc_sock, sk); 231 } 232 233 static bool tsk_conn_cong(struct tipc_sock *tsk) 234 { 235 return tsk->snt_unacked >= tsk->snd_win; 236 } 237 238 /* tsk_blocks(): translate a buffer size in bytes to number of 239 * advertisable blocks, taking into account the ratio truesize(len)/len 240 * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ 241 */ 242 static u16 tsk_adv_blocks(int len) 243 { 244 return len / FLOWCTL_BLK_SZ / 4; 245 } 246 247 /* tsk_inc(): increment counter for sent or received data 248 * - If block based flow control is not supported by peer we 249 * fall back to message based ditto, incrementing the counter 250 */ 251 static u16 tsk_inc(struct tipc_sock *tsk, int msglen) 252 { 253 if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) 254 return ((msglen / FLOWCTL_BLK_SZ) + 1); 255 return 1; 256 } 257 258 /** 259 * tsk_advance_rx_queue - discard first buffer in socket receive queue 260 * 261 * Caller must hold socket lock 262 */ 263 static void tsk_advance_rx_queue(struct sock *sk) 264 { 265 kfree_skb(__skb_dequeue(&sk->sk_receive_queue)); 266 } 267 268 /* tipc_sk_respond() : send response message back to sender 269 */ 270 static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err) 271 { 272 u32 selector; 273 u32 dnode; 274 u32 onode = tipc_own_addr(sock_net(sk)); 275 276 if (!tipc_msg_reverse(onode, &skb, err)) 277 return; 278 279 dnode = msg_destnode(buf_msg(skb)); 280 selector = msg_origport(buf_msg(skb)); 281 tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); 282 } 283 284 /** 285 * tsk_rej_rx_queue - reject all buffers in socket receive queue 286 * 287 * Caller must hold socket lock 288 */ 289 static void tsk_rej_rx_queue(struct sock *sk) 290 { 291 struct sk_buff *skb; 292 293 while ((skb = __skb_dequeue(&sk->sk_receive_queue))) 294 tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); 295 } 296 297 /* tsk_peer_msg - verify if message was sent by connected port's peer 298 * 299 * Handles cases where the node's network address has changed from 300 * the default of <0.0.0> to its configured setting. 301 */ 302 static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) 303 { 304 struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); 305 u32 peer_port = tsk_peer_port(tsk); 306 u32 orig_node; 307 u32 peer_node; 308 309 if (unlikely(!tsk->connected)) 310 return false; 311 312 if (unlikely(msg_origport(msg) != peer_port)) 313 return false; 314 315 orig_node = msg_orignode(msg); 316 peer_node = tsk_peer_node(tsk); 317 318 if (likely(orig_node == peer_node)) 319 return true; 320 321 if (!orig_node && (peer_node == tn->own_addr)) 322 return true; 323 324 if (!peer_node && (orig_node == tn->own_addr)) 325 return true; 326 327 return false; 328 } 329 330 /** 331 * tipc_sk_create - create a TIPC socket 332 * @net: network namespace (must be default network) 333 * @sock: pre-allocated socket structure 334 * @protocol: protocol indicator (must be 0) 335 * @kern: caused by kernel or by userspace? 336 * 337 * This routine creates additional data structures used by the TIPC socket, 338 * initializes them, and links them together. 339 * 340 * Returns 0 on success, errno otherwise 341 */ 342 static int tipc_sk_create(struct net *net, struct socket *sock, 343 int protocol, int kern) 344 { 345 struct tipc_net *tn; 346 const struct proto_ops *ops; 347 socket_state state; 348 struct sock *sk; 349 struct tipc_sock *tsk; 350 struct tipc_msg *msg; 351 352 /* Validate arguments */ 353 if (unlikely(protocol != 0)) 354 return -EPROTONOSUPPORT; 355 356 switch (sock->type) { 357 case SOCK_STREAM: 358 ops = &stream_ops; 359 state = SS_UNCONNECTED; 360 break; 361 case SOCK_SEQPACKET: 362 ops = &packet_ops; 363 state = SS_UNCONNECTED; 364 break; 365 case SOCK_DGRAM: 366 case SOCK_RDM: 367 ops = &msg_ops; 368 state = SS_READY; 369 break; 370 default: 371 return -EPROTOTYPE; 372 } 373 374 /* Allocate socket's protocol area */ 375 sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, kern); 376 if (sk == NULL) 377 return -ENOMEM; 378 379 tsk = tipc_sk(sk); 380 tsk->max_pkt = MAX_PKT_DEFAULT; 381 INIT_LIST_HEAD(&tsk->publications); 382 msg = &tsk->phdr; 383 tn = net_generic(sock_net(sk), tipc_net_id); 384 tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, 385 NAMED_H_SIZE, 0); 386 387 /* Finish initializing socket data structures */ 388 sock->ops = ops; 389 sock->state = state; 390 sock_init_data(sock, sk); 391 if (tipc_sk_insert(tsk)) { 392 pr_warn("Socket create failed; port number exhausted\n"); 393 return -EINVAL; 394 } 395 msg_set_origport(msg, tsk->portid); 396 setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); 397 sk->sk_backlog_rcv = tipc_backlog_rcv; 398 sk->sk_rcvbuf = sysctl_tipc_rmem[1]; 399 sk->sk_data_ready = tipc_data_ready; 400 sk->sk_write_space = tipc_write_space; 401 sk->sk_destruct = tipc_sock_destruct; 402 tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; 403 atomic_set(&tsk->dupl_rcvcnt, 0); 404 405 /* Start out with safe limits until we receive an advertised window */ 406 tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); 407 tsk->rcv_win = tsk->snd_win; 408 409 if (sock->state == SS_READY) { 410 tsk_set_unreturnable(tsk, true); 411 if (sock->type == SOCK_DGRAM) 412 tsk_set_unreliable(tsk, true); 413 } 414 return 0; 415 } 416 417 static void tipc_sk_callback(struct rcu_head *head) 418 { 419 struct tipc_sock *tsk = container_of(head, struct tipc_sock, rcu); 420 421 sock_put(&tsk->sk); 422 } 423 424 /** 425 * tipc_release - destroy a TIPC socket 426 * @sock: socket to destroy 427 * 428 * This routine cleans up any messages that are still queued on the socket. 429 * For DGRAM and RDM socket types, all queued messages are rejected. 430 * For SEQPACKET and STREAM socket types, the first message is rejected 431 * and any others are discarded. (If the first message on a STREAM socket 432 * is partially-read, it is discarded and the next one is rejected instead.) 433 * 434 * NOTE: Rejected messages are not necessarily returned to the sender! They 435 * are returned or discarded according to the "destination droppable" setting 436 * specified for the message by the sender. 437 * 438 * Returns 0 on success, errno otherwise 439 */ 440 static int tipc_release(struct socket *sock) 441 { 442 struct sock *sk = sock->sk; 443 struct net *net; 444 struct tipc_sock *tsk; 445 struct sk_buff *skb; 446 u32 dnode; 447 448 /* 449 * Exit if socket isn't fully initialized (occurs when a failed accept() 450 * releases a pre-allocated child socket that was never used) 451 */ 452 if (sk == NULL) 453 return 0; 454 455 net = sock_net(sk); 456 tsk = tipc_sk(sk); 457 lock_sock(sk); 458 459 /* 460 * Reject all unreceived messages, except on an active connection 461 * (which disconnects locally & sends a 'FIN+' to peer) 462 */ 463 dnode = tsk_peer_node(tsk); 464 while (sock->state != SS_DISCONNECTING) { 465 skb = __skb_dequeue(&sk->sk_receive_queue); 466 if (skb == NULL) 467 break; 468 if (TIPC_SKB_CB(skb)->handle != NULL) 469 kfree_skb(skb); 470 else { 471 if ((sock->state == SS_CONNECTING) || 472 (sock->state == SS_CONNECTED)) { 473 sock->state = SS_DISCONNECTING; 474 tsk->connected = 0; 475 tipc_node_remove_conn(net, dnode, tsk->portid); 476 } 477 tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); 478 } 479 } 480 481 tipc_sk_withdraw(tsk, 0, NULL); 482 sk_stop_timer(sk, &sk->sk_timer); 483 tipc_sk_remove(tsk); 484 if (tsk->connected) { 485 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, 486 TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, 487 tsk_own_node(tsk), tsk_peer_port(tsk), 488 tsk->portid, TIPC_ERR_NO_PORT); 489 if (skb) 490 tipc_node_xmit_skb(net, skb, dnode, tsk->portid); 491 tipc_node_remove_conn(net, dnode, tsk->portid); 492 } 493 494 /* Reject any messages that accumulated in backlog queue */ 495 sock->state = SS_DISCONNECTING; 496 release_sock(sk); 497 498 call_rcu(&tsk->rcu, tipc_sk_callback); 499 sock->sk = NULL; 500 501 return 0; 502 } 503 504 /** 505 * tipc_bind - associate or disassocate TIPC name(s) with a socket 506 * @sock: socket structure 507 * @uaddr: socket address describing name(s) and desired operation 508 * @uaddr_len: size of socket address data structure 509 * 510 * Name and name sequence binding is indicated using a positive scope value; 511 * a negative scope value unbinds the specified name. Specifying no name 512 * (i.e. a socket address length of 0) unbinds all names from the socket. 513 * 514 * Returns 0 on success, errno otherwise 515 * 516 * NOTE: This routine doesn't need to take the socket lock since it doesn't 517 * access any non-constant socket information. 518 */ 519 static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, 520 int uaddr_len) 521 { 522 struct sock *sk = sock->sk; 523 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 524 struct tipc_sock *tsk = tipc_sk(sk); 525 int res = -EINVAL; 526 527 lock_sock(sk); 528 if (unlikely(!uaddr_len)) { 529 res = tipc_sk_withdraw(tsk, 0, NULL); 530 goto exit; 531 } 532 533 if (uaddr_len < sizeof(struct sockaddr_tipc)) { 534 res = -EINVAL; 535 goto exit; 536 } 537 if (addr->family != AF_TIPC) { 538 res = -EAFNOSUPPORT; 539 goto exit; 540 } 541 542 if (addr->addrtype == TIPC_ADDR_NAME) 543 addr->addr.nameseq.upper = addr->addr.nameseq.lower; 544 else if (addr->addrtype != TIPC_ADDR_NAMESEQ) { 545 res = -EAFNOSUPPORT; 546 goto exit; 547 } 548 549 if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && 550 (addr->addr.nameseq.type != TIPC_TOP_SRV) && 551 (addr->addr.nameseq.type != TIPC_CFG_SRV)) { 552 res = -EACCES; 553 goto exit; 554 } 555 556 res = (addr->scope > 0) ? 557 tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) : 558 tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq); 559 exit: 560 release_sock(sk); 561 return res; 562 } 563 564 /** 565 * tipc_getname - get port ID of socket or peer socket 566 * @sock: socket structure 567 * @uaddr: area for returned socket address 568 * @uaddr_len: area for returned length of socket address 569 * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID 570 * 571 * Returns 0 on success, errno otherwise 572 * 573 * NOTE: This routine doesn't need to take the socket lock since it only 574 * accesses socket information that is unchanging (or which changes in 575 * a completely predictable manner). 576 */ 577 static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, 578 int *uaddr_len, int peer) 579 { 580 struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; 581 struct tipc_sock *tsk = tipc_sk(sock->sk); 582 struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); 583 584 memset(addr, 0, sizeof(*addr)); 585 if (peer) { 586 if ((sock->state != SS_CONNECTED) && 587 ((peer != 2) || (sock->state != SS_DISCONNECTING))) 588 return -ENOTCONN; 589 addr->addr.id.ref = tsk_peer_port(tsk); 590 addr->addr.id.node = tsk_peer_node(tsk); 591 } else { 592 addr->addr.id.ref = tsk->portid; 593 addr->addr.id.node = tn->own_addr; 594 } 595 596 *uaddr_len = sizeof(*addr); 597 addr->addrtype = TIPC_ADDR_ID; 598 addr->family = AF_TIPC; 599 addr->scope = 0; 600 addr->addr.name.domain = 0; 601 602 return 0; 603 } 604 605 /** 606 * tipc_poll - read and possibly block on pollmask 607 * @file: file structure associated with the socket 608 * @sock: socket for which to calculate the poll bits 609 * @wait: ??? 610 * 611 * Returns pollmask value 612 * 613 * COMMENTARY: 614 * It appears that the usual socket locking mechanisms are not useful here 615 * since the pollmask info is potentially out-of-date the moment this routine 616 * exits. TCP and other protocols seem to rely on higher level poll routines 617 * to handle any preventable race conditions, so TIPC will do the same ... 618 * 619 * TIPC sets the returned events as follows: 620 * 621 * socket state flags set 622 * ------------ --------- 623 * unconnected no read flags 624 * POLLOUT if port is not congested 625 * 626 * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue 627 * no write flags 628 * 629 * connected POLLIN/POLLRDNORM if data in rx queue 630 * POLLOUT if port is not congested 631 * 632 * disconnecting POLLIN/POLLRDNORM/POLLHUP 633 * no write flags 634 * 635 * listening POLLIN if SYN in rx queue 636 * no write flags 637 * 638 * ready POLLIN/POLLRDNORM if data in rx queue 639 * [connectionless] POLLOUT (since port cannot be congested) 640 * 641 * IMPORTANT: The fact that a read or write operation is indicated does NOT 642 * imply that the operation will succeed, merely that it should be performed 643 * and will not block. 644 */ 645 static unsigned int tipc_poll(struct file *file, struct socket *sock, 646 poll_table *wait) 647 { 648 struct sock *sk = sock->sk; 649 struct tipc_sock *tsk = tipc_sk(sk); 650 u32 mask = 0; 651 652 sock_poll_wait(file, sk_sleep(sk), wait); 653 654 switch ((int)sock->state) { 655 case SS_UNCONNECTED: 656 if (!tsk->link_cong) 657 mask |= POLLOUT; 658 break; 659 case SS_READY: 660 case SS_CONNECTED: 661 if (!tsk->link_cong && !tsk_conn_cong(tsk)) 662 mask |= POLLOUT; 663 /* fall thru' */ 664 case SS_CONNECTING: 665 case SS_LISTENING: 666 if (!skb_queue_empty(&sk->sk_receive_queue)) 667 mask |= (POLLIN | POLLRDNORM); 668 break; 669 case SS_DISCONNECTING: 670 mask = (POLLIN | POLLRDNORM | POLLHUP); 671 break; 672 } 673 674 return mask; 675 } 676 677 /** 678 * tipc_sendmcast - send multicast message 679 * @sock: socket structure 680 * @seq: destination address 681 * @msg: message to send 682 * @dsz: total length of message data 683 * @timeo: timeout to wait for wakeup 684 * 685 * Called from function tipc_sendmsg(), which has done all sanity checks 686 * Returns the number of bytes sent on success, or errno 687 */ 688 static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, 689 struct msghdr *msg, size_t dsz, long timeo) 690 { 691 struct sock *sk = sock->sk; 692 struct tipc_sock *tsk = tipc_sk(sk); 693 struct net *net = sock_net(sk); 694 struct tipc_msg *mhdr = &tsk->phdr; 695 struct sk_buff_head pktchain; 696 struct iov_iter save = msg->msg_iter; 697 uint mtu; 698 int rc; 699 700 msg_set_type(mhdr, TIPC_MCAST_MSG); 701 msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE); 702 msg_set_destport(mhdr, 0); 703 msg_set_destnode(mhdr, 0); 704 msg_set_nametype(mhdr, seq->type); 705 msg_set_namelower(mhdr, seq->lower); 706 msg_set_nameupper(mhdr, seq->upper); 707 msg_set_hdr_sz(mhdr, MCAST_H_SIZE); 708 709 skb_queue_head_init(&pktchain); 710 711 new_mtu: 712 mtu = tipc_bcast_get_mtu(net); 713 rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain); 714 if (unlikely(rc < 0)) 715 return rc; 716 717 do { 718 rc = tipc_bcast_xmit(net, &pktchain); 719 if (likely(!rc)) 720 return dsz; 721 722 if (rc == -ELINKCONG) { 723 tsk->link_cong = 1; 724 rc = tipc_wait_for_sndmsg(sock, &timeo); 725 if (!rc) 726 continue; 727 } 728 __skb_queue_purge(&pktchain); 729 if (rc == -EMSGSIZE) { 730 msg->msg_iter = save; 731 goto new_mtu; 732 } 733 break; 734 } while (1); 735 return rc; 736 } 737 738 /** 739 * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets 740 * @arrvq: queue with arriving messages, to be cloned after destination lookup 741 * @inputq: queue with cloned messages, delivered to socket after dest lookup 742 * 743 * Multi-threaded: parallel calls with reference to same queues may occur 744 */ 745 void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, 746 struct sk_buff_head *inputq) 747 { 748 struct tipc_msg *msg; 749 struct tipc_plist dports; 750 u32 portid; 751 u32 scope = TIPC_CLUSTER_SCOPE; 752 struct sk_buff_head tmpq; 753 uint hsz; 754 struct sk_buff *skb, *_skb; 755 756 __skb_queue_head_init(&tmpq); 757 tipc_plist_init(&dports); 758 759 skb = tipc_skb_peek(arrvq, &inputq->lock); 760 for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { 761 msg = buf_msg(skb); 762 hsz = skb_headroom(skb) + msg_hdr_sz(msg); 763 764 if (in_own_node(net, msg_orignode(msg))) 765 scope = TIPC_NODE_SCOPE; 766 767 /* Create destination port list and message clones: */ 768 tipc_nametbl_mc_translate(net, 769 msg_nametype(msg), msg_namelower(msg), 770 msg_nameupper(msg), scope, &dports); 771 portid = tipc_plist_pop(&dports); 772 for (; portid; portid = tipc_plist_pop(&dports)) { 773 _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); 774 if (_skb) { 775 msg_set_destport(buf_msg(_skb), portid); 776 __skb_queue_tail(&tmpq, _skb); 777 continue; 778 } 779 pr_warn("Failed to clone mcast rcv buffer\n"); 780 } 781 /* Append to inputq if not already done by other thread */ 782 spin_lock_bh(&inputq->lock); 783 if (skb_peek(arrvq) == skb) { 784 skb_queue_splice_tail_init(&tmpq, inputq); 785 kfree_skb(__skb_dequeue(arrvq)); 786 } 787 spin_unlock_bh(&inputq->lock); 788 __skb_queue_purge(&tmpq); 789 kfree_skb(skb); 790 } 791 tipc_sk_rcv(net, inputq); 792 } 793 794 /** 795 * tipc_sk_proto_rcv - receive a connection mng protocol message 796 * @tsk: receiving socket 797 * @skb: pointer to message buffer. 798 */ 799 static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, 800 struct sk_buff_head *xmitq) 801 { 802 struct sock *sk = &tsk->sk; 803 u32 onode = tsk_own_node(tsk); 804 struct tipc_msg *hdr = buf_msg(skb); 805 int mtyp = msg_type(hdr); 806 bool conn_cong; 807 808 /* Ignore if connection cannot be validated: */ 809 if (!tsk_peer_msg(tsk, hdr)) 810 goto exit; 811 812 tsk->probing_state = TIPC_CONN_OK; 813 814 if (mtyp == CONN_PROBE) { 815 msg_set_type(hdr, CONN_PROBE_REPLY); 816 if (tipc_msg_reverse(onode, &skb, TIPC_OK)) 817 __skb_queue_tail(xmitq, skb); 818 return; 819 } else if (mtyp == CONN_ACK) { 820 conn_cong = tsk_conn_cong(tsk); 821 tsk->snt_unacked -= msg_conn_ack(hdr); 822 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) 823 tsk->snd_win = msg_adv_win(hdr); 824 if (conn_cong) 825 sk->sk_write_space(sk); 826 } else if (mtyp != CONN_PROBE_REPLY) { 827 pr_warn("Received unknown CONN_PROTO msg\n"); 828 } 829 exit: 830 kfree_skb(skb); 831 } 832 833 static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) 834 { 835 struct sock *sk = sock->sk; 836 struct tipc_sock *tsk = tipc_sk(sk); 837 DEFINE_WAIT(wait); 838 int done; 839 840 do { 841 int err = sock_error(sk); 842 if (err) 843 return err; 844 if (sock->state == SS_DISCONNECTING) 845 return -EPIPE; 846 if (!*timeo_p) 847 return -EAGAIN; 848 if (signal_pending(current)) 849 return sock_intr_errno(*timeo_p); 850 851 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 852 done = sk_wait_event(sk, timeo_p, !tsk->link_cong); 853 finish_wait(sk_sleep(sk), &wait); 854 } while (!done); 855 return 0; 856 } 857 858 /** 859 * tipc_sendmsg - send message in connectionless manner 860 * @sock: socket structure 861 * @m: message to send 862 * @dsz: amount of user data to be sent 863 * 864 * Message must have an destination specified explicitly. 865 * Used for SOCK_RDM and SOCK_DGRAM messages, 866 * and for 'SYN' messages on SOCK_SEQPACKET and SOCK_STREAM connections. 867 * (Note: 'SYN+' is prohibited on SOCK_STREAM.) 868 * 869 * Returns the number of bytes sent on success, or errno otherwise 870 */ 871 static int tipc_sendmsg(struct socket *sock, 872 struct msghdr *m, size_t dsz) 873 { 874 struct sock *sk = sock->sk; 875 int ret; 876 877 lock_sock(sk); 878 ret = __tipc_sendmsg(sock, m, dsz); 879 release_sock(sk); 880 881 return ret; 882 } 883 884 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) 885 { 886 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); 887 struct sock *sk = sock->sk; 888 struct tipc_sock *tsk = tipc_sk(sk); 889 struct net *net = sock_net(sk); 890 struct tipc_msg *mhdr = &tsk->phdr; 891 u32 dnode, dport; 892 struct sk_buff_head pktchain; 893 struct sk_buff *skb; 894 struct tipc_name_seq *seq; 895 struct iov_iter save; 896 u32 mtu; 897 long timeo; 898 int rc; 899 900 if (dsz > TIPC_MAX_USER_MSG_SIZE) 901 return -EMSGSIZE; 902 if (unlikely(!dest)) { 903 if (tsk->connected && sock->state == SS_READY) 904 dest = &tsk->remote; 905 else 906 return -EDESTADDRREQ; 907 } else if (unlikely(m->msg_namelen < sizeof(*dest)) || 908 dest->family != AF_TIPC) { 909 return -EINVAL; 910 } 911 if (unlikely(sock->state != SS_READY)) { 912 if (sock->state == SS_LISTENING) 913 return -EPIPE; 914 if (sock->state != SS_UNCONNECTED) 915 return -EISCONN; 916 if (tsk->published) 917 return -EOPNOTSUPP; 918 if (dest->addrtype == TIPC_ADDR_NAME) { 919 tsk->conn_type = dest->addr.name.name.type; 920 tsk->conn_instance = dest->addr.name.name.instance; 921 } 922 } 923 seq = &dest->addr.nameseq; 924 timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); 925 926 if (dest->addrtype == TIPC_ADDR_MCAST) { 927 return tipc_sendmcast(sock, seq, m, dsz, timeo); 928 } else if (dest->addrtype == TIPC_ADDR_NAME) { 929 u32 type = dest->addr.name.name.type; 930 u32 inst = dest->addr.name.name.instance; 931 u32 domain = dest->addr.name.domain; 932 933 dnode = domain; 934 msg_set_type(mhdr, TIPC_NAMED_MSG); 935 msg_set_hdr_sz(mhdr, NAMED_H_SIZE); 936 msg_set_nametype(mhdr, type); 937 msg_set_nameinst(mhdr, inst); 938 msg_set_lookup_scope(mhdr, tipc_addr_scope(domain)); 939 dport = tipc_nametbl_translate(net, type, inst, &dnode); 940 msg_set_destnode(mhdr, dnode); 941 msg_set_destport(mhdr, dport); 942 if (unlikely(!dport && !dnode)) 943 return -EHOSTUNREACH; 944 } else if (dest->addrtype == TIPC_ADDR_ID) { 945 dnode = dest->addr.id.node; 946 msg_set_type(mhdr, TIPC_DIRECT_MSG); 947 msg_set_lookup_scope(mhdr, 0); 948 msg_set_destnode(mhdr, dnode); 949 msg_set_destport(mhdr, dest->addr.id.ref); 950 msg_set_hdr_sz(mhdr, BASIC_H_SIZE); 951 } 952 953 skb_queue_head_init(&pktchain); 954 save = m->msg_iter; 955 new_mtu: 956 mtu = tipc_node_get_mtu(net, dnode, tsk->portid); 957 rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain); 958 if (rc < 0) 959 return rc; 960 961 do { 962 skb = skb_peek(&pktchain); 963 TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; 964 rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid); 965 if (likely(!rc)) { 966 if (sock->state != SS_READY) 967 sock->state = SS_CONNECTING; 968 return dsz; 969 } 970 if (rc == -ELINKCONG) { 971 tsk->link_cong = 1; 972 rc = tipc_wait_for_sndmsg(sock, &timeo); 973 if (!rc) 974 continue; 975 } 976 __skb_queue_purge(&pktchain); 977 if (rc == -EMSGSIZE) { 978 m->msg_iter = save; 979 goto new_mtu; 980 } 981 break; 982 } while (1); 983 984 return rc; 985 } 986 987 static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) 988 { 989 struct sock *sk = sock->sk; 990 struct tipc_sock *tsk = tipc_sk(sk); 991 DEFINE_WAIT(wait); 992 int done; 993 994 do { 995 int err = sock_error(sk); 996 if (err) 997 return err; 998 if (sock->state == SS_DISCONNECTING) 999 return -EPIPE; 1000 else if (sock->state != SS_CONNECTED) 1001 return -ENOTCONN; 1002 if (!*timeo_p) 1003 return -EAGAIN; 1004 if (signal_pending(current)) 1005 return sock_intr_errno(*timeo_p); 1006 1007 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1008 done = sk_wait_event(sk, timeo_p, 1009 (!tsk->link_cong && 1010 !tsk_conn_cong(tsk)) || 1011 !tsk->connected); 1012 finish_wait(sk_sleep(sk), &wait); 1013 } while (!done); 1014 return 0; 1015 } 1016 1017 /** 1018 * tipc_send_stream - send stream-oriented data 1019 * @sock: socket structure 1020 * @m: data to send 1021 * @dsz: total length of data to be transmitted 1022 * 1023 * Used for SOCK_STREAM data. 1024 * 1025 * Returns the number of bytes sent on success (or partial success), 1026 * or errno if no data sent 1027 */ 1028 static int tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) 1029 { 1030 struct sock *sk = sock->sk; 1031 int ret; 1032 1033 lock_sock(sk); 1034 ret = __tipc_send_stream(sock, m, dsz); 1035 release_sock(sk); 1036 1037 return ret; 1038 } 1039 1040 static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) 1041 { 1042 struct sock *sk = sock->sk; 1043 struct net *net = sock_net(sk); 1044 struct tipc_sock *tsk = tipc_sk(sk); 1045 struct tipc_msg *mhdr = &tsk->phdr; 1046 struct sk_buff_head pktchain; 1047 DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); 1048 u32 portid = tsk->portid; 1049 int rc = -EINVAL; 1050 long timeo; 1051 u32 dnode; 1052 uint mtu, send, sent = 0; 1053 struct iov_iter save; 1054 int hlen = MIN_H_SIZE; 1055 1056 /* Handle implied connection establishment */ 1057 if (unlikely(dest)) { 1058 rc = __tipc_sendmsg(sock, m, dsz); 1059 hlen = msg_hdr_sz(mhdr); 1060 if (dsz && (dsz == rc)) 1061 tsk->snt_unacked = tsk_inc(tsk, dsz + hlen); 1062 return rc; 1063 } 1064 if (dsz > (uint)INT_MAX) 1065 return -EMSGSIZE; 1066 1067 if (unlikely(sock->state != SS_CONNECTED)) { 1068 if (sock->state == SS_DISCONNECTING) 1069 return -EPIPE; 1070 else 1071 return -ENOTCONN; 1072 } 1073 1074 timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); 1075 dnode = tsk_peer_node(tsk); 1076 skb_queue_head_init(&pktchain); 1077 1078 next: 1079 save = m->msg_iter; 1080 mtu = tsk->max_pkt; 1081 send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE); 1082 rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain); 1083 if (unlikely(rc < 0)) 1084 return rc; 1085 1086 do { 1087 if (likely(!tsk_conn_cong(tsk))) { 1088 rc = tipc_node_xmit(net, &pktchain, dnode, portid); 1089 if (likely(!rc)) { 1090 tsk->snt_unacked += tsk_inc(tsk, send + hlen); 1091 sent += send; 1092 if (sent == dsz) 1093 return dsz; 1094 goto next; 1095 } 1096 if (rc == -EMSGSIZE) { 1097 __skb_queue_purge(&pktchain); 1098 tsk->max_pkt = tipc_node_get_mtu(net, dnode, 1099 portid); 1100 m->msg_iter = save; 1101 goto next; 1102 } 1103 if (rc != -ELINKCONG) 1104 break; 1105 1106 tsk->link_cong = 1; 1107 } 1108 rc = tipc_wait_for_sndpkt(sock, &timeo); 1109 } while (!rc); 1110 1111 __skb_queue_purge(&pktchain); 1112 return sent ? sent : rc; 1113 } 1114 1115 /** 1116 * tipc_send_packet - send a connection-oriented message 1117 * @sock: socket structure 1118 * @m: message to send 1119 * @dsz: length of data to be transmitted 1120 * 1121 * Used for SOCK_SEQPACKET messages. 1122 * 1123 * Returns the number of bytes sent on success, or errno otherwise 1124 */ 1125 static int tipc_send_packet(struct socket *sock, struct msghdr *m, size_t dsz) 1126 { 1127 if (dsz > TIPC_MAX_USER_MSG_SIZE) 1128 return -EMSGSIZE; 1129 1130 return tipc_send_stream(sock, m, dsz); 1131 } 1132 1133 /* tipc_sk_finish_conn - complete the setup of a connection 1134 */ 1135 static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, 1136 u32 peer_node) 1137 { 1138 struct sock *sk = &tsk->sk; 1139 struct net *net = sock_net(sk); 1140 struct tipc_msg *msg = &tsk->phdr; 1141 1142 msg_set_destnode(msg, peer_node); 1143 msg_set_destport(msg, peer_port); 1144 msg_set_type(msg, TIPC_CONN_MSG); 1145 msg_set_lookup_scope(msg, 0); 1146 msg_set_hdr_sz(msg, SHORT_H_SIZE); 1147 1148 tsk->probing_intv = CONN_PROBING_INTERVAL; 1149 tsk->probing_state = TIPC_CONN_OK; 1150 tsk->connected = 1; 1151 sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); 1152 tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); 1153 tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); 1154 tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); 1155 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) 1156 return; 1157 1158 /* Fall back to message based flow control */ 1159 tsk->rcv_win = FLOWCTL_MSG_WIN; 1160 tsk->snd_win = FLOWCTL_MSG_WIN; 1161 } 1162 1163 /** 1164 * set_orig_addr - capture sender's address for received message 1165 * @m: descriptor for message info 1166 * @msg: received message header 1167 * 1168 * Note: Address is not captured if not requested by receiver. 1169 */ 1170 static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) 1171 { 1172 DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name); 1173 1174 if (addr) { 1175 addr->family = AF_TIPC; 1176 addr->addrtype = TIPC_ADDR_ID; 1177 memset(&addr->addr, 0, sizeof(addr->addr)); 1178 addr->addr.id.ref = msg_origport(msg); 1179 addr->addr.id.node = msg_orignode(msg); 1180 addr->addr.name.domain = 0; /* could leave uninitialized */ 1181 addr->scope = 0; /* could leave uninitialized */ 1182 m->msg_namelen = sizeof(struct sockaddr_tipc); 1183 } 1184 } 1185 1186 /** 1187 * tipc_sk_anc_data_recv - optionally capture ancillary data for received message 1188 * @m: descriptor for message info 1189 * @msg: received message header 1190 * @tsk: TIPC port associated with message 1191 * 1192 * Note: Ancillary data is not captured if not requested by receiver. 1193 * 1194 * Returns 0 if successful, otherwise errno 1195 */ 1196 static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, 1197 struct tipc_sock *tsk) 1198 { 1199 u32 anc_data[3]; 1200 u32 err; 1201 u32 dest_type; 1202 int has_name; 1203 int res; 1204 1205 if (likely(m->msg_controllen == 0)) 1206 return 0; 1207 1208 /* Optionally capture errored message object(s) */ 1209 err = msg ? msg_errcode(msg) : 0; 1210 if (unlikely(err)) { 1211 anc_data[0] = err; 1212 anc_data[1] = msg_data_sz(msg); 1213 res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data); 1214 if (res) 1215 return res; 1216 if (anc_data[1]) { 1217 res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], 1218 msg_data(msg)); 1219 if (res) 1220 return res; 1221 } 1222 } 1223 1224 /* Optionally capture message destination object */ 1225 dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG; 1226 switch (dest_type) { 1227 case TIPC_NAMED_MSG: 1228 has_name = 1; 1229 anc_data[0] = msg_nametype(msg); 1230 anc_data[1] = msg_namelower(msg); 1231 anc_data[2] = msg_namelower(msg); 1232 break; 1233 case TIPC_MCAST_MSG: 1234 has_name = 1; 1235 anc_data[0] = msg_nametype(msg); 1236 anc_data[1] = msg_namelower(msg); 1237 anc_data[2] = msg_nameupper(msg); 1238 break; 1239 case TIPC_CONN_MSG: 1240 has_name = (tsk->conn_type != 0); 1241 anc_data[0] = tsk->conn_type; 1242 anc_data[1] = tsk->conn_instance; 1243 anc_data[2] = tsk->conn_instance; 1244 break; 1245 default: 1246 has_name = 0; 1247 } 1248 if (has_name) { 1249 res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data); 1250 if (res) 1251 return res; 1252 } 1253 1254 return 0; 1255 } 1256 1257 static void tipc_sk_send_ack(struct tipc_sock *tsk) 1258 { 1259 struct net *net = sock_net(&tsk->sk); 1260 struct sk_buff *skb = NULL; 1261 struct tipc_msg *msg; 1262 u32 peer_port = tsk_peer_port(tsk); 1263 u32 dnode = tsk_peer_node(tsk); 1264 1265 if (!tsk->connected) 1266 return; 1267 skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, 1268 dnode, tsk_own_node(tsk), peer_port, 1269 tsk->portid, TIPC_OK); 1270 if (!skb) 1271 return; 1272 msg = buf_msg(skb); 1273 msg_set_conn_ack(msg, tsk->rcv_unacked); 1274 tsk->rcv_unacked = 0; 1275 1276 /* Adjust to and advertize the correct window limit */ 1277 if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) { 1278 tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf); 1279 msg_set_adv_win(msg, tsk->rcv_win); 1280 } 1281 tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); 1282 } 1283 1284 static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) 1285 { 1286 struct sock *sk = sock->sk; 1287 DEFINE_WAIT(wait); 1288 long timeo = *timeop; 1289 int err; 1290 1291 for (;;) { 1292 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1293 if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { 1294 if (sock->state == SS_DISCONNECTING) { 1295 err = -ENOTCONN; 1296 break; 1297 } 1298 release_sock(sk); 1299 timeo = schedule_timeout(timeo); 1300 lock_sock(sk); 1301 } 1302 err = 0; 1303 if (!skb_queue_empty(&sk->sk_receive_queue)) 1304 break; 1305 err = -EAGAIN; 1306 if (!timeo) 1307 break; 1308 err = sock_intr_errno(timeo); 1309 if (signal_pending(current)) 1310 break; 1311 } 1312 finish_wait(sk_sleep(sk), &wait); 1313 *timeop = timeo; 1314 return err; 1315 } 1316 1317 /** 1318 * tipc_recvmsg - receive packet-oriented message 1319 * @m: descriptor for message info 1320 * @buf_len: total size of user buffer area 1321 * @flags: receive flags 1322 * 1323 * Used for SOCK_DGRAM, SOCK_RDM, and SOCK_SEQPACKET messages. 1324 * If the complete message doesn't fit in user area, truncate it. 1325 * 1326 * Returns size of returned message data, errno otherwise 1327 */ 1328 static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, 1329 int flags) 1330 { 1331 struct sock *sk = sock->sk; 1332 struct tipc_sock *tsk = tipc_sk(sk); 1333 struct sk_buff *buf; 1334 struct tipc_msg *msg; 1335 long timeo; 1336 unsigned int sz; 1337 u32 err; 1338 int res, hlen; 1339 1340 /* Catch invalid receive requests */ 1341 if (unlikely(!buf_len)) 1342 return -EINVAL; 1343 1344 lock_sock(sk); 1345 1346 if (unlikely(sock->state == SS_UNCONNECTED)) { 1347 res = -ENOTCONN; 1348 goto exit; 1349 } 1350 1351 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1352 restart: 1353 1354 /* Look for a message in receive queue; wait if necessary */ 1355 res = tipc_wait_for_rcvmsg(sock, &timeo); 1356 if (res) 1357 goto exit; 1358 1359 /* Look at first message in receive queue */ 1360 buf = skb_peek(&sk->sk_receive_queue); 1361 msg = buf_msg(buf); 1362 sz = msg_data_sz(msg); 1363 hlen = msg_hdr_sz(msg); 1364 err = msg_errcode(msg); 1365 1366 /* Discard an empty non-errored message & try again */ 1367 if ((!sz) && (!err)) { 1368 tsk_advance_rx_queue(sk); 1369 goto restart; 1370 } 1371 1372 /* Capture sender's address (optional) */ 1373 set_orig_addr(m, msg); 1374 1375 /* Capture ancillary data (optional) */ 1376 res = tipc_sk_anc_data_recv(m, msg, tsk); 1377 if (res) 1378 goto exit; 1379 1380 /* Capture message data (if valid) & compute return value (always) */ 1381 if (!err) { 1382 if (unlikely(buf_len < sz)) { 1383 sz = buf_len; 1384 m->msg_flags |= MSG_TRUNC; 1385 } 1386 res = skb_copy_datagram_msg(buf, hlen, m, sz); 1387 if (res) 1388 goto exit; 1389 res = sz; 1390 } else { 1391 if ((sock->state == SS_READY) || 1392 ((err == TIPC_CONN_SHUTDOWN) || m->msg_control)) 1393 res = 0; 1394 else 1395 res = -ECONNRESET; 1396 } 1397 1398 if (unlikely(flags & MSG_PEEK)) 1399 goto exit; 1400 1401 if (likely(sock->state != SS_READY)) { 1402 tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); 1403 if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) 1404 tipc_sk_send_ack(tsk); 1405 } 1406 tsk_advance_rx_queue(sk); 1407 exit: 1408 release_sock(sk); 1409 return res; 1410 } 1411 1412 /** 1413 * tipc_recv_stream - receive stream-oriented data 1414 * @m: descriptor for message info 1415 * @buf_len: total size of user buffer area 1416 * @flags: receive flags 1417 * 1418 * Used for SOCK_STREAM messages only. If not enough data is available 1419 * will optionally wait for more; never truncates data. 1420 * 1421 * Returns size of returned message data, errno otherwise 1422 */ 1423 static int tipc_recv_stream(struct socket *sock, struct msghdr *m, 1424 size_t buf_len, int flags) 1425 { 1426 struct sock *sk = sock->sk; 1427 struct tipc_sock *tsk = tipc_sk(sk); 1428 struct sk_buff *buf; 1429 struct tipc_msg *msg; 1430 long timeo; 1431 unsigned int sz; 1432 int sz_to_copy, target, needed; 1433 int sz_copied = 0; 1434 u32 err; 1435 int res = 0, hlen; 1436 1437 /* Catch invalid receive attempts */ 1438 if (unlikely(!buf_len)) 1439 return -EINVAL; 1440 1441 lock_sock(sk); 1442 1443 if (unlikely(sock->state == SS_UNCONNECTED)) { 1444 res = -ENOTCONN; 1445 goto exit; 1446 } 1447 1448 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); 1449 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1450 1451 restart: 1452 /* Look for a message in receive queue; wait if necessary */ 1453 res = tipc_wait_for_rcvmsg(sock, &timeo); 1454 if (res) 1455 goto exit; 1456 1457 /* Look at first message in receive queue */ 1458 buf = skb_peek(&sk->sk_receive_queue); 1459 msg = buf_msg(buf); 1460 sz = msg_data_sz(msg); 1461 hlen = msg_hdr_sz(msg); 1462 err = msg_errcode(msg); 1463 1464 /* Discard an empty non-errored message & try again */ 1465 if ((!sz) && (!err)) { 1466 tsk_advance_rx_queue(sk); 1467 goto restart; 1468 } 1469 1470 /* Optionally capture sender's address & ancillary data of first msg */ 1471 if (sz_copied == 0) { 1472 set_orig_addr(m, msg); 1473 res = tipc_sk_anc_data_recv(m, msg, tsk); 1474 if (res) 1475 goto exit; 1476 } 1477 1478 /* Capture message data (if valid) & compute return value (always) */ 1479 if (!err) { 1480 u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); 1481 1482 sz -= offset; 1483 needed = (buf_len - sz_copied); 1484 sz_to_copy = (sz <= needed) ? sz : needed; 1485 1486 res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy); 1487 if (res) 1488 goto exit; 1489 1490 sz_copied += sz_to_copy; 1491 1492 if (sz_to_copy < sz) { 1493 if (!(flags & MSG_PEEK)) 1494 TIPC_SKB_CB(buf)->handle = 1495 (void *)(unsigned long)(offset + sz_to_copy); 1496 goto exit; 1497 } 1498 } else { 1499 if (sz_copied != 0) 1500 goto exit; /* can't add error msg to valid data */ 1501 1502 if ((err == TIPC_CONN_SHUTDOWN) || m->msg_control) 1503 res = 0; 1504 else 1505 res = -ECONNRESET; 1506 } 1507 1508 if (unlikely(flags & MSG_PEEK)) 1509 goto exit; 1510 1511 tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); 1512 if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) 1513 tipc_sk_send_ack(tsk); 1514 tsk_advance_rx_queue(sk); 1515 1516 /* Loop around if more data is required */ 1517 if ((sz_copied < buf_len) && /* didn't get all requested data */ 1518 (!skb_queue_empty(&sk->sk_receive_queue) || 1519 (sz_copied < target)) && /* and more is ready or required */ 1520 (!err)) /* and haven't reached a FIN */ 1521 goto restart; 1522 1523 exit: 1524 release_sock(sk); 1525 return sz_copied ? sz_copied : res; 1526 } 1527 1528 /** 1529 * tipc_write_space - wake up thread if port congestion is released 1530 * @sk: socket 1531 */ 1532 static void tipc_write_space(struct sock *sk) 1533 { 1534 struct socket_wq *wq; 1535 1536 rcu_read_lock(); 1537 wq = rcu_dereference(sk->sk_wq); 1538 if (skwq_has_sleeper(wq)) 1539 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | 1540 POLLWRNORM | POLLWRBAND); 1541 rcu_read_unlock(); 1542 } 1543 1544 /** 1545 * tipc_data_ready - wake up threads to indicate messages have been received 1546 * @sk: socket 1547 * @len: the length of messages 1548 */ 1549 static void tipc_data_ready(struct sock *sk) 1550 { 1551 struct socket_wq *wq; 1552 1553 rcu_read_lock(); 1554 wq = rcu_dereference(sk->sk_wq); 1555 if (skwq_has_sleeper(wq)) 1556 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | 1557 POLLRDNORM | POLLRDBAND); 1558 rcu_read_unlock(); 1559 } 1560 1561 static void tipc_sock_destruct(struct sock *sk) 1562 { 1563 __skb_queue_purge(&sk->sk_receive_queue); 1564 } 1565 1566 /** 1567 * filter_connect - Handle all incoming messages for a connection-based socket 1568 * @tsk: TIPC socket 1569 * @skb: pointer to message buffer. Set to NULL if buffer is consumed 1570 * 1571 * Returns true if everything ok, false otherwise 1572 */ 1573 static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) 1574 { 1575 struct sock *sk = &tsk->sk; 1576 struct net *net = sock_net(sk); 1577 struct socket *sock = sk->sk_socket; 1578 struct tipc_msg *hdr = buf_msg(skb); 1579 1580 if (unlikely(msg_mcast(hdr))) 1581 return false; 1582 1583 switch ((int)sock->state) { 1584 case SS_CONNECTED: 1585 1586 /* Accept only connection-based messages sent by peer */ 1587 if (unlikely(!tsk_peer_msg(tsk, hdr))) 1588 return false; 1589 1590 if (unlikely(msg_errcode(hdr))) { 1591 sock->state = SS_DISCONNECTING; 1592 tsk->connected = 0; 1593 /* Let timer expire on it's own */ 1594 tipc_node_remove_conn(net, tsk_peer_node(tsk), 1595 tsk->portid); 1596 } 1597 return true; 1598 1599 case SS_CONNECTING: 1600 1601 /* Accept only ACK or NACK message */ 1602 if (unlikely(!msg_connected(hdr))) 1603 return false; 1604 1605 if (unlikely(msg_errcode(hdr))) { 1606 sock->state = SS_DISCONNECTING; 1607 sk->sk_err = ECONNREFUSED; 1608 return true; 1609 } 1610 1611 if (unlikely(!msg_isdata(hdr))) { 1612 sock->state = SS_DISCONNECTING; 1613 sk->sk_err = EINVAL; 1614 return true; 1615 } 1616 1617 tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr)); 1618 msg_set_importance(&tsk->phdr, msg_importance(hdr)); 1619 sock->state = SS_CONNECTED; 1620 1621 /* If 'ACK+' message, add to socket receive queue */ 1622 if (msg_data_sz(hdr)) 1623 return true; 1624 1625 /* If empty 'ACK-' message, wake up sleeping connect() */ 1626 if (waitqueue_active(sk_sleep(sk))) 1627 wake_up_interruptible(sk_sleep(sk)); 1628 1629 /* 'ACK-' message is neither accepted nor rejected: */ 1630 msg_set_dest_droppable(hdr, 1); 1631 return false; 1632 1633 case SS_LISTENING: 1634 case SS_UNCONNECTED: 1635 1636 /* Accept only SYN message */ 1637 if (!msg_connected(hdr) && !(msg_errcode(hdr))) 1638 return true; 1639 break; 1640 case SS_DISCONNECTING: 1641 break; 1642 default: 1643 pr_err("Unknown socket state %u\n", sock->state); 1644 } 1645 return false; 1646 } 1647 1648 /** 1649 * rcvbuf_limit - get proper overload limit of socket receive queue 1650 * @sk: socket 1651 * @skb: message 1652 * 1653 * For connection oriented messages, irrespective of importance, 1654 * default queue limit is 2 MB. 1655 * 1656 * For connectionless messages, queue limits are based on message 1657 * importance as follows: 1658 * 1659 * TIPC_LOW_IMPORTANCE (2 MB) 1660 * TIPC_MEDIUM_IMPORTANCE (4 MB) 1661 * TIPC_HIGH_IMPORTANCE (8 MB) 1662 * TIPC_CRITICAL_IMPORTANCE (16 MB) 1663 * 1664 * Returns overload limit according to corresponding message importance 1665 */ 1666 static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) 1667 { 1668 struct tipc_sock *tsk = tipc_sk(sk); 1669 struct tipc_msg *hdr = buf_msg(skb); 1670 1671 if (unlikely(!msg_connected(hdr))) 1672 return sk->sk_rcvbuf << msg_importance(hdr); 1673 1674 if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) 1675 return sk->sk_rcvbuf; 1676 1677 return FLOWCTL_MSG_LIM; 1678 } 1679 1680 /** 1681 * filter_rcv - validate incoming message 1682 * @sk: socket 1683 * @skb: pointer to message. 1684 * 1685 * Enqueues message on receive queue if acceptable; optionally handles 1686 * disconnect indication for a connected socket. 1687 * 1688 * Called with socket lock already taken 1689 * 1690 * Returns true if message was added to socket receive queue, otherwise false 1691 */ 1692 static bool filter_rcv(struct sock *sk, struct sk_buff *skb, 1693 struct sk_buff_head *xmitq) 1694 { 1695 struct socket *sock = sk->sk_socket; 1696 struct tipc_sock *tsk = tipc_sk(sk); 1697 struct tipc_msg *hdr = buf_msg(skb); 1698 unsigned int limit = rcvbuf_limit(sk, skb); 1699 int err = TIPC_OK; 1700 int usr = msg_user(hdr); 1701 1702 if (unlikely(msg_user(hdr) == CONN_MANAGER)) { 1703 tipc_sk_proto_rcv(tsk, skb, xmitq); 1704 return false; 1705 } 1706 1707 if (unlikely(usr == SOCK_WAKEUP)) { 1708 kfree_skb(skb); 1709 tsk->link_cong = 0; 1710 sk->sk_write_space(sk); 1711 return false; 1712 } 1713 1714 /* Drop if illegal message type */ 1715 if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) { 1716 kfree_skb(skb); 1717 return false; 1718 } 1719 1720 /* Reject if wrong message type for current socket state */ 1721 if (unlikely(sock->state == SS_READY)) { 1722 if (msg_connected(hdr)) { 1723 err = TIPC_ERR_NO_PORT; 1724 goto reject; 1725 } 1726 } else if (unlikely(!filter_connect(tsk, skb))) { 1727 err = TIPC_ERR_NO_PORT; 1728 goto reject; 1729 } 1730 1731 /* Reject message if there isn't room to queue it */ 1732 if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) { 1733 err = TIPC_ERR_OVERLOAD; 1734 goto reject; 1735 } 1736 1737 /* Enqueue message */ 1738 TIPC_SKB_CB(skb)->handle = NULL; 1739 __skb_queue_tail(&sk->sk_receive_queue, skb); 1740 skb_set_owner_r(skb, sk); 1741 1742 sk->sk_data_ready(sk); 1743 return true; 1744 1745 reject: 1746 if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err)) 1747 __skb_queue_tail(xmitq, skb); 1748 return false; 1749 } 1750 1751 /** 1752 * tipc_backlog_rcv - handle incoming message from backlog queue 1753 * @sk: socket 1754 * @skb: message 1755 * 1756 * Caller must hold socket lock 1757 * 1758 * Returns 0 1759 */ 1760 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) 1761 { 1762 unsigned int truesize = skb->truesize; 1763 struct sk_buff_head xmitq; 1764 u32 dnode, selector; 1765 1766 __skb_queue_head_init(&xmitq); 1767 1768 if (likely(filter_rcv(sk, skb, &xmitq))) { 1769 atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); 1770 return 0; 1771 } 1772 1773 if (skb_queue_empty(&xmitq)) 1774 return 0; 1775 1776 /* Send response/rejected message */ 1777 skb = __skb_dequeue(&xmitq); 1778 dnode = msg_destnode(buf_msg(skb)); 1779 selector = msg_origport(buf_msg(skb)); 1780 tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); 1781 return 0; 1782 } 1783 1784 /** 1785 * tipc_sk_enqueue - extract all buffers with destination 'dport' from 1786 * inputq and try adding them to socket or backlog queue 1787 * @inputq: list of incoming buffers with potentially different destinations 1788 * @sk: socket where the buffers should be enqueued 1789 * @dport: port number for the socket 1790 * 1791 * Caller must hold socket lock 1792 */ 1793 static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, 1794 u32 dport, struct sk_buff_head *xmitq) 1795 { 1796 unsigned long time_limit = jiffies + 2; 1797 struct sk_buff *skb; 1798 unsigned int lim; 1799 atomic_t *dcnt; 1800 u32 onode; 1801 1802 while (skb_queue_len(inputq)) { 1803 if (unlikely(time_after_eq(jiffies, time_limit))) 1804 return; 1805 1806 skb = tipc_skb_dequeue(inputq, dport); 1807 if (unlikely(!skb)) 1808 return; 1809 1810 /* Add message directly to receive queue if possible */ 1811 if (!sock_owned_by_user(sk)) { 1812 filter_rcv(sk, skb, xmitq); 1813 continue; 1814 } 1815 1816 /* Try backlog, compensating for double-counted bytes */ 1817 dcnt = &tipc_sk(sk)->dupl_rcvcnt; 1818 if (!sk->sk_backlog.len) 1819 atomic_set(dcnt, 0); 1820 lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); 1821 if (likely(!sk_add_backlog(sk, skb, lim))) 1822 continue; 1823 1824 /* Overload => reject message back to sender */ 1825 onode = tipc_own_addr(sock_net(sk)); 1826 if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) 1827 __skb_queue_tail(xmitq, skb); 1828 break; 1829 } 1830 } 1831 1832 /** 1833 * tipc_sk_rcv - handle a chain of incoming buffers 1834 * @inputq: buffer list containing the buffers 1835 * Consumes all buffers in list until inputq is empty 1836 * Note: may be called in multiple threads referring to the same queue 1837 */ 1838 void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) 1839 { 1840 struct sk_buff_head xmitq; 1841 u32 dnode, dport = 0; 1842 int err; 1843 struct tipc_sock *tsk; 1844 struct sock *sk; 1845 struct sk_buff *skb; 1846 1847 __skb_queue_head_init(&xmitq); 1848 while (skb_queue_len(inputq)) { 1849 dport = tipc_skb_peek_port(inputq, dport); 1850 tsk = tipc_sk_lookup(net, dport); 1851 1852 if (likely(tsk)) { 1853 sk = &tsk->sk; 1854 if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { 1855 tipc_sk_enqueue(inputq, sk, dport, &xmitq); 1856 spin_unlock_bh(&sk->sk_lock.slock); 1857 } 1858 /* Send pending response/rejected messages, if any */ 1859 while ((skb = __skb_dequeue(&xmitq))) { 1860 dnode = msg_destnode(buf_msg(skb)); 1861 tipc_node_xmit_skb(net, skb, dnode, dport); 1862 } 1863 sock_put(sk); 1864 continue; 1865 } 1866 1867 /* No destination socket => dequeue skb if still there */ 1868 skb = tipc_skb_dequeue(inputq, dport); 1869 if (!skb) 1870 return; 1871 1872 /* Try secondary lookup if unresolved named message */ 1873 err = TIPC_ERR_NO_PORT; 1874 if (tipc_msg_lookup_dest(net, skb, &err)) 1875 goto xmit; 1876 1877 /* Prepare for message rejection */ 1878 if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err)) 1879 continue; 1880 xmit: 1881 dnode = msg_destnode(buf_msg(skb)); 1882 tipc_node_xmit_skb(net, skb, dnode, dport); 1883 } 1884 } 1885 1886 static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) 1887 { 1888 struct sock *sk = sock->sk; 1889 DEFINE_WAIT(wait); 1890 int done; 1891 1892 do { 1893 int err = sock_error(sk); 1894 if (err) 1895 return err; 1896 if (!*timeo_p) 1897 return -ETIMEDOUT; 1898 if (signal_pending(current)) 1899 return sock_intr_errno(*timeo_p); 1900 1901 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1902 done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING); 1903 finish_wait(sk_sleep(sk), &wait); 1904 } while (!done); 1905 return 0; 1906 } 1907 1908 /** 1909 * tipc_connect - establish a connection to another TIPC port 1910 * @sock: socket structure 1911 * @dest: socket address for destination port 1912 * @destlen: size of socket address data structure 1913 * @flags: file-related flags associated with socket 1914 * 1915 * Returns 0 on success, errno otherwise 1916 */ 1917 static int tipc_connect(struct socket *sock, struct sockaddr *dest, 1918 int destlen, int flags) 1919 { 1920 struct sock *sk = sock->sk; 1921 struct tipc_sock *tsk = tipc_sk(sk); 1922 struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; 1923 struct msghdr m = {NULL,}; 1924 long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout; 1925 socket_state previous; 1926 int res = 0; 1927 1928 lock_sock(sk); 1929 1930 /* DGRAM/RDM connect(), just save the destaddr */ 1931 if (sock->state == SS_READY) { 1932 if (dst->family == AF_UNSPEC) { 1933 memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); 1934 tsk->connected = 0; 1935 } else if (destlen != sizeof(struct sockaddr_tipc)) { 1936 res = -EINVAL; 1937 } else { 1938 memcpy(&tsk->remote, dest, destlen); 1939 tsk->connected = 1; 1940 } 1941 goto exit; 1942 } 1943 1944 /* 1945 * Reject connection attempt using multicast address 1946 * 1947 * Note: send_msg() validates the rest of the address fields, 1948 * so there's no need to do it here 1949 */ 1950 if (dst->addrtype == TIPC_ADDR_MCAST) { 1951 res = -EINVAL; 1952 goto exit; 1953 } 1954 1955 previous = sock->state; 1956 switch (sock->state) { 1957 case SS_UNCONNECTED: 1958 /* Send a 'SYN-' to destination */ 1959 m.msg_name = dest; 1960 m.msg_namelen = destlen; 1961 1962 /* If connect is in non-blocking case, set MSG_DONTWAIT to 1963 * indicate send_msg() is never blocked. 1964 */ 1965 if (!timeout) 1966 m.msg_flags = MSG_DONTWAIT; 1967 1968 res = __tipc_sendmsg(sock, &m, 0); 1969 if ((res < 0) && (res != -EWOULDBLOCK)) 1970 goto exit; 1971 1972 /* Just entered SS_CONNECTING state; the only 1973 * difference is that return value in non-blocking 1974 * case is EINPROGRESS, rather than EALREADY. 1975 */ 1976 res = -EINPROGRESS; 1977 case SS_CONNECTING: 1978 if (previous == SS_CONNECTING) 1979 res = -EALREADY; 1980 if (!timeout) 1981 goto exit; 1982 timeout = msecs_to_jiffies(timeout); 1983 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ 1984 res = tipc_wait_for_connect(sock, &timeout); 1985 break; 1986 case SS_CONNECTED: 1987 res = -EISCONN; 1988 break; 1989 default: 1990 res = -EINVAL; 1991 break; 1992 } 1993 exit: 1994 release_sock(sk); 1995 return res; 1996 } 1997 1998 /** 1999 * tipc_listen - allow socket to listen for incoming connections 2000 * @sock: socket structure 2001 * @len: (unused) 2002 * 2003 * Returns 0 on success, errno otherwise 2004 */ 2005 static int tipc_listen(struct socket *sock, int len) 2006 { 2007 struct sock *sk = sock->sk; 2008 int res; 2009 2010 lock_sock(sk); 2011 2012 if (sock->state != SS_UNCONNECTED) 2013 res = -EINVAL; 2014 else { 2015 sock->state = SS_LISTENING; 2016 res = 0; 2017 } 2018 2019 release_sock(sk); 2020 return res; 2021 } 2022 2023 static int tipc_wait_for_accept(struct socket *sock, long timeo) 2024 { 2025 struct sock *sk = sock->sk; 2026 DEFINE_WAIT(wait); 2027 int err; 2028 2029 /* True wake-one mechanism for incoming connections: only 2030 * one process gets woken up, not the 'whole herd'. 2031 * Since we do not 'race & poll' for established sockets 2032 * anymore, the common case will execute the loop only once. 2033 */ 2034 for (;;) { 2035 prepare_to_wait_exclusive(sk_sleep(sk), &wait, 2036 TASK_INTERRUPTIBLE); 2037 if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { 2038 release_sock(sk); 2039 timeo = schedule_timeout(timeo); 2040 lock_sock(sk); 2041 } 2042 err = 0; 2043 if (!skb_queue_empty(&sk->sk_receive_queue)) 2044 break; 2045 err = -EINVAL; 2046 if (sock->state != SS_LISTENING) 2047 break; 2048 err = -EAGAIN; 2049 if (!timeo) 2050 break; 2051 err = sock_intr_errno(timeo); 2052 if (signal_pending(current)) 2053 break; 2054 } 2055 finish_wait(sk_sleep(sk), &wait); 2056 return err; 2057 } 2058 2059 /** 2060 * tipc_accept - wait for connection request 2061 * @sock: listening socket 2062 * @newsock: new socket that is to be connected 2063 * @flags: file-related flags associated with socket 2064 * 2065 * Returns 0 on success, errno otherwise 2066 */ 2067 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) 2068 { 2069 struct sock *new_sk, *sk = sock->sk; 2070 struct sk_buff *buf; 2071 struct tipc_sock *new_tsock; 2072 struct tipc_msg *msg; 2073 long timeo; 2074 int res; 2075 2076 lock_sock(sk); 2077 2078 if (sock->state != SS_LISTENING) { 2079 res = -EINVAL; 2080 goto exit; 2081 } 2082 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 2083 res = tipc_wait_for_accept(sock, timeo); 2084 if (res) 2085 goto exit; 2086 2087 buf = skb_peek(&sk->sk_receive_queue); 2088 2089 res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); 2090 if (res) 2091 goto exit; 2092 security_sk_clone(sock->sk, new_sock->sk); 2093 2094 new_sk = new_sock->sk; 2095 new_tsock = tipc_sk(new_sk); 2096 msg = buf_msg(buf); 2097 2098 /* we lock on new_sk; but lockdep sees the lock on sk */ 2099 lock_sock_nested(new_sk, SINGLE_DEPTH_NESTING); 2100 2101 /* 2102 * Reject any stray messages received by new socket 2103 * before the socket lock was taken (very, very unlikely) 2104 */ 2105 tsk_rej_rx_queue(new_sk); 2106 2107 /* Connect new socket to it's peer */ 2108 tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg)); 2109 new_sock->state = SS_CONNECTED; 2110 2111 tsk_set_importance(new_tsock, msg_importance(msg)); 2112 if (msg_named(msg)) { 2113 new_tsock->conn_type = msg_nametype(msg); 2114 new_tsock->conn_instance = msg_nameinst(msg); 2115 } 2116 2117 /* 2118 * Respond to 'SYN-' by discarding it & returning 'ACK'-. 2119 * Respond to 'SYN+' by queuing it on new socket. 2120 */ 2121 if (!msg_data_sz(msg)) { 2122 struct msghdr m = {NULL,}; 2123 2124 tsk_advance_rx_queue(sk); 2125 __tipc_send_stream(new_sock, &m, 0); 2126 } else { 2127 __skb_dequeue(&sk->sk_receive_queue); 2128 __skb_queue_head(&new_sk->sk_receive_queue, buf); 2129 skb_set_owner_r(buf, new_sk); 2130 } 2131 release_sock(new_sk); 2132 exit: 2133 release_sock(sk); 2134 return res; 2135 } 2136 2137 /** 2138 * tipc_shutdown - shutdown socket connection 2139 * @sock: socket structure 2140 * @how: direction to close (must be SHUT_RDWR) 2141 * 2142 * Terminates connection (if necessary), then purges socket's receive queue. 2143 * 2144 * Returns 0 on success, errno otherwise 2145 */ 2146 static int tipc_shutdown(struct socket *sock, int how) 2147 { 2148 struct sock *sk = sock->sk; 2149 struct net *net = sock_net(sk); 2150 struct tipc_sock *tsk = tipc_sk(sk); 2151 struct sk_buff *skb; 2152 u32 dnode = tsk_peer_node(tsk); 2153 u32 dport = tsk_peer_port(tsk); 2154 u32 onode = tipc_own_addr(net); 2155 u32 oport = tsk->portid; 2156 int res; 2157 2158 if (how != SHUT_RDWR) 2159 return -EINVAL; 2160 2161 lock_sock(sk); 2162 2163 switch (sock->state) { 2164 case SS_CONNECTING: 2165 case SS_CONNECTED: 2166 2167 restart: 2168 dnode = tsk_peer_node(tsk); 2169 2170 /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ 2171 skb = __skb_dequeue(&sk->sk_receive_queue); 2172 if (skb) { 2173 if (TIPC_SKB_CB(skb)->handle != NULL) { 2174 kfree_skb(skb); 2175 goto restart; 2176 } 2177 tipc_sk_respond(sk, skb, TIPC_CONN_SHUTDOWN); 2178 } else { 2179 skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, 2180 TIPC_CONN_MSG, SHORT_H_SIZE, 2181 0, dnode, onode, dport, oport, 2182 TIPC_CONN_SHUTDOWN); 2183 tipc_node_xmit_skb(net, skb, dnode, tsk->portid); 2184 } 2185 tsk->connected = 0; 2186 sock->state = SS_DISCONNECTING; 2187 tipc_node_remove_conn(net, dnode, tsk->portid); 2188 /* fall through */ 2189 2190 case SS_DISCONNECTING: 2191 2192 /* Discard any unreceived messages */ 2193 __skb_queue_purge(&sk->sk_receive_queue); 2194 2195 /* Wake up anyone sleeping in poll */ 2196 sk->sk_state_change(sk); 2197 res = 0; 2198 break; 2199 2200 default: 2201 res = -ENOTCONN; 2202 } 2203 2204 release_sock(sk); 2205 return res; 2206 } 2207 2208 static void tipc_sk_timeout(unsigned long data) 2209 { 2210 struct tipc_sock *tsk = (struct tipc_sock *)data; 2211 struct sock *sk = &tsk->sk; 2212 struct sk_buff *skb = NULL; 2213 u32 peer_port, peer_node; 2214 u32 own_node = tsk_own_node(tsk); 2215 2216 bh_lock_sock(sk); 2217 if (!tsk->connected) { 2218 bh_unlock_sock(sk); 2219 goto exit; 2220 } 2221 peer_port = tsk_peer_port(tsk); 2222 peer_node = tsk_peer_node(tsk); 2223 2224 if (tsk->probing_state == TIPC_CONN_PROBING) { 2225 if (!sock_owned_by_user(sk)) { 2226 sk->sk_socket->state = SS_DISCONNECTING; 2227 tsk->connected = 0; 2228 tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), 2229 tsk_peer_port(tsk)); 2230 sk->sk_state_change(sk); 2231 } else { 2232 /* Try again later */ 2233 sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); 2234 } 2235 2236 } else { 2237 skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, 2238 INT_H_SIZE, 0, peer_node, own_node, 2239 peer_port, tsk->portid, TIPC_OK); 2240 tsk->probing_state = TIPC_CONN_PROBING; 2241 sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); 2242 } 2243 bh_unlock_sock(sk); 2244 if (skb) 2245 tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); 2246 exit: 2247 sock_put(sk); 2248 } 2249 2250 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, 2251 struct tipc_name_seq const *seq) 2252 { 2253 struct net *net = sock_net(&tsk->sk); 2254 struct publication *publ; 2255 u32 key; 2256 2257 if (tsk->connected) 2258 return -EINVAL; 2259 key = tsk->portid + tsk->pub_count + 1; 2260 if (key == tsk->portid) 2261 return -EADDRINUSE; 2262 2263 publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper, 2264 scope, tsk->portid, key); 2265 if (unlikely(!publ)) 2266 return -EINVAL; 2267 2268 list_add(&publ->pport_list, &tsk->publications); 2269 tsk->pub_count++; 2270 tsk->published = 1; 2271 return 0; 2272 } 2273 2274 static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, 2275 struct tipc_name_seq const *seq) 2276 { 2277 struct net *net = sock_net(&tsk->sk); 2278 struct publication *publ; 2279 struct publication *safe; 2280 int rc = -EINVAL; 2281 2282 list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) { 2283 if (seq) { 2284 if (publ->scope != scope) 2285 continue; 2286 if (publ->type != seq->type) 2287 continue; 2288 if (publ->lower != seq->lower) 2289 continue; 2290 if (publ->upper != seq->upper) 2291 break; 2292 tipc_nametbl_withdraw(net, publ->type, publ->lower, 2293 publ->ref, publ->key); 2294 rc = 0; 2295 break; 2296 } 2297 tipc_nametbl_withdraw(net, publ->type, publ->lower, 2298 publ->ref, publ->key); 2299 rc = 0; 2300 } 2301 if (list_empty(&tsk->publications)) 2302 tsk->published = 0; 2303 return rc; 2304 } 2305 2306 /* tipc_sk_reinit: set non-zero address in all existing sockets 2307 * when we go from standalone to network mode. 2308 */ 2309 void tipc_sk_reinit(struct net *net) 2310 { 2311 struct tipc_net *tn = net_generic(net, tipc_net_id); 2312 const struct bucket_table *tbl; 2313 struct rhash_head *pos; 2314 struct tipc_sock *tsk; 2315 struct tipc_msg *msg; 2316 int i; 2317 2318 rcu_read_lock(); 2319 tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); 2320 for (i = 0; i < tbl->size; i++) { 2321 rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { 2322 spin_lock_bh(&tsk->sk.sk_lock.slock); 2323 msg = &tsk->phdr; 2324 msg_set_prevnode(msg, tn->own_addr); 2325 msg_set_orignode(msg, tn->own_addr); 2326 spin_unlock_bh(&tsk->sk.sk_lock.slock); 2327 } 2328 } 2329 rcu_read_unlock(); 2330 } 2331 2332 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) 2333 { 2334 struct tipc_net *tn = net_generic(net, tipc_net_id); 2335 struct tipc_sock *tsk; 2336 2337 rcu_read_lock(); 2338 tsk = rhashtable_lookup_fast(&tn->sk_rht, &portid, tsk_rht_params); 2339 if (tsk) 2340 sock_hold(&tsk->sk); 2341 rcu_read_unlock(); 2342 2343 return tsk; 2344 } 2345 2346 static int tipc_sk_insert(struct tipc_sock *tsk) 2347 { 2348 struct sock *sk = &tsk->sk; 2349 struct net *net = sock_net(sk); 2350 struct tipc_net *tn = net_generic(net, tipc_net_id); 2351 u32 remaining = (TIPC_MAX_PORT - TIPC_MIN_PORT) + 1; 2352 u32 portid = prandom_u32() % remaining + TIPC_MIN_PORT; 2353 2354 while (remaining--) { 2355 portid++; 2356 if ((portid < TIPC_MIN_PORT) || (portid > TIPC_MAX_PORT)) 2357 portid = TIPC_MIN_PORT; 2358 tsk->portid = portid; 2359 sock_hold(&tsk->sk); 2360 if (!rhashtable_lookup_insert_fast(&tn->sk_rht, &tsk->node, 2361 tsk_rht_params)) 2362 return 0; 2363 sock_put(&tsk->sk); 2364 } 2365 2366 return -1; 2367 } 2368 2369 static void tipc_sk_remove(struct tipc_sock *tsk) 2370 { 2371 struct sock *sk = &tsk->sk; 2372 struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); 2373 2374 if (!rhashtable_remove_fast(&tn->sk_rht, &tsk->node, tsk_rht_params)) { 2375 WARN_ON(atomic_read(&sk->sk_refcnt) == 1); 2376 __sock_put(sk); 2377 } 2378 } 2379 2380 static const struct rhashtable_params tsk_rht_params = { 2381 .nelem_hint = 192, 2382 .head_offset = offsetof(struct tipc_sock, node), 2383 .key_offset = offsetof(struct tipc_sock, portid), 2384 .key_len = sizeof(u32), /* portid */ 2385 .max_size = 1048576, 2386 .min_size = 256, 2387 .automatic_shrinking = true, 2388 }; 2389 2390 int tipc_sk_rht_init(struct net *net) 2391 { 2392 struct tipc_net *tn = net_generic(net, tipc_net_id); 2393 2394 return rhashtable_init(&tn->sk_rht, &tsk_rht_params); 2395 } 2396 2397 void tipc_sk_rht_destroy(struct net *net) 2398 { 2399 struct tipc_net *tn = net_generic(net, tipc_net_id); 2400 2401 /* Wait for socket readers to complete */ 2402 synchronize_net(); 2403 2404 rhashtable_destroy(&tn->sk_rht); 2405 } 2406 2407 /** 2408 * tipc_setsockopt - set socket option 2409 * @sock: socket structure 2410 * @lvl: option level 2411 * @opt: option identifier 2412 * @ov: pointer to new option value 2413 * @ol: length of option value 2414 * 2415 * For stream sockets only, accepts and ignores all IPPROTO_TCP options 2416 * (to ease compatibility). 2417 * 2418 * Returns 0 on success, errno otherwise 2419 */ 2420 static int tipc_setsockopt(struct socket *sock, int lvl, int opt, 2421 char __user *ov, unsigned int ol) 2422 { 2423 struct sock *sk = sock->sk; 2424 struct tipc_sock *tsk = tipc_sk(sk); 2425 u32 value; 2426 int res; 2427 2428 if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) 2429 return 0; 2430 if (lvl != SOL_TIPC) 2431 return -ENOPROTOOPT; 2432 if (ol < sizeof(value)) 2433 return -EINVAL; 2434 res = get_user(value, (u32 __user *)ov); 2435 if (res) 2436 return res; 2437 2438 lock_sock(sk); 2439 2440 switch (opt) { 2441 case TIPC_IMPORTANCE: 2442 res = tsk_set_importance(tsk, value); 2443 break; 2444 case TIPC_SRC_DROPPABLE: 2445 if (sock->type != SOCK_STREAM) 2446 tsk_set_unreliable(tsk, value); 2447 else 2448 res = -ENOPROTOOPT; 2449 break; 2450 case TIPC_DEST_DROPPABLE: 2451 tsk_set_unreturnable(tsk, value); 2452 break; 2453 case TIPC_CONN_TIMEOUT: 2454 tipc_sk(sk)->conn_timeout = value; 2455 /* no need to set "res", since already 0 at this point */ 2456 break; 2457 default: 2458 res = -EINVAL; 2459 } 2460 2461 release_sock(sk); 2462 2463 return res; 2464 } 2465 2466 /** 2467 * tipc_getsockopt - get socket option 2468 * @sock: socket structure 2469 * @lvl: option level 2470 * @opt: option identifier 2471 * @ov: receptacle for option value 2472 * @ol: receptacle for length of option value 2473 * 2474 * For stream sockets only, returns 0 length result for all IPPROTO_TCP options 2475 * (to ease compatibility). 2476 * 2477 * Returns 0 on success, errno otherwise 2478 */ 2479 static int tipc_getsockopt(struct socket *sock, int lvl, int opt, 2480 char __user *ov, int __user *ol) 2481 { 2482 struct sock *sk = sock->sk; 2483 struct tipc_sock *tsk = tipc_sk(sk); 2484 int len; 2485 u32 value; 2486 int res; 2487 2488 if ((lvl == IPPROTO_TCP) && (sock->type == SOCK_STREAM)) 2489 return put_user(0, ol); 2490 if (lvl != SOL_TIPC) 2491 return -ENOPROTOOPT; 2492 res = get_user(len, ol); 2493 if (res) 2494 return res; 2495 2496 lock_sock(sk); 2497 2498 switch (opt) { 2499 case TIPC_IMPORTANCE: 2500 value = tsk_importance(tsk); 2501 break; 2502 case TIPC_SRC_DROPPABLE: 2503 value = tsk_unreliable(tsk); 2504 break; 2505 case TIPC_DEST_DROPPABLE: 2506 value = tsk_unreturnable(tsk); 2507 break; 2508 case TIPC_CONN_TIMEOUT: 2509 value = tsk->conn_timeout; 2510 /* no need to set "res", since already 0 at this point */ 2511 break; 2512 case TIPC_NODE_RECVQ_DEPTH: 2513 value = 0; /* was tipc_queue_size, now obsolete */ 2514 break; 2515 case TIPC_SOCK_RECVQ_DEPTH: 2516 value = skb_queue_len(&sk->sk_receive_queue); 2517 break; 2518 default: 2519 res = -EINVAL; 2520 } 2521 2522 release_sock(sk); 2523 2524 if (res) 2525 return res; /* "get" failed */ 2526 2527 if (len < sizeof(value)) 2528 return -EINVAL; 2529 2530 if (copy_to_user(ov, &value, sizeof(value))) 2531 return -EFAULT; 2532 2533 return put_user(sizeof(value), ol); 2534 } 2535 2536 static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 2537 { 2538 struct sock *sk = sock->sk; 2539 struct tipc_sioc_ln_req lnr; 2540 void __user *argp = (void __user *)arg; 2541 2542 switch (cmd) { 2543 case SIOCGETLINKNAME: 2544 if (copy_from_user(&lnr, argp, sizeof(lnr))) 2545 return -EFAULT; 2546 if (!tipc_node_get_linkname(sock_net(sk), 2547 lnr.bearer_id & 0xffff, lnr.peer, 2548 lnr.linkname, TIPC_MAX_LINK_NAME)) { 2549 if (copy_to_user(argp, &lnr, sizeof(lnr))) 2550 return -EFAULT; 2551 return 0; 2552 } 2553 return -EADDRNOTAVAIL; 2554 default: 2555 return -ENOIOCTLCMD; 2556 } 2557 } 2558 2559 /* Protocol switches for the various types of TIPC sockets */ 2560 2561 static const struct proto_ops msg_ops = { 2562 .owner = THIS_MODULE, 2563 .family = AF_TIPC, 2564 .release = tipc_release, 2565 .bind = tipc_bind, 2566 .connect = tipc_connect, 2567 .socketpair = sock_no_socketpair, 2568 .accept = sock_no_accept, 2569 .getname = tipc_getname, 2570 .poll = tipc_poll, 2571 .ioctl = tipc_ioctl, 2572 .listen = sock_no_listen, 2573 .shutdown = tipc_shutdown, 2574 .setsockopt = tipc_setsockopt, 2575 .getsockopt = tipc_getsockopt, 2576 .sendmsg = tipc_sendmsg, 2577 .recvmsg = tipc_recvmsg, 2578 .mmap = sock_no_mmap, 2579 .sendpage = sock_no_sendpage 2580 }; 2581 2582 static const struct proto_ops packet_ops = { 2583 .owner = THIS_MODULE, 2584 .family = AF_TIPC, 2585 .release = tipc_release, 2586 .bind = tipc_bind, 2587 .connect = tipc_connect, 2588 .socketpair = sock_no_socketpair, 2589 .accept = tipc_accept, 2590 .getname = tipc_getname, 2591 .poll = tipc_poll, 2592 .ioctl = tipc_ioctl, 2593 .listen = tipc_listen, 2594 .shutdown = tipc_shutdown, 2595 .setsockopt = tipc_setsockopt, 2596 .getsockopt = tipc_getsockopt, 2597 .sendmsg = tipc_send_packet, 2598 .recvmsg = tipc_recvmsg, 2599 .mmap = sock_no_mmap, 2600 .sendpage = sock_no_sendpage 2601 }; 2602 2603 static const struct proto_ops stream_ops = { 2604 .owner = THIS_MODULE, 2605 .family = AF_TIPC, 2606 .release = tipc_release, 2607 .bind = tipc_bind, 2608 .connect = tipc_connect, 2609 .socketpair = sock_no_socketpair, 2610 .accept = tipc_accept, 2611 .getname = tipc_getname, 2612 .poll = tipc_poll, 2613 .ioctl = tipc_ioctl, 2614 .listen = tipc_listen, 2615 .shutdown = tipc_shutdown, 2616 .setsockopt = tipc_setsockopt, 2617 .getsockopt = tipc_getsockopt, 2618 .sendmsg = tipc_send_stream, 2619 .recvmsg = tipc_recv_stream, 2620 .mmap = sock_no_mmap, 2621 .sendpage = sock_no_sendpage 2622 }; 2623 2624 static const struct net_proto_family tipc_family_ops = { 2625 .owner = THIS_MODULE, 2626 .family = AF_TIPC, 2627 .create = tipc_sk_create 2628 }; 2629 2630 static struct proto tipc_proto = { 2631 .name = "TIPC", 2632 .owner = THIS_MODULE, 2633 .obj_size = sizeof(struct tipc_sock), 2634 .sysctl_rmem = sysctl_tipc_rmem 2635 }; 2636 2637 /** 2638 * tipc_socket_init - initialize TIPC socket interface 2639 * 2640 * Returns 0 on success, errno otherwise 2641 */ 2642 int tipc_socket_init(void) 2643 { 2644 int res; 2645 2646 res = proto_register(&tipc_proto, 1); 2647 if (res) { 2648 pr_err("Failed to register TIPC protocol type\n"); 2649 goto out; 2650 } 2651 2652 res = sock_register(&tipc_family_ops); 2653 if (res) { 2654 pr_err("Failed to register TIPC socket type\n"); 2655 proto_unregister(&tipc_proto); 2656 goto out; 2657 } 2658 out: 2659 return res; 2660 } 2661 2662 /** 2663 * tipc_socket_stop - stop TIPC socket interface 2664 */ 2665 void tipc_socket_stop(void) 2666 { 2667 sock_unregister(tipc_family_ops.family); 2668 proto_unregister(&tipc_proto); 2669 } 2670 2671 /* Caller should hold socket lock for the passed tipc socket. */ 2672 static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) 2673 { 2674 u32 peer_node; 2675 u32 peer_port; 2676 struct nlattr *nest; 2677 2678 peer_node = tsk_peer_node(tsk); 2679 peer_port = tsk_peer_port(tsk); 2680 2681 nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); 2682 2683 if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) 2684 goto msg_full; 2685 if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port)) 2686 goto msg_full; 2687 2688 if (tsk->conn_type != 0) { 2689 if (nla_put_flag(skb, TIPC_NLA_CON_FLAG)) 2690 goto msg_full; 2691 if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type)) 2692 goto msg_full; 2693 if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance)) 2694 goto msg_full; 2695 } 2696 nla_nest_end(skb, nest); 2697 2698 return 0; 2699 2700 msg_full: 2701 nla_nest_cancel(skb, nest); 2702 2703 return -EMSGSIZE; 2704 } 2705 2706 /* Caller should hold socket lock for the passed tipc socket. */ 2707 static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb, 2708 struct tipc_sock *tsk) 2709 { 2710 int err; 2711 void *hdr; 2712 struct nlattr *attrs; 2713 struct net *net = sock_net(skb->sk); 2714 struct tipc_net *tn = net_generic(net, tipc_net_id); 2715 2716 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 2717 &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET); 2718 if (!hdr) 2719 goto msg_cancel; 2720 2721 attrs = nla_nest_start(skb, TIPC_NLA_SOCK); 2722 if (!attrs) 2723 goto genlmsg_cancel; 2724 if (nla_put_u32(skb, TIPC_NLA_SOCK_REF, tsk->portid)) 2725 goto attr_msg_cancel; 2726 if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr)) 2727 goto attr_msg_cancel; 2728 2729 if (tsk->connected) { 2730 err = __tipc_nl_add_sk_con(skb, tsk); 2731 if (err) 2732 goto attr_msg_cancel; 2733 } else if (!list_empty(&tsk->publications)) { 2734 if (nla_put_flag(skb, TIPC_NLA_SOCK_HAS_PUBL)) 2735 goto attr_msg_cancel; 2736 } 2737 nla_nest_end(skb, attrs); 2738 genlmsg_end(skb, hdr); 2739 2740 return 0; 2741 2742 attr_msg_cancel: 2743 nla_nest_cancel(skb, attrs); 2744 genlmsg_cancel: 2745 genlmsg_cancel(skb, hdr); 2746 msg_cancel: 2747 return -EMSGSIZE; 2748 } 2749 2750 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb) 2751 { 2752 int err; 2753 struct tipc_sock *tsk; 2754 const struct bucket_table *tbl; 2755 struct rhash_head *pos; 2756 struct net *net = sock_net(skb->sk); 2757 struct tipc_net *tn = net_generic(net, tipc_net_id); 2758 u32 tbl_id = cb->args[0]; 2759 u32 prev_portid = cb->args[1]; 2760 2761 rcu_read_lock(); 2762 tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); 2763 for (; tbl_id < tbl->size; tbl_id++) { 2764 rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) { 2765 spin_lock_bh(&tsk->sk.sk_lock.slock); 2766 if (prev_portid && prev_portid != tsk->portid) { 2767 spin_unlock_bh(&tsk->sk.sk_lock.slock); 2768 continue; 2769 } 2770 2771 err = __tipc_nl_add_sk(skb, cb, tsk); 2772 if (err) { 2773 prev_portid = tsk->portid; 2774 spin_unlock_bh(&tsk->sk.sk_lock.slock); 2775 goto out; 2776 } 2777 prev_portid = 0; 2778 spin_unlock_bh(&tsk->sk.sk_lock.slock); 2779 } 2780 } 2781 out: 2782 rcu_read_unlock(); 2783 cb->args[0] = tbl_id; 2784 cb->args[1] = prev_portid; 2785 2786 return skb->len; 2787 } 2788 2789 /* Caller should hold socket lock for the passed tipc socket. */ 2790 static int __tipc_nl_add_sk_publ(struct sk_buff *skb, 2791 struct netlink_callback *cb, 2792 struct publication *publ) 2793 { 2794 void *hdr; 2795 struct nlattr *attrs; 2796 2797 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 2798 &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); 2799 if (!hdr) 2800 goto msg_cancel; 2801 2802 attrs = nla_nest_start(skb, TIPC_NLA_PUBL); 2803 if (!attrs) 2804 goto genlmsg_cancel; 2805 2806 if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key)) 2807 goto attr_msg_cancel; 2808 if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type)) 2809 goto attr_msg_cancel; 2810 if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower)) 2811 goto attr_msg_cancel; 2812 if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper)) 2813 goto attr_msg_cancel; 2814 2815 nla_nest_end(skb, attrs); 2816 genlmsg_end(skb, hdr); 2817 2818 return 0; 2819 2820 attr_msg_cancel: 2821 nla_nest_cancel(skb, attrs); 2822 genlmsg_cancel: 2823 genlmsg_cancel(skb, hdr); 2824 msg_cancel: 2825 return -EMSGSIZE; 2826 } 2827 2828 /* Caller should hold socket lock for the passed tipc socket. */ 2829 static int __tipc_nl_list_sk_publ(struct sk_buff *skb, 2830 struct netlink_callback *cb, 2831 struct tipc_sock *tsk, u32 *last_publ) 2832 { 2833 int err; 2834 struct publication *p; 2835 2836 if (*last_publ) { 2837 list_for_each_entry(p, &tsk->publications, pport_list) { 2838 if (p->key == *last_publ) 2839 break; 2840 } 2841 if (p->key != *last_publ) { 2842 /* We never set seq or call nl_dump_check_consistent() 2843 * this means that setting prev_seq here will cause the 2844 * consistence check to fail in the netlink callback 2845 * handler. Resulting in the last NLMSG_DONE message 2846 * having the NLM_F_DUMP_INTR flag set. 2847 */ 2848 cb->prev_seq = 1; 2849 *last_publ = 0; 2850 return -EPIPE; 2851 } 2852 } else { 2853 p = list_first_entry(&tsk->publications, struct publication, 2854 pport_list); 2855 } 2856 2857 list_for_each_entry_from(p, &tsk->publications, pport_list) { 2858 err = __tipc_nl_add_sk_publ(skb, cb, p); 2859 if (err) { 2860 *last_publ = p->key; 2861 return err; 2862 } 2863 } 2864 *last_publ = 0; 2865 2866 return 0; 2867 } 2868 2869 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) 2870 { 2871 int err; 2872 u32 tsk_portid = cb->args[0]; 2873 u32 last_publ = cb->args[1]; 2874 u32 done = cb->args[2]; 2875 struct net *net = sock_net(skb->sk); 2876 struct tipc_sock *tsk; 2877 2878 if (!tsk_portid) { 2879 struct nlattr **attrs; 2880 struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; 2881 2882 err = tipc_nlmsg_parse(cb->nlh, &attrs); 2883 if (err) 2884 return err; 2885 2886 if (!attrs[TIPC_NLA_SOCK]) 2887 return -EINVAL; 2888 2889 err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, 2890 attrs[TIPC_NLA_SOCK], 2891 tipc_nl_sock_policy); 2892 if (err) 2893 return err; 2894 2895 if (!sock[TIPC_NLA_SOCK_REF]) 2896 return -EINVAL; 2897 2898 tsk_portid = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); 2899 } 2900 2901 if (done) 2902 return 0; 2903 2904 tsk = tipc_sk_lookup(net, tsk_portid); 2905 if (!tsk) 2906 return -EINVAL; 2907 2908 lock_sock(&tsk->sk); 2909 err = __tipc_nl_list_sk_publ(skb, cb, tsk, &last_publ); 2910 if (!err) 2911 done = 1; 2912 release_sock(&tsk->sk); 2913 sock_put(&tsk->sk); 2914 2915 cb->args[0] = tsk_portid; 2916 cb->args[1] = last_publ; 2917 cb->args[2] = done; 2918 2919 return skb->len; 2920 } 2921