1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 5 * applies to SOCK_STREAM sockets only 6 * offers an alternative communication option for TCP-protocol sockets 7 * applicable with RoCE-cards only 8 * 9 * Initial restrictions: 10 * - non-blocking connect postponed 11 * - IPv6 support postponed 12 * - support for alternate links postponed 13 * - partial support for non-blocking sockets only 14 * - support for urgent data postponed 15 * 16 * Copyright IBM Corp. 2016 17 * 18 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 19 * based on prototype from Frank Blaschka 20 */ 21 22 #define KMSG_COMPONENT "smc" 23 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 24 25 #include <linux/module.h> 26 #include <linux/socket.h> 27 #include <linux/inetdevice.h> 28 #include <linux/workqueue.h> 29 #include <linux/in.h> 30 #include <net/sock.h> 31 #include <net/tcp.h> 32 #include <net/smc.h> 33 34 #include "smc.h" 35 #include "smc_clc.h" 36 #include "smc_llc.h" 37 #include "smc_cdc.h" 38 #include "smc_core.h" 39 #include "smc_ib.h" 40 #include "smc_pnet.h" 41 #include "smc_tx.h" 42 #include "smc_rx.h" 43 #include "smc_close.h" 44 45 static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group 46 * creation 47 */ 48 49 struct smc_lgr_list smc_lgr_list = { /* established link groups */ 50 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 51 .list = LIST_HEAD_INIT(smc_lgr_list.list), 52 }; 53 54 static void smc_tcp_listen_work(struct work_struct *); 55 56 static void smc_set_keepalive(struct sock *sk, int val) 57 { 58 struct smc_sock *smc = smc_sk(sk); 59 60 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 61 } 62 63 static struct smc_hashinfo smc_v4_hashinfo = { 64 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 65 }; 66 67 int smc_hash_sk(struct sock *sk) 68 { 69 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 70 struct hlist_head *head; 71 72 head = &h->ht; 73 74 write_lock_bh(&h->lock); 75 sk_add_node(sk, head); 76 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 77 write_unlock_bh(&h->lock); 78 79 return 0; 80 } 81 EXPORT_SYMBOL_GPL(smc_hash_sk); 82 83 void smc_unhash_sk(struct sock *sk) 84 { 85 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 86 87 write_lock_bh(&h->lock); 88 if (sk_del_node_init(sk)) 89 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 90 write_unlock_bh(&h->lock); 91 } 92 EXPORT_SYMBOL_GPL(smc_unhash_sk); 93 94 struct proto smc_proto = { 95 .name = "SMC", 96 .owner = THIS_MODULE, 97 .keepalive = smc_set_keepalive, 98 .hash = smc_hash_sk, 99 .unhash = smc_unhash_sk, 100 .obj_size = sizeof(struct smc_sock), 101 .h.smc_hash = &smc_v4_hashinfo, 102 .slab_flags = SLAB_DESTROY_BY_RCU, 103 }; 104 EXPORT_SYMBOL_GPL(smc_proto); 105 106 static int smc_release(struct socket *sock) 107 { 108 struct sock *sk = sock->sk; 109 struct smc_sock *smc; 110 int rc = 0; 111 112 if (!sk) 113 goto out; 114 115 smc = smc_sk(sk); 116 sock_hold(sk); 117 if (sk->sk_state == SMC_LISTEN) 118 /* smc_close_non_accepted() is called and acquires 119 * sock lock for child sockets again 120 */ 121 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 122 else 123 lock_sock(sk); 124 125 if (smc->use_fallback) { 126 sk->sk_state = SMC_CLOSED; 127 sk->sk_state_change(sk); 128 } else { 129 rc = smc_close_active(smc); 130 sock_set_flag(sk, SOCK_DEAD); 131 sk->sk_shutdown |= SHUTDOWN_MASK; 132 } 133 if (smc->clcsock) { 134 sock_release(smc->clcsock); 135 smc->clcsock = NULL; 136 } 137 138 /* detach socket */ 139 sock_orphan(sk); 140 sock->sk = NULL; 141 if (smc->use_fallback) { 142 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 143 } else if (sk->sk_state == SMC_CLOSED) { 144 smc_conn_free(&smc->conn); 145 schedule_delayed_work(&smc->sock_put_work, 146 SMC_CLOSE_SOCK_PUT_DELAY); 147 } 148 sk->sk_prot->unhash(sk); 149 release_sock(sk); 150 151 sock_put(sk); 152 out: 153 return rc; 154 } 155 156 static void smc_destruct(struct sock *sk) 157 { 158 if (sk->sk_state != SMC_CLOSED) 159 return; 160 if (!sock_flag(sk, SOCK_DEAD)) 161 return; 162 163 sk_refcnt_debug_dec(sk); 164 } 165 166 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) 167 { 168 struct smc_sock *smc; 169 struct sock *sk; 170 171 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); 172 if (!sk) 173 return NULL; 174 175 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 176 sk->sk_state = SMC_INIT; 177 sk->sk_destruct = smc_destruct; 178 sk->sk_protocol = SMCPROTO_SMC; 179 smc = smc_sk(sk); 180 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 181 INIT_LIST_HEAD(&smc->accept_q); 182 spin_lock_init(&smc->accept_q_lock); 183 INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); 184 sk->sk_prot->hash(sk); 185 sk_refcnt_debug_inc(sk); 186 187 return sk; 188 } 189 190 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 191 int addr_len) 192 { 193 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 194 struct sock *sk = sock->sk; 195 struct smc_sock *smc; 196 int rc; 197 198 smc = smc_sk(sk); 199 200 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 201 rc = -EINVAL; 202 if (addr_len < sizeof(struct sockaddr_in)) 203 goto out; 204 205 rc = -EAFNOSUPPORT; 206 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 207 if ((addr->sin_family != AF_INET) && 208 ((addr->sin_family != AF_UNSPEC) || 209 (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) 210 goto out; 211 212 lock_sock(sk); 213 214 /* Check if socket is already active */ 215 rc = -EINVAL; 216 if (sk->sk_state != SMC_INIT) 217 goto out_rel; 218 219 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 220 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 221 222 out_rel: 223 release_sock(sk); 224 out: 225 return rc; 226 } 227 228 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 229 unsigned long mask) 230 { 231 /* options we don't get control via setsockopt for */ 232 nsk->sk_type = osk->sk_type; 233 nsk->sk_sndbuf = osk->sk_sndbuf; 234 nsk->sk_rcvbuf = osk->sk_rcvbuf; 235 nsk->sk_sndtimeo = osk->sk_sndtimeo; 236 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 237 nsk->sk_mark = osk->sk_mark; 238 nsk->sk_priority = osk->sk_priority; 239 nsk->sk_rcvlowat = osk->sk_rcvlowat; 240 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 241 nsk->sk_err = osk->sk_err; 242 243 nsk->sk_flags &= ~mask; 244 nsk->sk_flags |= osk->sk_flags & mask; 245 } 246 247 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 248 (1UL << SOCK_KEEPOPEN) | \ 249 (1UL << SOCK_LINGER) | \ 250 (1UL << SOCK_BROADCAST) | \ 251 (1UL << SOCK_TIMESTAMP) | \ 252 (1UL << SOCK_DBG) | \ 253 (1UL << SOCK_RCVTSTAMP) | \ 254 (1UL << SOCK_RCVTSTAMPNS) | \ 255 (1UL << SOCK_LOCALROUTE) | \ 256 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 257 (1UL << SOCK_RXQ_OVFL) | \ 258 (1UL << SOCK_WIFI_STATUS) | \ 259 (1UL << SOCK_NOFCS) | \ 260 (1UL << SOCK_FILTER_LOCKED)) 261 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 262 * clc socket (since smc is not called for these options from net/core) 263 */ 264 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 265 { 266 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 267 } 268 269 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 270 (1UL << SOCK_KEEPOPEN) | \ 271 (1UL << SOCK_LINGER) | \ 272 (1UL << SOCK_DBG)) 273 /* copy only settings and flags relevant for smc from clc to smc socket */ 274 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 275 { 276 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 277 } 278 279 /* determine subnet and mask of internal TCP socket */ 280 int smc_netinfo_by_tcpsk(struct socket *clcsock, 281 __be32 *subnet, u8 *prefix_len) 282 { 283 struct dst_entry *dst = sk_dst_get(clcsock->sk); 284 struct sockaddr_in addr; 285 int rc = -ENOENT; 286 int len; 287 288 if (!dst) { 289 rc = -ENOTCONN; 290 goto out; 291 } 292 if (!dst->dev) { 293 rc = -ENODEV; 294 goto out_rel; 295 } 296 297 /* get address to which the internal TCP socket is bound */ 298 kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); 299 /* analyze IPv4 specific data of net_device belonging to TCP socket */ 300 for_ifa(dst->dev->ip_ptr) { 301 if (ifa->ifa_address != addr.sin_addr.s_addr) 302 continue; 303 *prefix_len = inet_mask_len(ifa->ifa_mask); 304 *subnet = ifa->ifa_address & ifa->ifa_mask; 305 rc = 0; 306 break; 307 } endfor_ifa(dst->dev->ip_ptr); 308 309 out_rel: 310 dst_release(dst); 311 out: 312 return rc; 313 } 314 315 static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) 316 { 317 struct smc_link_group *lgr = smc->conn.lgr; 318 struct smc_link *link; 319 int rest; 320 int rc; 321 322 link = &lgr->lnk[SMC_SINGLE_LINK]; 323 /* receive CONFIRM LINK request from server over RoCE fabric */ 324 rest = wait_for_completion_interruptible_timeout( 325 &link->llc_confirm, 326 SMC_LLC_WAIT_FIRST_TIME); 327 if (rest <= 0) { 328 struct smc_clc_msg_decline dclc; 329 330 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 331 SMC_CLC_DECLINE); 332 return rc; 333 } 334 335 rc = smc_ib_modify_qp_rts(link); 336 if (rc) 337 return SMC_CLC_DECL_INTERR; 338 339 smc_wr_remember_qp_attr(link); 340 /* send CONFIRM LINK response over RoCE fabric */ 341 rc = smc_llc_send_confirm_link(link, 342 link->smcibdev->mac[link->ibport - 1], 343 gid, SMC_LLC_RESP); 344 if (rc < 0) 345 return SMC_CLC_DECL_TCL; 346 347 return rc; 348 } 349 350 static void smc_conn_save_peer_info(struct smc_sock *smc, 351 struct smc_clc_msg_accept_confirm *clc) 352 { 353 smc->conn.peer_conn_idx = clc->conn_idx; 354 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 355 smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size); 356 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 357 } 358 359 static void smc_link_save_peer_info(struct smc_link *link, 360 struct smc_clc_msg_accept_confirm *clc) 361 { 362 link->peer_qpn = ntoh24(clc->qpn); 363 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 364 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 365 link->peer_psn = ntoh24(clc->psn); 366 link->peer_mtu = clc->qp_mtu; 367 } 368 369 /* setup for RDMA connection of client */ 370 static int smc_connect_rdma(struct smc_sock *smc) 371 { 372 struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr; 373 struct smc_clc_msg_accept_confirm aclc; 374 int local_contact = SMC_FIRST_CONTACT; 375 struct smc_ib_device *smcibdev; 376 struct smc_link *link; 377 u8 srv_first_contact; 378 int reason_code = 0; 379 int rc = 0; 380 u8 ibport; 381 382 /* IPSec connections opt out of SMC-R optimizations */ 383 if (using_ipsec(smc)) { 384 reason_code = SMC_CLC_DECL_IPSEC; 385 goto decline_rdma; 386 } 387 388 /* PNET table look up: search active ib_device and port 389 * within same PNETID that also contains the ethernet device 390 * used for the internal TCP socket 391 */ 392 smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport); 393 if (!smcibdev) { 394 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 395 goto decline_rdma; 396 } 397 398 /* do inband token exchange */ 399 reason_code = smc_clc_send_proposal(smc, smcibdev, ibport); 400 if (reason_code < 0) { 401 rc = reason_code; 402 goto out_err; 403 } 404 if (reason_code > 0) /* configuration error */ 405 goto decline_rdma; 406 /* receive SMC Accept CLC message */ 407 reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc), 408 SMC_CLC_ACCEPT); 409 if (reason_code < 0) { 410 rc = reason_code; 411 goto out_err; 412 } 413 if (reason_code > 0) 414 goto decline_rdma; 415 416 srv_first_contact = aclc.hdr.flag; 417 mutex_lock(&smc_create_lgr_pending); 418 local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev, 419 ibport, &aclc.lcl, srv_first_contact); 420 if (local_contact < 0) { 421 rc = local_contact; 422 if (rc == -ENOMEM) 423 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 424 else if (rc == -ENOLINK) 425 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 426 goto decline_rdma_unlock; 427 } 428 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 429 430 smc_conn_save_peer_info(smc, &aclc); 431 432 rc = smc_sndbuf_create(smc); 433 if (rc) { 434 reason_code = SMC_CLC_DECL_MEM; 435 goto decline_rdma_unlock; 436 } 437 rc = smc_rmb_create(smc); 438 if (rc) { 439 reason_code = SMC_CLC_DECL_MEM; 440 goto decline_rdma_unlock; 441 } 442 443 if (local_contact == SMC_FIRST_CONTACT) 444 smc_link_save_peer_info(link, &aclc); 445 446 rc = smc_rmb_rtoken_handling(&smc->conn, &aclc); 447 if (rc) { 448 reason_code = SMC_CLC_DECL_INTERR; 449 goto decline_rdma_unlock; 450 } 451 452 if (local_contact == SMC_FIRST_CONTACT) { 453 rc = smc_ib_ready_link(link); 454 if (rc) { 455 reason_code = SMC_CLC_DECL_INTERR; 456 goto decline_rdma_unlock; 457 } 458 } 459 460 rc = smc_clc_send_confirm(smc); 461 if (rc) 462 goto out_err_unlock; 463 464 if (local_contact == SMC_FIRST_CONTACT) { 465 /* QP confirmation over RoCE fabric */ 466 reason_code = smc_clnt_conf_first_link( 467 smc, &smcibdev->gid[ibport - 1]); 468 if (reason_code < 0) { 469 rc = reason_code; 470 goto out_err_unlock; 471 } 472 if (reason_code > 0) 473 goto decline_rdma_unlock; 474 } 475 476 mutex_unlock(&smc_create_lgr_pending); 477 smc_tx_init(smc); 478 smc_rx_init(smc); 479 480 out_connected: 481 smc_copy_sock_settings_to_clc(smc); 482 if (smc->sk.sk_state == SMC_INIT) 483 smc->sk.sk_state = SMC_ACTIVE; 484 485 return rc ? rc : local_contact; 486 487 decline_rdma_unlock: 488 mutex_unlock(&smc_create_lgr_pending); 489 smc_conn_free(&smc->conn); 490 decline_rdma: 491 /* RDMA setup failed, switch back to TCP */ 492 smc->use_fallback = true; 493 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 494 rc = smc_clc_send_decline(smc, reason_code, 0); 495 if (rc < sizeof(struct smc_clc_msg_decline)) 496 goto out_err; 497 } 498 goto out_connected; 499 500 out_err_unlock: 501 mutex_unlock(&smc_create_lgr_pending); 502 smc_conn_free(&smc->conn); 503 out_err: 504 return rc; 505 } 506 507 static int smc_connect(struct socket *sock, struct sockaddr *addr, 508 int alen, int flags) 509 { 510 struct sock *sk = sock->sk; 511 struct smc_sock *smc; 512 int rc = -EINVAL; 513 514 smc = smc_sk(sk); 515 516 /* separate smc parameter checking to be safe */ 517 if (alen < sizeof(addr->sa_family)) 518 goto out_err; 519 if (addr->sa_family != AF_INET) 520 goto out_err; 521 smc->addr = addr; /* needed for nonblocking connect */ 522 523 lock_sock(sk); 524 switch (sk->sk_state) { 525 default: 526 goto out; 527 case SMC_ACTIVE: 528 rc = -EISCONN; 529 goto out; 530 case SMC_INIT: 531 rc = 0; 532 break; 533 } 534 535 smc_copy_sock_settings_to_clc(smc); 536 rc = kernel_connect(smc->clcsock, addr, alen, flags); 537 if (rc) 538 goto out; 539 540 /* setup RDMA connection */ 541 rc = smc_connect_rdma(smc); 542 if (rc < 0) 543 goto out; 544 else 545 rc = 0; /* success cases including fallback */ 546 547 out: 548 release_sock(sk); 549 out_err: 550 return rc; 551 } 552 553 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 554 { 555 struct sock *sk = &lsmc->sk; 556 struct socket *new_clcsock; 557 struct sock *new_sk; 558 int rc; 559 560 release_sock(&lsmc->sk); 561 new_sk = smc_sock_alloc(sock_net(sk), NULL); 562 if (!new_sk) { 563 rc = -ENOMEM; 564 lsmc->sk.sk_err = ENOMEM; 565 *new_smc = NULL; 566 lock_sock(&lsmc->sk); 567 goto out; 568 } 569 *new_smc = smc_sk(new_sk); 570 571 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 572 lock_sock(&lsmc->sk); 573 if (rc < 0) { 574 lsmc->sk.sk_err = -rc; 575 new_sk->sk_state = SMC_CLOSED; 576 sock_set_flag(new_sk, SOCK_DEAD); 577 sk->sk_prot->unhash(new_sk); 578 sock_put(new_sk); 579 *new_smc = NULL; 580 goto out; 581 } 582 if (lsmc->sk.sk_state == SMC_CLOSED) { 583 if (new_clcsock) 584 sock_release(new_clcsock); 585 new_sk->sk_state = SMC_CLOSED; 586 sock_set_flag(new_sk, SOCK_DEAD); 587 sk->sk_prot->unhash(new_sk); 588 sock_put(new_sk); 589 *new_smc = NULL; 590 goto out; 591 } 592 593 (*new_smc)->clcsock = new_clcsock; 594 out: 595 return rc; 596 } 597 598 /* add a just created sock to the accept queue of the listen sock as 599 * candidate for a following socket accept call from user space 600 */ 601 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 602 { 603 struct smc_sock *par = smc_sk(parent); 604 605 sock_hold(sk); 606 spin_lock(&par->accept_q_lock); 607 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 608 spin_unlock(&par->accept_q_lock); 609 sk_acceptq_added(parent); 610 } 611 612 /* remove a socket from the accept queue of its parental listening socket */ 613 static void smc_accept_unlink(struct sock *sk) 614 { 615 struct smc_sock *par = smc_sk(sk)->listen_smc; 616 617 spin_lock(&par->accept_q_lock); 618 list_del_init(&smc_sk(sk)->accept_q); 619 spin_unlock(&par->accept_q_lock); 620 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 621 sock_put(sk); 622 } 623 624 /* remove a sock from the accept queue to bind it to a new socket created 625 * for a socket accept call from user space 626 */ 627 struct sock *smc_accept_dequeue(struct sock *parent, 628 struct socket *new_sock) 629 { 630 struct smc_sock *isk, *n; 631 struct sock *new_sk; 632 633 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 634 new_sk = (struct sock *)isk; 635 636 smc_accept_unlink(new_sk); 637 if (new_sk->sk_state == SMC_CLOSED) { 638 /* tbd in follow-on patch: close this sock */ 639 continue; 640 } 641 if (new_sock) 642 sock_graft(new_sk, new_sock); 643 return new_sk; 644 } 645 return NULL; 646 } 647 648 /* clean up for a created but never accepted sock */ 649 void smc_close_non_accepted(struct sock *sk) 650 { 651 struct smc_sock *smc = smc_sk(sk); 652 653 sock_hold(sk); 654 lock_sock(sk); 655 if (!sk->sk_lingertime) 656 /* wait for peer closing */ 657 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 658 if (!smc->use_fallback) 659 smc_close_active(smc); 660 if (smc->clcsock) { 661 struct socket *tcp; 662 663 tcp = smc->clcsock; 664 smc->clcsock = NULL; 665 sock_release(tcp); 666 } 667 sock_set_flag(sk, SOCK_DEAD); 668 sk->sk_shutdown |= SHUTDOWN_MASK; 669 if (smc->use_fallback) { 670 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 671 } else { 672 smc_conn_free(&smc->conn); 673 schedule_delayed_work(&smc->sock_put_work, 674 SMC_CLOSE_SOCK_PUT_DELAY); 675 } 676 release_sock(sk); 677 sock_put(sk); 678 } 679 680 static int smc_serv_conf_first_link(struct smc_sock *smc) 681 { 682 struct smc_link_group *lgr = smc->conn.lgr; 683 struct smc_link *link; 684 int rest; 685 int rc; 686 687 link = &lgr->lnk[SMC_SINGLE_LINK]; 688 /* send CONFIRM LINK request to client over the RoCE fabric */ 689 rc = smc_llc_send_confirm_link(link, 690 link->smcibdev->mac[link->ibport - 1], 691 &link->smcibdev->gid[link->ibport - 1], 692 SMC_LLC_REQ); 693 if (rc < 0) 694 return SMC_CLC_DECL_TCL; 695 696 /* receive CONFIRM LINK response from client over the RoCE fabric */ 697 rest = wait_for_completion_interruptible_timeout( 698 &link->llc_confirm_resp, 699 SMC_LLC_WAIT_FIRST_TIME); 700 if (rest <= 0) { 701 struct smc_clc_msg_decline dclc; 702 703 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 704 SMC_CLC_DECLINE); 705 } 706 707 return rc; 708 } 709 710 /* setup for RDMA connection of server */ 711 static void smc_listen_work(struct work_struct *work) 712 { 713 struct smc_sock *new_smc = container_of(work, struct smc_sock, 714 smc_listen_work); 715 struct socket *newclcsock = new_smc->clcsock; 716 struct smc_sock *lsmc = new_smc->listen_smc; 717 struct smc_clc_msg_accept_confirm cclc; 718 int local_contact = SMC_REUSE_CONTACT; 719 struct sock *newsmcsk = &new_smc->sk; 720 struct smc_clc_msg_proposal pclc; 721 struct smc_ib_device *smcibdev; 722 struct sockaddr_in peeraddr; 723 struct smc_link *link; 724 int reason_code = 0; 725 int rc = 0, len; 726 __be32 subnet; 727 u8 prefix_len; 728 u8 ibport; 729 730 /* do inband token exchange - 731 *wait for and receive SMC Proposal CLC message 732 */ 733 reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), 734 SMC_CLC_PROPOSAL); 735 if (reason_code < 0) 736 goto out_err; 737 if (reason_code > 0) 738 goto decline_rdma; 739 740 /* IPSec connections opt out of SMC-R optimizations */ 741 if (using_ipsec(new_smc)) { 742 reason_code = SMC_CLC_DECL_IPSEC; 743 goto decline_rdma; 744 } 745 746 /* PNET table look up: search active ib_device and port 747 * within same PNETID that also contains the ethernet device 748 * used for the internal TCP socket 749 */ 750 smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport); 751 if (!smcibdev) { 752 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 753 goto decline_rdma; 754 } 755 756 /* determine subnet and mask from internal TCP socket */ 757 rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); 758 if (rc) { 759 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 760 goto decline_rdma; 761 } 762 if ((pclc.outgoing_subnet != subnet) || 763 (pclc.prefix_len != prefix_len)) { 764 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 765 goto decline_rdma; 766 } 767 768 /* get address of the peer connected to the internal TCP socket */ 769 kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len); 770 771 /* allocate connection / link group */ 772 mutex_lock(&smc_create_lgr_pending); 773 local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, 774 smcibdev, ibport, &pclc.lcl, 0); 775 if (local_contact == SMC_REUSE_CONTACT) 776 /* lock no longer needed, free it due to following 777 * smc_clc_wait_msg() call 778 */ 779 mutex_unlock(&smc_create_lgr_pending); 780 if (local_contact < 0) { 781 rc = local_contact; 782 if (rc == -ENOMEM) 783 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 784 else if (rc == -ENOLINK) 785 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 786 goto decline_rdma; 787 } 788 link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 789 790 rc = smc_sndbuf_create(new_smc); 791 if (rc) { 792 reason_code = SMC_CLC_DECL_MEM; 793 goto decline_rdma; 794 } 795 rc = smc_rmb_create(new_smc); 796 if (rc) { 797 reason_code = SMC_CLC_DECL_MEM; 798 goto decline_rdma; 799 } 800 801 rc = smc_clc_send_accept(new_smc, local_contact); 802 if (rc) 803 goto out_err; 804 805 /* receive SMC Confirm CLC message */ 806 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 807 SMC_CLC_CONFIRM); 808 if (reason_code < 0) 809 goto out_err; 810 if (reason_code > 0) 811 goto decline_rdma; 812 smc_conn_save_peer_info(new_smc, &cclc); 813 if (local_contact == SMC_FIRST_CONTACT) 814 smc_link_save_peer_info(link, &cclc); 815 816 rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); 817 if (rc) { 818 reason_code = SMC_CLC_DECL_INTERR; 819 goto decline_rdma; 820 } 821 822 if (local_contact == SMC_FIRST_CONTACT) { 823 rc = smc_ib_ready_link(link); 824 if (rc) { 825 reason_code = SMC_CLC_DECL_INTERR; 826 goto decline_rdma; 827 } 828 /* QP confirmation over RoCE fabric */ 829 reason_code = smc_serv_conf_first_link(new_smc); 830 if (reason_code < 0) { 831 /* peer is not aware of a problem */ 832 rc = reason_code; 833 goto out_err; 834 } 835 if (reason_code > 0) 836 goto decline_rdma; 837 } 838 839 smc_tx_init(new_smc); 840 smc_rx_init(new_smc); 841 842 out_connected: 843 sk_refcnt_debug_inc(newsmcsk); 844 if (newsmcsk->sk_state == SMC_INIT) 845 newsmcsk->sk_state = SMC_ACTIVE; 846 enqueue: 847 if (local_contact == SMC_FIRST_CONTACT) 848 mutex_unlock(&smc_create_lgr_pending); 849 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 850 if (lsmc->sk.sk_state == SMC_LISTEN) { 851 smc_accept_enqueue(&lsmc->sk, newsmcsk); 852 } else { /* no longer listening */ 853 smc_close_non_accepted(newsmcsk); 854 } 855 release_sock(&lsmc->sk); 856 857 /* Wake up accept */ 858 lsmc->sk.sk_data_ready(&lsmc->sk); 859 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 860 return; 861 862 decline_rdma: 863 /* RDMA setup failed, switch back to TCP */ 864 smc_conn_free(&new_smc->conn); 865 new_smc->use_fallback = true; 866 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 867 rc = smc_clc_send_decline(new_smc, reason_code, 0); 868 if (rc < sizeof(struct smc_clc_msg_decline)) 869 goto out_err; 870 } 871 goto out_connected; 872 873 out_err: 874 newsmcsk->sk_state = SMC_CLOSED; 875 smc_conn_free(&new_smc->conn); 876 goto enqueue; /* queue new sock with sk_err set */ 877 } 878 879 static void smc_tcp_listen_work(struct work_struct *work) 880 { 881 struct smc_sock *lsmc = container_of(work, struct smc_sock, 882 tcp_listen_work); 883 struct smc_sock *new_smc; 884 int rc = 0; 885 886 lock_sock(&lsmc->sk); 887 while (lsmc->sk.sk_state == SMC_LISTEN) { 888 rc = smc_clcsock_accept(lsmc, &new_smc); 889 if (rc) 890 goto out; 891 if (!new_smc) 892 continue; 893 894 new_smc->listen_smc = lsmc; 895 new_smc->use_fallback = false; /* assume rdma capability first*/ 896 sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ 897 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 898 smc_copy_sock_settings_to_smc(new_smc); 899 schedule_work(&new_smc->smc_listen_work); 900 } 901 902 out: 903 release_sock(&lsmc->sk); 904 lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ 905 } 906 907 static int smc_listen(struct socket *sock, int backlog) 908 { 909 struct sock *sk = sock->sk; 910 struct smc_sock *smc; 911 int rc; 912 913 smc = smc_sk(sk); 914 lock_sock(sk); 915 916 rc = -EINVAL; 917 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) 918 goto out; 919 920 rc = 0; 921 if (sk->sk_state == SMC_LISTEN) { 922 sk->sk_max_ack_backlog = backlog; 923 goto out; 924 } 925 /* some socket options are handled in core, so we could not apply 926 * them to the clc socket -- copy smc socket options to clc socket 927 */ 928 smc_copy_sock_settings_to_clc(smc); 929 930 rc = kernel_listen(smc->clcsock, backlog); 931 if (rc) 932 goto out; 933 sk->sk_max_ack_backlog = backlog; 934 sk->sk_ack_backlog = 0; 935 sk->sk_state = SMC_LISTEN; 936 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 937 schedule_work(&smc->tcp_listen_work); 938 939 out: 940 release_sock(sk); 941 return rc; 942 } 943 944 static int smc_accept(struct socket *sock, struct socket *new_sock, 945 int flags) 946 { 947 struct sock *sk = sock->sk, *nsk; 948 DECLARE_WAITQUEUE(wait, current); 949 struct smc_sock *lsmc; 950 long timeo; 951 int rc = 0; 952 953 lsmc = smc_sk(sk); 954 lock_sock(sk); 955 956 if (lsmc->sk.sk_state != SMC_LISTEN) { 957 rc = -EINVAL; 958 goto out; 959 } 960 961 /* Wait for an incoming connection */ 962 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 963 add_wait_queue_exclusive(sk_sleep(sk), &wait); 964 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 965 set_current_state(TASK_INTERRUPTIBLE); 966 if (!timeo) { 967 rc = -EAGAIN; 968 break; 969 } 970 release_sock(sk); 971 timeo = schedule_timeout(timeo); 972 /* wakeup by sk_data_ready in smc_listen_work() */ 973 sched_annotate_sleep(); 974 lock_sock(sk); 975 if (signal_pending(current)) { 976 rc = sock_intr_errno(timeo); 977 break; 978 } 979 } 980 set_current_state(TASK_RUNNING); 981 remove_wait_queue(sk_sleep(sk), &wait); 982 983 if (!rc) 984 rc = sock_error(nsk); 985 986 out: 987 release_sock(sk); 988 return rc; 989 } 990 991 static int smc_getname(struct socket *sock, struct sockaddr *addr, 992 int *len, int peer) 993 { 994 struct smc_sock *smc; 995 996 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 997 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 998 return -ENOTCONN; 999 1000 smc = smc_sk(sock->sk); 1001 1002 return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); 1003 } 1004 1005 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1006 { 1007 struct sock *sk = sock->sk; 1008 struct smc_sock *smc; 1009 int rc = -EPIPE; 1010 1011 smc = smc_sk(sk); 1012 lock_sock(sk); 1013 if ((sk->sk_state != SMC_ACTIVE) && 1014 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1015 (sk->sk_state != SMC_INIT)) 1016 goto out; 1017 if (smc->use_fallback) 1018 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1019 else 1020 rc = smc_tx_sendmsg(smc, msg, len); 1021 out: 1022 release_sock(sk); 1023 return rc; 1024 } 1025 1026 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1027 int flags) 1028 { 1029 struct sock *sk = sock->sk; 1030 struct smc_sock *smc; 1031 int rc = -ENOTCONN; 1032 1033 smc = smc_sk(sk); 1034 lock_sock(sk); 1035 if ((sk->sk_state == SMC_INIT) || 1036 (sk->sk_state == SMC_LISTEN) || 1037 (sk->sk_state == SMC_CLOSED)) 1038 goto out; 1039 1040 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1041 rc = 0; 1042 goto out; 1043 } 1044 1045 if (smc->use_fallback) 1046 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1047 else 1048 rc = smc_rx_recvmsg(smc, msg, len, flags); 1049 1050 out: 1051 release_sock(sk); 1052 return rc; 1053 } 1054 1055 static unsigned int smc_accept_poll(struct sock *parent) 1056 { 1057 struct smc_sock *isk; 1058 struct sock *sk; 1059 1060 lock_sock(parent); 1061 list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { 1062 sk = (struct sock *)isk; 1063 1064 if (sk->sk_state == SMC_ACTIVE) { 1065 release_sock(parent); 1066 return POLLIN | POLLRDNORM; 1067 } 1068 } 1069 release_sock(parent); 1070 1071 return 0; 1072 } 1073 1074 static unsigned int smc_poll(struct file *file, struct socket *sock, 1075 poll_table *wait) 1076 { 1077 struct sock *sk = sock->sk; 1078 unsigned int mask = 0; 1079 struct smc_sock *smc; 1080 int rc; 1081 1082 smc = smc_sk(sock->sk); 1083 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 1084 /* delegate to CLC child sock */ 1085 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1086 /* if non-blocking connect finished ... */ 1087 lock_sock(sk); 1088 if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) { 1089 sk->sk_err = smc->clcsock->sk->sk_err; 1090 if (sk->sk_err) { 1091 mask |= POLLERR; 1092 } else { 1093 rc = smc_connect_rdma(smc); 1094 if (rc < 0) 1095 mask |= POLLERR; 1096 else 1097 /* success cases including fallback */ 1098 mask |= POLLOUT | POLLWRNORM; 1099 } 1100 } 1101 release_sock(sk); 1102 } else { 1103 sock_poll_wait(file, sk_sleep(sk), wait); 1104 if (sk->sk_state == SMC_LISTEN) 1105 /* woken up by sk_data_ready in smc_listen_work() */ 1106 mask |= smc_accept_poll(sk); 1107 if (sk->sk_err) 1108 mask |= POLLERR; 1109 if (atomic_read(&smc->conn.sndbuf_space) || 1110 (sk->sk_shutdown & SEND_SHUTDOWN)) { 1111 mask |= POLLOUT | POLLWRNORM; 1112 } else { 1113 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1114 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1115 } 1116 if (atomic_read(&smc->conn.bytes_to_rcv)) 1117 mask |= POLLIN | POLLRDNORM; 1118 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1119 (sk->sk_state == SMC_CLOSED)) 1120 mask |= POLLHUP; 1121 if (sk->sk_shutdown & RCV_SHUTDOWN) 1122 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 1123 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1124 mask |= POLLIN; 1125 1126 } 1127 1128 return mask; 1129 } 1130 1131 static int smc_shutdown(struct socket *sock, int how) 1132 { 1133 struct sock *sk = sock->sk; 1134 struct smc_sock *smc; 1135 int rc = -EINVAL; 1136 int rc1 = 0; 1137 1138 smc = smc_sk(sk); 1139 1140 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1141 return rc; 1142 1143 lock_sock(sk); 1144 1145 rc = -ENOTCONN; 1146 if ((sk->sk_state != SMC_LISTEN) && 1147 (sk->sk_state != SMC_ACTIVE) && 1148 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1149 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1150 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1151 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1152 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1153 goto out; 1154 if (smc->use_fallback) { 1155 rc = kernel_sock_shutdown(smc->clcsock, how); 1156 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1157 if (sk->sk_shutdown == SHUTDOWN_MASK) 1158 sk->sk_state = SMC_CLOSED; 1159 goto out; 1160 } 1161 switch (how) { 1162 case SHUT_RDWR: /* shutdown in both directions */ 1163 rc = smc_close_active(smc); 1164 break; 1165 case SHUT_WR: 1166 rc = smc_close_shutdown_write(smc); 1167 break; 1168 case SHUT_RD: 1169 if (sk->sk_state == SMC_LISTEN) 1170 rc = smc_close_active(smc); 1171 else 1172 rc = 0; 1173 /* nothing more to do because peer is not involved */ 1174 break; 1175 } 1176 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1177 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1178 sk->sk_shutdown |= how + 1; 1179 1180 out: 1181 release_sock(sk); 1182 return rc ? rc : rc1; 1183 } 1184 1185 static int smc_setsockopt(struct socket *sock, int level, int optname, 1186 char __user *optval, unsigned int optlen) 1187 { 1188 struct sock *sk = sock->sk; 1189 struct smc_sock *smc; 1190 1191 smc = smc_sk(sk); 1192 1193 /* generic setsockopts reaching us here always apply to the 1194 * CLC socket 1195 */ 1196 return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1197 optval, optlen); 1198 } 1199 1200 static int smc_getsockopt(struct socket *sock, int level, int optname, 1201 char __user *optval, int __user *optlen) 1202 { 1203 struct smc_sock *smc; 1204 1205 smc = smc_sk(sock->sk); 1206 /* socket options apply to the CLC socket */ 1207 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1208 optval, optlen); 1209 } 1210 1211 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1212 unsigned long arg) 1213 { 1214 struct smc_sock *smc; 1215 1216 smc = smc_sk(sock->sk); 1217 if (smc->use_fallback) 1218 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1219 else 1220 return sock_no_ioctl(sock, cmd, arg); 1221 } 1222 1223 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1224 int offset, size_t size, int flags) 1225 { 1226 struct sock *sk = sock->sk; 1227 struct smc_sock *smc; 1228 int rc = -EPIPE; 1229 1230 smc = smc_sk(sk); 1231 lock_sock(sk); 1232 if (sk->sk_state != SMC_ACTIVE) 1233 goto out; 1234 if (smc->use_fallback) 1235 rc = kernel_sendpage(smc->clcsock, page, offset, 1236 size, flags); 1237 else 1238 rc = sock_no_sendpage(sock, page, offset, size, flags); 1239 1240 out: 1241 release_sock(sk); 1242 return rc; 1243 } 1244 1245 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1246 struct pipe_inode_info *pipe, size_t len, 1247 unsigned int flags) 1248 { 1249 struct sock *sk = sock->sk; 1250 struct smc_sock *smc; 1251 int rc = -ENOTCONN; 1252 1253 smc = smc_sk(sk); 1254 lock_sock(sk); 1255 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) 1256 goto out; 1257 if (smc->use_fallback) { 1258 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1259 pipe, len, flags); 1260 } else { 1261 rc = -EOPNOTSUPP; 1262 } 1263 out: 1264 release_sock(sk); 1265 return rc; 1266 } 1267 1268 /* must look like tcp */ 1269 static const struct proto_ops smc_sock_ops = { 1270 .family = PF_SMC, 1271 .owner = THIS_MODULE, 1272 .release = smc_release, 1273 .bind = smc_bind, 1274 .connect = smc_connect, 1275 .socketpair = sock_no_socketpair, 1276 .accept = smc_accept, 1277 .getname = smc_getname, 1278 .poll = smc_poll, 1279 .ioctl = smc_ioctl, 1280 .listen = smc_listen, 1281 .shutdown = smc_shutdown, 1282 .setsockopt = smc_setsockopt, 1283 .getsockopt = smc_getsockopt, 1284 .sendmsg = smc_sendmsg, 1285 .recvmsg = smc_recvmsg, 1286 .mmap = sock_no_mmap, 1287 .sendpage = smc_sendpage, 1288 .splice_read = smc_splice_read, 1289 }; 1290 1291 static int smc_create(struct net *net, struct socket *sock, int protocol, 1292 int kern) 1293 { 1294 struct smc_sock *smc; 1295 struct sock *sk; 1296 int rc; 1297 1298 rc = -ESOCKTNOSUPPORT; 1299 if (sock->type != SOCK_STREAM) 1300 goto out; 1301 1302 rc = -EPROTONOSUPPORT; 1303 if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) 1304 goto out; 1305 1306 rc = -ENOBUFS; 1307 sock->ops = &smc_sock_ops; 1308 sk = smc_sock_alloc(net, sock); 1309 if (!sk) 1310 goto out; 1311 1312 /* create internal TCP socket for CLC handshake and fallback */ 1313 smc = smc_sk(sk); 1314 smc->use_fallback = false; /* assume rdma capability first */ 1315 rc = sock_create_kern(net, PF_INET, SOCK_STREAM, 1316 IPPROTO_TCP, &smc->clcsock); 1317 if (rc) 1318 sk_common_release(sk); 1319 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 1320 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 1321 1322 out: 1323 return rc; 1324 } 1325 1326 static const struct net_proto_family smc_sock_family_ops = { 1327 .family = PF_SMC, 1328 .owner = THIS_MODULE, 1329 .create = smc_create, 1330 }; 1331 1332 static int __init smc_init(void) 1333 { 1334 int rc; 1335 1336 rc = smc_pnet_init(); 1337 if (rc) 1338 return rc; 1339 1340 rc = smc_llc_init(); 1341 if (rc) { 1342 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 1343 goto out_pnet; 1344 } 1345 1346 rc = smc_cdc_init(); 1347 if (rc) { 1348 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 1349 goto out_pnet; 1350 } 1351 1352 rc = proto_register(&smc_proto, 1); 1353 if (rc) { 1354 pr_err("%s: proto_register fails with %d\n", __func__, rc); 1355 goto out_pnet; 1356 } 1357 1358 rc = sock_register(&smc_sock_family_ops); 1359 if (rc) { 1360 pr_err("%s: sock_register fails with %d\n", __func__, rc); 1361 goto out_proto; 1362 } 1363 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 1364 1365 rc = smc_ib_register_client(); 1366 if (rc) { 1367 pr_err("%s: ib_register fails with %d\n", __func__, rc); 1368 goto out_sock; 1369 } 1370 1371 return 0; 1372 1373 out_sock: 1374 sock_unregister(PF_SMC); 1375 out_proto: 1376 proto_unregister(&smc_proto); 1377 out_pnet: 1378 smc_pnet_exit(); 1379 return rc; 1380 } 1381 1382 static void __exit smc_exit(void) 1383 { 1384 struct smc_link_group *lgr, *lg; 1385 LIST_HEAD(lgr_freeing_list); 1386 1387 spin_lock_bh(&smc_lgr_list.lock); 1388 if (!list_empty(&smc_lgr_list.list)) 1389 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); 1390 spin_unlock_bh(&smc_lgr_list.lock); 1391 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 1392 list_del_init(&lgr->list); 1393 smc_lgr_free(lgr); /* free link group */ 1394 } 1395 smc_ib_unregister_client(); 1396 sock_unregister(PF_SMC); 1397 proto_unregister(&smc_proto); 1398 smc_pnet_exit(); 1399 } 1400 1401 module_init(smc_init); 1402 module_exit(smc_exit); 1403 1404 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 1405 MODULE_DESCRIPTION("smc socket address family"); 1406 MODULE_LICENSE("GPL"); 1407 MODULE_ALIAS_NETPROTO(PF_SMC); 1408