1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 5 * applies to SOCK_STREAM sockets only 6 * offers an alternative communication option for TCP-protocol sockets 7 * applicable with RoCE-cards only 8 * 9 * Initial restrictions: 10 * - support for alternate links postponed 11 * 12 * Copyright IBM Corp. 2016, 2018 13 * 14 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 15 * based on prototype from Frank Blaschka 16 */ 17 18 #define KMSG_COMPONENT "smc" 19 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 20 21 #include <linux/module.h> 22 #include <linux/socket.h> 23 #include <linux/workqueue.h> 24 #include <linux/in.h> 25 #include <linux/sched/signal.h> 26 #include <linux/if_vlan.h> 27 28 #include <net/sock.h> 29 #include <net/tcp.h> 30 #include <net/smc.h> 31 #include <asm/ioctls.h> 32 33 #include "smc.h" 34 #include "smc_clc.h" 35 #include "smc_llc.h" 36 #include "smc_cdc.h" 37 #include "smc_core.h" 38 #include "smc_ib.h" 39 #include "smc_ism.h" 40 #include "smc_pnet.h" 41 #include "smc_tx.h" 42 #include "smc_rx.h" 43 #include "smc_close.h" 44 45 static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group 46 * creation on server 47 */ 48 static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group 49 * creation on client 50 */ 51 52 static void smc_tcp_listen_work(struct work_struct *); 53 static void smc_connect_work(struct work_struct *); 54 55 static void smc_set_keepalive(struct sock *sk, int val) 56 { 57 struct smc_sock *smc = smc_sk(sk); 58 59 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 60 } 61 62 static struct smc_hashinfo smc_v4_hashinfo = { 63 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 64 }; 65 66 static struct smc_hashinfo smc_v6_hashinfo = { 67 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), 68 }; 69 70 int smc_hash_sk(struct sock *sk) 71 { 72 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 73 struct hlist_head *head; 74 75 head = &h->ht; 76 77 write_lock_bh(&h->lock); 78 sk_add_node(sk, head); 79 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 80 write_unlock_bh(&h->lock); 81 82 return 0; 83 } 84 EXPORT_SYMBOL_GPL(smc_hash_sk); 85 86 void smc_unhash_sk(struct sock *sk) 87 { 88 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 89 90 write_lock_bh(&h->lock); 91 if (sk_del_node_init(sk)) 92 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 93 write_unlock_bh(&h->lock); 94 } 95 EXPORT_SYMBOL_GPL(smc_unhash_sk); 96 97 struct proto smc_proto = { 98 .name = "SMC", 99 .owner = THIS_MODULE, 100 .keepalive = smc_set_keepalive, 101 .hash = smc_hash_sk, 102 .unhash = smc_unhash_sk, 103 .obj_size = sizeof(struct smc_sock), 104 .h.smc_hash = &smc_v4_hashinfo, 105 .slab_flags = SLAB_TYPESAFE_BY_RCU, 106 }; 107 EXPORT_SYMBOL_GPL(smc_proto); 108 109 struct proto smc_proto6 = { 110 .name = "SMC6", 111 .owner = THIS_MODULE, 112 .keepalive = smc_set_keepalive, 113 .hash = smc_hash_sk, 114 .unhash = smc_unhash_sk, 115 .obj_size = sizeof(struct smc_sock), 116 .h.smc_hash = &smc_v6_hashinfo, 117 .slab_flags = SLAB_TYPESAFE_BY_RCU, 118 }; 119 EXPORT_SYMBOL_GPL(smc_proto6); 120 121 static int smc_release(struct socket *sock) 122 { 123 struct sock *sk = sock->sk; 124 struct smc_sock *smc; 125 int rc = 0; 126 127 if (!sk) 128 goto out; 129 130 smc = smc_sk(sk); 131 132 /* cleanup for a dangling non-blocking connect */ 133 if (smc->connect_info && sk->sk_state == SMC_INIT) 134 tcp_abort(smc->clcsock->sk, ECONNABORTED); 135 flush_work(&smc->connect_work); 136 kfree(smc->connect_info); 137 smc->connect_info = NULL; 138 139 if (sk->sk_state == SMC_LISTEN) 140 /* smc_close_non_accepted() is called and acquires 141 * sock lock for child sockets again 142 */ 143 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 144 else 145 lock_sock(sk); 146 147 if (!smc->use_fallback) { 148 rc = smc_close_active(smc); 149 sock_set_flag(sk, SOCK_DEAD); 150 sk->sk_shutdown |= SHUTDOWN_MASK; 151 } else { 152 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) 153 sock_put(sk); /* passive closing */ 154 if (sk->sk_state == SMC_LISTEN) { 155 /* wake up clcsock accept */ 156 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 157 } 158 sk->sk_state = SMC_CLOSED; 159 sk->sk_state_change(sk); 160 } 161 162 sk->sk_prot->unhash(sk); 163 164 if (sk->sk_state == SMC_CLOSED) { 165 if (smc->clcsock) { 166 mutex_lock(&smc->clcsock_release_lock); 167 sock_release(smc->clcsock); 168 smc->clcsock = NULL; 169 mutex_unlock(&smc->clcsock_release_lock); 170 } 171 if (!smc->use_fallback) 172 smc_conn_free(&smc->conn); 173 } 174 175 /* detach socket */ 176 sock_orphan(sk); 177 sock->sk = NULL; 178 release_sock(sk); 179 180 sock_put(sk); /* final sock_put */ 181 out: 182 return rc; 183 } 184 185 static void smc_destruct(struct sock *sk) 186 { 187 if (sk->sk_state != SMC_CLOSED) 188 return; 189 if (!sock_flag(sk, SOCK_DEAD)) 190 return; 191 192 sk_refcnt_debug_dec(sk); 193 } 194 195 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, 196 int protocol) 197 { 198 struct smc_sock *smc; 199 struct proto *prot; 200 struct sock *sk; 201 202 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; 203 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); 204 if (!sk) 205 return NULL; 206 207 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 208 sk->sk_state = SMC_INIT; 209 sk->sk_destruct = smc_destruct; 210 sk->sk_protocol = protocol; 211 smc = smc_sk(sk); 212 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 213 INIT_WORK(&smc->connect_work, smc_connect_work); 214 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 215 INIT_LIST_HEAD(&smc->accept_q); 216 spin_lock_init(&smc->accept_q_lock); 217 spin_lock_init(&smc->conn.send_lock); 218 sk->sk_prot->hash(sk); 219 sk_refcnt_debug_inc(sk); 220 mutex_init(&smc->clcsock_release_lock); 221 222 return sk; 223 } 224 225 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 226 int addr_len) 227 { 228 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 229 struct sock *sk = sock->sk; 230 struct smc_sock *smc; 231 int rc; 232 233 smc = smc_sk(sk); 234 235 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 236 rc = -EINVAL; 237 if (addr_len < sizeof(struct sockaddr_in)) 238 goto out; 239 240 rc = -EAFNOSUPPORT; 241 if (addr->sin_family != AF_INET && 242 addr->sin_family != AF_INET6 && 243 addr->sin_family != AF_UNSPEC) 244 goto out; 245 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 246 if (addr->sin_family == AF_UNSPEC && 247 addr->sin_addr.s_addr != htonl(INADDR_ANY)) 248 goto out; 249 250 lock_sock(sk); 251 252 /* Check if socket is already active */ 253 rc = -EINVAL; 254 if (sk->sk_state != SMC_INIT) 255 goto out_rel; 256 257 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 258 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 259 260 out_rel: 261 release_sock(sk); 262 out: 263 return rc; 264 } 265 266 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 267 unsigned long mask) 268 { 269 /* options we don't get control via setsockopt for */ 270 nsk->sk_type = osk->sk_type; 271 nsk->sk_sndbuf = osk->sk_sndbuf; 272 nsk->sk_rcvbuf = osk->sk_rcvbuf; 273 nsk->sk_sndtimeo = osk->sk_sndtimeo; 274 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 275 nsk->sk_mark = osk->sk_mark; 276 nsk->sk_priority = osk->sk_priority; 277 nsk->sk_rcvlowat = osk->sk_rcvlowat; 278 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 279 nsk->sk_err = osk->sk_err; 280 281 nsk->sk_flags &= ~mask; 282 nsk->sk_flags |= osk->sk_flags & mask; 283 } 284 285 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 286 (1UL << SOCK_KEEPOPEN) | \ 287 (1UL << SOCK_LINGER) | \ 288 (1UL << SOCK_BROADCAST) | \ 289 (1UL << SOCK_TIMESTAMP) | \ 290 (1UL << SOCK_DBG) | \ 291 (1UL << SOCK_RCVTSTAMP) | \ 292 (1UL << SOCK_RCVTSTAMPNS) | \ 293 (1UL << SOCK_LOCALROUTE) | \ 294 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 295 (1UL << SOCK_RXQ_OVFL) | \ 296 (1UL << SOCK_WIFI_STATUS) | \ 297 (1UL << SOCK_NOFCS) | \ 298 (1UL << SOCK_FILTER_LOCKED) | \ 299 (1UL << SOCK_TSTAMP_NEW)) 300 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 301 * clc socket (since smc is not called for these options from net/core) 302 */ 303 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 304 { 305 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 306 } 307 308 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 309 (1UL << SOCK_KEEPOPEN) | \ 310 (1UL << SOCK_LINGER) | \ 311 (1UL << SOCK_DBG)) 312 /* copy only settings and flags relevant for smc from clc to smc socket */ 313 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 314 { 315 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 316 } 317 318 /* register a new rmb, send confirm_rkey msg to register with peer */ 319 static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc, 320 bool conf_rkey) 321 { 322 if (!rmb_desc->wr_reg) { 323 /* register memory region for new rmb */ 324 if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) { 325 rmb_desc->regerr = 1; 326 return -EFAULT; 327 } 328 rmb_desc->wr_reg = 1; 329 } 330 if (!conf_rkey) 331 return 0; 332 /* exchange confirm_rkey msg with peer */ 333 if (smc_llc_do_confirm_rkey(link, rmb_desc)) { 334 rmb_desc->regerr = 1; 335 return -EFAULT; 336 } 337 return 0; 338 } 339 340 static int smc_clnt_conf_first_link(struct smc_sock *smc) 341 { 342 struct net *net = sock_net(smc->clcsock->sk); 343 struct smc_link_group *lgr = smc->conn.lgr; 344 struct smc_link *link; 345 int rest; 346 int rc; 347 348 link = &lgr->lnk[SMC_SINGLE_LINK]; 349 /* receive CONFIRM LINK request from server over RoCE fabric */ 350 rest = wait_for_completion_interruptible_timeout( 351 &link->llc_confirm, 352 SMC_LLC_WAIT_FIRST_TIME); 353 if (rest <= 0) { 354 struct smc_clc_msg_decline dclc; 355 356 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 357 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 358 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 359 } 360 361 if (link->llc_confirm_rc) 362 return SMC_CLC_DECL_RMBE_EC; 363 364 rc = smc_ib_modify_qp_rts(link); 365 if (rc) 366 return SMC_CLC_DECL_ERR_RDYLNK; 367 368 smc_wr_remember_qp_attr(link); 369 370 if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) 371 return SMC_CLC_DECL_ERR_REGRMB; 372 373 /* send CONFIRM LINK response over RoCE fabric */ 374 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); 375 if (rc < 0) 376 return SMC_CLC_DECL_TIMEOUT_CL; 377 378 /* receive ADD LINK request from server over RoCE fabric */ 379 rest = wait_for_completion_interruptible_timeout(&link->llc_add, 380 SMC_LLC_WAIT_TIME); 381 if (rest <= 0) { 382 struct smc_clc_msg_decline dclc; 383 384 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 385 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 386 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; 387 } 388 389 /* send add link reject message, only one link supported for now */ 390 rc = smc_llc_send_add_link(link, 391 link->smcibdev->mac[link->ibport - 1], 392 link->gid, SMC_LLC_RESP); 393 if (rc < 0) 394 return SMC_CLC_DECL_TIMEOUT_AL; 395 396 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); 397 398 return 0; 399 } 400 401 static void smcr_conn_save_peer_info(struct smc_sock *smc, 402 struct smc_clc_msg_accept_confirm *clc) 403 { 404 int bufsize = smc_uncompress_bufsize(clc->rmbe_size); 405 406 smc->conn.peer_rmbe_idx = clc->rmbe_idx; 407 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 408 smc->conn.peer_rmbe_size = bufsize; 409 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 410 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); 411 } 412 413 static void smcd_conn_save_peer_info(struct smc_sock *smc, 414 struct smc_clc_msg_accept_confirm *clc) 415 { 416 int bufsize = smc_uncompress_bufsize(clc->dmbe_size); 417 418 smc->conn.peer_rmbe_idx = clc->dmbe_idx; 419 smc->conn.peer_token = clc->token; 420 /* msg header takes up space in the buffer */ 421 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); 422 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 423 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; 424 } 425 426 static void smc_conn_save_peer_info(struct smc_sock *smc, 427 struct smc_clc_msg_accept_confirm *clc) 428 { 429 if (smc->conn.lgr->is_smcd) 430 smcd_conn_save_peer_info(smc, clc); 431 else 432 smcr_conn_save_peer_info(smc, clc); 433 } 434 435 static void smc_link_save_peer_info(struct smc_link *link, 436 struct smc_clc_msg_accept_confirm *clc) 437 { 438 link->peer_qpn = ntoh24(clc->qpn); 439 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 440 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 441 link->peer_psn = ntoh24(clc->psn); 442 link->peer_mtu = clc->qp_mtu; 443 } 444 445 /* fall back during connect */ 446 static int smc_connect_fallback(struct smc_sock *smc, int reason_code) 447 { 448 smc->use_fallback = true; 449 smc->fallback_rsn = reason_code; 450 smc_copy_sock_settings_to_clc(smc); 451 if (smc->sk.sk_state == SMC_INIT) 452 smc->sk.sk_state = SMC_ACTIVE; 453 return 0; 454 } 455 456 /* decline and fall back during connect */ 457 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) 458 { 459 int rc; 460 461 if (reason_code < 0) { /* error, fallback is not possible */ 462 if (smc->sk.sk_state == SMC_INIT) 463 sock_put(&smc->sk); /* passive closing */ 464 return reason_code; 465 } 466 if (reason_code != SMC_CLC_DECL_PEERDECL) { 467 rc = smc_clc_send_decline(smc, reason_code); 468 if (rc < 0) { 469 if (smc->sk.sk_state == SMC_INIT) 470 sock_put(&smc->sk); /* passive closing */ 471 return rc; 472 } 473 } 474 return smc_connect_fallback(smc, reason_code); 475 } 476 477 /* abort connecting */ 478 static int smc_connect_abort(struct smc_sock *smc, int reason_code, 479 int local_contact) 480 { 481 if (local_contact == SMC_FIRST_CONTACT) 482 smc_lgr_forget(smc->conn.lgr); 483 if (smc->conn.lgr->is_smcd) 484 /* there is only one lgr role for SMC-D; use server lock */ 485 mutex_unlock(&smc_server_lgr_pending); 486 else 487 mutex_unlock(&smc_client_lgr_pending); 488 489 smc_conn_free(&smc->conn); 490 return reason_code; 491 } 492 493 /* check if there is a rdma device available for this connection. */ 494 /* called for connect and listen */ 495 static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, 496 u8 *ibport, unsigned short vlan_id, u8 gid[]) 497 { 498 int reason_code = 0; 499 500 /* PNET table look up: search active ib_device and port 501 * within same PNETID that also contains the ethernet device 502 * used for the internal TCP socket 503 */ 504 smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id, 505 gid); 506 if (!(*ibdev)) 507 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 508 509 return reason_code; 510 } 511 512 /* check if there is an ISM device available for this connection. */ 513 /* called for connect and listen */ 514 static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev) 515 { 516 /* Find ISM device with same PNETID as connecting interface */ 517 smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev); 518 if (!(*ismdev)) 519 return SMC_CLC_DECL_CNFERR; /* configuration error */ 520 return 0; 521 } 522 523 /* Check for VLAN ID and register it on ISM device just for CLC handshake */ 524 static int smc_connect_ism_vlan_setup(struct smc_sock *smc, 525 struct smcd_dev *ismdev, 526 unsigned short vlan_id) 527 { 528 if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id)) 529 return SMC_CLC_DECL_CNFERR; 530 return 0; 531 } 532 533 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is 534 * used, the VLAN ID will be registered again during the connection setup. 535 */ 536 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, 537 struct smcd_dev *ismdev, 538 unsigned short vlan_id) 539 { 540 if (!is_smcd) 541 return 0; 542 if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id)) 543 return SMC_CLC_DECL_CNFERR; 544 return 0; 545 } 546 547 /* CLC handshake during connect */ 548 static int smc_connect_clc(struct smc_sock *smc, int smc_type, 549 struct smc_clc_msg_accept_confirm *aclc, 550 struct smc_ib_device *ibdev, u8 ibport, 551 u8 gid[], struct smcd_dev *ismdev) 552 { 553 int rc = 0; 554 555 /* do inband token exchange */ 556 rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev); 557 if (rc) 558 return rc; 559 /* receive SMC Accept CLC message */ 560 return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT, 561 CLC_WAIT_TIME); 562 } 563 564 /* setup for RDMA connection of client */ 565 static int smc_connect_rdma(struct smc_sock *smc, 566 struct smc_clc_msg_accept_confirm *aclc, 567 struct smc_ib_device *ibdev, u8 ibport) 568 { 569 int local_contact = SMC_FIRST_CONTACT; 570 struct smc_link *link; 571 int reason_code = 0; 572 573 mutex_lock(&smc_client_lgr_pending); 574 local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, 575 ibport, ntoh24(aclc->qpn), &aclc->lcl, 576 NULL, 0); 577 if (local_contact < 0) { 578 if (local_contact == -ENOMEM) 579 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 580 else if (local_contact == -ENOLINK) 581 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 582 else 583 reason_code = SMC_CLC_DECL_INTERR; /* other error */ 584 mutex_unlock(&smc_client_lgr_pending); 585 return reason_code; 586 } 587 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 588 589 smc_conn_save_peer_info(smc, aclc); 590 591 /* create send buffer and rmb */ 592 if (smc_buf_create(smc, false)) 593 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); 594 595 if (local_contact == SMC_FIRST_CONTACT) 596 smc_link_save_peer_info(link, aclc); 597 598 if (smc_rmb_rtoken_handling(&smc->conn, aclc)) 599 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, 600 local_contact); 601 602 smc_close_init(smc); 603 smc_rx_init(smc); 604 605 if (local_contact == SMC_FIRST_CONTACT) { 606 if (smc_ib_ready_link(link)) 607 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, 608 local_contact); 609 } else { 610 if (smc_reg_rmb(link, smc->conn.rmb_desc, true)) 611 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, 612 local_contact); 613 } 614 smc_rmb_sync_sg_for_device(&smc->conn); 615 616 reason_code = smc_clc_send_confirm(smc); 617 if (reason_code) 618 return smc_connect_abort(smc, reason_code, local_contact); 619 620 smc_tx_init(smc); 621 622 if (local_contact == SMC_FIRST_CONTACT) { 623 /* QP confirmation over RoCE fabric */ 624 reason_code = smc_clnt_conf_first_link(smc); 625 if (reason_code) 626 return smc_connect_abort(smc, reason_code, 627 local_contact); 628 } 629 mutex_unlock(&smc_client_lgr_pending); 630 631 smc_copy_sock_settings_to_clc(smc); 632 if (smc->sk.sk_state == SMC_INIT) 633 smc->sk.sk_state = SMC_ACTIVE; 634 635 return 0; 636 } 637 638 /* setup for ISM connection of client */ 639 static int smc_connect_ism(struct smc_sock *smc, 640 struct smc_clc_msg_accept_confirm *aclc, 641 struct smcd_dev *ismdev) 642 { 643 int local_contact = SMC_FIRST_CONTACT; 644 int rc = 0; 645 646 /* there is only one lgr role for SMC-D; use server lock */ 647 mutex_lock(&smc_server_lgr_pending); 648 local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0, 649 NULL, ismdev, aclc->gid); 650 if (local_contact < 0) { 651 mutex_unlock(&smc_server_lgr_pending); 652 return SMC_CLC_DECL_MEM; 653 } 654 655 /* Create send and receive buffers */ 656 if (smc_buf_create(smc, true)) 657 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); 658 659 smc_conn_save_peer_info(smc, aclc); 660 smc_close_init(smc); 661 smc_rx_init(smc); 662 smc_tx_init(smc); 663 664 rc = smc_clc_send_confirm(smc); 665 if (rc) 666 return smc_connect_abort(smc, rc, local_contact); 667 mutex_unlock(&smc_server_lgr_pending); 668 669 smc_copy_sock_settings_to_clc(smc); 670 if (smc->sk.sk_state == SMC_INIT) 671 smc->sk.sk_state = SMC_ACTIVE; 672 673 return 0; 674 } 675 676 /* perform steps before actually connecting */ 677 static int __smc_connect(struct smc_sock *smc) 678 { 679 bool ism_supported = false, rdma_supported = false; 680 struct smc_clc_msg_accept_confirm aclc; 681 struct smc_ib_device *ibdev; 682 struct smcd_dev *ismdev; 683 u8 gid[SMC_GID_SIZE]; 684 unsigned short vlan; 685 int smc_type; 686 int rc = 0; 687 u8 ibport; 688 689 sock_hold(&smc->sk); /* sock put in passive closing */ 690 691 if (smc->use_fallback) 692 return smc_connect_fallback(smc, smc->fallback_rsn); 693 694 /* if peer has not signalled SMC-capability, fall back */ 695 if (!tcp_sk(smc->clcsock->sk)->syn_smc) 696 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); 697 698 /* IPSec connections opt out of SMC-R optimizations */ 699 if (using_ipsec(smc)) 700 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); 701 702 /* check for VLAN ID */ 703 if (smc_vlan_by_tcpsk(smc->clcsock, &vlan)) 704 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); 705 706 /* check if there is an ism device available */ 707 if (!smc_check_ism(smc, &ismdev) && 708 !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) { 709 /* ISM is supported for this connection */ 710 ism_supported = true; 711 smc_type = SMC_TYPE_D; 712 } 713 714 /* check if there is a rdma device available */ 715 if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) { 716 /* RDMA is supported for this connection */ 717 rdma_supported = true; 718 if (ism_supported) 719 smc_type = SMC_TYPE_B; /* both */ 720 else 721 smc_type = SMC_TYPE_R; /* only RDMA */ 722 } 723 724 /* if neither ISM nor RDMA are supported, fallback */ 725 if (!rdma_supported && !ism_supported) 726 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); 727 728 /* perform CLC handshake */ 729 rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev); 730 if (rc) { 731 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); 732 return smc_connect_decline_fallback(smc, rc); 733 } 734 735 /* depending on previous steps, connect using rdma or ism */ 736 if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) 737 rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); 738 else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) 739 rc = smc_connect_ism(smc, &aclc, ismdev); 740 else 741 rc = SMC_CLC_DECL_MODEUNSUPP; 742 if (rc) { 743 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); 744 return smc_connect_decline_fallback(smc, rc); 745 } 746 747 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); 748 return 0; 749 } 750 751 static void smc_connect_work(struct work_struct *work) 752 { 753 struct smc_sock *smc = container_of(work, struct smc_sock, 754 connect_work); 755 int rc; 756 757 lock_sock(&smc->sk); 758 rc = kernel_connect(smc->clcsock, &smc->connect_info->addr, 759 smc->connect_info->alen, smc->connect_info->flags); 760 if (smc->clcsock->sk->sk_err) { 761 smc->sk.sk_err = smc->clcsock->sk->sk_err; 762 goto out; 763 } 764 if (rc < 0) { 765 smc->sk.sk_err = -rc; 766 goto out; 767 } 768 769 rc = __smc_connect(smc); 770 if (rc < 0) 771 smc->sk.sk_err = -rc; 772 773 out: 774 if (smc->sk.sk_err) 775 smc->sk.sk_state_change(&smc->sk); 776 else 777 smc->sk.sk_write_space(&smc->sk); 778 kfree(smc->connect_info); 779 smc->connect_info = NULL; 780 release_sock(&smc->sk); 781 } 782 783 static int smc_connect(struct socket *sock, struct sockaddr *addr, 784 int alen, int flags) 785 { 786 struct sock *sk = sock->sk; 787 struct smc_sock *smc; 788 int rc = -EINVAL; 789 790 smc = smc_sk(sk); 791 792 /* separate smc parameter checking to be safe */ 793 if (alen < sizeof(addr->sa_family)) 794 goto out_err; 795 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) 796 goto out_err; 797 798 lock_sock(sk); 799 switch (sk->sk_state) { 800 default: 801 goto out; 802 case SMC_ACTIVE: 803 rc = -EISCONN; 804 goto out; 805 case SMC_INIT: 806 rc = 0; 807 break; 808 } 809 810 smc_copy_sock_settings_to_clc(smc); 811 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 812 if (flags & O_NONBLOCK) { 813 if (smc->connect_info) { 814 rc = -EALREADY; 815 goto out; 816 } 817 smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL); 818 if (!smc->connect_info) { 819 rc = -ENOMEM; 820 goto out; 821 } 822 smc->connect_info->alen = alen; 823 smc->connect_info->flags = flags ^ O_NONBLOCK; 824 memcpy(&smc->connect_info->addr, addr, alen); 825 schedule_work(&smc->connect_work); 826 rc = -EINPROGRESS; 827 } else { 828 rc = kernel_connect(smc->clcsock, addr, alen, flags); 829 if (rc) 830 goto out; 831 832 rc = __smc_connect(smc); 833 if (rc < 0) 834 goto out; 835 else 836 rc = 0; /* success cases including fallback */ 837 } 838 839 out: 840 release_sock(sk); 841 out_err: 842 return rc; 843 } 844 845 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 846 { 847 struct socket *new_clcsock = NULL; 848 struct sock *lsk = &lsmc->sk; 849 struct sock *new_sk; 850 int rc = -EINVAL; 851 852 release_sock(lsk); 853 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); 854 if (!new_sk) { 855 rc = -ENOMEM; 856 lsk->sk_err = ENOMEM; 857 *new_smc = NULL; 858 lock_sock(lsk); 859 goto out; 860 } 861 *new_smc = smc_sk(new_sk); 862 863 mutex_lock(&lsmc->clcsock_release_lock); 864 if (lsmc->clcsock) 865 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 866 mutex_unlock(&lsmc->clcsock_release_lock); 867 lock_sock(lsk); 868 if (rc < 0) 869 lsk->sk_err = -rc; 870 if (rc < 0 || lsk->sk_state == SMC_CLOSED) { 871 if (new_clcsock) 872 sock_release(new_clcsock); 873 new_sk->sk_state = SMC_CLOSED; 874 sock_set_flag(new_sk, SOCK_DEAD); 875 new_sk->sk_prot->unhash(new_sk); 876 sock_put(new_sk); /* final */ 877 *new_smc = NULL; 878 goto out; 879 } 880 881 (*new_smc)->clcsock = new_clcsock; 882 out: 883 return rc; 884 } 885 886 /* add a just created sock to the accept queue of the listen sock as 887 * candidate for a following socket accept call from user space 888 */ 889 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 890 { 891 struct smc_sock *par = smc_sk(parent); 892 893 sock_hold(sk); /* sock_put in smc_accept_unlink () */ 894 spin_lock(&par->accept_q_lock); 895 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 896 spin_unlock(&par->accept_q_lock); 897 sk_acceptq_added(parent); 898 } 899 900 /* remove a socket from the accept queue of its parental listening socket */ 901 static void smc_accept_unlink(struct sock *sk) 902 { 903 struct smc_sock *par = smc_sk(sk)->listen_smc; 904 905 spin_lock(&par->accept_q_lock); 906 list_del_init(&smc_sk(sk)->accept_q); 907 spin_unlock(&par->accept_q_lock); 908 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 909 sock_put(sk); /* sock_hold in smc_accept_enqueue */ 910 } 911 912 /* remove a sock from the accept queue to bind it to a new socket created 913 * for a socket accept call from user space 914 */ 915 struct sock *smc_accept_dequeue(struct sock *parent, 916 struct socket *new_sock) 917 { 918 struct smc_sock *isk, *n; 919 struct sock *new_sk; 920 921 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 922 new_sk = (struct sock *)isk; 923 924 smc_accept_unlink(new_sk); 925 if (new_sk->sk_state == SMC_CLOSED) { 926 if (isk->clcsock) { 927 sock_release(isk->clcsock); 928 isk->clcsock = NULL; 929 } 930 new_sk->sk_prot->unhash(new_sk); 931 sock_put(new_sk); /* final */ 932 continue; 933 } 934 if (new_sock) 935 sock_graft(new_sk, new_sock); 936 return new_sk; 937 } 938 return NULL; 939 } 940 941 /* clean up for a created but never accepted sock */ 942 void smc_close_non_accepted(struct sock *sk) 943 { 944 struct smc_sock *smc = smc_sk(sk); 945 946 lock_sock(sk); 947 if (!sk->sk_lingertime) 948 /* wait for peer closing */ 949 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 950 if (!smc->use_fallback) { 951 smc_close_active(smc); 952 sock_set_flag(sk, SOCK_DEAD); 953 sk->sk_shutdown |= SHUTDOWN_MASK; 954 } 955 if (smc->clcsock) { 956 struct socket *tcp; 957 958 tcp = smc->clcsock; 959 smc->clcsock = NULL; 960 sock_release(tcp); 961 } 962 if (smc->use_fallback) { 963 sock_put(sk); /* passive closing */ 964 sk->sk_state = SMC_CLOSED; 965 } else { 966 if (sk->sk_state == SMC_CLOSED) 967 smc_conn_free(&smc->conn); 968 } 969 release_sock(sk); 970 sk->sk_prot->unhash(sk); 971 sock_put(sk); /* final sock_put */ 972 } 973 974 static int smc_serv_conf_first_link(struct smc_sock *smc) 975 { 976 struct net *net = sock_net(smc->clcsock->sk); 977 struct smc_link_group *lgr = smc->conn.lgr; 978 struct smc_link *link; 979 int rest; 980 int rc; 981 982 link = &lgr->lnk[SMC_SINGLE_LINK]; 983 984 if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) 985 return SMC_CLC_DECL_ERR_REGRMB; 986 987 /* send CONFIRM LINK request to client over the RoCE fabric */ 988 rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); 989 if (rc < 0) 990 return SMC_CLC_DECL_TIMEOUT_CL; 991 992 /* receive CONFIRM LINK response from client over the RoCE fabric */ 993 rest = wait_for_completion_interruptible_timeout( 994 &link->llc_confirm_resp, 995 SMC_LLC_WAIT_FIRST_TIME); 996 if (rest <= 0) { 997 struct smc_clc_msg_decline dclc; 998 999 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1000 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1001 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 1002 } 1003 1004 if (link->llc_confirm_resp_rc) 1005 return SMC_CLC_DECL_RMBE_EC; 1006 1007 /* send ADD LINK request to client over the RoCE fabric */ 1008 rc = smc_llc_send_add_link(link, 1009 link->smcibdev->mac[link->ibport - 1], 1010 link->gid, SMC_LLC_REQ); 1011 if (rc < 0) 1012 return SMC_CLC_DECL_TIMEOUT_AL; 1013 1014 /* receive ADD LINK response from client over the RoCE fabric */ 1015 rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp, 1016 SMC_LLC_WAIT_TIME); 1017 if (rest <= 0) { 1018 struct smc_clc_msg_decline dclc; 1019 1020 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1021 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1022 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; 1023 } 1024 1025 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); 1026 1027 return 0; 1028 } 1029 1030 /* listen worker: finish */ 1031 static void smc_listen_out(struct smc_sock *new_smc) 1032 { 1033 struct smc_sock *lsmc = new_smc->listen_smc; 1034 struct sock *newsmcsk = &new_smc->sk; 1035 1036 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 1037 if (lsmc->sk.sk_state == SMC_LISTEN) { 1038 smc_accept_enqueue(&lsmc->sk, newsmcsk); 1039 } else { /* no longer listening */ 1040 smc_close_non_accepted(newsmcsk); 1041 } 1042 release_sock(&lsmc->sk); 1043 1044 /* Wake up accept */ 1045 lsmc->sk.sk_data_ready(&lsmc->sk); 1046 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 1047 } 1048 1049 /* listen worker: finish in state connected */ 1050 static void smc_listen_out_connected(struct smc_sock *new_smc) 1051 { 1052 struct sock *newsmcsk = &new_smc->sk; 1053 1054 sk_refcnt_debug_inc(newsmcsk); 1055 if (newsmcsk->sk_state == SMC_INIT) 1056 newsmcsk->sk_state = SMC_ACTIVE; 1057 1058 smc_listen_out(new_smc); 1059 } 1060 1061 /* listen worker: finish in error state */ 1062 static void smc_listen_out_err(struct smc_sock *new_smc) 1063 { 1064 struct sock *newsmcsk = &new_smc->sk; 1065 1066 if (newsmcsk->sk_state == SMC_INIT) 1067 sock_put(&new_smc->sk); /* passive closing */ 1068 newsmcsk->sk_state = SMC_CLOSED; 1069 smc_conn_free(&new_smc->conn); 1070 1071 smc_listen_out(new_smc); 1072 } 1073 1074 /* listen worker: decline and fall back if possible */ 1075 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, 1076 int local_contact) 1077 { 1078 /* RDMA setup failed, switch back to TCP */ 1079 if (local_contact == SMC_FIRST_CONTACT) 1080 smc_lgr_forget(new_smc->conn.lgr); 1081 if (reason_code < 0) { /* error, no fallback possible */ 1082 smc_listen_out_err(new_smc); 1083 return; 1084 } 1085 smc_conn_free(&new_smc->conn); 1086 new_smc->use_fallback = true; 1087 new_smc->fallback_rsn = reason_code; 1088 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { 1089 if (smc_clc_send_decline(new_smc, reason_code) < 0) { 1090 smc_listen_out_err(new_smc); 1091 return; 1092 } 1093 } 1094 smc_listen_out_connected(new_smc); 1095 } 1096 1097 /* listen worker: check prefixes */ 1098 static int smc_listen_rdma_check(struct smc_sock *new_smc, 1099 struct smc_clc_msg_proposal *pclc) 1100 { 1101 struct smc_clc_msg_proposal_prefix *pclc_prfx; 1102 struct socket *newclcsock = new_smc->clcsock; 1103 1104 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 1105 if (smc_clc_prfx_match(newclcsock, pclc_prfx)) 1106 return SMC_CLC_DECL_CNFERR; 1107 1108 return 0; 1109 } 1110 1111 /* listen worker: initialize connection and buffers */ 1112 static int smc_listen_rdma_init(struct smc_sock *new_smc, 1113 struct smc_clc_msg_proposal *pclc, 1114 struct smc_ib_device *ibdev, u8 ibport, 1115 int *local_contact) 1116 { 1117 /* allocate connection / link group */ 1118 *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0, 1119 &pclc->lcl, NULL, 0); 1120 if (*local_contact < 0) { 1121 if (*local_contact == -ENOMEM) 1122 return SMC_CLC_DECL_MEM;/* insufficient memory*/ 1123 return SMC_CLC_DECL_INTERR; /* other error */ 1124 } 1125 1126 /* create send buffer and rmb */ 1127 if (smc_buf_create(new_smc, false)) 1128 return SMC_CLC_DECL_MEM; 1129 1130 return 0; 1131 } 1132 1133 /* listen worker: initialize connection and buffers for SMC-D */ 1134 static int smc_listen_ism_init(struct smc_sock *new_smc, 1135 struct smc_clc_msg_proposal *pclc, 1136 struct smcd_dev *ismdev, 1137 int *local_contact) 1138 { 1139 struct smc_clc_msg_smcd *pclc_smcd; 1140 1141 pclc_smcd = smc_get_clc_msg_smcd(pclc); 1142 *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL, 1143 ismdev, pclc_smcd->gid); 1144 if (*local_contact < 0) { 1145 if (*local_contact == -ENOMEM) 1146 return SMC_CLC_DECL_MEM;/* insufficient memory*/ 1147 return SMC_CLC_DECL_INTERR; /* other error */ 1148 } 1149 1150 /* Check if peer can be reached via ISM device */ 1151 if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, 1152 new_smc->conn.lgr->vlan_id, 1153 new_smc->conn.lgr->smcd)) { 1154 if (*local_contact == SMC_FIRST_CONTACT) 1155 smc_lgr_forget(new_smc->conn.lgr); 1156 smc_conn_free(&new_smc->conn); 1157 return SMC_CLC_DECL_CNFERR; 1158 } 1159 1160 /* Create send and receive buffers */ 1161 if (smc_buf_create(new_smc, true)) { 1162 if (*local_contact == SMC_FIRST_CONTACT) 1163 smc_lgr_forget(new_smc->conn.lgr); 1164 smc_conn_free(&new_smc->conn); 1165 return SMC_CLC_DECL_MEM; 1166 } 1167 1168 return 0; 1169 } 1170 1171 /* listen worker: register buffers */ 1172 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) 1173 { 1174 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 1175 1176 if (local_contact != SMC_FIRST_CONTACT) { 1177 if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) 1178 return SMC_CLC_DECL_ERR_REGRMB; 1179 } 1180 smc_rmb_sync_sg_for_device(&new_smc->conn); 1181 1182 return 0; 1183 } 1184 1185 /* listen worker: finish RDMA setup */ 1186 static int smc_listen_rdma_finish(struct smc_sock *new_smc, 1187 struct smc_clc_msg_accept_confirm *cclc, 1188 int local_contact) 1189 { 1190 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 1191 int reason_code = 0; 1192 1193 if (local_contact == SMC_FIRST_CONTACT) 1194 smc_link_save_peer_info(link, cclc); 1195 1196 if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) { 1197 reason_code = SMC_CLC_DECL_ERR_RTOK; 1198 goto decline; 1199 } 1200 1201 if (local_contact == SMC_FIRST_CONTACT) { 1202 if (smc_ib_ready_link(link)) { 1203 reason_code = SMC_CLC_DECL_ERR_RDYLNK; 1204 goto decline; 1205 } 1206 /* QP confirmation over RoCE fabric */ 1207 reason_code = smc_serv_conf_first_link(new_smc); 1208 if (reason_code) 1209 goto decline; 1210 } 1211 return 0; 1212 1213 decline: 1214 smc_listen_decline(new_smc, reason_code, local_contact); 1215 return reason_code; 1216 } 1217 1218 /* setup for RDMA connection of server */ 1219 static void smc_listen_work(struct work_struct *work) 1220 { 1221 struct smc_sock *new_smc = container_of(work, struct smc_sock, 1222 smc_listen_work); 1223 struct socket *newclcsock = new_smc->clcsock; 1224 struct smc_clc_msg_accept_confirm cclc; 1225 struct smc_clc_msg_proposal *pclc; 1226 struct smc_ib_device *ibdev; 1227 bool ism_supported = false; 1228 struct smcd_dev *ismdev; 1229 u8 buf[SMC_CLC_MAX_LEN]; 1230 int local_contact = 0; 1231 unsigned short vlan; 1232 int reason_code = 0; 1233 int rc = 0; 1234 u8 ibport; 1235 1236 if (new_smc->use_fallback) { 1237 smc_listen_out_connected(new_smc); 1238 return; 1239 } 1240 1241 /* check if peer is smc capable */ 1242 if (!tcp_sk(newclcsock->sk)->syn_smc) { 1243 new_smc->use_fallback = true; 1244 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; 1245 smc_listen_out_connected(new_smc); 1246 return; 1247 } 1248 1249 /* do inband token exchange - 1250 * wait for and receive SMC Proposal CLC message 1251 */ 1252 pclc = (struct smc_clc_msg_proposal *)&buf; 1253 reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, 1254 SMC_CLC_PROPOSAL, CLC_WAIT_TIME); 1255 if (reason_code) { 1256 smc_listen_decline(new_smc, reason_code, 0); 1257 return; 1258 } 1259 1260 /* IPSec connections opt out of SMC-R optimizations */ 1261 if (using_ipsec(new_smc)) { 1262 smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0); 1263 return; 1264 } 1265 1266 mutex_lock(&smc_server_lgr_pending); 1267 smc_close_init(new_smc); 1268 smc_rx_init(new_smc); 1269 smc_tx_init(new_smc); 1270 1271 /* check if ISM is available */ 1272 if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) && 1273 !smc_check_ism(new_smc, &ismdev) && 1274 !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) { 1275 ism_supported = true; 1276 } 1277 1278 /* check if RDMA is available */ 1279 if (!ism_supported && 1280 ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || 1281 smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) || 1282 smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) || 1283 smc_listen_rdma_check(new_smc, pclc) || 1284 smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, 1285 &local_contact) || 1286 smc_listen_rdma_reg(new_smc, local_contact))) { 1287 /* SMC not supported, decline */ 1288 mutex_unlock(&smc_server_lgr_pending); 1289 smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP, 1290 local_contact); 1291 return; 1292 } 1293 1294 /* send SMC Accept CLC message */ 1295 rc = smc_clc_send_accept(new_smc, local_contact); 1296 if (rc) { 1297 mutex_unlock(&smc_server_lgr_pending); 1298 smc_listen_decline(new_smc, rc, local_contact); 1299 return; 1300 } 1301 1302 /* SMC-D does not need this lock any more */ 1303 if (ism_supported) 1304 mutex_unlock(&smc_server_lgr_pending); 1305 1306 /* receive SMC Confirm CLC message */ 1307 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 1308 SMC_CLC_CONFIRM, CLC_WAIT_TIME); 1309 if (reason_code) { 1310 if (!ism_supported) 1311 mutex_unlock(&smc_server_lgr_pending); 1312 smc_listen_decline(new_smc, reason_code, local_contact); 1313 return; 1314 } 1315 1316 /* finish worker */ 1317 if (!ism_supported) { 1318 rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact); 1319 mutex_unlock(&smc_server_lgr_pending); 1320 if (rc) 1321 return; 1322 } 1323 smc_conn_save_peer_info(new_smc, &cclc); 1324 smc_listen_out_connected(new_smc); 1325 } 1326 1327 static void smc_tcp_listen_work(struct work_struct *work) 1328 { 1329 struct smc_sock *lsmc = container_of(work, struct smc_sock, 1330 tcp_listen_work); 1331 struct sock *lsk = &lsmc->sk; 1332 struct smc_sock *new_smc; 1333 int rc = 0; 1334 1335 lock_sock(lsk); 1336 while (lsk->sk_state == SMC_LISTEN) { 1337 rc = smc_clcsock_accept(lsmc, &new_smc); 1338 if (rc) 1339 goto out; 1340 if (!new_smc) 1341 continue; 1342 1343 new_smc->listen_smc = lsmc; 1344 new_smc->use_fallback = lsmc->use_fallback; 1345 new_smc->fallback_rsn = lsmc->fallback_rsn; 1346 sock_hold(lsk); /* sock_put in smc_listen_work */ 1347 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 1348 smc_copy_sock_settings_to_smc(new_smc); 1349 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; 1350 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; 1351 sock_hold(&new_smc->sk); /* sock_put in passive closing */ 1352 if (!schedule_work(&new_smc->smc_listen_work)) 1353 sock_put(&new_smc->sk); 1354 } 1355 1356 out: 1357 release_sock(lsk); 1358 sock_put(&lsmc->sk); /* sock_hold in smc_listen */ 1359 } 1360 1361 static int smc_listen(struct socket *sock, int backlog) 1362 { 1363 struct sock *sk = sock->sk; 1364 struct smc_sock *smc; 1365 int rc; 1366 1367 smc = smc_sk(sk); 1368 lock_sock(sk); 1369 1370 rc = -EINVAL; 1371 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) 1372 goto out; 1373 1374 rc = 0; 1375 if (sk->sk_state == SMC_LISTEN) { 1376 sk->sk_max_ack_backlog = backlog; 1377 goto out; 1378 } 1379 /* some socket options are handled in core, so we could not apply 1380 * them to the clc socket -- copy smc socket options to clc socket 1381 */ 1382 smc_copy_sock_settings_to_clc(smc); 1383 if (!smc->use_fallback) 1384 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 1385 1386 rc = kernel_listen(smc->clcsock, backlog); 1387 if (rc) 1388 goto out; 1389 sk->sk_max_ack_backlog = backlog; 1390 sk->sk_ack_backlog = 0; 1391 sk->sk_state = SMC_LISTEN; 1392 sock_hold(sk); /* sock_hold in tcp_listen_worker */ 1393 if (!schedule_work(&smc->tcp_listen_work)) 1394 sock_put(sk); 1395 1396 out: 1397 release_sock(sk); 1398 return rc; 1399 } 1400 1401 static int smc_accept(struct socket *sock, struct socket *new_sock, 1402 int flags, bool kern) 1403 { 1404 struct sock *sk = sock->sk, *nsk; 1405 DECLARE_WAITQUEUE(wait, current); 1406 struct smc_sock *lsmc; 1407 long timeo; 1408 int rc = 0; 1409 1410 lsmc = smc_sk(sk); 1411 sock_hold(sk); /* sock_put below */ 1412 lock_sock(sk); 1413 1414 if (lsmc->sk.sk_state != SMC_LISTEN) { 1415 rc = -EINVAL; 1416 release_sock(sk); 1417 goto out; 1418 } 1419 1420 /* Wait for an incoming connection */ 1421 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1422 add_wait_queue_exclusive(sk_sleep(sk), &wait); 1423 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 1424 set_current_state(TASK_INTERRUPTIBLE); 1425 if (!timeo) { 1426 rc = -EAGAIN; 1427 break; 1428 } 1429 release_sock(sk); 1430 timeo = schedule_timeout(timeo); 1431 /* wakeup by sk_data_ready in smc_listen_work() */ 1432 sched_annotate_sleep(); 1433 lock_sock(sk); 1434 if (signal_pending(current)) { 1435 rc = sock_intr_errno(timeo); 1436 break; 1437 } 1438 } 1439 set_current_state(TASK_RUNNING); 1440 remove_wait_queue(sk_sleep(sk), &wait); 1441 1442 if (!rc) 1443 rc = sock_error(nsk); 1444 release_sock(sk); 1445 if (rc) 1446 goto out; 1447 1448 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { 1449 /* wait till data arrives on the socket */ 1450 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * 1451 MSEC_PER_SEC); 1452 if (smc_sk(nsk)->use_fallback) { 1453 struct sock *clcsk = smc_sk(nsk)->clcsock->sk; 1454 1455 lock_sock(clcsk); 1456 if (skb_queue_empty(&clcsk->sk_receive_queue)) 1457 sk_wait_data(clcsk, &timeo, NULL); 1458 release_sock(clcsk); 1459 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { 1460 lock_sock(nsk); 1461 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available); 1462 release_sock(nsk); 1463 } 1464 } 1465 1466 out: 1467 sock_put(sk); /* sock_hold above */ 1468 return rc; 1469 } 1470 1471 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1472 int peer) 1473 { 1474 struct smc_sock *smc; 1475 1476 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1477 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1478 return -ENOTCONN; 1479 1480 smc = smc_sk(sock->sk); 1481 1482 return smc->clcsock->ops->getname(smc->clcsock, addr, peer); 1483 } 1484 1485 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1486 { 1487 struct sock *sk = sock->sk; 1488 struct smc_sock *smc; 1489 int rc = -EPIPE; 1490 1491 smc = smc_sk(sk); 1492 lock_sock(sk); 1493 if ((sk->sk_state != SMC_ACTIVE) && 1494 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1495 (sk->sk_state != SMC_INIT)) 1496 goto out; 1497 1498 if (msg->msg_flags & MSG_FASTOPEN) { 1499 if (sk->sk_state == SMC_INIT) { 1500 smc->use_fallback = true; 1501 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1502 } else { 1503 rc = -EINVAL; 1504 goto out; 1505 } 1506 } 1507 1508 if (smc->use_fallback) 1509 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1510 else 1511 rc = smc_tx_sendmsg(smc, msg, len); 1512 out: 1513 release_sock(sk); 1514 return rc; 1515 } 1516 1517 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1518 int flags) 1519 { 1520 struct sock *sk = sock->sk; 1521 struct smc_sock *smc; 1522 int rc = -ENOTCONN; 1523 1524 smc = smc_sk(sk); 1525 lock_sock(sk); 1526 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1527 /* socket was connected before, no more data to read */ 1528 rc = 0; 1529 goto out; 1530 } 1531 if ((sk->sk_state == SMC_INIT) || 1532 (sk->sk_state == SMC_LISTEN) || 1533 (sk->sk_state == SMC_CLOSED)) 1534 goto out; 1535 1536 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1537 rc = 0; 1538 goto out; 1539 } 1540 1541 if (smc->use_fallback) { 1542 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1543 } else { 1544 msg->msg_namelen = 0; 1545 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags); 1546 } 1547 1548 out: 1549 release_sock(sk); 1550 return rc; 1551 } 1552 1553 static __poll_t smc_accept_poll(struct sock *parent) 1554 { 1555 struct smc_sock *isk = smc_sk(parent); 1556 __poll_t mask = 0; 1557 1558 spin_lock(&isk->accept_q_lock); 1559 if (!list_empty(&isk->accept_q)) 1560 mask = EPOLLIN | EPOLLRDNORM; 1561 spin_unlock(&isk->accept_q_lock); 1562 1563 return mask; 1564 } 1565 1566 static __poll_t smc_poll(struct file *file, struct socket *sock, 1567 poll_table *wait) 1568 { 1569 struct sock *sk = sock->sk; 1570 __poll_t mask = 0; 1571 struct smc_sock *smc; 1572 1573 if (!sk) 1574 return EPOLLNVAL; 1575 1576 smc = smc_sk(sock->sk); 1577 if (smc->use_fallback) { 1578 /* delegate to CLC child sock */ 1579 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1580 sk->sk_err = smc->clcsock->sk->sk_err; 1581 if (sk->sk_err) 1582 mask |= EPOLLERR; 1583 } else { 1584 if (sk->sk_state != SMC_CLOSED) 1585 sock_poll_wait(file, sock, wait); 1586 if (sk->sk_err) 1587 mask |= EPOLLERR; 1588 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1589 (sk->sk_state == SMC_CLOSED)) 1590 mask |= EPOLLHUP; 1591 if (sk->sk_state == SMC_LISTEN) { 1592 /* woken up by sk_data_ready in smc_listen_work() */ 1593 mask = smc_accept_poll(sk); 1594 } else { 1595 if (atomic_read(&smc->conn.sndbuf_space) || 1596 sk->sk_shutdown & SEND_SHUTDOWN) { 1597 mask |= EPOLLOUT | EPOLLWRNORM; 1598 } else { 1599 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1600 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1601 } 1602 if (atomic_read(&smc->conn.bytes_to_rcv)) 1603 mask |= EPOLLIN | EPOLLRDNORM; 1604 if (sk->sk_shutdown & RCV_SHUTDOWN) 1605 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 1606 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1607 mask |= EPOLLIN; 1608 if (smc->conn.urg_state == SMC_URG_VALID) 1609 mask |= EPOLLPRI; 1610 } 1611 } 1612 1613 return mask; 1614 } 1615 1616 static int smc_shutdown(struct socket *sock, int how) 1617 { 1618 struct sock *sk = sock->sk; 1619 struct smc_sock *smc; 1620 int rc = -EINVAL; 1621 int rc1 = 0; 1622 1623 smc = smc_sk(sk); 1624 1625 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1626 return rc; 1627 1628 lock_sock(sk); 1629 1630 rc = -ENOTCONN; 1631 if ((sk->sk_state != SMC_ACTIVE) && 1632 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1633 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1634 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1635 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1636 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1637 goto out; 1638 if (smc->use_fallback) { 1639 rc = kernel_sock_shutdown(smc->clcsock, how); 1640 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1641 if (sk->sk_shutdown == SHUTDOWN_MASK) 1642 sk->sk_state = SMC_CLOSED; 1643 goto out; 1644 } 1645 switch (how) { 1646 case SHUT_RDWR: /* shutdown in both directions */ 1647 rc = smc_close_active(smc); 1648 break; 1649 case SHUT_WR: 1650 rc = smc_close_shutdown_write(smc); 1651 break; 1652 case SHUT_RD: 1653 rc = 0; 1654 /* nothing more to do because peer is not involved */ 1655 break; 1656 } 1657 if (smc->clcsock) 1658 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1659 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1660 sk->sk_shutdown |= how + 1; 1661 1662 out: 1663 release_sock(sk); 1664 return rc ? rc : rc1; 1665 } 1666 1667 static int smc_setsockopt(struct socket *sock, int level, int optname, 1668 char __user *optval, unsigned int optlen) 1669 { 1670 struct sock *sk = sock->sk; 1671 struct smc_sock *smc; 1672 int val, rc; 1673 1674 smc = smc_sk(sk); 1675 1676 /* generic setsockopts reaching us here always apply to the 1677 * CLC socket 1678 */ 1679 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1680 optval, optlen); 1681 if (smc->clcsock->sk->sk_err) { 1682 sk->sk_err = smc->clcsock->sk->sk_err; 1683 sk->sk_error_report(sk); 1684 } 1685 if (rc) 1686 return rc; 1687 1688 if (optlen < sizeof(int)) 1689 return -EINVAL; 1690 if (get_user(val, (int __user *)optval)) 1691 return -EFAULT; 1692 1693 lock_sock(sk); 1694 switch (optname) { 1695 case TCP_ULP: 1696 case TCP_FASTOPEN: 1697 case TCP_FASTOPEN_CONNECT: 1698 case TCP_FASTOPEN_KEY: 1699 case TCP_FASTOPEN_NO_COOKIE: 1700 /* option not supported by SMC */ 1701 if (sk->sk_state == SMC_INIT) { 1702 smc->use_fallback = true; 1703 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1704 } else { 1705 if (!smc->use_fallback) 1706 rc = -EINVAL; 1707 } 1708 break; 1709 case TCP_NODELAY: 1710 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) { 1711 if (val && !smc->use_fallback) 1712 mod_delayed_work(system_wq, &smc->conn.tx_work, 1713 0); 1714 } 1715 break; 1716 case TCP_CORK: 1717 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) { 1718 if (!val && !smc->use_fallback) 1719 mod_delayed_work(system_wq, &smc->conn.tx_work, 1720 0); 1721 } 1722 break; 1723 case TCP_DEFER_ACCEPT: 1724 smc->sockopt_defer_accept = val; 1725 break; 1726 default: 1727 break; 1728 } 1729 release_sock(sk); 1730 1731 return rc; 1732 } 1733 1734 static int smc_getsockopt(struct socket *sock, int level, int optname, 1735 char __user *optval, int __user *optlen) 1736 { 1737 struct smc_sock *smc; 1738 1739 smc = smc_sk(sock->sk); 1740 /* socket options apply to the CLC socket */ 1741 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1742 optval, optlen); 1743 } 1744 1745 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1746 unsigned long arg) 1747 { 1748 union smc_host_cursor cons, urg; 1749 struct smc_connection *conn; 1750 struct smc_sock *smc; 1751 int answ; 1752 1753 smc = smc_sk(sock->sk); 1754 conn = &smc->conn; 1755 lock_sock(&smc->sk); 1756 if (smc->use_fallback) { 1757 if (!smc->clcsock) { 1758 release_sock(&smc->sk); 1759 return -EBADF; 1760 } 1761 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1762 release_sock(&smc->sk); 1763 return answ; 1764 } 1765 switch (cmd) { 1766 case SIOCINQ: /* same as FIONREAD */ 1767 if (smc->sk.sk_state == SMC_LISTEN) { 1768 release_sock(&smc->sk); 1769 return -EINVAL; 1770 } 1771 if (smc->sk.sk_state == SMC_INIT || 1772 smc->sk.sk_state == SMC_CLOSED) 1773 answ = 0; 1774 else 1775 answ = atomic_read(&smc->conn.bytes_to_rcv); 1776 break; 1777 case SIOCOUTQ: 1778 /* output queue size (not send + not acked) */ 1779 if (smc->sk.sk_state == SMC_LISTEN) { 1780 release_sock(&smc->sk); 1781 return -EINVAL; 1782 } 1783 if (smc->sk.sk_state == SMC_INIT || 1784 smc->sk.sk_state == SMC_CLOSED) 1785 answ = 0; 1786 else 1787 answ = smc->conn.sndbuf_desc->len - 1788 atomic_read(&smc->conn.sndbuf_space); 1789 break; 1790 case SIOCOUTQNSD: 1791 /* output queue size (not send only) */ 1792 if (smc->sk.sk_state == SMC_LISTEN) { 1793 release_sock(&smc->sk); 1794 return -EINVAL; 1795 } 1796 if (smc->sk.sk_state == SMC_INIT || 1797 smc->sk.sk_state == SMC_CLOSED) 1798 answ = 0; 1799 else 1800 answ = smc_tx_prepared_sends(&smc->conn); 1801 break; 1802 case SIOCATMARK: 1803 if (smc->sk.sk_state == SMC_LISTEN) { 1804 release_sock(&smc->sk); 1805 return -EINVAL; 1806 } 1807 if (smc->sk.sk_state == SMC_INIT || 1808 smc->sk.sk_state == SMC_CLOSED) { 1809 answ = 0; 1810 } else { 1811 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); 1812 smc_curs_copy(&urg, &conn->urg_curs, conn); 1813 answ = smc_curs_diff(conn->rmb_desc->len, 1814 &cons, &urg) == 1; 1815 } 1816 break; 1817 default: 1818 release_sock(&smc->sk); 1819 return -ENOIOCTLCMD; 1820 } 1821 release_sock(&smc->sk); 1822 1823 return put_user(answ, (int __user *)arg); 1824 } 1825 1826 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1827 int offset, size_t size, int flags) 1828 { 1829 struct sock *sk = sock->sk; 1830 struct smc_sock *smc; 1831 int rc = -EPIPE; 1832 1833 smc = smc_sk(sk); 1834 lock_sock(sk); 1835 if (sk->sk_state != SMC_ACTIVE) { 1836 release_sock(sk); 1837 goto out; 1838 } 1839 release_sock(sk); 1840 if (smc->use_fallback) 1841 rc = kernel_sendpage(smc->clcsock, page, offset, 1842 size, flags); 1843 else 1844 rc = sock_no_sendpage(sock, page, offset, size, flags); 1845 1846 out: 1847 return rc; 1848 } 1849 1850 /* Map the affected portions of the rmbe into an spd, note the number of bytes 1851 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor 1852 * updates till whenever a respective page has been fully processed. 1853 * Note that subsequent recv() calls have to wait till all splice() processing 1854 * completed. 1855 */ 1856 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1857 struct pipe_inode_info *pipe, size_t len, 1858 unsigned int flags) 1859 { 1860 struct sock *sk = sock->sk; 1861 struct smc_sock *smc; 1862 int rc = -ENOTCONN; 1863 1864 smc = smc_sk(sk); 1865 lock_sock(sk); 1866 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1867 /* socket was connected before, no more data to read */ 1868 rc = 0; 1869 goto out; 1870 } 1871 if (sk->sk_state == SMC_INIT || 1872 sk->sk_state == SMC_LISTEN || 1873 sk->sk_state == SMC_CLOSED) 1874 goto out; 1875 1876 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1877 rc = 0; 1878 goto out; 1879 } 1880 1881 if (smc->use_fallback) { 1882 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1883 pipe, len, flags); 1884 } else { 1885 if (*ppos) { 1886 rc = -ESPIPE; 1887 goto out; 1888 } 1889 if (flags & SPLICE_F_NONBLOCK) 1890 flags = MSG_DONTWAIT; 1891 else 1892 flags = 0; 1893 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags); 1894 } 1895 out: 1896 release_sock(sk); 1897 1898 return rc; 1899 } 1900 1901 /* must look like tcp */ 1902 static const struct proto_ops smc_sock_ops = { 1903 .family = PF_SMC, 1904 .owner = THIS_MODULE, 1905 .release = smc_release, 1906 .bind = smc_bind, 1907 .connect = smc_connect, 1908 .socketpair = sock_no_socketpair, 1909 .accept = smc_accept, 1910 .getname = smc_getname, 1911 .poll = smc_poll, 1912 .ioctl = smc_ioctl, 1913 .listen = smc_listen, 1914 .shutdown = smc_shutdown, 1915 .setsockopt = smc_setsockopt, 1916 .getsockopt = smc_getsockopt, 1917 .sendmsg = smc_sendmsg, 1918 .recvmsg = smc_recvmsg, 1919 .mmap = sock_no_mmap, 1920 .sendpage = smc_sendpage, 1921 .splice_read = smc_splice_read, 1922 }; 1923 1924 static int smc_create(struct net *net, struct socket *sock, int protocol, 1925 int kern) 1926 { 1927 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; 1928 struct smc_sock *smc; 1929 struct sock *sk; 1930 int rc; 1931 1932 rc = -ESOCKTNOSUPPORT; 1933 if (sock->type != SOCK_STREAM) 1934 goto out; 1935 1936 rc = -EPROTONOSUPPORT; 1937 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) 1938 goto out; 1939 1940 rc = -ENOBUFS; 1941 sock->ops = &smc_sock_ops; 1942 sk = smc_sock_alloc(net, sock, protocol); 1943 if (!sk) 1944 goto out; 1945 1946 /* create internal TCP socket for CLC handshake and fallback */ 1947 smc = smc_sk(sk); 1948 smc->use_fallback = false; /* assume rdma capability first */ 1949 smc->fallback_rsn = 0; 1950 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, 1951 &smc->clcsock); 1952 if (rc) { 1953 sk_common_release(sk); 1954 goto out; 1955 } 1956 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 1957 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 1958 1959 out: 1960 return rc; 1961 } 1962 1963 static const struct net_proto_family smc_sock_family_ops = { 1964 .family = PF_SMC, 1965 .owner = THIS_MODULE, 1966 .create = smc_create, 1967 }; 1968 1969 static int __init smc_init(void) 1970 { 1971 int rc; 1972 1973 rc = smc_pnet_init(); 1974 if (rc) 1975 return rc; 1976 1977 rc = smc_llc_init(); 1978 if (rc) { 1979 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 1980 goto out_pnet; 1981 } 1982 1983 rc = smc_cdc_init(); 1984 if (rc) { 1985 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 1986 goto out_pnet; 1987 } 1988 1989 rc = proto_register(&smc_proto, 1); 1990 if (rc) { 1991 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); 1992 goto out_pnet; 1993 } 1994 1995 rc = proto_register(&smc_proto6, 1); 1996 if (rc) { 1997 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); 1998 goto out_proto; 1999 } 2000 2001 rc = sock_register(&smc_sock_family_ops); 2002 if (rc) { 2003 pr_err("%s: sock_register fails with %d\n", __func__, rc); 2004 goto out_proto6; 2005 } 2006 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 2007 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); 2008 2009 rc = smc_ib_register_client(); 2010 if (rc) { 2011 pr_err("%s: ib_register fails with %d\n", __func__, rc); 2012 goto out_sock; 2013 } 2014 2015 static_branch_enable(&tcp_have_smc); 2016 return 0; 2017 2018 out_sock: 2019 sock_unregister(PF_SMC); 2020 out_proto6: 2021 proto_unregister(&smc_proto6); 2022 out_proto: 2023 proto_unregister(&smc_proto); 2024 out_pnet: 2025 smc_pnet_exit(); 2026 return rc; 2027 } 2028 2029 static void __exit smc_exit(void) 2030 { 2031 smc_core_exit(); 2032 static_branch_disable(&tcp_have_smc); 2033 smc_ib_unregister_client(); 2034 sock_unregister(PF_SMC); 2035 proto_unregister(&smc_proto6); 2036 proto_unregister(&smc_proto); 2037 smc_pnet_exit(); 2038 } 2039 2040 module_init(smc_init); 2041 module_exit(smc_exit); 2042 2043 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 2044 MODULE_DESCRIPTION("smc socket address family"); 2045 MODULE_LICENSE("GPL"); 2046 MODULE_ALIAS_NETPROTO(PF_SMC); 2047