1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * CLC (connection layer control) handshake over initial TCP socket to 6 * prepare for RDMA traffic 7 * 8 * Copyright IBM Corp. 2016 9 * 10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 11 */ 12 13 #include <linux/in.h> 14 #include <linux/if_ether.h> 15 #include <linux/sched/signal.h> 16 17 #include <net/sock.h> 18 #include <net/tcp.h> 19 20 #include "smc.h" 21 #include "smc_core.h" 22 #include "smc_clc.h" 23 #include "smc_ib.h" 24 25 /* Wait for data on the tcp-socket, analyze received data 26 * Returns: 27 * 0 if success and it was not a decline that we received. 28 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send. 29 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise. 30 */ 31 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, 32 u8 expected_type) 33 { 34 struct sock *clc_sk = smc->clcsock->sk; 35 struct smc_clc_msg_hdr *clcm = buf; 36 struct msghdr msg = {NULL, 0}; 37 int reason_code = 0; 38 struct kvec vec; 39 int len, datlen; 40 int krflags; 41 42 /* peek the first few bytes to determine length of data to receive 43 * so we don't consume any subsequent CLC message or payload data 44 * in the TCP byte stream 45 */ 46 vec.iov_base = buf; 47 vec.iov_len = buflen; 48 krflags = MSG_PEEK | MSG_WAITALL; 49 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 50 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, 51 sizeof(struct smc_clc_msg_hdr), krflags); 52 if (signal_pending(current)) { 53 reason_code = -EINTR; 54 clc_sk->sk_err = EINTR; 55 smc->sk.sk_err = EINTR; 56 goto out; 57 } 58 if (clc_sk->sk_err) { 59 reason_code = -clc_sk->sk_err; 60 smc->sk.sk_err = clc_sk->sk_err; 61 goto out; 62 } 63 if (!len) { /* peer has performed orderly shutdown */ 64 smc->sk.sk_err = ECONNRESET; 65 reason_code = -ECONNRESET; 66 goto out; 67 } 68 if (len < 0) { 69 smc->sk.sk_err = -len; 70 reason_code = len; 71 goto out; 72 } 73 datlen = ntohs(clcm->length); 74 if ((len < sizeof(struct smc_clc_msg_hdr)) || 75 (datlen < sizeof(struct smc_clc_msg_decline)) || 76 (datlen > sizeof(struct smc_clc_msg_accept_confirm)) || 77 memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) || 78 ((clcm->type != SMC_CLC_DECLINE) && 79 (clcm->type != expected_type))) { 80 smc->sk.sk_err = EPROTO; 81 reason_code = -EPROTO; 82 goto out; 83 } 84 85 /* receive the complete CLC message */ 86 vec.iov_base = buf; 87 vec.iov_len = buflen; 88 memset(&msg, 0, sizeof(struct msghdr)); 89 krflags = MSG_WAITALL; 90 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 91 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); 92 if (len < datlen) { 93 smc->sk.sk_err = EPROTO; 94 reason_code = -EPROTO; 95 goto out; 96 } 97 if (clcm->type == SMC_CLC_DECLINE) { 98 reason_code = SMC_CLC_DECL_REPLY; 99 if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { 100 smc->conn.lgr->sync_err = true; 101 smc_lgr_terminate(smc->conn.lgr); 102 } 103 } 104 105 out: 106 return reason_code; 107 } 108 109 /* send CLC DECLINE message across internal TCP socket */ 110 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) 111 { 112 struct smc_clc_msg_decline dclc; 113 struct msghdr msg; 114 struct kvec vec; 115 int len; 116 117 memset(&dclc, 0, sizeof(dclc)); 118 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 119 dclc.hdr.type = SMC_CLC_DECLINE; 120 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); 121 dclc.hdr.version = SMC_CLC_V1; 122 dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0; 123 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); 124 dclc.peer_diagnosis = htonl(peer_diag_info); 125 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 126 127 memset(&msg, 0, sizeof(msg)); 128 vec.iov_base = &dclc; 129 vec.iov_len = sizeof(struct smc_clc_msg_decline); 130 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, 131 sizeof(struct smc_clc_msg_decline)); 132 if (len < sizeof(struct smc_clc_msg_decline)) 133 smc->sk.sk_err = EPROTO; 134 if (len < 0) 135 smc->sk.sk_err = -len; 136 return len; 137 } 138 139 /* send CLC PROPOSAL message across internal TCP socket */ 140 int smc_clc_send_proposal(struct smc_sock *smc, 141 struct smc_ib_device *smcibdev, 142 u8 ibport) 143 { 144 struct smc_clc_msg_proposal pclc; 145 int reason_code = 0; 146 struct msghdr msg; 147 struct kvec vec; 148 int len, rc; 149 150 /* send SMC Proposal CLC message */ 151 memset(&pclc, 0, sizeof(pclc)); 152 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 153 pclc.hdr.type = SMC_CLC_PROPOSAL; 154 pclc.hdr.length = htons(sizeof(pclc)); 155 pclc.hdr.version = SMC_CLC_V1; /* SMC version */ 156 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 157 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); 158 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); 159 160 /* determine subnet and mask from internal TCP socket */ 161 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet, 162 &pclc.prefix_len); 163 if (rc) 164 return SMC_CLC_DECL_CNFERR; /* configuration error */ 165 memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 166 memset(&msg, 0, sizeof(msg)); 167 vec.iov_base = &pclc; 168 vec.iov_len = sizeof(pclc); 169 /* due to the few bytes needed for clc-handshake this cannot block */ 170 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc)); 171 if (len < sizeof(pclc)) { 172 if (len >= 0) { 173 reason_code = -ENETUNREACH; 174 smc->sk.sk_err = -reason_code; 175 } else { 176 smc->sk.sk_err = smc->clcsock->sk->sk_err; 177 reason_code = -smc->sk.sk_err; 178 } 179 } 180 181 return reason_code; 182 } 183 184 /* send CLC CONFIRM message across internal TCP socket */ 185 int smc_clc_send_confirm(struct smc_sock *smc) 186 { 187 struct smc_connection *conn = &smc->conn; 188 struct smc_clc_msg_accept_confirm cclc; 189 struct smc_link *link; 190 int reason_code = 0; 191 struct msghdr msg; 192 struct kvec vec; 193 int len; 194 195 link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 196 /* send SMC Confirm CLC msg */ 197 memset(&cclc, 0, sizeof(cclc)); 198 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 199 cclc.hdr.type = SMC_CLC_CONFIRM; 200 cclc.hdr.length = htons(sizeof(cclc)); 201 cclc.hdr.version = SMC_CLC_V1; /* SMC version */ 202 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 203 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 204 SMC_GID_SIZE); 205 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 206 hton24(cclc.qpn, link->roce_qp->qp_num); 207 cclc.rmb_rkey = 208 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); 209 cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ 210 cclc.rmbe_alert_token = htonl(conn->alert_token_local); 211 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); 212 cclc.rmbe_size = conn->rmbe_size_short; 213 cclc.rmb_dma_addr = cpu_to_be64( 214 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); 215 hton24(cclc.psn, link->psn_initial); 216 217 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 218 219 memset(&msg, 0, sizeof(msg)); 220 vec.iov_base = &cclc; 221 vec.iov_len = sizeof(cclc); 222 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc)); 223 if (len < sizeof(cclc)) { 224 if (len >= 0) { 225 reason_code = -ENETUNREACH; 226 smc->sk.sk_err = -reason_code; 227 } else { 228 smc->sk.sk_err = smc->clcsock->sk->sk_err; 229 reason_code = -smc->sk.sk_err; 230 } 231 } 232 return reason_code; 233 } 234 235 /* send CLC ACCEPT message across internal TCP socket */ 236 int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) 237 { 238 struct smc_connection *conn = &new_smc->conn; 239 struct smc_clc_msg_accept_confirm aclc; 240 struct smc_link *link; 241 struct msghdr msg; 242 struct kvec vec; 243 int rc = 0; 244 int len; 245 246 link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 247 memset(&aclc, 0, sizeof(aclc)); 248 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 249 aclc.hdr.type = SMC_CLC_ACCEPT; 250 aclc.hdr.length = htons(sizeof(aclc)); 251 aclc.hdr.version = SMC_CLC_V1; /* SMC version */ 252 if (srv_first_contact) 253 aclc.hdr.flag = 1; 254 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 255 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 256 SMC_GID_SIZE); 257 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 258 hton24(aclc.qpn, link->roce_qp->qp_num); 259 aclc.rmb_rkey = 260 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); 261 aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ 262 aclc.rmbe_alert_token = htonl(conn->alert_token_local); 263 aclc.qp_mtu = link->path_mtu; 264 aclc.rmbe_size = conn->rmbe_size_short, 265 aclc.rmb_dma_addr = cpu_to_be64( 266 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); 267 hton24(aclc.psn, link->psn_initial); 268 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 269 270 memset(&msg, 0, sizeof(msg)); 271 vec.iov_base = &aclc; 272 vec.iov_len = sizeof(aclc); 273 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc)); 274 if (len < sizeof(aclc)) { 275 if (len >= 0) 276 new_smc->sk.sk_err = EPROTO; 277 else 278 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err; 279 rc = sock_error(&new_smc->sk); 280 } 281 282 return rc; 283 } 284