1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * CLC (connection layer control) handshake over initial TCP socket to 6 * prepare for RDMA traffic 7 * 8 * Copyright IBM Corp. 2016 9 * 10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 11 */ 12 13 #include <linux/in.h> 14 #include <linux/if_ether.h> 15 #include <linux/sched/signal.h> 16 17 #include <net/sock.h> 18 #include <net/tcp.h> 19 20 #include "smc.h" 21 #include "smc_core.h" 22 #include "smc_clc.h" 23 #include "smc_ib.h" 24 25 /* check if received message has a correct header length and contains valid 26 * heading and trailing eyecatchers 27 */ 28 static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) 29 { 30 struct smc_clc_msg_proposal_prefix *pclc_prfx; 31 struct smc_clc_msg_accept_confirm *clc; 32 struct smc_clc_msg_proposal *pclc; 33 struct smc_clc_msg_decline *dclc; 34 struct smc_clc_msg_trail *trl; 35 36 if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) 37 return false; 38 switch (clcm->type) { 39 case SMC_CLC_PROPOSAL: 40 pclc = (struct smc_clc_msg_proposal *)clcm; 41 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 42 if (ntohs(pclc->hdr.length) != 43 sizeof(*pclc) + ntohs(pclc->iparea_offset) + 44 sizeof(*pclc_prfx) + 45 pclc_prfx->ipv6_prefixes_cnt * 46 sizeof(struct smc_clc_ipv6_prefix) + 47 sizeof(*trl)) 48 return false; 49 trl = (struct smc_clc_msg_trail *) 50 ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); 51 break; 52 case SMC_CLC_ACCEPT: 53 case SMC_CLC_CONFIRM: 54 clc = (struct smc_clc_msg_accept_confirm *)clcm; 55 if (ntohs(clc->hdr.length) != sizeof(*clc)) 56 return false; 57 trl = &clc->trl; 58 break; 59 case SMC_CLC_DECLINE: 60 dclc = (struct smc_clc_msg_decline *)clcm; 61 if (ntohs(dclc->hdr.length) != sizeof(*dclc)) 62 return false; 63 trl = &dclc->trl; 64 break; 65 default: 66 return false; 67 } 68 if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) 69 return false; 70 return true; 71 } 72 73 /* Wait for data on the tcp-socket, analyze received data 74 * Returns: 75 * 0 if success and it was not a decline that we received. 76 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send. 77 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise. 78 */ 79 int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, 80 u8 expected_type) 81 { 82 struct sock *clc_sk = smc->clcsock->sk; 83 struct smc_clc_msg_hdr *clcm = buf; 84 struct msghdr msg = {NULL, 0}; 85 int reason_code = 0; 86 struct kvec vec; 87 int len, datlen; 88 int krflags; 89 90 /* peek the first few bytes to determine length of data to receive 91 * so we don't consume any subsequent CLC message or payload data 92 * in the TCP byte stream 93 */ 94 vec.iov_base = buf; 95 vec.iov_len = buflen; 96 krflags = MSG_PEEK | MSG_WAITALL; 97 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 98 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, 99 sizeof(struct smc_clc_msg_hdr), krflags); 100 if (signal_pending(current)) { 101 reason_code = -EINTR; 102 clc_sk->sk_err = EINTR; 103 smc->sk.sk_err = EINTR; 104 goto out; 105 } 106 if (clc_sk->sk_err) { 107 reason_code = -clc_sk->sk_err; 108 smc->sk.sk_err = clc_sk->sk_err; 109 goto out; 110 } 111 if (!len) { /* peer has performed orderly shutdown */ 112 smc->sk.sk_err = ECONNRESET; 113 reason_code = -ECONNRESET; 114 goto out; 115 } 116 if (len < 0) { 117 smc->sk.sk_err = -len; 118 reason_code = len; 119 goto out; 120 } 121 datlen = ntohs(clcm->length); 122 if ((len < sizeof(struct smc_clc_msg_hdr)) || 123 (datlen > buflen) || 124 ((clcm->type != SMC_CLC_DECLINE) && 125 (clcm->type != expected_type))) { 126 smc->sk.sk_err = EPROTO; 127 reason_code = -EPROTO; 128 goto out; 129 } 130 131 /* receive the complete CLC message */ 132 vec.iov_base = buf; 133 vec.iov_len = buflen; 134 memset(&msg, 0, sizeof(struct msghdr)); 135 krflags = MSG_WAITALL; 136 smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; 137 len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); 138 if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { 139 smc->sk.sk_err = EPROTO; 140 reason_code = -EPROTO; 141 goto out; 142 } 143 if (clcm->type == SMC_CLC_DECLINE) { 144 reason_code = SMC_CLC_DECL_REPLY; 145 if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { 146 smc->conn.lgr->sync_err = true; 147 smc_lgr_terminate(smc->conn.lgr); 148 } 149 } 150 151 out: 152 return reason_code; 153 } 154 155 /* send CLC DECLINE message across internal TCP socket */ 156 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) 157 { 158 struct smc_clc_msg_decline dclc; 159 struct msghdr msg; 160 struct kvec vec; 161 int len; 162 163 memset(&dclc, 0, sizeof(dclc)); 164 memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 165 dclc.hdr.type = SMC_CLC_DECLINE; 166 dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); 167 dclc.hdr.version = SMC_CLC_V1; 168 dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0; 169 memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); 170 dclc.peer_diagnosis = htonl(peer_diag_info); 171 memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 172 173 memset(&msg, 0, sizeof(msg)); 174 vec.iov_base = &dclc; 175 vec.iov_len = sizeof(struct smc_clc_msg_decline); 176 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, 177 sizeof(struct smc_clc_msg_decline)); 178 if (len < sizeof(struct smc_clc_msg_decline)) 179 smc->sk.sk_err = EPROTO; 180 if (len < 0) 181 smc->sk.sk_err = -len; 182 return sock_error(&smc->sk); 183 } 184 185 /* send CLC PROPOSAL message across internal TCP socket */ 186 int smc_clc_send_proposal(struct smc_sock *smc, 187 struct smc_ib_device *smcibdev, 188 u8 ibport) 189 { 190 struct smc_clc_msg_proposal_prefix pclc_prfx; 191 struct smc_clc_msg_proposal pclc; 192 struct smc_clc_msg_trail trl; 193 int reason_code = 0; 194 struct kvec vec[3]; 195 struct msghdr msg; 196 int len, plen, rc; 197 198 /* send SMC Proposal CLC message */ 199 plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); 200 memset(&pclc, 0, sizeof(pclc)); 201 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 202 pclc.hdr.type = SMC_CLC_PROPOSAL; 203 pclc.hdr.length = htons(plen); 204 pclc.hdr.version = SMC_CLC_V1; /* SMC version */ 205 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 206 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); 207 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); 208 pclc.iparea_offset = htons(0); 209 210 memset(&pclc_prfx, 0, sizeof(pclc_prfx)); 211 /* determine subnet and mask from internal TCP socket */ 212 rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, 213 &pclc_prfx.prefix_len); 214 if (rc) 215 return SMC_CLC_DECL_CNFERR; /* configuration error */ 216 pclc_prfx.ipv6_prefixes_cnt = 0; 217 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 218 memset(&msg, 0, sizeof(msg)); 219 vec[0].iov_base = &pclc; 220 vec[0].iov_len = sizeof(pclc); 221 vec[1].iov_base = &pclc_prfx; 222 vec[1].iov_len = sizeof(pclc_prfx); 223 vec[2].iov_base = &trl; 224 vec[2].iov_len = sizeof(trl); 225 /* due to the few bytes needed for clc-handshake this cannot block */ 226 len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); 227 if (len < sizeof(pclc)) { 228 if (len >= 0) { 229 reason_code = -ENETUNREACH; 230 smc->sk.sk_err = -reason_code; 231 } else { 232 smc->sk.sk_err = smc->clcsock->sk->sk_err; 233 reason_code = -smc->sk.sk_err; 234 } 235 } 236 237 return reason_code; 238 } 239 240 /* send CLC CONFIRM message across internal TCP socket */ 241 int smc_clc_send_confirm(struct smc_sock *smc) 242 { 243 struct smc_connection *conn = &smc->conn; 244 struct smc_clc_msg_accept_confirm cclc; 245 struct smc_link *link; 246 int reason_code = 0; 247 struct msghdr msg; 248 struct kvec vec; 249 int len; 250 251 link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 252 /* send SMC Confirm CLC msg */ 253 memset(&cclc, 0, sizeof(cclc)); 254 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 255 cclc.hdr.type = SMC_CLC_CONFIRM; 256 cclc.hdr.length = htons(sizeof(cclc)); 257 cclc.hdr.version = SMC_CLC_V1; /* SMC version */ 258 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 259 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 260 SMC_GID_SIZE); 261 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 262 hton24(cclc.qpn, link->roce_qp->qp_num); 263 cclc.rmb_rkey = 264 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); 265 cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ 266 cclc.rmbe_alert_token = htonl(conn->alert_token_local); 267 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); 268 cclc.rmbe_size = conn->rmbe_size_short; 269 cclc.rmb_dma_addr = cpu_to_be64( 270 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); 271 hton24(cclc.psn, link->psn_initial); 272 273 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 274 275 memset(&msg, 0, sizeof(msg)); 276 vec.iov_base = &cclc; 277 vec.iov_len = sizeof(cclc); 278 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc)); 279 if (len < sizeof(cclc)) { 280 if (len >= 0) { 281 reason_code = -ENETUNREACH; 282 smc->sk.sk_err = -reason_code; 283 } else { 284 smc->sk.sk_err = smc->clcsock->sk->sk_err; 285 reason_code = -smc->sk.sk_err; 286 } 287 } 288 return reason_code; 289 } 290 291 /* send CLC ACCEPT message across internal TCP socket */ 292 int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) 293 { 294 struct smc_connection *conn = &new_smc->conn; 295 struct smc_clc_msg_accept_confirm aclc; 296 struct smc_link *link; 297 struct msghdr msg; 298 struct kvec vec; 299 int rc = 0; 300 int len; 301 302 link = &conn->lgr->lnk[SMC_SINGLE_LINK]; 303 memset(&aclc, 0, sizeof(aclc)); 304 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 305 aclc.hdr.type = SMC_CLC_ACCEPT; 306 aclc.hdr.length = htons(sizeof(aclc)); 307 aclc.hdr.version = SMC_CLC_V1; /* SMC version */ 308 if (srv_first_contact) 309 aclc.hdr.flag = 1; 310 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 311 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 312 SMC_GID_SIZE); 313 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 314 hton24(aclc.qpn, link->roce_qp->qp_num); 315 aclc.rmb_rkey = 316 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); 317 aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ 318 aclc.rmbe_alert_token = htonl(conn->alert_token_local); 319 aclc.qp_mtu = link->path_mtu; 320 aclc.rmbe_size = conn->rmbe_size_short, 321 aclc.rmb_dma_addr = cpu_to_be64( 322 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); 323 hton24(aclc.psn, link->psn_initial); 324 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 325 326 memset(&msg, 0, sizeof(msg)); 327 vec.iov_base = &aclc; 328 vec.iov_len = sizeof(aclc); 329 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc)); 330 if (len < sizeof(aclc)) { 331 if (len >= 0) 332 new_smc->sk.sk_err = EPROTO; 333 else 334 new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err; 335 rc = sock_error(&new_smc->sk); 336 } 337 338 return rc; 339 } 340