1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 * 34 * $Id$ 35 */ 36 #include "sdp.h" 37 38 #define SDP_MAJV_MINV 0x22 39 40 SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of " 41 "type Infiniband"); 42 43 enum { 44 SDP_HH_SIZE = 76, 45 SDP_HAH_SIZE = 180, 46 }; 47 48 static void 49 sdp_qp_event_handler(struct ib_event *event, void *data) 50 { 51 struct socket *sk = data; 52 53 sdp_dbg(sk, "QP Event: %s (%d)", ib_event_msg(event->event), 54 event->event); 55 } 56 57 static int 58 sdp_get_max_dev_sge(struct ib_device *dev) 59 { 60 struct ib_device_attr *device_attr; 61 static int max_sges = -1; 62 63 if (max_sges > 0) 64 goto out; 65 66 device_attr = &dev->attrs; 67 max_sges = device_attr->max_sge; 68 69 out: 70 return max_sges; 71 } 72 73 static int 74 sdp_init_qp(struct socket *sk, struct rdma_cm_id *id) 75 { 76 struct ib_qp_init_attr qp_init_attr = { 77 .event_handler = sdp_qp_event_handler, 78 .qp_context = sk, 79 .cap.max_send_wr = SDP_TX_SIZE, 80 .cap.max_recv_wr = SDP_RX_SIZE, 81 .sq_sig_type = IB_SIGNAL_REQ_WR, 82 .qp_type = IB_QPT_RC, 83 }; 84 struct ib_device *device = id->device; 85 struct sdp_sock *ssk; 86 int rc; 87 88 sdp_dbg(sk, "%s\n", __func__); 89 90 ssk = sdp_sk(sk); 91 ssk->max_sge = sdp_get_max_dev_sge(device); 92 sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge); 93 94 qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES); 95 sdp_dbg(sk, "Setting max send sge to: %d\n", 96 qp_init_attr.cap.max_send_sge); 97 98 qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES); 99 sdp_dbg(sk, "Setting max recv sge to: %d\n", 100 qp_init_attr.cap.max_recv_sge); 101 102 ssk->sdp_dev = ib_get_client_data(device, &sdp_client); 103 if (!ssk->sdp_dev) { 104 sdp_warn(sk, "SDP not available on device %s\n", device->name); 105 rc = -ENODEV; 106 goto err_rx; 107 } 108 109 rc = sdp_rx_ring_create(ssk, device); 110 if (rc) 111 goto err_rx; 112 113 rc = sdp_tx_ring_create(ssk, device); 114 if (rc) 115 goto err_tx; 116 117 qp_init_attr.recv_cq = ssk->rx_ring.cq; 118 qp_init_attr.send_cq = ssk->tx_ring.cq; 119 120 rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr); 121 if (rc) { 122 sdp_warn(sk, "Unable to create QP: %d.\n", rc); 123 goto err_qp; 124 } 125 ssk->qp = id->qp; 126 ssk->ib_device = device; 127 ssk->qp_active = 1; 128 ssk->context.device = device; 129 130 sdp_dbg(sk, "%s done\n", __func__); 131 return 0; 132 133 err_qp: 134 sdp_tx_ring_destroy(ssk); 135 err_tx: 136 sdp_rx_ring_destroy(ssk); 137 err_rx: 138 return rc; 139 } 140 141 static int 142 sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id, 143 struct rdma_cm_event *event) 144 { 145 struct sockaddr_in *src_addr; 146 struct sockaddr_in *dst_addr; 147 struct socket *child; 148 const struct sdp_hh *h; 149 struct sdp_sock *ssk; 150 int rc; 151 152 sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id); 153 154 h = event->param.conn.private_data; 155 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); 156 157 if (!h->max_adverts) 158 return -EINVAL; 159 160 child = sonewconn(sk, SS_ISCONNECTED); 161 if (!child) 162 return -ENOMEM; 163 164 ssk = sdp_sk(child); 165 rc = sdp_init_qp(child, id); 166 if (rc) 167 return rc; 168 SDP_WLOCK(ssk); 169 id->context = ssk; 170 ssk->id = id; 171 ssk->socket = child; 172 ssk->cred = crhold(child->so_cred); 173 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; 174 src_addr = (struct sockaddr_in *)&id->route.addr.src_addr; 175 ssk->fport = dst_addr->sin_port; 176 ssk->faddr = dst_addr->sin_addr.s_addr; 177 ssk->lport = src_addr->sin_port; 178 ssk->max_bufs = ntohs(h->bsdh.bufs); 179 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs); 180 ssk->min_bufs = tx_credits(ssk) / 4; 181 ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); 182 sdp_init_buffers(ssk, rcvbuf_initial_size); 183 ssk->state = TCPS_SYN_RECEIVED; 184 SDP_WUNLOCK(ssk); 185 186 return 0; 187 } 188 189 static int 190 sdp_response_handler(struct socket *sk, struct rdma_cm_id *id, 191 struct rdma_cm_event *event) 192 { 193 const struct sdp_hah *h; 194 struct sockaddr_in *dst_addr; 195 struct sdp_sock *ssk; 196 sdp_dbg(sk, "%s\n", __func__); 197 198 ssk = sdp_sk(sk); 199 SDP_WLOCK(ssk); 200 ssk->state = TCPS_ESTABLISHED; 201 sdp_set_default_moderation(ssk); 202 if (ssk->flags & SDP_DROPPED) { 203 SDP_WUNLOCK(ssk); 204 return 0; 205 } 206 if (sk->so_options & SO_KEEPALIVE) 207 sdp_start_keepalive_timer(sk); 208 h = event->param.conn.private_data; 209 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); 210 ssk->max_bufs = ntohs(h->bsdh.bufs); 211 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs); 212 ssk->min_bufs = tx_credits(ssk) / 4; 213 ssk->xmit_size_goal = 214 ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); 215 ssk->poll_cq = 1; 216 217 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; 218 ssk->fport = dst_addr->sin_port; 219 ssk->faddr = dst_addr->sin_addr.s_addr; 220 soisconnected(sk); 221 SDP_WUNLOCK(ssk); 222 223 return 0; 224 } 225 226 static int 227 sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event) 228 { 229 struct sdp_sock *ssk; 230 231 sdp_dbg(sk, "%s\n", __func__); 232 233 ssk = sdp_sk(sk); 234 SDP_WLOCK(ssk); 235 ssk->state = TCPS_ESTABLISHED; 236 237 sdp_set_default_moderation(ssk); 238 239 if (sk->so_options & SO_KEEPALIVE) 240 sdp_start_keepalive_timer(sk); 241 242 if ((ssk->flags & SDP_DROPPED) == 0) 243 soisconnected(sk); 244 SDP_WUNLOCK(ssk); 245 return 0; 246 } 247 248 static int 249 sdp_disconnected_handler(struct socket *sk) 250 { 251 struct sdp_sock *ssk; 252 253 ssk = sdp_sk(sk); 254 sdp_dbg(sk, "%s\n", __func__); 255 256 SDP_WLOCK_ASSERT(ssk); 257 if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) { 258 sdp_connected_handler(sk, NULL); 259 260 if (rcv_nxt(ssk)) 261 return 0; 262 } 263 264 return -ECONNRESET; 265 } 266 267 int 268 sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 269 { 270 struct rdma_conn_param conn_param; 271 struct socket *sk; 272 struct sdp_sock *ssk; 273 struct sdp_hah hah; 274 struct sdp_hh hh; 275 276 int rc = 0; 277 278 ssk = id->context; 279 sk = NULL; 280 if (ssk) 281 sk = ssk->socket; 282 if (!ssk || !sk || !ssk->id) { 283 sdp_dbg(sk, 284 "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n", 285 event->event, ssk, sk, id); 286 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? 287 -EINVAL : 0; 288 } 289 290 sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id); 291 switch (event->event) { 292 case RDMA_CM_EVENT_ADDR_RESOLVED: 293 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n"); 294 295 if (sdp_link_layer_ib_only && 296 rdma_node_get_transport(id->device->node_type) == 297 RDMA_TRANSPORT_IB && 298 rdma_port_get_link_layer(id->device, id->port_num) != 299 IB_LINK_LAYER_INFINIBAND) { 300 sdp_dbg(sk, "Link layer is: %d. Only IB link layer " 301 "is allowed\n", 302 rdma_port_get_link_layer(id->device, id->port_num)); 303 rc = -ENETUNREACH; 304 break; 305 } 306 307 rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); 308 break; 309 case RDMA_CM_EVENT_ADDR_ERROR: 310 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n"); 311 rc = -ENETUNREACH; 312 break; 313 case RDMA_CM_EVENT_ROUTE_RESOLVED: 314 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id); 315 rc = sdp_init_qp(sk, id); 316 if (rc) 317 break; 318 atomic_set(&sdp_sk(sk)->remote_credits, 319 rx_ring_posted(sdp_sk(sk))); 320 memset(&hh, 0, sizeof hh); 321 hh.bsdh.mid = SDP_MID_HELLO; 322 hh.bsdh.len = htonl(sizeof(struct sdp_hh)); 323 hh.max_adverts = 1; 324 hh.ipv_cap = 0x40; 325 hh.majv_minv = SDP_MAJV_MINV; 326 sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size); 327 hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk))); 328 hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes); 329 hh.max_adverts = 0x1; 330 sdp_sk(sk)->laddr = 331 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; 332 memset(&conn_param, 0, sizeof conn_param); 333 conn_param.private_data_len = sizeof hh; 334 conn_param.private_data = &hh; 335 conn_param.responder_resources = 4 /* TODO */; 336 conn_param.initiator_depth = 4 /* TODO */; 337 conn_param.retry_count = SDP_RETRY_COUNT; 338 SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh); 339 rc = rdma_connect(id, &conn_param); 340 break; 341 case RDMA_CM_EVENT_ROUTE_ERROR: 342 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id); 343 rc = -ETIMEDOUT; 344 break; 345 case RDMA_CM_EVENT_CONNECT_REQUEST: 346 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n"); 347 rc = sdp_connect_handler(sk, id, event); 348 if (rc) { 349 sdp_dbg(sk, "Destroying qp\n"); 350 rdma_reject(id, NULL, 0); 351 break; 352 } 353 ssk = id->context; 354 atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); 355 memset(&hah, 0, sizeof hah); 356 hah.bsdh.mid = SDP_MID_HELLO_ACK; 357 hah.bsdh.bufs = htons(rx_ring_posted(ssk)); 358 hah.bsdh.len = htonl(sizeof(struct sdp_hah)); 359 hah.majv_minv = SDP_MAJV_MINV; 360 hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec, 361 but just in case */ 362 hah.actrcvsz = htonl(ssk->recv_bytes); 363 memset(&conn_param, 0, sizeof conn_param); 364 conn_param.private_data_len = sizeof hah; 365 conn_param.private_data = &hah; 366 conn_param.responder_resources = 4 /* TODO */; 367 conn_param.initiator_depth = 4 /* TODO */; 368 conn_param.retry_count = SDP_RETRY_COUNT; 369 SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh); 370 rc = rdma_accept(id, &conn_param); 371 if (rc) { 372 ssk->id = NULL; 373 id->qp = NULL; 374 id->context = NULL; 375 } 376 break; 377 case RDMA_CM_EVENT_CONNECT_RESPONSE: 378 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n"); 379 rc = sdp_response_handler(sk, id, event); 380 if (rc) { 381 sdp_dbg(sk, "Destroying qp\n"); 382 rdma_reject(id, NULL, 0); 383 } else 384 rc = rdma_accept(id, NULL); 385 break; 386 case RDMA_CM_EVENT_CONNECT_ERROR: 387 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n"); 388 rc = -ETIMEDOUT; 389 break; 390 case RDMA_CM_EVENT_UNREACHABLE: 391 sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n"); 392 rc = -ENETUNREACH; 393 break; 394 case RDMA_CM_EVENT_REJECTED: 395 sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n"); 396 rc = -ECONNREFUSED; 397 break; 398 case RDMA_CM_EVENT_ESTABLISHED: 399 sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n"); 400 sdp_sk(sk)->laddr = 401 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; 402 rc = sdp_connected_handler(sk, event); 403 break; 404 case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */ 405 sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n"); 406 407 SDP_WLOCK(ssk); 408 if (ssk->state == TCPS_LAST_ACK) { 409 sdp_cancel_dreq_wait_timeout(ssk); 410 411 sdp_dbg(sk, "%s: waiting for Infiniband tear down\n", 412 __func__); 413 } 414 ssk->qp_active = 0; 415 SDP_WUNLOCK(ssk); 416 rdma_disconnect(id); 417 SDP_WLOCK(ssk); 418 if (ssk->state != TCPS_TIME_WAIT) { 419 if (ssk->state == TCPS_CLOSE_WAIT) { 420 sdp_dbg(sk, "IB teardown while in " 421 "TCPS_CLOSE_WAIT taking reference to " 422 "let close() finish the work\n"); 423 } 424 rc = sdp_disconnected_handler(sk); 425 if (rc) 426 rc = -EPIPE; 427 } 428 SDP_WUNLOCK(ssk); 429 break; 430 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 431 sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n"); 432 SDP_WLOCK(ssk); 433 rc = sdp_disconnected_handler(sk); 434 SDP_WUNLOCK(ssk); 435 break; 436 case RDMA_CM_EVENT_DEVICE_REMOVAL: 437 sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n"); 438 rc = -ENETRESET; 439 break; 440 default: 441 printk(KERN_ERR "SDP: Unexpected CMA event: %d\n", 442 event->event); 443 rc = -ECONNABORTED; 444 break; 445 } 446 447 sdp_dbg(sk, "event %s (%d) done. status %d\n", 448 rdma_event_msg(event->event), event->event, rc); 449 450 if (rc) { 451 SDP_WLOCK(ssk); 452 if (ssk->id == id) { 453 ssk->id = NULL; 454 id->qp = NULL; 455 id->context = NULL; 456 if (sdp_notify(ssk, -rc)) 457 SDP_WUNLOCK(ssk); 458 } else 459 SDP_WUNLOCK(ssk); 460 } 461 462 return rc; 463 } 464