1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 * 34 * $Id$ 35 */ 36 #include "sdp.h" 37 38 #define SDP_MAJV_MINV 0x22 39 40 SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of " 41 "type Infiniband"); 42 43 enum { 44 SDP_HH_SIZE = 76, 45 SDP_HAH_SIZE = 180, 46 }; 47 48 static void 49 sdp_qp_event_handler(struct ib_event *event, void *data) 50 { 51 } 52 53 static int 54 sdp_get_max_dev_sge(struct ib_device *dev) 55 { 56 struct ib_device_attr *device_attr; 57 static int max_sges = -1; 58 59 if (max_sges > 0) 60 goto out; 61 62 device_attr = &dev->attrs; 63 max_sges = device_attr->max_sge; 64 65 out: 66 return max_sges; 67 } 68 69 static int 70 sdp_init_qp(struct socket *sk, struct rdma_cm_id *id) 71 { 72 struct ib_qp_init_attr qp_init_attr = { 73 .event_handler = sdp_qp_event_handler, 74 .cap.max_send_wr = SDP_TX_SIZE, 75 .cap.max_recv_wr = SDP_RX_SIZE, 76 .sq_sig_type = IB_SIGNAL_REQ_WR, 77 .qp_type = IB_QPT_RC, 78 }; 79 struct ib_device *device = id->device; 80 struct sdp_sock *ssk; 81 int rc; 82 83 sdp_dbg(sk, "%s\n", __func__); 84 85 ssk = sdp_sk(sk); 86 ssk->max_sge = sdp_get_max_dev_sge(device); 87 sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge); 88 89 qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES); 90 sdp_dbg(sk, "Setting max send sge to: %d\n", 91 qp_init_attr.cap.max_send_sge); 92 93 qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES); 94 sdp_dbg(sk, "Setting max recv sge to: %d\n", 95 qp_init_attr.cap.max_recv_sge); 96 97 ssk->sdp_dev = ib_get_client_data(device, &sdp_client); 98 if (!ssk->sdp_dev) { 99 sdp_warn(sk, "SDP not available on device %s\n", device->name); 100 rc = -ENODEV; 101 goto err_rx; 102 } 103 104 rc = sdp_rx_ring_create(ssk, device); 105 if (rc) 106 goto err_rx; 107 108 rc = sdp_tx_ring_create(ssk, device); 109 if (rc) 110 goto err_tx; 111 112 qp_init_attr.recv_cq = ssk->rx_ring.cq; 113 qp_init_attr.send_cq = ssk->tx_ring.cq; 114 115 rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr); 116 if (rc) { 117 sdp_warn(sk, "Unable to create QP: %d.\n", rc); 118 goto err_qp; 119 } 120 ssk->qp = id->qp; 121 ssk->ib_device = device; 122 ssk->qp_active = 1; 123 ssk->context.device = device; 124 125 sdp_dbg(sk, "%s done\n", __func__); 126 return 0; 127 128 err_qp: 129 sdp_tx_ring_destroy(ssk); 130 err_tx: 131 sdp_rx_ring_destroy(ssk); 132 err_rx: 133 return rc; 134 } 135 136 static int 137 sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id, 138 struct rdma_cm_event *event) 139 { 140 struct sockaddr_in *src_addr; 141 struct sockaddr_in *dst_addr; 142 struct socket *child; 143 const struct sdp_hh *h; 144 struct sdp_sock *ssk; 145 int rc; 146 147 sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id); 148 149 h = event->param.conn.private_data; 150 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); 151 152 if (!h->max_adverts) 153 return -EINVAL; 154 155 child = sonewconn(sk, SS_ISCONNECTED); 156 if (!child) 157 return -ENOMEM; 158 159 ssk = sdp_sk(child); 160 rc = sdp_init_qp(child, id); 161 if (rc) 162 return rc; 163 SDP_WLOCK(ssk); 164 id->context = ssk; 165 ssk->id = id; 166 ssk->socket = child; 167 ssk->cred = crhold(child->so_cred); 168 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; 169 src_addr = (struct sockaddr_in *)&id->route.addr.src_addr; 170 ssk->fport = dst_addr->sin_port; 171 ssk->faddr = dst_addr->sin_addr.s_addr; 172 ssk->lport = src_addr->sin_port; 173 ssk->max_bufs = ntohs(h->bsdh.bufs); 174 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs); 175 ssk->min_bufs = tx_credits(ssk) / 4; 176 ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); 177 sdp_init_buffers(ssk, rcvbuf_initial_size); 178 ssk->state = TCPS_SYN_RECEIVED; 179 SDP_WUNLOCK(ssk); 180 181 return 0; 182 } 183 184 static int 185 sdp_response_handler(struct socket *sk, struct rdma_cm_id *id, 186 struct rdma_cm_event *event) 187 { 188 const struct sdp_hah *h; 189 struct sockaddr_in *dst_addr; 190 struct sdp_sock *ssk; 191 sdp_dbg(sk, "%s\n", __func__); 192 193 ssk = sdp_sk(sk); 194 SDP_WLOCK(ssk); 195 ssk->state = TCPS_ESTABLISHED; 196 sdp_set_default_moderation(ssk); 197 if (ssk->flags & SDP_DROPPED) { 198 SDP_WUNLOCK(ssk); 199 return 0; 200 } 201 if (sk->so_options & SO_KEEPALIVE) 202 sdp_start_keepalive_timer(sk); 203 h = event->param.conn.private_data; 204 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); 205 ssk->max_bufs = ntohs(h->bsdh.bufs); 206 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs); 207 ssk->min_bufs = tx_credits(ssk) / 4; 208 ssk->xmit_size_goal = 209 ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); 210 ssk->poll_cq = 1; 211 212 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; 213 ssk->fport = dst_addr->sin_port; 214 ssk->faddr = dst_addr->sin_addr.s_addr; 215 soisconnected(sk); 216 SDP_WUNLOCK(ssk); 217 218 return 0; 219 } 220 221 static int 222 sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event) 223 { 224 struct sdp_sock *ssk; 225 226 sdp_dbg(sk, "%s\n", __func__); 227 228 ssk = sdp_sk(sk); 229 SDP_WLOCK(ssk); 230 ssk->state = TCPS_ESTABLISHED; 231 232 sdp_set_default_moderation(ssk); 233 234 if (sk->so_options & SO_KEEPALIVE) 235 sdp_start_keepalive_timer(sk); 236 237 if ((ssk->flags & SDP_DROPPED) == 0) 238 soisconnected(sk); 239 SDP_WUNLOCK(ssk); 240 return 0; 241 } 242 243 static int 244 sdp_disconnected_handler(struct socket *sk) 245 { 246 struct sdp_sock *ssk; 247 248 ssk = sdp_sk(sk); 249 sdp_dbg(sk, "%s\n", __func__); 250 251 SDP_WLOCK_ASSERT(ssk); 252 if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) { 253 sdp_connected_handler(sk, NULL); 254 255 if (rcv_nxt(ssk)) 256 return 0; 257 } 258 259 return -ECONNRESET; 260 } 261 262 int 263 sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 264 { 265 struct rdma_conn_param conn_param; 266 struct socket *sk; 267 struct sdp_sock *ssk; 268 struct sdp_hah hah; 269 struct sdp_hh hh; 270 271 int rc = 0; 272 273 ssk = id->context; 274 sk = NULL; 275 if (ssk) 276 sk = ssk->socket; 277 if (!ssk || !sk || !ssk->id) { 278 sdp_dbg(sk, 279 "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n", 280 event->event, ssk, sk, id); 281 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? 282 -EINVAL : 0; 283 } 284 285 sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id); 286 switch (event->event) { 287 case RDMA_CM_EVENT_ADDR_RESOLVED: 288 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n"); 289 290 if (sdp_link_layer_ib_only && 291 rdma_node_get_transport(id->device->node_type) == 292 RDMA_TRANSPORT_IB && 293 rdma_port_get_link_layer(id->device, id->port_num) != 294 IB_LINK_LAYER_INFINIBAND) { 295 sdp_dbg(sk, "Link layer is: %d. Only IB link layer " 296 "is allowed\n", 297 rdma_port_get_link_layer(id->device, id->port_num)); 298 rc = -ENETUNREACH; 299 break; 300 } 301 302 rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); 303 break; 304 case RDMA_CM_EVENT_ADDR_ERROR: 305 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n"); 306 rc = -ENETUNREACH; 307 break; 308 case RDMA_CM_EVENT_ROUTE_RESOLVED: 309 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id); 310 rc = sdp_init_qp(sk, id); 311 if (rc) 312 break; 313 atomic_set(&sdp_sk(sk)->remote_credits, 314 rx_ring_posted(sdp_sk(sk))); 315 memset(&hh, 0, sizeof hh); 316 hh.bsdh.mid = SDP_MID_HELLO; 317 hh.bsdh.len = htonl(sizeof(struct sdp_hh)); 318 hh.max_adverts = 1; 319 hh.ipv_cap = 0x40; 320 hh.majv_minv = SDP_MAJV_MINV; 321 sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size); 322 hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk))); 323 hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes); 324 hh.max_adverts = 0x1; 325 sdp_sk(sk)->laddr = 326 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; 327 memset(&conn_param, 0, sizeof conn_param); 328 conn_param.private_data_len = sizeof hh; 329 conn_param.private_data = &hh; 330 conn_param.responder_resources = 4 /* TODO */; 331 conn_param.initiator_depth = 4 /* TODO */; 332 conn_param.retry_count = SDP_RETRY_COUNT; 333 SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh); 334 rc = rdma_connect(id, &conn_param); 335 break; 336 case RDMA_CM_EVENT_ROUTE_ERROR: 337 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id); 338 rc = -ETIMEDOUT; 339 break; 340 case RDMA_CM_EVENT_CONNECT_REQUEST: 341 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n"); 342 rc = sdp_connect_handler(sk, id, event); 343 if (rc) { 344 sdp_dbg(sk, "Destroying qp\n"); 345 rdma_reject(id, NULL, 0); 346 break; 347 } 348 ssk = id->context; 349 atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); 350 memset(&hah, 0, sizeof hah); 351 hah.bsdh.mid = SDP_MID_HELLO_ACK; 352 hah.bsdh.bufs = htons(rx_ring_posted(ssk)); 353 hah.bsdh.len = htonl(sizeof(struct sdp_hah)); 354 hah.majv_minv = SDP_MAJV_MINV; 355 hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec, 356 but just in case */ 357 hah.actrcvsz = htonl(ssk->recv_bytes); 358 memset(&conn_param, 0, sizeof conn_param); 359 conn_param.private_data_len = sizeof hah; 360 conn_param.private_data = &hah; 361 conn_param.responder_resources = 4 /* TODO */; 362 conn_param.initiator_depth = 4 /* TODO */; 363 conn_param.retry_count = SDP_RETRY_COUNT; 364 SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh); 365 rc = rdma_accept(id, &conn_param); 366 if (rc) { 367 ssk->id = NULL; 368 id->qp = NULL; 369 id->context = NULL; 370 } 371 break; 372 case RDMA_CM_EVENT_CONNECT_RESPONSE: 373 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n"); 374 rc = sdp_response_handler(sk, id, event); 375 if (rc) { 376 sdp_dbg(sk, "Destroying qp\n"); 377 rdma_reject(id, NULL, 0); 378 } else 379 rc = rdma_accept(id, NULL); 380 break; 381 case RDMA_CM_EVENT_CONNECT_ERROR: 382 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n"); 383 rc = -ETIMEDOUT; 384 break; 385 case RDMA_CM_EVENT_UNREACHABLE: 386 sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n"); 387 rc = -ENETUNREACH; 388 break; 389 case RDMA_CM_EVENT_REJECTED: 390 sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n"); 391 rc = -ECONNREFUSED; 392 break; 393 case RDMA_CM_EVENT_ESTABLISHED: 394 sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n"); 395 sdp_sk(sk)->laddr = 396 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; 397 rc = sdp_connected_handler(sk, event); 398 break; 399 case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */ 400 sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n"); 401 402 SDP_WLOCK(ssk); 403 if (ssk->state == TCPS_LAST_ACK) { 404 sdp_cancel_dreq_wait_timeout(ssk); 405 406 sdp_dbg(sk, "%s: waiting for Infiniband tear down\n", 407 __func__); 408 } 409 ssk->qp_active = 0; 410 SDP_WUNLOCK(ssk); 411 rdma_disconnect(id); 412 SDP_WLOCK(ssk); 413 if (ssk->state != TCPS_TIME_WAIT) { 414 if (ssk->state == TCPS_CLOSE_WAIT) { 415 sdp_dbg(sk, "IB teardown while in " 416 "TCPS_CLOSE_WAIT taking reference to " 417 "let close() finish the work\n"); 418 } 419 rc = sdp_disconnected_handler(sk); 420 if (rc) 421 rc = -EPIPE; 422 } 423 SDP_WUNLOCK(ssk); 424 break; 425 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 426 sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n"); 427 SDP_WLOCK(ssk); 428 rc = sdp_disconnected_handler(sk); 429 SDP_WUNLOCK(ssk); 430 break; 431 case RDMA_CM_EVENT_DEVICE_REMOVAL: 432 sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n"); 433 rc = -ENETRESET; 434 break; 435 default: 436 printk(KERN_ERR "SDP: Unexpected CMA event: %d\n", 437 event->event); 438 rc = -ECONNABORTED; 439 break; 440 } 441 442 sdp_dbg(sk, "event %d done. status %d\n", event->event, rc); 443 444 if (rc) { 445 SDP_WLOCK(ssk); 446 if (ssk->id == id) { 447 ssk->id = NULL; 448 id->qp = NULL; 449 id->context = NULL; 450 if (sdp_notify(ssk, -rc)) 451 SDP_WUNLOCK(ssk); 452 } else 453 SDP_WUNLOCK(ssk); 454 } 455 456 return rc; 457 } 458