1 /* 2 * Copyright (c) 2006 Mellanox Technologies Ltd. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 * $Id$ 33 */ 34 #include "sdp.h" 35 36 #define SDP_MAJV_MINV 0x22 37 38 SDP_MODPARAM_SINT(sdp_link_layer_ib_only, 1, "Support only link layer of " 39 "type Infiniband"); 40 41 enum { 42 SDP_HH_SIZE = 76, 43 SDP_HAH_SIZE = 180, 44 }; 45 46 static void 47 sdp_qp_event_handler(struct ib_event *event, void *data) 48 { 49 } 50 51 static int 52 sdp_get_max_dev_sge(struct ib_device *dev) 53 { 54 struct ib_device_attr attr; 55 static int max_sges = -1; 56 57 if (max_sges > 0) 58 goto out; 59 60 ib_query_device(dev, &attr); 61 62 max_sges = attr.max_sge; 63 64 out: 65 return max_sges; 66 } 67 68 static int 69 sdp_init_qp(struct socket *sk, struct rdma_cm_id *id) 70 { 71 struct ib_qp_init_attr qp_init_attr = { 72 .event_handler = sdp_qp_event_handler, 73 .cap.max_send_wr = SDP_TX_SIZE, 74 .cap.max_recv_wr = SDP_RX_SIZE, 75 .sq_sig_type = IB_SIGNAL_REQ_WR, 76 .qp_type = IB_QPT_RC, 77 }; 78 struct ib_device *device = id->device; 79 struct sdp_sock *ssk; 80 int rc; 81 82 sdp_dbg(sk, "%s\n", __func__); 83 84 ssk = sdp_sk(sk); 85 ssk->max_sge = sdp_get_max_dev_sge(device); 86 sdp_dbg(sk, "Max sges: %d\n", ssk->max_sge); 87 88 qp_init_attr.cap.max_send_sge = MIN(ssk->max_sge, SDP_MAX_SEND_SGES); 89 sdp_dbg(sk, "Setting max send sge to: %d\n", 90 qp_init_attr.cap.max_send_sge); 91 92 qp_init_attr.cap.max_recv_sge = MIN(ssk->max_sge, SDP_MAX_RECV_SGES); 93 sdp_dbg(sk, "Setting max recv sge to: %d\n", 94 qp_init_attr.cap.max_recv_sge); 95 96 ssk->sdp_dev = ib_get_client_data(device, &sdp_client); 97 if (!ssk->sdp_dev) { 98 sdp_warn(sk, "SDP not available on device %s\n", device->name); 99 rc = -ENODEV; 100 goto err_rx; 101 } 102 103 rc = sdp_rx_ring_create(ssk, device); 104 if (rc) 105 goto err_rx; 106 107 rc = sdp_tx_ring_create(ssk, device); 108 if (rc) 109 goto err_tx; 110 111 qp_init_attr.recv_cq = ssk->rx_ring.cq; 112 qp_init_attr.send_cq = ssk->tx_ring.cq; 113 114 rc = rdma_create_qp(id, ssk->sdp_dev->pd, &qp_init_attr); 115 if (rc) { 116 sdp_warn(sk, "Unable to create QP: %d.\n", rc); 117 goto err_qp; 118 } 119 ssk->qp = id->qp; 120 ssk->ib_device = device; 121 ssk->qp_active = 1; 122 ssk->context.device = device; 123 124 sdp_dbg(sk, "%s done\n", __func__); 125 return 0; 126 127 err_qp: 128 sdp_tx_ring_destroy(ssk); 129 err_tx: 130 sdp_rx_ring_destroy(ssk); 131 err_rx: 132 return rc; 133 } 134 135 static int 136 sdp_connect_handler(struct socket *sk, struct rdma_cm_id *id, 137 struct rdma_cm_event *event) 138 { 139 struct sockaddr_in *src_addr; 140 struct sockaddr_in *dst_addr; 141 struct socket *child; 142 const struct sdp_hh *h; 143 struct sdp_sock *ssk; 144 int rc; 145 146 sdp_dbg(sk, "%s %p -> %p\n", __func__, sdp_sk(sk)->id, id); 147 148 h = event->param.conn.private_data; 149 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); 150 151 if (!h->max_adverts) 152 return -EINVAL; 153 154 child = sonewconn(sk, SS_ISCONNECTED); 155 if (!child) 156 return -ENOMEM; 157 158 ssk = sdp_sk(child); 159 rc = sdp_init_qp(child, id); 160 if (rc) 161 return rc; 162 SDP_WLOCK(ssk); 163 id->context = ssk; 164 ssk->id = id; 165 ssk->socket = child; 166 ssk->cred = crhold(child->so_cred); 167 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; 168 src_addr = (struct sockaddr_in *)&id->route.addr.src_addr; 169 ssk->fport = dst_addr->sin_port; 170 ssk->faddr = dst_addr->sin_addr.s_addr; 171 ssk->lport = src_addr->sin_port; 172 ssk->max_bufs = ntohs(h->bsdh.bufs); 173 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs); 174 ssk->min_bufs = tx_credits(ssk) / 4; 175 ssk->xmit_size_goal = ntohl(h->localrcvsz) - sizeof(struct sdp_bsdh); 176 sdp_init_buffers(ssk, rcvbuf_initial_size); 177 ssk->state = TCPS_SYN_RECEIVED; 178 SDP_WUNLOCK(ssk); 179 180 return 0; 181 } 182 183 static int 184 sdp_response_handler(struct socket *sk, struct rdma_cm_id *id, 185 struct rdma_cm_event *event) 186 { 187 const struct sdp_hah *h; 188 struct sockaddr_in *dst_addr; 189 struct sdp_sock *ssk; 190 sdp_dbg(sk, "%s\n", __func__); 191 192 ssk = sdp_sk(sk); 193 SDP_WLOCK(ssk); 194 ssk->state = TCPS_ESTABLISHED; 195 sdp_set_default_moderation(ssk); 196 if (ssk->flags & SDP_DROPPED) { 197 SDP_WUNLOCK(ssk); 198 return 0; 199 } 200 if (sk->so_options & SO_KEEPALIVE) 201 sdp_start_keepalive_timer(sk); 202 h = event->param.conn.private_data; 203 SDP_DUMP_PACKET(sk, "RX", NULL, &h->bsdh); 204 ssk->max_bufs = ntohs(h->bsdh.bufs); 205 atomic_set(&ssk->tx_ring.credits, ssk->max_bufs); 206 ssk->min_bufs = tx_credits(ssk) / 4; 207 ssk->xmit_size_goal = 208 ntohl(h->actrcvsz) - sizeof(struct sdp_bsdh); 209 ssk->poll_cq = 1; 210 211 dst_addr = (struct sockaddr_in *)&id->route.addr.dst_addr; 212 ssk->fport = dst_addr->sin_port; 213 ssk->faddr = dst_addr->sin_addr.s_addr; 214 soisconnected(sk); 215 SDP_WUNLOCK(ssk); 216 217 return 0; 218 } 219 220 static int 221 sdp_connected_handler(struct socket *sk, struct rdma_cm_event *event) 222 { 223 struct sdp_sock *ssk; 224 225 sdp_dbg(sk, "%s\n", __func__); 226 227 ssk = sdp_sk(sk); 228 SDP_WLOCK(ssk); 229 ssk->state = TCPS_ESTABLISHED; 230 231 sdp_set_default_moderation(ssk); 232 233 if (sk->so_options & SO_KEEPALIVE) 234 sdp_start_keepalive_timer(sk); 235 236 if ((ssk->flags & SDP_DROPPED) == 0) 237 soisconnected(sk); 238 SDP_WUNLOCK(ssk); 239 return 0; 240 } 241 242 static int 243 sdp_disconnected_handler(struct socket *sk) 244 { 245 struct sdp_sock *ssk; 246 247 ssk = sdp_sk(sk); 248 sdp_dbg(sk, "%s\n", __func__); 249 250 SDP_WLOCK_ASSERT(ssk); 251 if (sdp_sk(sk)->state == TCPS_SYN_RECEIVED) { 252 sdp_connected_handler(sk, NULL); 253 254 if (rcv_nxt(ssk)) 255 return 0; 256 } 257 258 return -ECONNRESET; 259 } 260 261 int 262 sdp_cma_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 263 { 264 struct rdma_conn_param conn_param; 265 struct socket *sk; 266 struct sdp_sock *ssk; 267 struct sdp_hah hah; 268 struct sdp_hh hh; 269 270 int rc = 0; 271 272 ssk = id->context; 273 sk = NULL; 274 if (ssk) 275 sk = ssk->socket; 276 if (!ssk || !sk || !ssk->id) { 277 sdp_dbg(sk, 278 "cm_id is being torn down, event %d, ssk %p, sk %p, id %p\n", 279 event->event, ssk, sk, id); 280 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST ? 281 -EINVAL : 0; 282 } 283 284 sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id); 285 switch (event->event) { 286 case RDMA_CM_EVENT_ADDR_RESOLVED: 287 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_RESOLVED\n"); 288 289 if (sdp_link_layer_ib_only && 290 rdma_node_get_transport(id->device->node_type) == 291 RDMA_TRANSPORT_IB && 292 rdma_port_get_link_layer(id->device, id->port_num) != 293 IB_LINK_LAYER_INFINIBAND) { 294 sdp_dbg(sk, "Link layer is: %d. Only IB link layer " 295 "is allowed\n", 296 rdma_port_get_link_layer(id->device, id->port_num)); 297 rc = -ENETUNREACH; 298 break; 299 } 300 301 rc = rdma_resolve_route(id, SDP_ROUTE_TIMEOUT); 302 break; 303 case RDMA_CM_EVENT_ADDR_ERROR: 304 sdp_dbg(sk, "RDMA_CM_EVENT_ADDR_ERROR\n"); 305 rc = -ENETUNREACH; 306 break; 307 case RDMA_CM_EVENT_ROUTE_RESOLVED: 308 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_RESOLVED : %p\n", id); 309 rc = sdp_init_qp(sk, id); 310 if (rc) 311 break; 312 atomic_set(&sdp_sk(sk)->remote_credits, 313 rx_ring_posted(sdp_sk(sk))); 314 memset(&hh, 0, sizeof hh); 315 hh.bsdh.mid = SDP_MID_HELLO; 316 hh.bsdh.len = htonl(sizeof(struct sdp_hh)); 317 hh.max_adverts = 1; 318 hh.ipv_cap = 0x40; 319 hh.majv_minv = SDP_MAJV_MINV; 320 sdp_init_buffers(sdp_sk(sk), rcvbuf_initial_size); 321 hh.bsdh.bufs = htons(rx_ring_posted(sdp_sk(sk))); 322 hh.localrcvsz = hh.desremrcvsz = htonl(sdp_sk(sk)->recv_bytes); 323 hh.max_adverts = 0x1; 324 sdp_sk(sk)->laddr = 325 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; 326 memset(&conn_param, 0, sizeof conn_param); 327 conn_param.private_data_len = sizeof hh; 328 conn_param.private_data = &hh; 329 conn_param.responder_resources = 4 /* TODO */; 330 conn_param.initiator_depth = 4 /* TODO */; 331 conn_param.retry_count = SDP_RETRY_COUNT; 332 SDP_DUMP_PACKET(NULL, "TX", NULL, &hh.bsdh); 333 rc = rdma_connect(id, &conn_param); 334 break; 335 case RDMA_CM_EVENT_ROUTE_ERROR: 336 sdp_dbg(sk, "RDMA_CM_EVENT_ROUTE_ERROR : %p\n", id); 337 rc = -ETIMEDOUT; 338 break; 339 case RDMA_CM_EVENT_CONNECT_REQUEST: 340 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_REQUEST\n"); 341 rc = sdp_connect_handler(sk, id, event); 342 if (rc) { 343 sdp_dbg(sk, "Destroying qp\n"); 344 rdma_reject(id, NULL, 0); 345 break; 346 } 347 ssk = id->context; 348 atomic_set(&ssk->remote_credits, rx_ring_posted(ssk)); 349 memset(&hah, 0, sizeof hah); 350 hah.bsdh.mid = SDP_MID_HELLO_ACK; 351 hah.bsdh.bufs = htons(rx_ring_posted(ssk)); 352 hah.bsdh.len = htonl(sizeof(struct sdp_hah)); 353 hah.majv_minv = SDP_MAJV_MINV; 354 hah.ext_max_adverts = 1; /* Doesn't seem to be mandated by spec, 355 but just in case */ 356 hah.actrcvsz = htonl(ssk->recv_bytes); 357 memset(&conn_param, 0, sizeof conn_param); 358 conn_param.private_data_len = sizeof hah; 359 conn_param.private_data = &hah; 360 conn_param.responder_resources = 4 /* TODO */; 361 conn_param.initiator_depth = 4 /* TODO */; 362 conn_param.retry_count = SDP_RETRY_COUNT; 363 SDP_DUMP_PACKET(sk, "TX", NULL, &hah.bsdh); 364 rc = rdma_accept(id, &conn_param); 365 if (rc) { 366 ssk->id = NULL; 367 id->qp = NULL; 368 id->context = NULL; 369 } 370 break; 371 case RDMA_CM_EVENT_CONNECT_RESPONSE: 372 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_RESPONSE\n"); 373 rc = sdp_response_handler(sk, id, event); 374 if (rc) { 375 sdp_dbg(sk, "Destroying qp\n"); 376 rdma_reject(id, NULL, 0); 377 } else 378 rc = rdma_accept(id, NULL); 379 break; 380 case RDMA_CM_EVENT_CONNECT_ERROR: 381 sdp_dbg(sk, "RDMA_CM_EVENT_CONNECT_ERROR\n"); 382 rc = -ETIMEDOUT; 383 break; 384 case RDMA_CM_EVENT_UNREACHABLE: 385 sdp_dbg(sk, "RDMA_CM_EVENT_UNREACHABLE\n"); 386 rc = -ENETUNREACH; 387 break; 388 case RDMA_CM_EVENT_REJECTED: 389 sdp_dbg(sk, "RDMA_CM_EVENT_REJECTED\n"); 390 rc = -ECONNREFUSED; 391 break; 392 case RDMA_CM_EVENT_ESTABLISHED: 393 sdp_dbg(sk, "RDMA_CM_EVENT_ESTABLISHED\n"); 394 sdp_sk(sk)->laddr = 395 ((struct sockaddr_in *)&id->route.addr.src_addr)->sin_addr.s_addr; 396 rc = sdp_connected_handler(sk, event); 397 break; 398 case RDMA_CM_EVENT_DISCONNECTED: /* This means DREQ/DREP received */ 399 sdp_dbg(sk, "RDMA_CM_EVENT_DISCONNECTED\n"); 400 401 SDP_WLOCK(ssk); 402 if (ssk->state == TCPS_LAST_ACK) { 403 sdp_cancel_dreq_wait_timeout(ssk); 404 405 sdp_dbg(sk, "%s: waiting for Infiniband tear down\n", 406 __func__); 407 } 408 ssk->qp_active = 0; 409 SDP_WUNLOCK(ssk); 410 rdma_disconnect(id); 411 SDP_WLOCK(ssk); 412 if (ssk->state != TCPS_TIME_WAIT) { 413 if (ssk->state == TCPS_CLOSE_WAIT) { 414 sdp_dbg(sk, "IB teardown while in " 415 "TCPS_CLOSE_WAIT taking reference to " 416 "let close() finish the work\n"); 417 } 418 rc = sdp_disconnected_handler(sk); 419 if (rc) 420 rc = -EPIPE; 421 } 422 SDP_WUNLOCK(ssk); 423 break; 424 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 425 sdp_dbg(sk, "RDMA_CM_EVENT_TIMEWAIT_EXIT\n"); 426 SDP_WLOCK(ssk); 427 rc = sdp_disconnected_handler(sk); 428 SDP_WUNLOCK(ssk); 429 break; 430 case RDMA_CM_EVENT_DEVICE_REMOVAL: 431 sdp_dbg(sk, "RDMA_CM_EVENT_DEVICE_REMOVAL\n"); 432 rc = -ENETRESET; 433 break; 434 default: 435 printk(KERN_ERR "SDP: Unexpected CMA event: %d\n", 436 event->event); 437 rc = -ECONNABORTED; 438 break; 439 } 440 441 sdp_dbg(sk, "event %d done. status %d\n", event->event, rc); 442 443 if (rc) { 444 SDP_WLOCK(ssk); 445 if (ssk->id == id) { 446 ssk->id = NULL; 447 id->qp = NULL; 448 id->context = NULL; 449 if (sdp_notify(ssk, -rc)) 450 SDP_WUNLOCK(ssk); 451 } else 452 SDP_WUNLOCK(ssk); 453 } 454 455 return rc; 456 } 457