1 /* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 */ 38 #include <linux/dma-mapping.h> 39 #include <linux/err.h> 40 #include <linux/idr.h> 41 #include <linux/interrupt.h> 42 #include <linux/rbtree.h> 43 #include <linux/sched.h> 44 #include <linux/spinlock.h> 45 #include <linux/workqueue.h> 46 #include <linux/completion.h> 47 #include <linux/slab.h> 48 #include <linux/module.h> 49 #include <linux/sysctl.h> 50 51 #include <rdma/iw_cm.h> 52 #include <rdma/ib_addr.h> 53 #include <rdma/iw_portmap.h> 54 #include <rdma/rdma_netlink.h> 55 56 #include "iwcm.h" 57 58 MODULE_AUTHOR("Tom Tucker"); 59 MODULE_DESCRIPTION("iWARP CM"); 60 MODULE_LICENSE("Dual BSD/GPL"); 61 62 static const char * const iwcm_rej_reason_strs[] = { 63 [ECONNRESET] = "reset by remote host", 64 [ECONNREFUSED] = "refused by remote application", 65 [ETIMEDOUT] = "setup timeout", 66 }; 67 68 const char *__attribute_const__ iwcm_reject_msg(int reason) 69 { 70 size_t index; 71 72 /* iWARP uses negative errnos */ 73 index = -reason; 74 75 if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && 76 iwcm_rej_reason_strs[index]) 77 return iwcm_rej_reason_strs[index]; 78 else 79 return "unrecognized reason"; 80 } 81 EXPORT_SYMBOL(iwcm_reject_msg); 82 83 static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { 84 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, 85 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, 86 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, 87 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, 88 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, 89 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, 90 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}, 91 [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb} 92 }; 93 94 static struct workqueue_struct *iwcm_wq; 95 struct iwcm_work { 96 struct work_struct work; 97 struct iwcm_id_private *cm_id; 98 struct list_head list; 99 struct iw_cm_event event; 100 struct list_head free_list; 101 }; 102 103 static unsigned int default_backlog = 256; 104 105 static struct ctl_table_header *iwcm_ctl_table_hdr; 106 static struct ctl_table iwcm_ctl_table[] = { 107 { 108 .procname = "default_backlog", 109 .data = &default_backlog, 110 .maxlen = sizeof(default_backlog), 111 .mode = 0644, 112 .proc_handler = proc_dointvec, 113 }, 114 }; 115 116 /* 117 * The following services provide a mechanism for pre-allocating iwcm_work 118 * elements. The design pre-allocates them based on the cm_id type: 119 * LISTENING IDS: Get enough elements preallocated to handle the 120 * listen backlog. 121 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 122 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 123 * 124 * Allocating them in connect and listen avoids having to deal 125 * with allocation failures on the event upcall from the provider (which 126 * is called in the interrupt context). 127 * 128 * One exception is when creating the cm_id for incoming connection requests. 129 * There are two cases: 130 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 131 * the backlog is exceeded, then no more connection request events will 132 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 133 * to the provider to reject the connection request. 134 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 135 * If work elements cannot be allocated for the new connect request cm_id, 136 * then IWCM will call the provider reject method. This is ok since 137 * cm_conn_req_handler() runs in the workqueue thread context. 138 */ 139 140 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 141 { 142 struct iwcm_work *work; 143 144 if (list_empty(&cm_id_priv->work_free_list)) 145 return NULL; 146 work = list_first_entry(&cm_id_priv->work_free_list, struct iwcm_work, 147 free_list); 148 list_del_init(&work->free_list); 149 return work; 150 } 151 152 static void put_work(struct iwcm_work *work) 153 { 154 list_add(&work->free_list, &work->cm_id->work_free_list); 155 } 156 157 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 158 { 159 struct list_head *e, *tmp; 160 161 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { 162 list_del(e); 163 kfree(list_entry(e, struct iwcm_work, free_list)); 164 } 165 } 166 167 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 168 { 169 struct iwcm_work *work; 170 171 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 172 while (count--) { 173 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 174 if (!work) { 175 dealloc_work_entries(cm_id_priv); 176 return -ENOMEM; 177 } 178 work->cm_id = cm_id_priv; 179 INIT_LIST_HEAD(&work->list); 180 put_work(work); 181 } 182 return 0; 183 } 184 185 /* 186 * Save private data from incoming connection requests to 187 * iw_cm_event, so the low level driver doesn't have to. Adjust 188 * the event ptr to point to the local copy. 189 */ 190 static int copy_private_data(struct iw_cm_event *event) 191 { 192 void *p; 193 194 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 195 if (!p) 196 return -ENOMEM; 197 event->private_data = p; 198 return 0; 199 } 200 201 static void free_cm_id(struct iwcm_id_private *cm_id_priv) 202 { 203 dealloc_work_entries(cm_id_priv); 204 kfree(cm_id_priv); 205 } 206 207 /* 208 * Release a reference on cm_id. If the last reference is being 209 * released, free the cm_id and return 'true'. 210 */ 211 static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 212 { 213 if (refcount_dec_and_test(&cm_id_priv->refcount)) { 214 BUG_ON(!list_empty(&cm_id_priv->work_list)); 215 free_cm_id(cm_id_priv); 216 return true; 217 } 218 219 return false; 220 } 221 222 static void add_ref(struct iw_cm_id *cm_id) 223 { 224 struct iwcm_id_private *cm_id_priv; 225 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 226 refcount_inc(&cm_id_priv->refcount); 227 } 228 229 static void rem_ref(struct iw_cm_id *cm_id) 230 { 231 struct iwcm_id_private *cm_id_priv; 232 233 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 234 235 (void)iwcm_deref_id(cm_id_priv); 236 } 237 238 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 239 240 struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 241 iw_cm_handler cm_handler, 242 void *context) 243 { 244 struct iwcm_id_private *cm_id_priv; 245 246 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 247 if (!cm_id_priv) 248 return ERR_PTR(-ENOMEM); 249 250 cm_id_priv->state = IW_CM_STATE_IDLE; 251 cm_id_priv->id.device = device; 252 cm_id_priv->id.cm_handler = cm_handler; 253 cm_id_priv->id.context = context; 254 cm_id_priv->id.event_handler = cm_event_handler; 255 cm_id_priv->id.add_ref = add_ref; 256 cm_id_priv->id.rem_ref = rem_ref; 257 spin_lock_init(&cm_id_priv->lock); 258 refcount_set(&cm_id_priv->refcount, 1); 259 init_waitqueue_head(&cm_id_priv->connect_wait); 260 init_completion(&cm_id_priv->destroy_comp); 261 INIT_LIST_HEAD(&cm_id_priv->work_list); 262 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 263 264 return &cm_id_priv->id; 265 } 266 EXPORT_SYMBOL(iw_create_cm_id); 267 268 269 static int iwcm_modify_qp_err(struct ib_qp *qp) 270 { 271 struct ib_qp_attr qp_attr; 272 273 if (!qp) 274 return -EINVAL; 275 276 qp_attr.qp_state = IB_QPS_ERR; 277 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 278 } 279 280 /* 281 * This is really the RDMAC CLOSING state. It is most similar to the 282 * IB SQD QP state. 283 */ 284 static int iwcm_modify_qp_sqd(struct ib_qp *qp) 285 { 286 struct ib_qp_attr qp_attr; 287 288 BUG_ON(qp == NULL); 289 qp_attr.qp_state = IB_QPS_SQD; 290 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 291 } 292 293 /* 294 * CM_ID <-- CLOSING 295 * 296 * Block if a passive or active connection is currently being processed. Then 297 * process the event as follows: 298 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 299 * based on the abrupt flag 300 * - If the connection is already in the CLOSING or IDLE state, the peer is 301 * disconnecting concurrently with us and we've already seen the 302 * DISCONNECT event -- ignore the request and return 0 303 * - Disconnect on a listening endpoint returns -EINVAL 304 */ 305 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 306 { 307 struct iwcm_id_private *cm_id_priv; 308 unsigned long flags; 309 int ret = 0; 310 struct ib_qp *qp = NULL; 311 312 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 313 /* Wait if we're currently in a connect or accept downcall */ 314 wait_event(cm_id_priv->connect_wait, 315 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 316 317 spin_lock_irqsave(&cm_id_priv->lock, flags); 318 switch (cm_id_priv->state) { 319 case IW_CM_STATE_ESTABLISHED: 320 cm_id_priv->state = IW_CM_STATE_CLOSING; 321 322 /* QP could be <nul> for user-mode client */ 323 if (cm_id_priv->qp) 324 qp = cm_id_priv->qp; 325 else 326 ret = -EINVAL; 327 break; 328 case IW_CM_STATE_LISTEN: 329 ret = -EINVAL; 330 break; 331 case IW_CM_STATE_CLOSING: 332 /* remote peer closed first */ 333 case IW_CM_STATE_IDLE: 334 /* accept or connect returned !0 */ 335 break; 336 case IW_CM_STATE_CONN_RECV: 337 /* 338 * App called disconnect before/without calling accept after 339 * connect_request event delivered. 340 */ 341 break; 342 case IW_CM_STATE_CONN_SENT: 343 /* Can only get here if wait above fails */ 344 default: 345 BUG(); 346 } 347 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 348 349 if (qp) { 350 if (abrupt) 351 ret = iwcm_modify_qp_err(qp); 352 else 353 ret = iwcm_modify_qp_sqd(qp); 354 355 /* 356 * If both sides are disconnecting the QP could 357 * already be in ERR or SQD states 358 */ 359 ret = 0; 360 } 361 362 return ret; 363 } 364 EXPORT_SYMBOL(iw_cm_disconnect); 365 366 /* 367 * CM_ID <-- DESTROYING 368 * 369 * Clean up all resources associated with the connection and release 370 * the initial reference taken by iw_create_cm_id. 371 * 372 * Returns true if and only if the last cm_id_priv reference has been dropped. 373 */ 374 static bool destroy_cm_id(struct iw_cm_id *cm_id) 375 { 376 struct iwcm_id_private *cm_id_priv; 377 struct ib_qp *qp; 378 unsigned long flags; 379 380 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 381 /* 382 * Wait if we're currently in a connect or accept downcall. A 383 * listening endpoint should never block here. 384 */ 385 wait_event(cm_id_priv->connect_wait, 386 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 387 388 /* 389 * Since we're deleting the cm_id, drop any events that 390 * might arrive before the last dereference. 391 */ 392 set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); 393 394 spin_lock_irqsave(&cm_id_priv->lock, flags); 395 qp = cm_id_priv->qp; 396 cm_id_priv->qp = NULL; 397 398 switch (cm_id_priv->state) { 399 case IW_CM_STATE_LISTEN: 400 cm_id_priv->state = IW_CM_STATE_DESTROYING; 401 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 402 /* destroy the listening endpoint */ 403 cm_id->device->ops.iw_destroy_listen(cm_id); 404 spin_lock_irqsave(&cm_id_priv->lock, flags); 405 break; 406 case IW_CM_STATE_ESTABLISHED: 407 cm_id_priv->state = IW_CM_STATE_DESTROYING; 408 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 409 /* Abrupt close of the connection */ 410 (void)iwcm_modify_qp_err(qp); 411 spin_lock_irqsave(&cm_id_priv->lock, flags); 412 break; 413 case IW_CM_STATE_IDLE: 414 case IW_CM_STATE_CLOSING: 415 cm_id_priv->state = IW_CM_STATE_DESTROYING; 416 break; 417 case IW_CM_STATE_CONN_RECV: 418 /* 419 * App called destroy before/without calling accept after 420 * receiving connection request event notification or 421 * returned non zero from the event callback function. 422 * In either case, must tell the provider to reject. 423 */ 424 cm_id_priv->state = IW_CM_STATE_DESTROYING; 425 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 426 cm_id->device->ops.iw_reject(cm_id, NULL, 0); 427 spin_lock_irqsave(&cm_id_priv->lock, flags); 428 break; 429 case IW_CM_STATE_CONN_SENT: 430 case IW_CM_STATE_DESTROYING: 431 default: 432 BUG(); 433 break; 434 } 435 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 436 if (qp) 437 cm_id_priv->id.device->ops.iw_rem_ref(qp); 438 439 if (cm_id->mapped) { 440 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); 441 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); 442 } 443 444 return iwcm_deref_id(cm_id_priv); 445 } 446 447 /* 448 * This function is only called by the application thread and cannot 449 * be called by the event thread. The function will wait for all 450 * references to be released on the cm_id and then kfree the cm_id 451 * object. 452 */ 453 void iw_destroy_cm_id(struct iw_cm_id *cm_id) 454 { 455 if (!destroy_cm_id(cm_id)) 456 flush_workqueue(iwcm_wq); 457 } 458 EXPORT_SYMBOL(iw_destroy_cm_id); 459 460 /** 461 * iw_cm_check_wildcard - If IP address is 0 then use original 462 * @pm_addr: sockaddr containing the ip to check for wildcard 463 * @cm_addr: sockaddr containing the actual IP address 464 * @cm_outaddr: sockaddr to set IP addr which leaving port 465 * 466 * Checks the pm_addr for wildcard and then sets cm_outaddr's 467 * IP to the actual (cm_addr). 468 */ 469 static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, 470 struct sockaddr_storage *cm_addr, 471 struct sockaddr_storage *cm_outaddr) 472 { 473 if (pm_addr->ss_family == AF_INET) { 474 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 475 476 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { 477 struct sockaddr_in *cm4_addr = 478 (struct sockaddr_in *)cm_addr; 479 struct sockaddr_in *cm4_outaddr = 480 (struct sockaddr_in *)cm_outaddr; 481 482 cm4_outaddr->sin_addr = cm4_addr->sin_addr; 483 } 484 } else { 485 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; 486 487 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { 488 struct sockaddr_in6 *cm6_addr = 489 (struct sockaddr_in6 *)cm_addr; 490 struct sockaddr_in6 *cm6_outaddr = 491 (struct sockaddr_in6 *)cm_outaddr; 492 493 cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; 494 } 495 } 496 } 497 498 /** 499 * iw_cm_map - Use portmapper to map the ports 500 * @cm_id: connection manager pointer 501 * @active: Indicates the active side when true 502 * returns nonzero for error only if iwpm_create_mapinfo() fails 503 * 504 * Tries to add a mapping for a port using the Portmapper. If 505 * successful in mapping the IP/Port it will check the remote 506 * mapped IP address for a wildcard IP address and replace the 507 * zero IP address with the remote_addr. 508 */ 509 static int iw_cm_map(struct iw_cm_id *cm_id, bool active) 510 { 511 const char *devname = dev_name(&cm_id->device->dev); 512 const char *ifname = cm_id->device->iw_ifname; 513 struct iwpm_dev_data pm_reg_msg = {}; 514 struct iwpm_sa_data pm_msg; 515 int status; 516 517 if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || 518 strlen(ifname) >= sizeof(pm_reg_msg.if_name)) 519 return -EINVAL; 520 521 cm_id->m_local_addr = cm_id->local_addr; 522 cm_id->m_remote_addr = cm_id->remote_addr; 523 524 strcpy(pm_reg_msg.dev_name, devname); 525 strcpy(pm_reg_msg.if_name, ifname); 526 527 if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || 528 !iwpm_valid_pid()) 529 return 0; 530 531 cm_id->mapped = true; 532 pm_msg.loc_addr = cm_id->local_addr; 533 pm_msg.rem_addr = cm_id->remote_addr; 534 pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ? 535 IWPM_FLAGS_NO_PORT_MAP : 0; 536 if (active) 537 status = iwpm_add_and_query_mapping(&pm_msg, 538 RDMA_NL_IWCM); 539 else 540 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); 541 542 if (!status) { 543 cm_id->m_local_addr = pm_msg.mapped_loc_addr; 544 if (active) { 545 cm_id->m_remote_addr = pm_msg.mapped_rem_addr; 546 iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, 547 &cm_id->remote_addr, 548 &cm_id->m_remote_addr); 549 } 550 } 551 552 return iwpm_create_mapinfo(&cm_id->local_addr, 553 &cm_id->m_local_addr, 554 RDMA_NL_IWCM, pm_msg.flags); 555 } 556 557 /* 558 * CM_ID <-- LISTEN 559 * 560 * Start listening for connect requests. Generates one CONNECT_REQUEST 561 * event for each inbound connect request. 562 */ 563 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 564 { 565 struct iwcm_id_private *cm_id_priv; 566 unsigned long flags; 567 int ret; 568 569 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 570 571 if (!backlog) 572 backlog = default_backlog; 573 574 ret = alloc_work_entries(cm_id_priv, backlog); 575 if (ret) 576 return ret; 577 578 spin_lock_irqsave(&cm_id_priv->lock, flags); 579 switch (cm_id_priv->state) { 580 case IW_CM_STATE_IDLE: 581 cm_id_priv->state = IW_CM_STATE_LISTEN; 582 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 583 ret = iw_cm_map(cm_id, false); 584 if (!ret) 585 ret = cm_id->device->ops.iw_create_listen(cm_id, 586 backlog); 587 if (ret) 588 cm_id_priv->state = IW_CM_STATE_IDLE; 589 spin_lock_irqsave(&cm_id_priv->lock, flags); 590 break; 591 default: 592 ret = -EINVAL; 593 } 594 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 595 596 return ret; 597 } 598 EXPORT_SYMBOL(iw_cm_listen); 599 600 /* 601 * CM_ID <-- IDLE 602 * 603 * Rejects an inbound connection request. No events are generated. 604 */ 605 int iw_cm_reject(struct iw_cm_id *cm_id, 606 const void *private_data, 607 u8 private_data_len) 608 { 609 struct iwcm_id_private *cm_id_priv; 610 unsigned long flags; 611 int ret; 612 613 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 614 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 615 616 spin_lock_irqsave(&cm_id_priv->lock, flags); 617 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 618 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 619 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 620 wake_up_all(&cm_id_priv->connect_wait); 621 return -EINVAL; 622 } 623 cm_id_priv->state = IW_CM_STATE_IDLE; 624 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 625 626 ret = cm_id->device->ops.iw_reject(cm_id, private_data, 627 private_data_len); 628 629 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 630 wake_up_all(&cm_id_priv->connect_wait); 631 632 return ret; 633 } 634 EXPORT_SYMBOL(iw_cm_reject); 635 636 /* 637 * CM_ID <-- ESTABLISHED 638 * 639 * Accepts an inbound connection request and generates an ESTABLISHED 640 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 641 * until the ESTABLISHED event is received from the provider. 642 */ 643 int iw_cm_accept(struct iw_cm_id *cm_id, 644 struct iw_cm_conn_param *iw_param) 645 { 646 struct iwcm_id_private *cm_id_priv; 647 struct ib_qp *qp; 648 unsigned long flags; 649 int ret; 650 651 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 652 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 653 654 spin_lock_irqsave(&cm_id_priv->lock, flags); 655 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 656 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 657 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 658 wake_up_all(&cm_id_priv->connect_wait); 659 return -EINVAL; 660 } 661 /* Get the ib_qp given the QPN */ 662 qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); 663 if (!qp) { 664 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 665 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 666 wake_up_all(&cm_id_priv->connect_wait); 667 return -EINVAL; 668 } 669 cm_id->device->ops.iw_add_ref(qp); 670 cm_id_priv->qp = qp; 671 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 672 673 ret = cm_id->device->ops.iw_accept(cm_id, iw_param); 674 if (ret) { 675 /* An error on accept precludes provider events */ 676 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 677 cm_id_priv->state = IW_CM_STATE_IDLE; 678 spin_lock_irqsave(&cm_id_priv->lock, flags); 679 qp = cm_id_priv->qp; 680 cm_id_priv->qp = NULL; 681 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 682 if (qp) 683 cm_id->device->ops.iw_rem_ref(qp); 684 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 685 wake_up_all(&cm_id_priv->connect_wait); 686 } 687 688 return ret; 689 } 690 EXPORT_SYMBOL(iw_cm_accept); 691 692 /* 693 * Active Side: CM_ID <-- CONN_SENT 694 * 695 * If successful, results in the generation of a CONNECT_REPLY 696 * event. iw_cm_disconnect and iw_cm_destroy will block until the 697 * CONNECT_REPLY event is received from the provider. 698 */ 699 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 700 { 701 struct iwcm_id_private *cm_id_priv; 702 int ret; 703 unsigned long flags; 704 struct ib_qp *qp = NULL; 705 706 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 707 708 ret = alloc_work_entries(cm_id_priv, 4); 709 if (ret) 710 return ret; 711 712 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 713 spin_lock_irqsave(&cm_id_priv->lock, flags); 714 715 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 716 ret = -EINVAL; 717 goto err; 718 } 719 720 /* Get the ib_qp given the QPN */ 721 qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); 722 if (!qp) { 723 ret = -EINVAL; 724 goto err; 725 } 726 cm_id->device->ops.iw_add_ref(qp); 727 cm_id_priv->qp = qp; 728 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 729 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 730 731 ret = iw_cm_map(cm_id, true); 732 if (!ret) 733 ret = cm_id->device->ops.iw_connect(cm_id, iw_param); 734 if (!ret) 735 return 0; /* success */ 736 737 spin_lock_irqsave(&cm_id_priv->lock, flags); 738 qp = cm_id_priv->qp; 739 cm_id_priv->qp = NULL; 740 cm_id_priv->state = IW_CM_STATE_IDLE; 741 err: 742 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 743 if (qp) 744 cm_id->device->ops.iw_rem_ref(qp); 745 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 746 wake_up_all(&cm_id_priv->connect_wait); 747 return ret; 748 } 749 EXPORT_SYMBOL(iw_cm_connect); 750 751 /* 752 * Passive Side: new CM_ID <-- CONN_RECV 753 * 754 * Handles an inbound connect request. The function creates a new 755 * iw_cm_id to represent the new connection and inherits the client 756 * callback function and other attributes from the listening parent. 757 * 758 * The work item contains a pointer to the listen_cm_id and the event. The 759 * listen_cm_id contains the client cm_handler, context and 760 * device. These are copied when the device is cloned. The event 761 * contains the new four tuple. 762 * 763 * An error on the child should not affect the parent, so this 764 * function does not return a value. 765 */ 766 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 767 struct iw_cm_event *iw_event) 768 { 769 unsigned long flags; 770 struct iw_cm_id *cm_id; 771 struct iwcm_id_private *cm_id_priv; 772 int ret; 773 774 /* 775 * The provider should never generate a connection request 776 * event with a bad status. 777 */ 778 BUG_ON(iw_event->status); 779 780 cm_id = iw_create_cm_id(listen_id_priv->id.device, 781 listen_id_priv->id.cm_handler, 782 listen_id_priv->id.context); 783 /* If the cm_id could not be created, ignore the request */ 784 if (IS_ERR(cm_id)) 785 goto out; 786 787 cm_id->provider_data = iw_event->provider_data; 788 cm_id->m_local_addr = iw_event->local_addr; 789 cm_id->m_remote_addr = iw_event->remote_addr; 790 cm_id->local_addr = listen_id_priv->id.local_addr; 791 792 ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, 793 &iw_event->remote_addr, 794 &cm_id->remote_addr, 795 RDMA_NL_IWCM); 796 if (ret) { 797 cm_id->remote_addr = iw_event->remote_addr; 798 } else { 799 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, 800 &iw_event->local_addr, 801 &cm_id->local_addr); 802 iw_event->local_addr = cm_id->local_addr; 803 iw_event->remote_addr = cm_id->remote_addr; 804 } 805 806 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 807 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 808 809 /* 810 * We could be destroying the listening id. If so, ignore this 811 * upcall. 812 */ 813 spin_lock_irqsave(&listen_id_priv->lock, flags); 814 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 815 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 816 iw_cm_reject(cm_id, NULL, 0); 817 iw_destroy_cm_id(cm_id); 818 goto out; 819 } 820 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 821 822 ret = alloc_work_entries(cm_id_priv, 3); 823 if (ret) { 824 iw_cm_reject(cm_id, NULL, 0); 825 iw_destroy_cm_id(cm_id); 826 goto out; 827 } 828 829 /* Call the client CM handler */ 830 ret = cm_id->cm_handler(cm_id, iw_event); 831 if (ret) { 832 iw_cm_reject(cm_id, NULL, 0); 833 iw_destroy_cm_id(cm_id); 834 } 835 836 out: 837 if (iw_event->private_data_len) 838 kfree(iw_event->private_data); 839 } 840 841 /* 842 * Passive Side: CM_ID <-- ESTABLISHED 843 * 844 * The provider generated an ESTABLISHED event which means that 845 * the MPA negotion has completed successfully and we are now in MPA 846 * FPDU mode. 847 * 848 * This event can only be received in the CONN_RECV state. If the 849 * remote peer closed, the ESTABLISHED event would be received followed 850 * by the CLOSE event. If the app closes, it will block until we wake 851 * it up after processing this event. 852 */ 853 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 854 struct iw_cm_event *iw_event) 855 { 856 unsigned long flags; 857 int ret; 858 859 spin_lock_irqsave(&cm_id_priv->lock, flags); 860 861 /* 862 * We clear the CONNECT_WAIT bit here to allow the callback 863 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 864 * from a callback handler is not allowed. 865 */ 866 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 867 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 868 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 869 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 870 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 871 wake_up_all(&cm_id_priv->connect_wait); 872 873 return ret; 874 } 875 876 /* 877 * Active Side: CM_ID <-- ESTABLISHED 878 * 879 * The app has called connect and is waiting for the established event to 880 * post it's requests to the server. This event will wake up anyone 881 * blocked in iw_cm_disconnect or iw_destroy_id. 882 */ 883 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 884 struct iw_cm_event *iw_event) 885 { 886 struct ib_qp *qp = NULL; 887 unsigned long flags; 888 int ret; 889 890 spin_lock_irqsave(&cm_id_priv->lock, flags); 891 /* 892 * Clear the connect wait bit so a callback function calling 893 * iw_cm_disconnect will not wait and deadlock this thread 894 */ 895 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 896 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 897 if (iw_event->status == 0) { 898 cm_id_priv->id.m_local_addr = iw_event->local_addr; 899 cm_id_priv->id.m_remote_addr = iw_event->remote_addr; 900 iw_event->local_addr = cm_id_priv->id.local_addr; 901 iw_event->remote_addr = cm_id_priv->id.remote_addr; 902 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 903 } else { 904 /* REJECTED or RESET */ 905 qp = cm_id_priv->qp; 906 cm_id_priv->qp = NULL; 907 cm_id_priv->state = IW_CM_STATE_IDLE; 908 } 909 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 910 if (qp) 911 cm_id_priv->id.device->ops.iw_rem_ref(qp); 912 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 913 914 if (iw_event->private_data_len) 915 kfree(iw_event->private_data); 916 917 /* Wake up waiters on connect complete */ 918 wake_up_all(&cm_id_priv->connect_wait); 919 920 return ret; 921 } 922 923 /* 924 * CM_ID <-- CLOSING 925 * 926 * If in the ESTABLISHED state, move to CLOSING. 927 */ 928 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 929 struct iw_cm_event *iw_event) 930 { 931 unsigned long flags; 932 933 spin_lock_irqsave(&cm_id_priv->lock, flags); 934 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 935 cm_id_priv->state = IW_CM_STATE_CLOSING; 936 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 937 } 938 939 /* 940 * CM_ID <-- IDLE 941 * 942 * If in the ESTBLISHED or CLOSING states, the QP will have have been 943 * moved by the provider to the ERR state. Disassociate the CM_ID from 944 * the QP, move to IDLE, and remove the 'connected' reference. 945 * 946 * If in some other state, the cm_id was destroyed asynchronously. 947 * This is the last reference that will result in waking up 948 * the app thread blocked in iw_destroy_cm_id. 949 */ 950 static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 951 struct iw_cm_event *iw_event) 952 { 953 struct ib_qp *qp; 954 unsigned long flags; 955 int ret = 0, notify_event = 0; 956 spin_lock_irqsave(&cm_id_priv->lock, flags); 957 qp = cm_id_priv->qp; 958 cm_id_priv->qp = NULL; 959 960 switch (cm_id_priv->state) { 961 case IW_CM_STATE_ESTABLISHED: 962 case IW_CM_STATE_CLOSING: 963 cm_id_priv->state = IW_CM_STATE_IDLE; 964 notify_event = 1; 965 break; 966 case IW_CM_STATE_DESTROYING: 967 break; 968 default: 969 BUG(); 970 } 971 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 972 973 if (qp) 974 cm_id_priv->id.device->ops.iw_rem_ref(qp); 975 if (notify_event) 976 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 977 return ret; 978 } 979 980 static int process_event(struct iwcm_id_private *cm_id_priv, 981 struct iw_cm_event *iw_event) 982 { 983 int ret = 0; 984 985 switch (iw_event->event) { 986 case IW_CM_EVENT_CONNECT_REQUEST: 987 cm_conn_req_handler(cm_id_priv, iw_event); 988 break; 989 case IW_CM_EVENT_CONNECT_REPLY: 990 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 991 break; 992 case IW_CM_EVENT_ESTABLISHED: 993 ret = cm_conn_est_handler(cm_id_priv, iw_event); 994 break; 995 case IW_CM_EVENT_DISCONNECT: 996 cm_disconnect_handler(cm_id_priv, iw_event); 997 break; 998 case IW_CM_EVENT_CLOSE: 999 ret = cm_close_handler(cm_id_priv, iw_event); 1000 break; 1001 default: 1002 BUG(); 1003 } 1004 1005 return ret; 1006 } 1007 1008 /* 1009 * Process events on the work_list for the cm_id. If the callback 1010 * function requests that the cm_id be deleted, a flag is set in the 1011 * cm_id flags to indicate that when the last reference is 1012 * removed, the cm_id is to be destroyed. This is necessary to 1013 * distinguish between an object that will be destroyed by the app 1014 * thread asleep on the destroy_comp list vs. an object destroyed 1015 * here synchronously when the last reference is removed. 1016 */ 1017 static void cm_work_handler(struct work_struct *_work) 1018 { 1019 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 1020 struct iw_cm_event levent; 1021 struct iwcm_id_private *cm_id_priv = work->cm_id; 1022 unsigned long flags; 1023 int ret = 0; 1024 1025 spin_lock_irqsave(&cm_id_priv->lock, flags); 1026 while (!list_empty(&cm_id_priv->work_list)) { 1027 work = list_first_entry(&cm_id_priv->work_list, 1028 struct iwcm_work, list); 1029 list_del_init(&work->list); 1030 levent = work->event; 1031 put_work(work); 1032 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1033 1034 if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { 1035 ret = process_event(cm_id_priv, &levent); 1036 if (ret) 1037 WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id)); 1038 } else 1039 pr_debug("dropping event %d\n", levent.event); 1040 if (iwcm_deref_id(cm_id_priv)) 1041 return; 1042 spin_lock_irqsave(&cm_id_priv->lock, flags); 1043 } 1044 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1045 } 1046 1047 /* 1048 * This function is called on interrupt context. Schedule events on 1049 * the iwcm_wq thread to allow callback functions to downcall into 1050 * the CM and/or block. Events are queued to a per-CM_ID 1051 * work_list. If this is the first event on the work_list, the work 1052 * element is also queued on the iwcm_wq thread. 1053 * 1054 * Each event holds a reference on the cm_id. Until the last posted 1055 * event has been delivered and processed, the cm_id cannot be 1056 * deleted. 1057 * 1058 * Returns: 1059 * 0 - the event was handled. 1060 * -ENOMEM - the event was not handled due to lack of resources. 1061 */ 1062 static int cm_event_handler(struct iw_cm_id *cm_id, 1063 struct iw_cm_event *iw_event) 1064 { 1065 struct iwcm_work *work; 1066 struct iwcm_id_private *cm_id_priv; 1067 unsigned long flags; 1068 int ret = 0; 1069 1070 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1071 1072 spin_lock_irqsave(&cm_id_priv->lock, flags); 1073 work = get_work(cm_id_priv); 1074 if (!work) { 1075 ret = -ENOMEM; 1076 goto out; 1077 } 1078 1079 INIT_WORK(&work->work, cm_work_handler); 1080 work->cm_id = cm_id_priv; 1081 work->event = *iw_event; 1082 1083 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 1084 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 1085 work->event.private_data_len) { 1086 ret = copy_private_data(&work->event); 1087 if (ret) { 1088 put_work(work); 1089 goto out; 1090 } 1091 } 1092 1093 refcount_inc(&cm_id_priv->refcount); 1094 list_add_tail(&work->list, &cm_id_priv->work_list); 1095 queue_work(iwcm_wq, &work->work); 1096 out: 1097 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1098 return ret; 1099 } 1100 1101 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 1102 struct ib_qp_attr *qp_attr, 1103 int *qp_attr_mask) 1104 { 1105 unsigned long flags; 1106 int ret; 1107 1108 spin_lock_irqsave(&cm_id_priv->lock, flags); 1109 switch (cm_id_priv->state) { 1110 case IW_CM_STATE_IDLE: 1111 case IW_CM_STATE_CONN_SENT: 1112 case IW_CM_STATE_CONN_RECV: 1113 case IW_CM_STATE_ESTABLISHED: 1114 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1115 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 1116 IB_ACCESS_REMOTE_READ; 1117 ret = 0; 1118 break; 1119 default: 1120 ret = -EINVAL; 1121 break; 1122 } 1123 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1124 return ret; 1125 } 1126 1127 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 1128 struct ib_qp_attr *qp_attr, 1129 int *qp_attr_mask) 1130 { 1131 unsigned long flags; 1132 int ret; 1133 1134 spin_lock_irqsave(&cm_id_priv->lock, flags); 1135 switch (cm_id_priv->state) { 1136 case IW_CM_STATE_IDLE: 1137 case IW_CM_STATE_CONN_SENT: 1138 case IW_CM_STATE_CONN_RECV: 1139 case IW_CM_STATE_ESTABLISHED: 1140 *qp_attr_mask = 0; 1141 ret = 0; 1142 break; 1143 default: 1144 ret = -EINVAL; 1145 break; 1146 } 1147 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1148 return ret; 1149 } 1150 1151 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1152 struct ib_qp_attr *qp_attr, 1153 int *qp_attr_mask) 1154 { 1155 struct iwcm_id_private *cm_id_priv; 1156 int ret; 1157 1158 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1159 switch (qp_attr->qp_state) { 1160 case IB_QPS_INIT: 1161 case IB_QPS_RTR: 1162 ret = iwcm_init_qp_init_attr(cm_id_priv, 1163 qp_attr, qp_attr_mask); 1164 break; 1165 case IB_QPS_RTS: 1166 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1167 qp_attr, qp_attr_mask); 1168 break; 1169 default: 1170 ret = -EINVAL; 1171 break; 1172 } 1173 return ret; 1174 } 1175 EXPORT_SYMBOL(iw_cm_init_qp_attr); 1176 1177 static int __init iw_cm_init(void) 1178 { 1179 int ret; 1180 1181 ret = iwpm_init(RDMA_NL_IWCM); 1182 if (ret) 1183 return ret; 1184 1185 iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); 1186 if (!iwcm_wq) 1187 goto err_alloc; 1188 1189 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", 1190 iwcm_ctl_table); 1191 if (!iwcm_ctl_table_hdr) { 1192 pr_err("iw_cm: couldn't register sysctl paths\n"); 1193 goto err_sysctl; 1194 } 1195 1196 rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); 1197 return 0; 1198 1199 err_sysctl: 1200 destroy_workqueue(iwcm_wq); 1201 err_alloc: 1202 iwpm_exit(RDMA_NL_IWCM); 1203 return -ENOMEM; 1204 } 1205 1206 static void __exit iw_cm_cleanup(void) 1207 { 1208 rdma_nl_unregister(RDMA_NL_IWCM); 1209 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1210 destroy_workqueue(iwcm_wq); 1211 iwpm_exit(RDMA_NL_IWCM); 1212 } 1213 1214 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2); 1215 1216 module_init(iw_cm_init); 1217 module_exit(iw_cm_cleanup); 1218