1 /* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 */ 38 #include <linux/dma-mapping.h> 39 #include <linux/err.h> 40 #include <linux/idr.h> 41 #include <linux/interrupt.h> 42 #include <linux/rbtree.h> 43 #include <linux/sched.h> 44 #include <linux/spinlock.h> 45 #include <linux/workqueue.h> 46 #include <linux/completion.h> 47 #include <linux/slab.h> 48 #include <linux/module.h> 49 #include <linux/sysctl.h> 50 51 #include <rdma/iw_cm.h> 52 #include <rdma/ib_addr.h> 53 #include <rdma/iw_portmap.h> 54 #include <rdma/rdma_netlink.h> 55 56 #include "iwcm.h" 57 58 MODULE_AUTHOR("Tom Tucker"); 59 MODULE_DESCRIPTION("iWARP CM"); 60 MODULE_LICENSE("Dual BSD/GPL"); 61 62 static const char * const iwcm_rej_reason_strs[] = { 63 [ECONNRESET] = "reset by remote host", 64 [ECONNREFUSED] = "refused by remote application", 65 [ETIMEDOUT] = "setup timeout", 66 }; 67 68 const char *__attribute_const__ iwcm_reject_msg(int reason) 69 { 70 size_t index; 71 72 /* iWARP uses negative errnos */ 73 index = -reason; 74 75 if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && 76 iwcm_rej_reason_strs[index]) 77 return iwcm_rej_reason_strs[index]; 78 else 79 return "unrecognized reason"; 80 } 81 EXPORT_SYMBOL(iwcm_reject_msg); 82 83 static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { 84 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, 85 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, 86 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, 87 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, 88 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, 89 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, 90 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}, 91 [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb} 92 }; 93 94 static struct workqueue_struct *iwcm_wq; 95 struct iwcm_work { 96 struct work_struct work; 97 struct iwcm_id_private *cm_id; 98 struct list_head list; 99 struct iw_cm_event event; 100 struct list_head free_list; 101 }; 102 103 static unsigned int default_backlog = 256; 104 105 static struct ctl_table_header *iwcm_ctl_table_hdr; 106 static struct ctl_table iwcm_ctl_table[] = { 107 { 108 .procname = "default_backlog", 109 .data = &default_backlog, 110 .maxlen = sizeof(default_backlog), 111 .mode = 0644, 112 .proc_handler = proc_dointvec_minmax, 113 .extra1 = SYSCTL_ZERO, 114 .extra2 = SYSCTL_INT_MAX, 115 }, 116 }; 117 118 /* 119 * The following services provide a mechanism for pre-allocating iwcm_work 120 * elements. The design pre-allocates them based on the cm_id type: 121 * LISTENING IDS: Get enough elements preallocated to handle the 122 * listen backlog. 123 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 124 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 125 * 126 * Allocating them in connect and listen avoids having to deal 127 * with allocation failures on the event upcall from the provider (which 128 * is called in the interrupt context). 129 * 130 * One exception is when creating the cm_id for incoming connection requests. 131 * There are two cases: 132 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 133 * the backlog is exceeded, then no more connection request events will 134 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 135 * to the provider to reject the connection request. 136 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 137 * If work elements cannot be allocated for the new connect request cm_id, 138 * then IWCM will call the provider reject method. This is ok since 139 * cm_conn_req_handler() runs in the workqueue thread context. 140 */ 141 142 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 143 { 144 struct iwcm_work *work; 145 146 if (list_empty(&cm_id_priv->work_free_list)) 147 return NULL; 148 work = list_first_entry(&cm_id_priv->work_free_list, struct iwcm_work, 149 free_list); 150 list_del_init(&work->free_list); 151 return work; 152 } 153 154 static void put_work(struct iwcm_work *work) 155 { 156 list_add(&work->free_list, &work->cm_id->work_free_list); 157 } 158 159 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 160 { 161 struct list_head *e, *tmp; 162 163 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { 164 list_del(e); 165 kfree(list_entry(e, struct iwcm_work, free_list)); 166 } 167 } 168 169 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 170 { 171 struct iwcm_work *work; 172 173 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 174 while (count--) { 175 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 176 if (!work) { 177 dealloc_work_entries(cm_id_priv); 178 return -ENOMEM; 179 } 180 work->cm_id = cm_id_priv; 181 INIT_LIST_HEAD(&work->list); 182 put_work(work); 183 } 184 return 0; 185 } 186 187 /* 188 * Save private data from incoming connection requests to 189 * iw_cm_event, so the low level driver doesn't have to. Adjust 190 * the event ptr to point to the local copy. 191 */ 192 static int copy_private_data(struct iw_cm_event *event) 193 { 194 void *p; 195 196 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 197 if (!p) 198 return -ENOMEM; 199 event->private_data = p; 200 return 0; 201 } 202 203 static void free_cm_id(struct iwcm_id_private *cm_id_priv) 204 { 205 dealloc_work_entries(cm_id_priv); 206 kfree(cm_id_priv); 207 } 208 209 /* 210 * Release a reference on cm_id. If the last reference is being 211 * released, free the cm_id and return 'true'. 212 */ 213 static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 214 { 215 if (refcount_dec_and_test(&cm_id_priv->refcount)) { 216 BUG_ON(!list_empty(&cm_id_priv->work_list)); 217 free_cm_id(cm_id_priv); 218 return true; 219 } 220 221 return false; 222 } 223 224 static void add_ref(struct iw_cm_id *cm_id) 225 { 226 struct iwcm_id_private *cm_id_priv; 227 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 228 refcount_inc(&cm_id_priv->refcount); 229 } 230 231 static void rem_ref(struct iw_cm_id *cm_id) 232 { 233 struct iwcm_id_private *cm_id_priv; 234 235 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 236 237 (void)iwcm_deref_id(cm_id_priv); 238 } 239 240 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 241 242 struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 243 iw_cm_handler cm_handler, 244 void *context) 245 { 246 struct iwcm_id_private *cm_id_priv; 247 248 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 249 if (!cm_id_priv) 250 return ERR_PTR(-ENOMEM); 251 252 cm_id_priv->state = IW_CM_STATE_IDLE; 253 cm_id_priv->id.device = device; 254 cm_id_priv->id.cm_handler = cm_handler; 255 cm_id_priv->id.context = context; 256 cm_id_priv->id.event_handler = cm_event_handler; 257 cm_id_priv->id.add_ref = add_ref; 258 cm_id_priv->id.rem_ref = rem_ref; 259 spin_lock_init(&cm_id_priv->lock); 260 refcount_set(&cm_id_priv->refcount, 1); 261 init_waitqueue_head(&cm_id_priv->connect_wait); 262 init_completion(&cm_id_priv->destroy_comp); 263 INIT_LIST_HEAD(&cm_id_priv->work_list); 264 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 265 266 return &cm_id_priv->id; 267 } 268 EXPORT_SYMBOL(iw_create_cm_id); 269 270 271 static int iwcm_modify_qp_err(struct ib_qp *qp) 272 { 273 struct ib_qp_attr qp_attr; 274 275 if (!qp) 276 return -EINVAL; 277 278 qp_attr.qp_state = IB_QPS_ERR; 279 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 280 } 281 282 /* 283 * This is really the RDMAC CLOSING state. It is most similar to the 284 * IB SQD QP state. 285 */ 286 static int iwcm_modify_qp_sqd(struct ib_qp *qp) 287 { 288 struct ib_qp_attr qp_attr; 289 290 BUG_ON(qp == NULL); 291 qp_attr.qp_state = IB_QPS_SQD; 292 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 293 } 294 295 /* 296 * CM_ID <-- CLOSING 297 * 298 * Block if a passive or active connection is currently being processed. Then 299 * process the event as follows: 300 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 301 * based on the abrupt flag 302 * - If the connection is already in the CLOSING or IDLE state, the peer is 303 * disconnecting concurrently with us and we've already seen the 304 * DISCONNECT event -- ignore the request and return 0 305 * - Disconnect on a listening endpoint returns -EINVAL 306 */ 307 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 308 { 309 struct iwcm_id_private *cm_id_priv; 310 unsigned long flags; 311 int ret = 0; 312 struct ib_qp *qp = NULL; 313 314 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 315 /* Wait if we're currently in a connect or accept downcall */ 316 wait_event(cm_id_priv->connect_wait, 317 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 318 319 spin_lock_irqsave(&cm_id_priv->lock, flags); 320 switch (cm_id_priv->state) { 321 case IW_CM_STATE_ESTABLISHED: 322 cm_id_priv->state = IW_CM_STATE_CLOSING; 323 324 /* QP could be <nul> for user-mode client */ 325 if (cm_id_priv->qp) 326 qp = cm_id_priv->qp; 327 else 328 ret = -EINVAL; 329 break; 330 case IW_CM_STATE_LISTEN: 331 ret = -EINVAL; 332 break; 333 case IW_CM_STATE_CLOSING: 334 /* remote peer closed first */ 335 case IW_CM_STATE_IDLE: 336 /* accept or connect returned !0 */ 337 break; 338 case IW_CM_STATE_CONN_RECV: 339 /* 340 * App called disconnect before/without calling accept after 341 * connect_request event delivered. 342 */ 343 break; 344 case IW_CM_STATE_CONN_SENT: 345 /* Can only get here if wait above fails */ 346 default: 347 BUG(); 348 } 349 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 350 351 if (qp) { 352 if (abrupt) 353 ret = iwcm_modify_qp_err(qp); 354 else 355 ret = iwcm_modify_qp_sqd(qp); 356 357 /* 358 * If both sides are disconnecting the QP could 359 * already be in ERR or SQD states 360 */ 361 ret = 0; 362 } 363 364 return ret; 365 } 366 EXPORT_SYMBOL(iw_cm_disconnect); 367 368 /* 369 * CM_ID <-- DESTROYING 370 * 371 * Clean up all resources associated with the connection and release 372 * the initial reference taken by iw_create_cm_id. 373 * 374 * Returns true if and only if the last cm_id_priv reference has been dropped. 375 */ 376 static bool destroy_cm_id(struct iw_cm_id *cm_id) 377 { 378 struct iwcm_id_private *cm_id_priv; 379 struct ib_qp *qp; 380 unsigned long flags; 381 382 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 383 /* 384 * Wait if we're currently in a connect or accept downcall. A 385 * listening endpoint should never block here. 386 */ 387 wait_event(cm_id_priv->connect_wait, 388 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 389 390 /* 391 * Since we're deleting the cm_id, drop any events that 392 * might arrive before the last dereference. 393 */ 394 set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); 395 396 spin_lock_irqsave(&cm_id_priv->lock, flags); 397 qp = cm_id_priv->qp; 398 cm_id_priv->qp = NULL; 399 400 switch (cm_id_priv->state) { 401 case IW_CM_STATE_LISTEN: 402 cm_id_priv->state = IW_CM_STATE_DESTROYING; 403 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 404 /* destroy the listening endpoint */ 405 cm_id->device->ops.iw_destroy_listen(cm_id); 406 spin_lock_irqsave(&cm_id_priv->lock, flags); 407 break; 408 case IW_CM_STATE_ESTABLISHED: 409 cm_id_priv->state = IW_CM_STATE_DESTROYING; 410 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 411 /* Abrupt close of the connection */ 412 (void)iwcm_modify_qp_err(qp); 413 spin_lock_irqsave(&cm_id_priv->lock, flags); 414 break; 415 case IW_CM_STATE_IDLE: 416 case IW_CM_STATE_CLOSING: 417 cm_id_priv->state = IW_CM_STATE_DESTROYING; 418 break; 419 case IW_CM_STATE_CONN_RECV: 420 /* 421 * App called destroy before/without calling accept after 422 * receiving connection request event notification or 423 * returned non zero from the event callback function. 424 * In either case, must tell the provider to reject. 425 */ 426 cm_id_priv->state = IW_CM_STATE_DESTROYING; 427 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 428 cm_id->device->ops.iw_reject(cm_id, NULL, 0); 429 spin_lock_irqsave(&cm_id_priv->lock, flags); 430 break; 431 case IW_CM_STATE_CONN_SENT: 432 case IW_CM_STATE_DESTROYING: 433 default: 434 BUG(); 435 break; 436 } 437 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 438 if (qp) 439 cm_id_priv->id.device->ops.iw_rem_ref(qp); 440 441 if (cm_id->mapped) { 442 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); 443 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); 444 } 445 446 return iwcm_deref_id(cm_id_priv); 447 } 448 449 /* 450 * This function is only called by the application thread and cannot 451 * be called by the event thread. The function will wait for all 452 * references to be released on the cm_id and then kfree the cm_id 453 * object. 454 */ 455 void iw_destroy_cm_id(struct iw_cm_id *cm_id) 456 { 457 if (!destroy_cm_id(cm_id)) 458 flush_workqueue(iwcm_wq); 459 } 460 EXPORT_SYMBOL(iw_destroy_cm_id); 461 462 /** 463 * iw_cm_check_wildcard - If IP address is 0 then use original 464 * @pm_addr: sockaddr containing the ip to check for wildcard 465 * @cm_addr: sockaddr containing the actual IP address 466 * @cm_outaddr: sockaddr to set IP addr which leaving port 467 * 468 * Checks the pm_addr for wildcard and then sets cm_outaddr's 469 * IP to the actual (cm_addr). 470 */ 471 static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, 472 struct sockaddr_storage *cm_addr, 473 struct sockaddr_storage *cm_outaddr) 474 { 475 if (pm_addr->ss_family == AF_INET) { 476 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 477 478 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { 479 struct sockaddr_in *cm4_addr = 480 (struct sockaddr_in *)cm_addr; 481 struct sockaddr_in *cm4_outaddr = 482 (struct sockaddr_in *)cm_outaddr; 483 484 cm4_outaddr->sin_addr = cm4_addr->sin_addr; 485 } 486 } else { 487 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; 488 489 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { 490 struct sockaddr_in6 *cm6_addr = 491 (struct sockaddr_in6 *)cm_addr; 492 struct sockaddr_in6 *cm6_outaddr = 493 (struct sockaddr_in6 *)cm_outaddr; 494 495 cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; 496 } 497 } 498 } 499 500 /** 501 * iw_cm_map - Use portmapper to map the ports 502 * @cm_id: connection manager pointer 503 * @active: Indicates the active side when true 504 * returns nonzero for error only if iwpm_create_mapinfo() fails 505 * 506 * Tries to add a mapping for a port using the Portmapper. If 507 * successful in mapping the IP/Port it will check the remote 508 * mapped IP address for a wildcard IP address and replace the 509 * zero IP address with the remote_addr. 510 */ 511 static int iw_cm_map(struct iw_cm_id *cm_id, bool active) 512 { 513 const char *devname = dev_name(&cm_id->device->dev); 514 const char *ifname = cm_id->device->iw_ifname; 515 struct iwpm_dev_data pm_reg_msg = {}; 516 struct iwpm_sa_data pm_msg; 517 int status; 518 519 if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || 520 strlen(ifname) >= sizeof(pm_reg_msg.if_name)) 521 return -EINVAL; 522 523 cm_id->m_local_addr = cm_id->local_addr; 524 cm_id->m_remote_addr = cm_id->remote_addr; 525 526 strcpy(pm_reg_msg.dev_name, devname); 527 strcpy(pm_reg_msg.if_name, ifname); 528 529 if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || 530 !iwpm_valid_pid()) 531 return 0; 532 533 cm_id->mapped = true; 534 pm_msg.loc_addr = cm_id->local_addr; 535 pm_msg.rem_addr = cm_id->remote_addr; 536 pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ? 537 IWPM_FLAGS_NO_PORT_MAP : 0; 538 if (active) 539 status = iwpm_add_and_query_mapping(&pm_msg, 540 RDMA_NL_IWCM); 541 else 542 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); 543 544 if (!status) { 545 cm_id->m_local_addr = pm_msg.mapped_loc_addr; 546 if (active) { 547 cm_id->m_remote_addr = pm_msg.mapped_rem_addr; 548 iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, 549 &cm_id->remote_addr, 550 &cm_id->m_remote_addr); 551 } 552 } 553 554 return iwpm_create_mapinfo(&cm_id->local_addr, 555 &cm_id->m_local_addr, 556 RDMA_NL_IWCM, pm_msg.flags); 557 } 558 559 /* 560 * CM_ID <-- LISTEN 561 * 562 * Start listening for connect requests. Generates one CONNECT_REQUEST 563 * event for each inbound connect request. 564 */ 565 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 566 { 567 struct iwcm_id_private *cm_id_priv; 568 unsigned long flags; 569 int ret; 570 571 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 572 573 if (!backlog) 574 backlog = default_backlog; 575 576 ret = alloc_work_entries(cm_id_priv, backlog); 577 if (ret) 578 return ret; 579 580 spin_lock_irqsave(&cm_id_priv->lock, flags); 581 switch (cm_id_priv->state) { 582 case IW_CM_STATE_IDLE: 583 cm_id_priv->state = IW_CM_STATE_LISTEN; 584 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 585 ret = iw_cm_map(cm_id, false); 586 if (!ret) 587 ret = cm_id->device->ops.iw_create_listen(cm_id, 588 backlog); 589 if (ret) 590 cm_id_priv->state = IW_CM_STATE_IDLE; 591 spin_lock_irqsave(&cm_id_priv->lock, flags); 592 break; 593 default: 594 ret = -EINVAL; 595 } 596 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 597 598 return ret; 599 } 600 EXPORT_SYMBOL(iw_cm_listen); 601 602 /* 603 * CM_ID <-- IDLE 604 * 605 * Rejects an inbound connection request. No events are generated. 606 */ 607 int iw_cm_reject(struct iw_cm_id *cm_id, 608 const void *private_data, 609 u8 private_data_len) 610 { 611 struct iwcm_id_private *cm_id_priv; 612 unsigned long flags; 613 int ret; 614 615 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 616 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 617 618 spin_lock_irqsave(&cm_id_priv->lock, flags); 619 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 620 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 621 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 622 wake_up_all(&cm_id_priv->connect_wait); 623 return -EINVAL; 624 } 625 cm_id_priv->state = IW_CM_STATE_IDLE; 626 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 627 628 ret = cm_id->device->ops.iw_reject(cm_id, private_data, 629 private_data_len); 630 631 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 632 wake_up_all(&cm_id_priv->connect_wait); 633 634 return ret; 635 } 636 EXPORT_SYMBOL(iw_cm_reject); 637 638 /* 639 * CM_ID <-- ESTABLISHED 640 * 641 * Accepts an inbound connection request and generates an ESTABLISHED 642 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 643 * until the ESTABLISHED event is received from the provider. 644 */ 645 int iw_cm_accept(struct iw_cm_id *cm_id, 646 struct iw_cm_conn_param *iw_param) 647 { 648 struct iwcm_id_private *cm_id_priv; 649 struct ib_qp *qp; 650 unsigned long flags; 651 int ret; 652 653 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 654 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 655 656 spin_lock_irqsave(&cm_id_priv->lock, flags); 657 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 658 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 659 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 660 wake_up_all(&cm_id_priv->connect_wait); 661 return -EINVAL; 662 } 663 /* Get the ib_qp given the QPN */ 664 qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); 665 if (!qp) { 666 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 667 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 668 wake_up_all(&cm_id_priv->connect_wait); 669 return -EINVAL; 670 } 671 cm_id->device->ops.iw_add_ref(qp); 672 cm_id_priv->qp = qp; 673 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 674 675 ret = cm_id->device->ops.iw_accept(cm_id, iw_param); 676 if (ret) { 677 /* An error on accept precludes provider events */ 678 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 679 cm_id_priv->state = IW_CM_STATE_IDLE; 680 spin_lock_irqsave(&cm_id_priv->lock, flags); 681 qp = cm_id_priv->qp; 682 cm_id_priv->qp = NULL; 683 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 684 if (qp) 685 cm_id->device->ops.iw_rem_ref(qp); 686 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 687 wake_up_all(&cm_id_priv->connect_wait); 688 } 689 690 return ret; 691 } 692 EXPORT_SYMBOL(iw_cm_accept); 693 694 /* 695 * Active Side: CM_ID <-- CONN_SENT 696 * 697 * If successful, results in the generation of a CONNECT_REPLY 698 * event. iw_cm_disconnect and iw_cm_destroy will block until the 699 * CONNECT_REPLY event is received from the provider. 700 */ 701 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 702 { 703 struct iwcm_id_private *cm_id_priv; 704 int ret; 705 unsigned long flags; 706 struct ib_qp *qp = NULL; 707 708 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 709 710 ret = alloc_work_entries(cm_id_priv, 4); 711 if (ret) 712 return ret; 713 714 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 715 spin_lock_irqsave(&cm_id_priv->lock, flags); 716 717 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 718 ret = -EINVAL; 719 goto err; 720 } 721 722 /* Get the ib_qp given the QPN */ 723 qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); 724 if (!qp) { 725 ret = -EINVAL; 726 goto err; 727 } 728 cm_id->device->ops.iw_add_ref(qp); 729 cm_id_priv->qp = qp; 730 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 731 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 732 733 ret = iw_cm_map(cm_id, true); 734 if (!ret) 735 ret = cm_id->device->ops.iw_connect(cm_id, iw_param); 736 if (!ret) 737 return 0; /* success */ 738 739 spin_lock_irqsave(&cm_id_priv->lock, flags); 740 qp = cm_id_priv->qp; 741 cm_id_priv->qp = NULL; 742 cm_id_priv->state = IW_CM_STATE_IDLE; 743 err: 744 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 745 if (qp) 746 cm_id->device->ops.iw_rem_ref(qp); 747 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 748 wake_up_all(&cm_id_priv->connect_wait); 749 return ret; 750 } 751 EXPORT_SYMBOL(iw_cm_connect); 752 753 /* 754 * Passive Side: new CM_ID <-- CONN_RECV 755 * 756 * Handles an inbound connect request. The function creates a new 757 * iw_cm_id to represent the new connection and inherits the client 758 * callback function and other attributes from the listening parent. 759 * 760 * The work item contains a pointer to the listen_cm_id and the event. The 761 * listen_cm_id contains the client cm_handler, context and 762 * device. These are copied when the device is cloned. The event 763 * contains the new four tuple. 764 * 765 * An error on the child should not affect the parent, so this 766 * function does not return a value. 767 */ 768 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 769 struct iw_cm_event *iw_event) 770 { 771 unsigned long flags; 772 struct iw_cm_id *cm_id; 773 struct iwcm_id_private *cm_id_priv; 774 int ret; 775 776 /* 777 * The provider should never generate a connection request 778 * event with a bad status. 779 */ 780 BUG_ON(iw_event->status); 781 782 cm_id = iw_create_cm_id(listen_id_priv->id.device, 783 listen_id_priv->id.cm_handler, 784 listen_id_priv->id.context); 785 /* If the cm_id could not be created, ignore the request */ 786 if (IS_ERR(cm_id)) 787 goto out; 788 789 cm_id->provider_data = iw_event->provider_data; 790 cm_id->m_local_addr = iw_event->local_addr; 791 cm_id->m_remote_addr = iw_event->remote_addr; 792 cm_id->local_addr = listen_id_priv->id.local_addr; 793 794 ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, 795 &iw_event->remote_addr, 796 &cm_id->remote_addr, 797 RDMA_NL_IWCM); 798 if (ret) { 799 cm_id->remote_addr = iw_event->remote_addr; 800 } else { 801 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, 802 &iw_event->local_addr, 803 &cm_id->local_addr); 804 iw_event->local_addr = cm_id->local_addr; 805 iw_event->remote_addr = cm_id->remote_addr; 806 } 807 808 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 809 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 810 811 /* 812 * We could be destroying the listening id. If so, ignore this 813 * upcall. 814 */ 815 spin_lock_irqsave(&listen_id_priv->lock, flags); 816 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 817 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 818 iw_cm_reject(cm_id, NULL, 0); 819 iw_destroy_cm_id(cm_id); 820 goto out; 821 } 822 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 823 824 ret = alloc_work_entries(cm_id_priv, 3); 825 if (ret) { 826 iw_cm_reject(cm_id, NULL, 0); 827 iw_destroy_cm_id(cm_id); 828 goto out; 829 } 830 831 /* Call the client CM handler */ 832 ret = cm_id->cm_handler(cm_id, iw_event); 833 if (ret) { 834 iw_cm_reject(cm_id, NULL, 0); 835 iw_destroy_cm_id(cm_id); 836 } 837 838 out: 839 if (iw_event->private_data_len) 840 kfree(iw_event->private_data); 841 } 842 843 /* 844 * Passive Side: CM_ID <-- ESTABLISHED 845 * 846 * The provider generated an ESTABLISHED event which means that 847 * the MPA negotion has completed successfully and we are now in MPA 848 * FPDU mode. 849 * 850 * This event can only be received in the CONN_RECV state. If the 851 * remote peer closed, the ESTABLISHED event would be received followed 852 * by the CLOSE event. If the app closes, it will block until we wake 853 * it up after processing this event. 854 */ 855 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 856 struct iw_cm_event *iw_event) 857 { 858 unsigned long flags; 859 int ret; 860 861 spin_lock_irqsave(&cm_id_priv->lock, flags); 862 863 /* 864 * We clear the CONNECT_WAIT bit here to allow the callback 865 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 866 * from a callback handler is not allowed. 867 */ 868 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 869 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 870 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 871 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 872 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 873 wake_up_all(&cm_id_priv->connect_wait); 874 875 return ret; 876 } 877 878 /* 879 * Active Side: CM_ID <-- ESTABLISHED 880 * 881 * The app has called connect and is waiting for the established event to 882 * post it's requests to the server. This event will wake up anyone 883 * blocked in iw_cm_disconnect or iw_destroy_id. 884 */ 885 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 886 struct iw_cm_event *iw_event) 887 { 888 struct ib_qp *qp = NULL; 889 unsigned long flags; 890 int ret; 891 892 spin_lock_irqsave(&cm_id_priv->lock, flags); 893 /* 894 * Clear the connect wait bit so a callback function calling 895 * iw_cm_disconnect will not wait and deadlock this thread 896 */ 897 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 898 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 899 if (iw_event->status == 0) { 900 cm_id_priv->id.m_local_addr = iw_event->local_addr; 901 cm_id_priv->id.m_remote_addr = iw_event->remote_addr; 902 iw_event->local_addr = cm_id_priv->id.local_addr; 903 iw_event->remote_addr = cm_id_priv->id.remote_addr; 904 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 905 } else { 906 /* REJECTED or RESET */ 907 qp = cm_id_priv->qp; 908 cm_id_priv->qp = NULL; 909 cm_id_priv->state = IW_CM_STATE_IDLE; 910 } 911 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 912 if (qp) 913 cm_id_priv->id.device->ops.iw_rem_ref(qp); 914 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 915 916 if (iw_event->private_data_len) 917 kfree(iw_event->private_data); 918 919 /* Wake up waiters on connect complete */ 920 wake_up_all(&cm_id_priv->connect_wait); 921 922 return ret; 923 } 924 925 /* 926 * CM_ID <-- CLOSING 927 * 928 * If in the ESTABLISHED state, move to CLOSING. 929 */ 930 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 931 struct iw_cm_event *iw_event) 932 { 933 unsigned long flags; 934 935 spin_lock_irqsave(&cm_id_priv->lock, flags); 936 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 937 cm_id_priv->state = IW_CM_STATE_CLOSING; 938 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 939 } 940 941 /* 942 * CM_ID <-- IDLE 943 * 944 * If in the ESTBLISHED or CLOSING states, the QP will have have been 945 * moved by the provider to the ERR state. Disassociate the CM_ID from 946 * the QP, move to IDLE, and remove the 'connected' reference. 947 * 948 * If in some other state, the cm_id was destroyed asynchronously. 949 * This is the last reference that will result in waking up 950 * the app thread blocked in iw_destroy_cm_id. 951 */ 952 static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 953 struct iw_cm_event *iw_event) 954 { 955 struct ib_qp *qp; 956 unsigned long flags; 957 int ret = 0, notify_event = 0; 958 spin_lock_irqsave(&cm_id_priv->lock, flags); 959 qp = cm_id_priv->qp; 960 cm_id_priv->qp = NULL; 961 962 switch (cm_id_priv->state) { 963 case IW_CM_STATE_ESTABLISHED: 964 case IW_CM_STATE_CLOSING: 965 cm_id_priv->state = IW_CM_STATE_IDLE; 966 notify_event = 1; 967 break; 968 case IW_CM_STATE_DESTROYING: 969 break; 970 default: 971 BUG(); 972 } 973 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 974 975 if (qp) 976 cm_id_priv->id.device->ops.iw_rem_ref(qp); 977 if (notify_event) 978 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 979 return ret; 980 } 981 982 static int process_event(struct iwcm_id_private *cm_id_priv, 983 struct iw_cm_event *iw_event) 984 { 985 int ret = 0; 986 987 switch (iw_event->event) { 988 case IW_CM_EVENT_CONNECT_REQUEST: 989 cm_conn_req_handler(cm_id_priv, iw_event); 990 break; 991 case IW_CM_EVENT_CONNECT_REPLY: 992 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 993 break; 994 case IW_CM_EVENT_ESTABLISHED: 995 ret = cm_conn_est_handler(cm_id_priv, iw_event); 996 break; 997 case IW_CM_EVENT_DISCONNECT: 998 cm_disconnect_handler(cm_id_priv, iw_event); 999 break; 1000 case IW_CM_EVENT_CLOSE: 1001 ret = cm_close_handler(cm_id_priv, iw_event); 1002 break; 1003 default: 1004 BUG(); 1005 } 1006 1007 return ret; 1008 } 1009 1010 /* 1011 * Process events on the work_list for the cm_id. If the callback 1012 * function requests that the cm_id be deleted, a flag is set in the 1013 * cm_id flags to indicate that when the last reference is 1014 * removed, the cm_id is to be destroyed. This is necessary to 1015 * distinguish between an object that will be destroyed by the app 1016 * thread asleep on the destroy_comp list vs. an object destroyed 1017 * here synchronously when the last reference is removed. 1018 */ 1019 static void cm_work_handler(struct work_struct *_work) 1020 { 1021 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 1022 struct iw_cm_event levent; 1023 struct iwcm_id_private *cm_id_priv = work->cm_id; 1024 unsigned long flags; 1025 int ret = 0; 1026 1027 spin_lock_irqsave(&cm_id_priv->lock, flags); 1028 while (!list_empty(&cm_id_priv->work_list)) { 1029 work = list_first_entry(&cm_id_priv->work_list, 1030 struct iwcm_work, list); 1031 list_del_init(&work->list); 1032 levent = work->event; 1033 put_work(work); 1034 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1035 1036 if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { 1037 ret = process_event(cm_id_priv, &levent); 1038 if (ret) 1039 WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id)); 1040 } else 1041 pr_debug("dropping event %d\n", levent.event); 1042 if (iwcm_deref_id(cm_id_priv)) 1043 return; 1044 spin_lock_irqsave(&cm_id_priv->lock, flags); 1045 } 1046 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1047 } 1048 1049 /* 1050 * This function is called on interrupt context. Schedule events on 1051 * the iwcm_wq thread to allow callback functions to downcall into 1052 * the CM and/or block. Events are queued to a per-CM_ID 1053 * work_list. If this is the first event on the work_list, the work 1054 * element is also queued on the iwcm_wq thread. 1055 * 1056 * Each event holds a reference on the cm_id. Until the last posted 1057 * event has been delivered and processed, the cm_id cannot be 1058 * deleted. 1059 * 1060 * Returns: 1061 * 0 - the event was handled. 1062 * -ENOMEM - the event was not handled due to lack of resources. 1063 */ 1064 static int cm_event_handler(struct iw_cm_id *cm_id, 1065 struct iw_cm_event *iw_event) 1066 { 1067 struct iwcm_work *work; 1068 struct iwcm_id_private *cm_id_priv; 1069 unsigned long flags; 1070 int ret = 0; 1071 1072 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1073 1074 spin_lock_irqsave(&cm_id_priv->lock, flags); 1075 work = get_work(cm_id_priv); 1076 if (!work) { 1077 ret = -ENOMEM; 1078 goto out; 1079 } 1080 1081 INIT_WORK(&work->work, cm_work_handler); 1082 work->cm_id = cm_id_priv; 1083 work->event = *iw_event; 1084 1085 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 1086 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 1087 work->event.private_data_len) { 1088 ret = copy_private_data(&work->event); 1089 if (ret) { 1090 put_work(work); 1091 goto out; 1092 } 1093 } 1094 1095 refcount_inc(&cm_id_priv->refcount); 1096 list_add_tail(&work->list, &cm_id_priv->work_list); 1097 queue_work(iwcm_wq, &work->work); 1098 out: 1099 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1100 return ret; 1101 } 1102 1103 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 1104 struct ib_qp_attr *qp_attr, 1105 int *qp_attr_mask) 1106 { 1107 unsigned long flags; 1108 int ret; 1109 1110 spin_lock_irqsave(&cm_id_priv->lock, flags); 1111 switch (cm_id_priv->state) { 1112 case IW_CM_STATE_IDLE: 1113 case IW_CM_STATE_CONN_SENT: 1114 case IW_CM_STATE_CONN_RECV: 1115 case IW_CM_STATE_ESTABLISHED: 1116 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1117 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 1118 IB_ACCESS_REMOTE_READ; 1119 ret = 0; 1120 break; 1121 default: 1122 ret = -EINVAL; 1123 break; 1124 } 1125 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1126 return ret; 1127 } 1128 1129 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 1130 struct ib_qp_attr *qp_attr, 1131 int *qp_attr_mask) 1132 { 1133 unsigned long flags; 1134 int ret; 1135 1136 spin_lock_irqsave(&cm_id_priv->lock, flags); 1137 switch (cm_id_priv->state) { 1138 case IW_CM_STATE_IDLE: 1139 case IW_CM_STATE_CONN_SENT: 1140 case IW_CM_STATE_CONN_RECV: 1141 case IW_CM_STATE_ESTABLISHED: 1142 *qp_attr_mask = 0; 1143 ret = 0; 1144 break; 1145 default: 1146 ret = -EINVAL; 1147 break; 1148 } 1149 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1150 return ret; 1151 } 1152 1153 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1154 struct ib_qp_attr *qp_attr, 1155 int *qp_attr_mask) 1156 { 1157 struct iwcm_id_private *cm_id_priv; 1158 int ret; 1159 1160 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1161 switch (qp_attr->qp_state) { 1162 case IB_QPS_INIT: 1163 case IB_QPS_RTR: 1164 ret = iwcm_init_qp_init_attr(cm_id_priv, 1165 qp_attr, qp_attr_mask); 1166 break; 1167 case IB_QPS_RTS: 1168 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1169 qp_attr, qp_attr_mask); 1170 break; 1171 default: 1172 ret = -EINVAL; 1173 break; 1174 } 1175 return ret; 1176 } 1177 EXPORT_SYMBOL(iw_cm_init_qp_attr); 1178 1179 static int __init iw_cm_init(void) 1180 { 1181 int ret; 1182 1183 ret = iwpm_init(RDMA_NL_IWCM); 1184 if (ret) 1185 return ret; 1186 1187 iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); 1188 if (!iwcm_wq) 1189 goto err_alloc; 1190 1191 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", 1192 iwcm_ctl_table); 1193 if (!iwcm_ctl_table_hdr) { 1194 pr_err("iw_cm: couldn't register sysctl paths\n"); 1195 goto err_sysctl; 1196 } 1197 1198 rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); 1199 return 0; 1200 1201 err_sysctl: 1202 destroy_workqueue(iwcm_wq); 1203 err_alloc: 1204 iwpm_exit(RDMA_NL_IWCM); 1205 return -ENOMEM; 1206 } 1207 1208 static void __exit iw_cm_cleanup(void) 1209 { 1210 rdma_nl_unregister(RDMA_NL_IWCM); 1211 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1212 destroy_workqueue(iwcm_wq); 1213 iwpm_exit(RDMA_NL_IWCM); 1214 } 1215 1216 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2); 1217 1218 module_init(iw_cm_init); 1219 module_exit(iw_cm_cleanup); 1220