1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ 4 /* Fredy Neeser */ 5 /* Greg Joyce <greg@opengridcomputing.com> */ 6 /* Copyright (c) 2008-2019, IBM Corporation */ 7 /* Copyright (c) 2017, Open Grid Computing, Inc. */ 8 9 #include <linux/errno.h> 10 #include <linux/types.h> 11 #include <linux/net.h> 12 #include <linux/inetdevice.h> 13 #include <net/addrconf.h> 14 #include <linux/workqueue.h> 15 #include <net/sock.h> 16 #include <net/tcp.h> 17 #include <linux/inet.h> 18 #include <linux/tcp.h> 19 #include <trace/events/sock.h> 20 21 #include <rdma/iw_cm.h> 22 #include <rdma/ib_verbs.h> 23 #include <rdma/ib_user_verbs.h> 24 25 #include "siw.h" 26 #include "siw_cm.h" 27 28 /* 29 * Set to any combination of 30 * MPA_V2_RDMA_NO_RTR, MPA_V2_RDMA_READ_RTR, MPA_V2_RDMA_WRITE_RTR 31 */ 32 static __be16 rtr_type = MPA_V2_RDMA_READ_RTR | MPA_V2_RDMA_WRITE_RTR; 33 static const bool relaxed_ird_negotiation = true; 34 35 static void siw_cm_llp_state_change(struct sock *s); 36 static void siw_cm_llp_data_ready(struct sock *s); 37 static void siw_cm_llp_write_space(struct sock *s); 38 static void siw_cm_llp_error_report(struct sock *s); 39 static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, 40 int status); 41 42 43 #ifdef CONFIG_DEBUG_LOCK_ALLOC 44 /* 45 * lockdep can detect false positive circular dependencies 46 * when there are user-space socket API users or in kernel 47 * users switching between a tcp and rdma transport. 48 * Maybe also switching between siw and rxe may cause 49 * problems as per default sockets are only classified 50 * by family and not by ip protocol. And there might 51 * be different locks used between the application 52 * and the low level sockets. 53 * 54 * Problems were seen with ksmbd.ko and cifs.ko, 55 * switching transports, use git blame to find 56 * more details. 57 */ 58 static struct lock_class_key siw_sk_key[2]; 59 static struct lock_class_key siw_slock_key[2]; 60 #endif /* CONFIG_DEBUG_LOCK_ALLOC */ 61 62 static inline void siw_reclassify_socket(struct socket *sock) 63 { 64 #ifdef CONFIG_DEBUG_LOCK_ALLOC 65 struct sock *sk = sock->sk; 66 67 if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) 68 return; 69 70 switch (sk->sk_family) { 71 case AF_INET: 72 sock_lock_init_class_and_name(sk, 73 "slock-AF_INET-RDMA-SIW", 74 &siw_slock_key[0], 75 "sk_lock-AF_INET-RDMA-SIW", 76 &siw_sk_key[0]); 77 break; 78 case AF_INET6: 79 sock_lock_init_class_and_name(sk, 80 "slock-AF_INET6-RDMA-SIW", 81 &siw_slock_key[1], 82 "sk_lock-AF_INET6-RDMA-SIW", 83 &siw_sk_key[1]); 84 break; 85 default: 86 WARN_ON_ONCE(1); 87 } 88 #endif /* CONFIG_DEBUG_LOCK_ALLOC */ 89 } 90 91 static void siw_sk_assign_cm_upcalls(struct sock *sk) 92 { 93 struct siw_cep *cep = sk_to_cep(sk); 94 95 write_lock_bh(&sk->sk_callback_lock); 96 cep->sk_state_change = sk->sk_state_change; 97 cep->sk_data_ready = sk->sk_data_ready; 98 cep->sk_write_space = sk->sk_write_space; 99 cep->sk_error_report = sk->sk_error_report; 100 101 sk->sk_state_change = siw_cm_llp_state_change; 102 sk->sk_data_ready = siw_cm_llp_data_ready; 103 sk->sk_write_space = siw_cm_llp_write_space; 104 sk->sk_error_report = siw_cm_llp_error_report; 105 write_unlock_bh(&sk->sk_callback_lock); 106 } 107 108 static void siw_sk_restore_upcalls(struct sock *sk, struct siw_cep *cep) 109 { 110 sk->sk_state_change = cep->sk_state_change; 111 sk->sk_data_ready = cep->sk_data_ready; 112 sk->sk_write_space = cep->sk_write_space; 113 sk->sk_error_report = cep->sk_error_report; 114 sk->sk_user_data = NULL; 115 } 116 117 static void siw_qp_socket_assoc(struct siw_cep *cep, struct siw_qp *qp) 118 { 119 struct socket *s = cep->sock; 120 struct sock *sk = s->sk; 121 122 write_lock_bh(&sk->sk_callback_lock); 123 124 qp->attrs.sk = s; 125 sk->sk_data_ready = siw_qp_llp_data_ready; 126 sk->sk_write_space = siw_qp_llp_write_space; 127 128 write_unlock_bh(&sk->sk_callback_lock); 129 } 130 131 static void siw_socket_disassoc(struct socket *s) 132 { 133 struct sock *sk = s->sk; 134 struct siw_cep *cep; 135 136 if (sk) { 137 write_lock_bh(&sk->sk_callback_lock); 138 cep = sk_to_cep(sk); 139 if (cep) { 140 siw_sk_restore_upcalls(sk, cep); 141 cep->sock = NULL; 142 siw_cep_put(cep); 143 } else { 144 pr_warn("siw: cannot restore sk callbacks: no ep\n"); 145 } 146 write_unlock_bh(&sk->sk_callback_lock); 147 } else { 148 pr_warn("siw: cannot restore sk callbacks: no sk\n"); 149 } 150 } 151 152 static void siw_rtr_data_ready(struct sock *sk) 153 { 154 struct siw_cep *cep; 155 struct siw_qp *qp = NULL; 156 read_descriptor_t rd_desc; 157 158 trace_sk_data_ready(sk); 159 160 read_lock(&sk->sk_callback_lock); 161 162 cep = sk_to_cep(sk); 163 if (!cep) { 164 WARN(1, "No connection endpoint\n"); 165 goto out; 166 } 167 qp = sk_to_qp(sk); 168 169 memset(&rd_desc, 0, sizeof(rd_desc)); 170 rd_desc.arg.data = qp; 171 rd_desc.count = 1; 172 173 tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data); 174 /* 175 * Check if first frame was successfully processed. 176 * Signal connection full establishment if yes. 177 * Failed data processing would have already scheduled 178 * connection drop. 179 */ 180 if (!qp->rx_stream.rx_suspend) 181 siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0); 182 out: 183 read_unlock(&sk->sk_callback_lock); 184 if (qp) 185 siw_qp_socket_assoc(cep, qp); 186 } 187 188 static void siw_sk_assign_rtr_upcalls(struct siw_cep *cep) 189 { 190 struct sock *sk = cep->sock->sk; 191 192 write_lock_bh(&sk->sk_callback_lock); 193 sk->sk_data_ready = siw_rtr_data_ready; 194 sk->sk_write_space = siw_qp_llp_write_space; 195 write_unlock_bh(&sk->sk_callback_lock); 196 } 197 198 static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s) 199 { 200 cep->sock = s; 201 siw_cep_get(cep); 202 s->sk->sk_user_data = cep; 203 204 siw_sk_assign_cm_upcalls(s->sk); 205 } 206 207 static struct siw_cep *siw_cep_alloc(struct siw_device *sdev) 208 { 209 struct siw_cep *cep = kzalloc_obj(*cep); 210 unsigned long flags; 211 212 if (!cep) 213 return NULL; 214 215 INIT_LIST_HEAD(&cep->listenq); 216 INIT_LIST_HEAD(&cep->devq); 217 INIT_LIST_HEAD(&cep->work_freelist); 218 219 kref_init(&cep->ref); 220 cep->state = SIW_EPSTATE_IDLE; 221 init_waitqueue_head(&cep->waitq); 222 spin_lock_init(&cep->lock); 223 cep->sdev = sdev; 224 cep->enhanced_rdma_conn_est = false; 225 226 spin_lock_irqsave(&sdev->lock, flags); 227 list_add_tail(&cep->devq, &sdev->cep_list); 228 spin_unlock_irqrestore(&sdev->lock, flags); 229 230 siw_dbg_cep(cep, "new endpoint\n"); 231 return cep; 232 } 233 234 static void siw_cm_free_work(struct siw_cep *cep) 235 { 236 struct list_head *w, *tmp; 237 struct siw_cm_work *work; 238 239 list_for_each_safe(w, tmp, &cep->work_freelist) { 240 work = list_entry(w, struct siw_cm_work, list); 241 list_del(&work->list); 242 kfree(work); 243 } 244 } 245 246 static void siw_cancel_mpatimer(struct siw_cep *cep) 247 { 248 spin_lock_bh(&cep->lock); 249 if (cep->mpa_timer) { 250 if (cancel_delayed_work(&cep->mpa_timer->work)) { 251 siw_cep_put(cep); 252 kfree(cep->mpa_timer); /* not needed again */ 253 } 254 cep->mpa_timer = NULL; 255 } 256 spin_unlock_bh(&cep->lock); 257 } 258 259 static void siw_put_work(struct siw_cm_work *work) 260 { 261 INIT_LIST_HEAD(&work->list); 262 spin_lock_bh(&work->cep->lock); 263 list_add(&work->list, &work->cep->work_freelist); 264 spin_unlock_bh(&work->cep->lock); 265 } 266 267 static void siw_cep_set_inuse(struct siw_cep *cep) 268 { 269 unsigned long flags; 270 retry: 271 spin_lock_irqsave(&cep->lock, flags); 272 273 if (cep->in_use) { 274 spin_unlock_irqrestore(&cep->lock, flags); 275 wait_event_interruptible(cep->waitq, !cep->in_use); 276 if (signal_pending(current)) 277 flush_signals(current); 278 goto retry; 279 } else { 280 cep->in_use = 1; 281 spin_unlock_irqrestore(&cep->lock, flags); 282 } 283 } 284 285 static void siw_cep_set_free(struct siw_cep *cep) 286 { 287 unsigned long flags; 288 289 spin_lock_irqsave(&cep->lock, flags); 290 cep->in_use = 0; 291 spin_unlock_irqrestore(&cep->lock, flags); 292 293 wake_up(&cep->waitq); 294 } 295 296 static void __siw_cep_dealloc(struct kref *ref) 297 { 298 struct siw_cep *cep = container_of(ref, struct siw_cep, ref); 299 struct siw_device *sdev = cep->sdev; 300 unsigned long flags; 301 302 WARN_ON(cep->listen_cep); 303 304 /* kfree(NULL) is safe */ 305 kfree(cep->mpa.pdata); 306 spin_lock_bh(&cep->lock); 307 if (!list_empty(&cep->work_freelist)) 308 siw_cm_free_work(cep); 309 spin_unlock_bh(&cep->lock); 310 311 spin_lock_irqsave(&sdev->lock, flags); 312 list_del(&cep->devq); 313 spin_unlock_irqrestore(&sdev->lock, flags); 314 315 siw_dbg_cep(cep, "free endpoint\n"); 316 kfree(cep); 317 } 318 319 static struct siw_cm_work *siw_get_work(struct siw_cep *cep) 320 { 321 struct siw_cm_work *work = NULL; 322 323 spin_lock_bh(&cep->lock); 324 if (!list_empty(&cep->work_freelist)) { 325 work = list_entry(cep->work_freelist.next, struct siw_cm_work, 326 list); 327 list_del_init(&work->list); 328 } 329 spin_unlock_bh(&cep->lock); 330 return work; 331 } 332 333 static int siw_cm_alloc_work(struct siw_cep *cep, int num) 334 { 335 struct siw_cm_work *work; 336 337 while (num--) { 338 work = kmalloc_obj(*work); 339 if (!work) { 340 if (!(list_empty(&cep->work_freelist))) 341 siw_cm_free_work(cep); 342 return -ENOMEM; 343 } 344 work->cep = cep; 345 INIT_LIST_HEAD(&work->list); 346 list_add(&work->list, &cep->work_freelist); 347 } 348 return 0; 349 } 350 351 /* 352 * siw_cm_upcall() 353 * 354 * Upcall to IWCM to inform about async connection events 355 */ 356 static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, 357 int status) 358 { 359 struct iw_cm_event event; 360 struct iw_cm_id *id; 361 362 memset(&event, 0, sizeof(event)); 363 event.status = status; 364 event.event = reason; 365 366 if (reason == IW_CM_EVENT_CONNECT_REQUEST) { 367 event.provider_data = cep; 368 id = cep->listen_cep->cm_id; 369 } else { 370 id = cep->cm_id; 371 } 372 /* Signal IRD and ORD */ 373 if (reason == IW_CM_EVENT_ESTABLISHED || 374 reason == IW_CM_EVENT_CONNECT_REPLY) { 375 /* Signal negotiated IRD/ORD values we will use */ 376 event.ird = cep->ird; 377 event.ord = cep->ord; 378 } else if (reason == IW_CM_EVENT_CONNECT_REQUEST) { 379 event.ird = cep->ord; 380 event.ord = cep->ird; 381 } 382 /* Signal private data and address information */ 383 if (reason == IW_CM_EVENT_CONNECT_REQUEST || 384 reason == IW_CM_EVENT_CONNECT_REPLY) { 385 u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len); 386 387 if (pd_len) { 388 /* 389 * hand over MPA private data 390 */ 391 event.private_data_len = pd_len; 392 event.private_data = cep->mpa.pdata; 393 394 /* Hide MPA V2 IRD/ORD control */ 395 if (cep->enhanced_rdma_conn_est) { 396 event.private_data_len -= 397 sizeof(struct mpa_v2_data); 398 event.private_data += 399 sizeof(struct mpa_v2_data); 400 } 401 } 402 getname_local(cep->sock, &event.local_addr); 403 getname_peer(cep->sock, &event.remote_addr); 404 } 405 siw_dbg_cep(cep, "[QP %u]: reason=%d, status=%d\n", 406 cep->qp ? qp_id(cep->qp) : UINT_MAX, reason, status); 407 408 return id->event_handler(id, &event); 409 } 410 411 static void siw_free_cm_id(struct siw_cep *cep) 412 { 413 if (!cep->cm_id) 414 return; 415 416 cep->cm_id->rem_ref(cep->cm_id); 417 cep->cm_id = NULL; 418 } 419 420 static void siw_destroy_cep_sock(struct siw_cep *cep) 421 { 422 struct socket *s = cep->sock; 423 424 if (s) { 425 siw_socket_disassoc(s); 426 sock_release(s); 427 } 428 } 429 430 /* 431 * siw_qp_cm_drop() 432 * 433 * Drops established LLP connection if present and not already 434 * scheduled for dropping. Called from user context, SQ workqueue 435 * or receive IRQ. Caller signals if socket can be immediately 436 * closed (basically, if not in IRQ). 437 */ 438 void siw_qp_cm_drop(struct siw_qp *qp, int schedule) 439 { 440 struct siw_cep *cep = qp->cep; 441 442 qp->rx_stream.rx_suspend = 1; 443 qp->tx_ctx.tx_suspend = 1; 444 445 if (!qp->cep) 446 return; 447 448 if (schedule) { 449 siw_cm_queue_work(cep, SIW_CM_WORK_CLOSE_LLP); 450 } else { 451 siw_cep_set_inuse(cep); 452 453 if (cep->state == SIW_EPSTATE_CLOSED) { 454 siw_dbg_cep(cep, "already closed\n"); 455 goto out; 456 } 457 siw_dbg_cep(cep, "immediate close, state %d\n", cep->state); 458 459 siw_send_terminate(qp); 460 461 if (cep->cm_id) { 462 switch (cep->state) { 463 case SIW_EPSTATE_AWAIT_MPAREP: 464 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 465 -EINVAL); 466 break; 467 468 case SIW_EPSTATE_RDMA_MODE: 469 siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); 470 break; 471 472 case SIW_EPSTATE_IDLE: 473 case SIW_EPSTATE_LISTENING: 474 case SIW_EPSTATE_CONNECTING: 475 case SIW_EPSTATE_AWAIT_MPAREQ: 476 case SIW_EPSTATE_RECVD_MPAREQ: 477 case SIW_EPSTATE_CLOSED: 478 default: 479 break; 480 } 481 siw_free_cm_id(cep); 482 siw_cep_put(cep); 483 } 484 cep->state = SIW_EPSTATE_CLOSED; 485 486 siw_destroy_cep_sock(cep); 487 if (cep->qp) { 488 cep->qp = NULL; 489 siw_qp_put(qp); 490 } 491 out: 492 siw_cep_set_free(cep); 493 } 494 } 495 496 void siw_cep_put(struct siw_cep *cep) 497 { 498 WARN_ON(kref_read(&cep->ref) < 1); 499 kref_put(&cep->ref, __siw_cep_dealloc); 500 } 501 502 static void siw_cep_set_free_and_put(struct siw_cep *cep) 503 { 504 siw_cep_set_free(cep); 505 siw_cep_put(cep); 506 } 507 508 void siw_cep_get(struct siw_cep *cep) 509 { 510 kref_get(&cep->ref); 511 } 512 513 /* 514 * Expects params->pd_len in host byte order 515 */ 516 static int siw_send_mpareqrep(struct siw_cep *cep, const void *pdata, u8 pd_len) 517 { 518 struct socket *s = cep->sock; 519 struct mpa_rr *rr = &cep->mpa.hdr; 520 struct kvec iov[3]; 521 struct msghdr msg; 522 int rv; 523 int iovec_num = 0; 524 int mpa_len; 525 526 memset(&msg, 0, sizeof(msg)); 527 528 iov[iovec_num].iov_base = rr; 529 iov[iovec_num].iov_len = sizeof(*rr); 530 mpa_len = sizeof(*rr); 531 532 if (cep->enhanced_rdma_conn_est) { 533 iovec_num++; 534 iov[iovec_num].iov_base = &cep->mpa.v2_ctrl; 535 iov[iovec_num].iov_len = sizeof(cep->mpa.v2_ctrl); 536 mpa_len += sizeof(cep->mpa.v2_ctrl); 537 } 538 if (pd_len) { 539 iovec_num++; 540 iov[iovec_num].iov_base = (char *)pdata; 541 iov[iovec_num].iov_len = pd_len; 542 mpa_len += pd_len; 543 } 544 if (cep->enhanced_rdma_conn_est) 545 pd_len += sizeof(cep->mpa.v2_ctrl); 546 547 rr->params.pd_len = cpu_to_be16(pd_len); 548 549 rv = kernel_sendmsg(s, &msg, iov, iovec_num + 1, mpa_len); 550 551 return rv < 0 ? rv : 0; 552 } 553 554 /* 555 * Receive MPA Request/Reply header. 556 * 557 * Returns 0 if complete MPA Request/Reply header including 558 * eventual private data was received. Returns -EAGAIN if 559 * header was partially received or negative error code otherwise. 560 * 561 * Context: May be called in process context only 562 */ 563 static int siw_recv_mpa_rr(struct siw_cep *cep) 564 { 565 struct mpa_rr *hdr = &cep->mpa.hdr; 566 struct socket *s = cep->sock; 567 u16 pd_len; 568 int rcvd, to_rcv; 569 570 if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) { 571 rcvd = ksock_recv(s, (char *)hdr + cep->mpa.bytes_rcvd, 572 sizeof(struct mpa_rr) - cep->mpa.bytes_rcvd, 573 0); 574 if (rcvd <= 0) 575 return -ECONNABORTED; 576 577 cep->mpa.bytes_rcvd += rcvd; 578 579 if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) 580 return -EAGAIN; 581 582 if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA) 583 return -EPROTO; 584 } 585 pd_len = be16_to_cpu(hdr->params.pd_len); 586 587 /* 588 * At least the MPA Request/Reply header (frame not including 589 * private data) has been received. 590 * Receive (or continue receiving) any private data. 591 */ 592 to_rcv = pd_len - (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr)); 593 594 if (!to_rcv) { 595 /* 596 * We must have hdr->params.pd_len == 0 and thus received a 597 * complete MPA Request/Reply frame. 598 * Check against peer protocol violation. 599 */ 600 u32 word; 601 602 rcvd = ksock_recv(s, (char *)&word, sizeof(word), MSG_DONTWAIT); 603 if (rcvd == -EAGAIN) 604 return 0; 605 606 if (rcvd == 0) { 607 siw_dbg_cep(cep, "peer EOF\n"); 608 return -EPIPE; 609 } 610 if (rcvd < 0) { 611 siw_dbg_cep(cep, "error: %d\n", rcvd); 612 return rcvd; 613 } 614 siw_dbg_cep(cep, "peer sent extra data: %d\n", rcvd); 615 616 return -EPROTO; 617 } 618 619 /* 620 * At this point, we must have hdr->params.pd_len != 0. 621 * A private data buffer gets allocated if hdr->params.pd_len != 0. 622 */ 623 if (!cep->mpa.pdata) { 624 cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL); 625 if (!cep->mpa.pdata) 626 return -ENOMEM; 627 } 628 rcvd = ksock_recv( 629 s, cep->mpa.pdata + cep->mpa.bytes_rcvd - sizeof(struct mpa_rr), 630 to_rcv + 4, MSG_DONTWAIT); 631 632 if (rcvd < 0) 633 return rcvd; 634 635 if (rcvd > to_rcv) 636 return -EPROTO; 637 638 cep->mpa.bytes_rcvd += rcvd; 639 640 if (to_rcv == rcvd) { 641 siw_dbg_cep(cep, "%d bytes private data received\n", pd_len); 642 return 0; 643 } 644 return -EAGAIN; 645 } 646 647 /* 648 * siw_proc_mpareq() 649 * 650 * Read MPA Request from socket and signal new connection to IWCM 651 * if success. Caller must hold lock on corresponding listening CEP. 652 */ 653 static int siw_proc_mpareq(struct siw_cep *cep) 654 { 655 struct mpa_rr *req; 656 int version, rv; 657 u16 pd_len; 658 659 rv = siw_recv_mpa_rr(cep); 660 if (rv) 661 return rv; 662 663 req = &cep->mpa.hdr; 664 665 version = __mpa_rr_revision(req->params.bits); 666 pd_len = be16_to_cpu(req->params.pd_len); 667 668 if (version > MPA_REVISION_2) 669 /* allow for 0, 1, and 2 only */ 670 return -EPROTO; 671 672 if (memcmp(req->key, MPA_KEY_REQ, 16)) 673 return -EPROTO; 674 675 /* Prepare for sending MPA reply */ 676 memcpy(req->key, MPA_KEY_REP, 16); 677 678 if (version == MPA_REVISION_2 && 679 (req->params.bits & MPA_RR_FLAG_ENHANCED)) { 680 /* 681 * MPA version 2 must signal IRD/ORD values and P2P mode 682 * in private data if header flag MPA_RR_FLAG_ENHANCED 683 * is set. 684 */ 685 if (pd_len < sizeof(struct mpa_v2_data)) 686 goto reject_conn; 687 688 cep->enhanced_rdma_conn_est = true; 689 } 690 691 /* MPA Markers: currently not supported. Marker TX to be added. */ 692 if (req->params.bits & MPA_RR_FLAG_MARKERS) 693 goto reject_conn; 694 695 if (req->params.bits & MPA_RR_FLAG_CRC) { 696 /* 697 * RFC 5044, page 27: CRC MUST be used if peer requests it. 698 * siw specific: 'mpa_crc_strict' parameter to reject 699 * connection with CRC if local CRC off enforced by 700 * 'mpa_crc_strict' module parameter. 701 */ 702 if (!mpa_crc_required && mpa_crc_strict) 703 goto reject_conn; 704 705 /* Enable CRC if requested by module parameter */ 706 if (mpa_crc_required) 707 req->params.bits |= MPA_RR_FLAG_CRC; 708 } 709 if (cep->enhanced_rdma_conn_est) { 710 struct mpa_v2_data *v2 = (struct mpa_v2_data *)cep->mpa.pdata; 711 712 /* 713 * Peer requested ORD becomes requested local IRD, 714 * peer requested IRD becomes requested local ORD. 715 * IRD and ORD get limited by global maximum values. 716 */ 717 cep->ord = ntohs(v2->ird) & MPA_IRD_ORD_MASK; 718 cep->ord = min(cep->ord, SIW_MAX_ORD_QP); 719 cep->ird = ntohs(v2->ord) & MPA_IRD_ORD_MASK; 720 cep->ird = min(cep->ird, SIW_MAX_IRD_QP); 721 722 /* May get overwritten by locally negotiated values */ 723 cep->mpa.v2_ctrl.ird = htons(cep->ird); 724 cep->mpa.v2_ctrl.ord = htons(cep->ord); 725 726 /* 727 * Support for peer sent zero length Write or Read to 728 * let local side enter RTS. Writes are preferred. 729 * Sends would require pre-posting a Receive and are 730 * not supported. 731 * Propose zero length Write if none of Read and Write 732 * is indicated. 733 */ 734 if (v2->ird & MPA_V2_PEER_TO_PEER) { 735 cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER; 736 737 if (v2->ord & MPA_V2_RDMA_WRITE_RTR) 738 cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR; 739 else if (v2->ord & MPA_V2_RDMA_READ_RTR) 740 cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_READ_RTR; 741 else 742 cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR; 743 } 744 } 745 746 cep->state = SIW_EPSTATE_RECVD_MPAREQ; 747 748 /* Keep reference until IWCM accepts/rejects */ 749 siw_cep_get(cep); 750 rv = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0); 751 if (rv) 752 siw_cep_put(cep); 753 754 return rv; 755 756 reject_conn: 757 siw_dbg_cep(cep, "reject: crc %d:%d:%d, m %d:%d\n", 758 req->params.bits & MPA_RR_FLAG_CRC ? 1 : 0, 759 mpa_crc_required, mpa_crc_strict, 760 req->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0); 761 762 req->params.bits &= ~MPA_RR_FLAG_MARKERS; 763 req->params.bits |= MPA_RR_FLAG_REJECT; 764 765 if (!mpa_crc_required && mpa_crc_strict) 766 req->params.bits &= ~MPA_RR_FLAG_CRC; 767 768 if (pd_len) 769 kfree(cep->mpa.pdata); 770 771 cep->mpa.pdata = NULL; 772 773 siw_send_mpareqrep(cep, NULL, 0); 774 775 return -EOPNOTSUPP; 776 } 777 778 static int siw_proc_mpareply(struct siw_cep *cep) 779 { 780 struct siw_qp_attrs qp_attrs; 781 enum siw_qp_attr_mask qp_attr_mask; 782 struct siw_qp *qp = cep->qp; 783 struct mpa_rr *rep; 784 int rv; 785 u16 rep_ord; 786 u16 rep_ird; 787 bool ird_insufficient = false; 788 enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR; 789 790 rv = siw_recv_mpa_rr(cep); 791 if (rv) 792 goto out_err; 793 794 siw_cancel_mpatimer(cep); 795 796 rep = &cep->mpa.hdr; 797 798 if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) { 799 /* allow for 0, 1, and 2 only */ 800 rv = -EPROTO; 801 goto out_err; 802 } 803 if (memcmp(rep->key, MPA_KEY_REP, 16)) { 804 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, LLP_ETYPE_MPA, 805 LLP_ECODE_INVALID_REQ_RESP, 0); 806 siw_send_terminate(qp); 807 rv = -EPROTO; 808 goto out_err; 809 } 810 if (rep->params.bits & MPA_RR_FLAG_REJECT) { 811 siw_dbg_cep(cep, "got mpa reject\n"); 812 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET); 813 814 return -ECONNRESET; 815 } 816 if (try_gso && rep->params.bits & MPA_RR_FLAG_GSO_EXP) { 817 siw_dbg_cep(cep, "peer allows GSO on TX\n"); 818 qp->tx_ctx.gso_seg_limit = 0; 819 } 820 if ((rep->params.bits & MPA_RR_FLAG_MARKERS) || 821 (mpa_crc_required && !(rep->params.bits & MPA_RR_FLAG_CRC)) || 822 (mpa_crc_strict && !mpa_crc_required && 823 (rep->params.bits & MPA_RR_FLAG_CRC))) { 824 siw_dbg_cep(cep, "reply unsupp: crc %d:%d:%d, m %d:%d\n", 825 rep->params.bits & MPA_RR_FLAG_CRC ? 1 : 0, 826 mpa_crc_required, mpa_crc_strict, 827 rep->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0); 828 829 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED); 830 831 return -EINVAL; 832 } 833 if (cep->enhanced_rdma_conn_est) { 834 struct mpa_v2_data *v2; 835 836 if (__mpa_rr_revision(rep->params.bits) < MPA_REVISION_2 || 837 !(rep->params.bits & MPA_RR_FLAG_ENHANCED)) { 838 /* 839 * Protocol failure: The responder MUST reply with 840 * MPA version 2 and MUST set MPA_RR_FLAG_ENHANCED. 841 */ 842 siw_dbg_cep(cep, "mpa reply error: vers %d, enhcd %d\n", 843 __mpa_rr_revision(rep->params.bits), 844 rep->params.bits & MPA_RR_FLAG_ENHANCED ? 845 1 : 846 0); 847 848 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 849 -ECONNRESET); 850 return -EINVAL; 851 } 852 v2 = (struct mpa_v2_data *)cep->mpa.pdata; 853 rep_ird = ntohs(v2->ird) & MPA_IRD_ORD_MASK; 854 rep_ord = ntohs(v2->ord) & MPA_IRD_ORD_MASK; 855 856 if (cep->ird < rep_ord && 857 (relaxed_ird_negotiation == false || 858 rep_ord > cep->sdev->attrs.max_ird)) { 859 siw_dbg_cep(cep, "ird %d, rep_ord %d, max_ord %d\n", 860 cep->ird, rep_ord, 861 cep->sdev->attrs.max_ord); 862 ird_insufficient = true; 863 } 864 if (cep->ord > rep_ird && relaxed_ird_negotiation == false) { 865 siw_dbg_cep(cep, "ord %d, rep_ird %d\n", cep->ord, 866 rep_ird); 867 ird_insufficient = true; 868 } 869 /* 870 * Always report negotiated peer values to user, 871 * even if IRD/ORD negotiation failed 872 */ 873 cep->ird = rep_ord; 874 cep->ord = rep_ird; 875 876 if (ird_insufficient) { 877 /* 878 * If the initiator IRD is insuffient for the 879 * responder ORD, send a TERM. 880 */ 881 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, 882 LLP_ETYPE_MPA, 883 LLP_ECODE_INSUFFICIENT_IRD, 0); 884 siw_send_terminate(qp); 885 rv = -ENOMEM; 886 goto out_err; 887 } 888 if (cep->mpa.v2_ctrl_req.ird & MPA_V2_PEER_TO_PEER) 889 mpa_p2p_mode = 890 cep->mpa.v2_ctrl_req.ord & 891 (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR); 892 893 /* 894 * Check if we requested P2P mode, and if peer agrees 895 */ 896 if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) { 897 if ((mpa_p2p_mode & v2->ord) == 0) { 898 /* 899 * We requested RTR mode(s), but the peer 900 * did not pick any mode we support. 901 */ 902 siw_dbg_cep(cep, 903 "rtr mode: req %2x, got %2x\n", 904 mpa_p2p_mode, 905 v2->ord & (MPA_V2_RDMA_WRITE_RTR | 906 MPA_V2_RDMA_READ_RTR)); 907 908 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, 909 LLP_ETYPE_MPA, 910 LLP_ECODE_NO_MATCHING_RTR, 911 0); 912 siw_send_terminate(qp); 913 rv = -EPROTO; 914 goto out_err; 915 } 916 mpa_p2p_mode = v2->ord & (MPA_V2_RDMA_WRITE_RTR | 917 MPA_V2_RDMA_READ_RTR); 918 } 919 } 920 memset(&qp_attrs, 0, sizeof(qp_attrs)); 921 922 if (rep->params.bits & MPA_RR_FLAG_CRC) 923 qp_attrs.flags = SIW_MPA_CRC; 924 925 qp_attrs.irq_size = cep->ird; 926 qp_attrs.orq_size = cep->ord; 927 qp_attrs.sk = cep->sock; 928 qp_attrs.state = SIW_QP_STATE_RTS; 929 930 qp_attr_mask = SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE | 931 SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | SIW_QP_ATTR_MPA; 932 933 /* Move socket RX/TX under QP control */ 934 down_write(&qp->state_lock); 935 if (qp->attrs.state > SIW_QP_STATE_RTR) { 936 rv = -EINVAL; 937 up_write(&qp->state_lock); 938 goto out_err; 939 } 940 rv = siw_qp_modify(qp, &qp_attrs, qp_attr_mask); 941 942 siw_qp_socket_assoc(cep, qp); 943 944 up_write(&qp->state_lock); 945 946 /* Send extra RDMA frame to trigger peer RTS if negotiated */ 947 if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) { 948 rv = siw_qp_mpa_rts(qp, mpa_p2p_mode); 949 if (rv) 950 goto out_err; 951 } 952 if (!rv) { 953 rv = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0); 954 if (!rv) 955 cep->state = SIW_EPSTATE_RDMA_MODE; 956 957 return 0; 958 } 959 960 out_err: 961 if (rv != -EAGAIN) 962 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); 963 964 return rv; 965 } 966 967 /* 968 * siw_accept_newconn - accept an incoming pending connection 969 * 970 */ 971 static void siw_accept_newconn(struct siw_cep *cep) 972 { 973 struct socket *s = cep->sock; 974 struct socket *new_s = NULL; 975 struct siw_cep *new_cep = NULL; 976 int rv = 0; /* debug only. should disappear */ 977 978 if (cep->state != SIW_EPSTATE_LISTENING) 979 goto error; 980 981 new_cep = siw_cep_alloc(cep->sdev); 982 if (!new_cep) 983 goto error; 984 985 /* 986 * 4: Allocate a sufficient number of work elements 987 * to allow concurrent handling of local + peer close 988 * events, MPA header processing + MPA timeout. 989 */ 990 if (siw_cm_alloc_work(new_cep, 4) != 0) 991 goto error; 992 993 /* 994 * Copy saved socket callbacks from listening CEP 995 * and assign new socket with new CEP 996 */ 997 new_cep->sk_state_change = cep->sk_state_change; 998 new_cep->sk_data_ready = cep->sk_data_ready; 999 new_cep->sk_write_space = cep->sk_write_space; 1000 new_cep->sk_error_report = cep->sk_error_report; 1001 1002 rv = kernel_accept(s, &new_s, O_NONBLOCK); 1003 if (rv != 0) { 1004 /* 1005 * Connection already aborted by peer..? 1006 */ 1007 siw_dbg_cep(cep, "kernel_accept() error: %d\n", rv); 1008 goto error; 1009 } 1010 new_cep->sock = new_s; 1011 siw_cep_get(new_cep); 1012 new_s->sk->sk_user_data = new_cep; 1013 1014 if (siw_tcp_nagle == false) 1015 tcp_sock_set_nodelay(new_s->sk); 1016 new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ; 1017 1018 rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT); 1019 if (rv) 1020 goto error; 1021 /* 1022 * See siw_proc_mpareq() etc. for the use of new_cep->listen_cep. 1023 */ 1024 new_cep->listen_cep = cep; 1025 siw_cep_get(cep); 1026 1027 if (atomic_read(&new_s->sk->sk_rmem_alloc)) { 1028 /* 1029 * MPA REQ already queued 1030 */ 1031 siw_dbg_cep(cep, "immediate mpa request\n"); 1032 1033 siw_cep_set_inuse(new_cep); 1034 rv = siw_proc_mpareq(new_cep); 1035 if (rv != -EAGAIN) { 1036 siw_cep_put(cep); 1037 new_cep->listen_cep = NULL; 1038 if (rv) { 1039 siw_cancel_mpatimer(new_cep); 1040 siw_cep_set_free(new_cep); 1041 goto error; 1042 } 1043 } 1044 siw_cep_set_free(new_cep); 1045 } 1046 return; 1047 1048 error: 1049 if (new_cep) 1050 siw_cep_put(new_cep); 1051 1052 if (new_s) { 1053 siw_socket_disassoc(new_s); 1054 sock_release(new_s); 1055 } 1056 siw_dbg_cep(cep, "error %d\n", rv); 1057 } 1058 1059 static void siw_cm_work_handler(struct work_struct *w) 1060 { 1061 struct siw_cm_work *work; 1062 struct siw_cep *cep; 1063 int release_cep = 0, rv = 0; 1064 1065 work = container_of(w, struct siw_cm_work, work.work); 1066 cep = work->cep; 1067 1068 siw_dbg_cep(cep, "[QP %u]: work type: %d, state %d\n", 1069 cep->qp ? qp_id(cep->qp) : UINT_MAX, 1070 work->type, cep->state); 1071 1072 siw_cep_set_inuse(cep); 1073 1074 switch (work->type) { 1075 case SIW_CM_WORK_ACCEPT: 1076 siw_accept_newconn(cep); 1077 break; 1078 1079 case SIW_CM_WORK_READ_MPAHDR: 1080 if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { 1081 if (cep->listen_cep) { 1082 siw_cep_set_inuse(cep->listen_cep); 1083 1084 if (cep->listen_cep->state == 1085 SIW_EPSTATE_LISTENING) 1086 rv = siw_proc_mpareq(cep); 1087 else 1088 rv = -EFAULT; 1089 1090 siw_cep_set_free(cep->listen_cep); 1091 1092 if (rv != -EAGAIN) { 1093 siw_cep_put(cep->listen_cep); 1094 cep->listen_cep = NULL; 1095 if (rv) 1096 siw_cep_put(cep); 1097 } 1098 } 1099 } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { 1100 rv = siw_proc_mpareply(cep); 1101 } else { 1102 /* 1103 * CEP already moved out of MPA handshake. 1104 * any connection management already done. 1105 * silently ignore the mpa packet. 1106 */ 1107 if (cep->state == SIW_EPSTATE_RDMA_MODE) { 1108 cep->sock->sk->sk_data_ready(cep->sock->sk); 1109 siw_dbg_cep(cep, "already in RDMA mode"); 1110 } else { 1111 siw_dbg_cep(cep, "out of state: %d\n", 1112 cep->state); 1113 } 1114 } 1115 if (rv && rv != -EAGAIN) 1116 release_cep = 1; 1117 break; 1118 1119 case SIW_CM_WORK_CLOSE_LLP: 1120 /* 1121 * QP scheduled LLP close 1122 */ 1123 if (cep->qp) 1124 siw_send_terminate(cep->qp); 1125 1126 if (cep->cm_id) 1127 siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); 1128 1129 release_cep = 1; 1130 break; 1131 1132 case SIW_CM_WORK_PEER_CLOSE: 1133 if (cep->cm_id) { 1134 if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { 1135 /* 1136 * MPA reply not received, but connection drop 1137 */ 1138 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 1139 -ECONNRESET); 1140 } else if (cep->state == SIW_EPSTATE_RDMA_MODE) { 1141 /* 1142 * NOTE: IW_CM_EVENT_DISCONNECT is given just 1143 * to transition IWCM into CLOSING. 1144 */ 1145 siw_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0); 1146 siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); 1147 } 1148 /* 1149 * for other states there is no connection 1150 * known to the IWCM. 1151 */ 1152 } else { 1153 if (cep->state == SIW_EPSTATE_RECVD_MPAREQ) { 1154 /* 1155 * Wait for the ulp/CM to call accept/reject 1156 */ 1157 siw_dbg_cep(cep, 1158 "mpa req recvd, wait for ULP\n"); 1159 } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { 1160 /* 1161 * Socket close before MPA request received. 1162 */ 1163 if (cep->listen_cep) { 1164 siw_dbg_cep(cep, 1165 "no mpareq: drop listener\n"); 1166 siw_cep_put(cep->listen_cep); 1167 cep->listen_cep = NULL; 1168 } 1169 } 1170 } 1171 release_cep = 1; 1172 break; 1173 1174 case SIW_CM_WORK_MPATIMEOUT: 1175 cep->mpa_timer = NULL; 1176 1177 if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { 1178 /* 1179 * MPA request timed out: 1180 * Hide any partially received private data and signal 1181 * timeout 1182 */ 1183 cep->mpa.hdr.params.pd_len = 0; 1184 1185 if (cep->cm_id) 1186 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 1187 -ETIMEDOUT); 1188 release_cep = 1; 1189 1190 } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { 1191 /* 1192 * No MPA request received after peer TCP stream setup. 1193 */ 1194 if (cep->listen_cep) { 1195 siw_cep_put(cep->listen_cep); 1196 cep->listen_cep = NULL; 1197 } 1198 release_cep = 1; 1199 } 1200 break; 1201 1202 default: 1203 WARN(1, "Undefined CM work type: %d\n", work->type); 1204 } 1205 if (release_cep) { 1206 struct socket *s = cep->sock; 1207 1208 siw_dbg_cep(cep, 1209 "release: timer=%s, QP[%u]\n", 1210 cep->mpa_timer ? "y" : "n", 1211 cep->qp ? qp_id(cep->qp) : UINT_MAX); 1212 1213 siw_cancel_mpatimer(cep); 1214 1215 cep->state = SIW_EPSTATE_CLOSED; 1216 1217 if (cep->qp) { 1218 struct siw_qp *qp = cep->qp; 1219 /* 1220 * Serialize a potential race with application 1221 * closing the QP and calling siw_qp_cm_drop() 1222 */ 1223 siw_qp_get(qp); 1224 siw_cep_set_free(cep); 1225 1226 siw_qp_llp_close(qp); 1227 siw_qp_put(qp); 1228 1229 siw_cep_set_inuse(cep); 1230 cep->qp = NULL; 1231 siw_qp_put(qp); 1232 } 1233 if (s) { 1234 siw_socket_disassoc(s); 1235 sock_release(s); 1236 } 1237 if (cep->cm_id) { 1238 siw_free_cm_id(cep); 1239 siw_cep_put(cep); 1240 } 1241 } 1242 siw_cep_set_free(cep); 1243 siw_put_work(work); 1244 siw_cep_put(cep); 1245 } 1246 1247 static struct workqueue_struct *siw_cm_wq; 1248 1249 int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type) 1250 { 1251 struct siw_cm_work *work = siw_get_work(cep); 1252 unsigned long delay = 0; 1253 1254 if (!work) { 1255 siw_dbg_cep(cep, "failed with no work available\n"); 1256 return -ENOMEM; 1257 } 1258 work->type = type; 1259 work->cep = cep; 1260 1261 siw_cep_get(cep); 1262 1263 INIT_DELAYED_WORK(&work->work, siw_cm_work_handler); 1264 1265 if (type == SIW_CM_WORK_MPATIMEOUT) { 1266 cep->mpa_timer = work; 1267 1268 if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) 1269 delay = MPAREQ_TIMEOUT; 1270 else 1271 delay = MPAREP_TIMEOUT; 1272 } 1273 siw_dbg_cep(cep, "[QP %u]: work type: %d, timeout %lu\n", 1274 cep->qp ? qp_id(cep->qp) : -1, type, delay); 1275 1276 queue_delayed_work(siw_cm_wq, &work->work, delay); 1277 1278 return 0; 1279 } 1280 1281 static void siw_cm_llp_data_ready(struct sock *sk) 1282 { 1283 struct siw_cep *cep; 1284 1285 trace_sk_data_ready(sk); 1286 1287 read_lock(&sk->sk_callback_lock); 1288 1289 cep = sk_to_cep(sk); 1290 if (!cep) 1291 goto out; 1292 1293 siw_dbg_cep(cep, "cep state: %d, socket state %d\n", 1294 cep->state, sk->sk_state); 1295 1296 if (sk->sk_state != TCP_ESTABLISHED) 1297 goto out; 1298 1299 switch (cep->state) { 1300 case SIW_EPSTATE_RDMA_MODE: 1301 case SIW_EPSTATE_LISTENING: 1302 break; 1303 1304 case SIW_EPSTATE_AWAIT_MPAREQ: 1305 case SIW_EPSTATE_AWAIT_MPAREP: 1306 siw_cm_queue_work(cep, SIW_CM_WORK_READ_MPAHDR); 1307 break; 1308 1309 default: 1310 siw_dbg_cep(cep, "unexpected data, state %d\n", cep->state); 1311 break; 1312 } 1313 out: 1314 read_unlock(&sk->sk_callback_lock); 1315 } 1316 1317 static void siw_cm_llp_write_space(struct sock *sk) 1318 { 1319 struct siw_cep *cep = sk_to_cep(sk); 1320 1321 if (cep) 1322 siw_dbg_cep(cep, "state: %d\n", cep->state); 1323 } 1324 1325 static void siw_cm_llp_error_report(struct sock *sk) 1326 { 1327 struct siw_cep *cep = sk_to_cep(sk); 1328 1329 if (cep) { 1330 siw_dbg_cep(cep, "error %d, socket state: %d, cep state: %d\n", 1331 sk->sk_err, sk->sk_state, cep->state); 1332 cep->sk_error_report(sk); 1333 } 1334 } 1335 1336 static void siw_cm_llp_state_change(struct sock *sk) 1337 { 1338 struct siw_cep *cep; 1339 void (*orig_state_change)(struct sock *s); 1340 1341 read_lock(&sk->sk_callback_lock); 1342 1343 cep = sk_to_cep(sk); 1344 if (!cep) { 1345 /* endpoint already disassociated */ 1346 read_unlock(&sk->sk_callback_lock); 1347 return; 1348 } 1349 orig_state_change = cep->sk_state_change; 1350 1351 siw_dbg_cep(cep, "state: %d\n", cep->state); 1352 1353 switch (sk->sk_state) { 1354 case TCP_ESTABLISHED: 1355 /* 1356 * handle accepting socket as special case where only 1357 * new connection is possible 1358 */ 1359 siw_cm_queue_work(cep, SIW_CM_WORK_ACCEPT); 1360 break; 1361 1362 case TCP_CLOSE: 1363 case TCP_CLOSE_WAIT: 1364 if (cep->qp) 1365 cep->qp->tx_ctx.tx_suspend = 1; 1366 siw_cm_queue_work(cep, SIW_CM_WORK_PEER_CLOSE); 1367 break; 1368 1369 default: 1370 siw_dbg_cep(cep, "unexpected socket state %d\n", sk->sk_state); 1371 } 1372 read_unlock(&sk->sk_callback_lock); 1373 orig_state_change(sk); 1374 } 1375 1376 static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, 1377 struct sockaddr *raddr, bool afonly) 1378 { 1379 int rv, flags = 0; 1380 size_t size = laddr->sa_family == AF_INET ? 1381 sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); 1382 1383 /* 1384 * Make address available again asap. 1385 */ 1386 sock_set_reuseaddr(s->sk); 1387 1388 if (afonly) { 1389 rv = ip6_sock_set_v6only(s->sk); 1390 if (rv) 1391 return rv; 1392 } 1393 1394 rv = s->ops->bind(s, (struct sockaddr_unsized *)laddr, size); 1395 if (rv < 0) 1396 return rv; 1397 1398 rv = s->ops->connect(s, (struct sockaddr_unsized *)raddr, size, flags); 1399 1400 return rv < 0 ? rv : 0; 1401 } 1402 1403 int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) 1404 { 1405 struct siw_device *sdev = to_siw_dev(id->device); 1406 struct siw_qp *qp; 1407 struct siw_cep *cep = NULL; 1408 struct socket *s = NULL; 1409 struct sockaddr *laddr = (struct sockaddr *)&id->local_addr, 1410 *raddr = (struct sockaddr *)&id->remote_addr; 1411 bool p2p_mode = peer_to_peer, v4 = true; 1412 u16 pd_len = params->private_data_len; 1413 int version = mpa_version, rv; 1414 1415 if (pd_len > MPA_MAX_PRIVDATA) 1416 return -EINVAL; 1417 1418 if (params->ird > sdev->attrs.max_ird || 1419 params->ord > sdev->attrs.max_ord) 1420 return -ENOMEM; 1421 1422 if (laddr->sa_family == AF_INET6) 1423 v4 = false; 1424 else if (laddr->sa_family != AF_INET) 1425 return -EAFNOSUPPORT; 1426 1427 /* 1428 * Respect any iwarp port mapping: Use mapped remote address 1429 * if valid. Local address must not be mapped, since siw 1430 * uses kernel TCP stack. 1431 */ 1432 if ((v4 && to_sockaddr_in(id->remote_addr).sin_port != 0) || 1433 to_sockaddr_in6(id->remote_addr).sin6_port != 0) 1434 raddr = (struct sockaddr *)&id->m_remote_addr; 1435 1436 qp = siw_qp_id2obj(sdev, params->qpn); 1437 if (!qp) { 1438 WARN(1, "[QP %u] does not exist\n", params->qpn); 1439 rv = -EINVAL; 1440 goto error; 1441 } 1442 siw_dbg_qp(qp, "pd_len %d, laddr %pISp, raddr %pISp\n", pd_len, laddr, 1443 raddr); 1444 1445 rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s); 1446 if (rv < 0) 1447 goto error; 1448 siw_reclassify_socket(s); 1449 1450 /* 1451 * NOTE: For simplification, connect() is called in blocking 1452 * mode. Might be reconsidered for async connection setup at 1453 * TCP level. 1454 */ 1455 rv = kernel_bindconnect(s, laddr, raddr, id->afonly); 1456 if (rv != 0) { 1457 siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv); 1458 goto error; 1459 } 1460 if (siw_tcp_nagle == false) 1461 tcp_sock_set_nodelay(s->sk); 1462 cep = siw_cep_alloc(sdev); 1463 if (!cep) { 1464 rv = -ENOMEM; 1465 goto error; 1466 } 1467 siw_cep_set_inuse(cep); 1468 1469 /* Associate QP with CEP */ 1470 siw_cep_get(cep); 1471 qp->cep = cep; 1472 1473 /* siw_qp_get(qp) already done by QP lookup */ 1474 cep->qp = qp; 1475 1476 id->add_ref(id); 1477 cep->cm_id = id; 1478 1479 /* 1480 * 4: Allocate a sufficient number of work elements 1481 * to allow concurrent handling of local + peer close 1482 * events, MPA header processing + MPA timeout. 1483 */ 1484 rv = siw_cm_alloc_work(cep, 4); 1485 if (rv != 0) { 1486 rv = -ENOMEM; 1487 goto error; 1488 } 1489 cep->ird = params->ird; 1490 cep->ord = params->ord; 1491 1492 if (p2p_mode && cep->ord == 0) 1493 cep->ord = 1; 1494 1495 cep->state = SIW_EPSTATE_CONNECTING; 1496 1497 /* 1498 * Associate CEP with socket 1499 */ 1500 siw_cep_socket_assoc(cep, s); 1501 1502 cep->state = SIW_EPSTATE_AWAIT_MPAREP; 1503 1504 /* 1505 * Set MPA Request bits: CRC if required, no MPA Markers, 1506 * MPA Rev. according to module parameter 'mpa_version', Key 'Request'. 1507 */ 1508 cep->mpa.hdr.params.bits = 0; 1509 if (version > MPA_REVISION_2) { 1510 pr_warn("Setting MPA version to %u\n", MPA_REVISION_2); 1511 version = MPA_REVISION_2; 1512 /* Adjust also module parameter */ 1513 mpa_version = MPA_REVISION_2; 1514 } 1515 __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, version); 1516 1517 if (try_gso) 1518 cep->mpa.hdr.params.bits |= MPA_RR_FLAG_GSO_EXP; 1519 1520 if (mpa_crc_required) 1521 cep->mpa.hdr.params.bits |= MPA_RR_FLAG_CRC; 1522 1523 /* 1524 * If MPA version == 2: 1525 * o Include ORD and IRD. 1526 * o Indicate peer-to-peer mode, if required by module 1527 * parameter 'peer_to_peer'. 1528 */ 1529 if (version == MPA_REVISION_2) { 1530 cep->enhanced_rdma_conn_est = true; 1531 cep->mpa.hdr.params.bits |= MPA_RR_FLAG_ENHANCED; 1532 1533 cep->mpa.v2_ctrl.ird = htons(cep->ird); 1534 cep->mpa.v2_ctrl.ord = htons(cep->ord); 1535 1536 if (p2p_mode) { 1537 cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER; 1538 cep->mpa.v2_ctrl.ord |= rtr_type; 1539 } 1540 /* Remember own P2P mode requested */ 1541 cep->mpa.v2_ctrl_req.ird = cep->mpa.v2_ctrl.ird; 1542 cep->mpa.v2_ctrl_req.ord = cep->mpa.v2_ctrl.ord; 1543 } 1544 memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, 16); 1545 1546 rv = siw_send_mpareqrep(cep, params->private_data, pd_len); 1547 /* 1548 * Reset private data. 1549 */ 1550 cep->mpa.hdr.params.pd_len = 0; 1551 1552 if (rv >= 0) { 1553 rv = siw_cm_queue_work(cep, SIW_CM_WORK_MPATIMEOUT); 1554 if (!rv) { 1555 siw_dbg_cep(cep, "[QP %u]: exit\n", qp_id(qp)); 1556 siw_cep_set_free(cep); 1557 return 0; 1558 } 1559 } 1560 error: 1561 siw_dbg(id->device, "failed: %d\n", rv); 1562 1563 if (cep) { 1564 siw_socket_disassoc(s); 1565 sock_release(s); 1566 1567 cep->qp = NULL; 1568 1569 cep->cm_id = NULL; 1570 id->rem_ref(id); 1571 1572 qp->cep = NULL; 1573 siw_cep_put(cep); 1574 1575 cep->state = SIW_EPSTATE_CLOSED; 1576 1577 siw_cep_set_free_and_put(cep); 1578 1579 } else if (s) { 1580 sock_release(s); 1581 } 1582 if (qp) 1583 siw_qp_put(qp); 1584 1585 return rv; 1586 } 1587 1588 /* 1589 * siw_accept - Let SoftiWARP accept an RDMA connection request 1590 * 1591 * @id: New connection management id to be used for accepted 1592 * connection request 1593 * @params: Connection parameters provided by ULP for accepting connection 1594 * 1595 * Transition QP to RTS state, associate new CM id @id with accepted CEP 1596 * and get prepared for TCP input by installing socket callbacks. 1597 * Then send MPA Reply and generate the "connection established" event. 1598 * Socket callbacks must be installed before sending MPA Reply, because 1599 * the latter may cause a first RDMA message to arrive from the RDMA Initiator 1600 * side very quickly, at which time the socket callbacks must be ready. 1601 */ 1602 int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) 1603 { 1604 struct siw_device *sdev = to_siw_dev(id->device); 1605 struct siw_cep *cep = (struct siw_cep *)id->provider_data; 1606 struct siw_qp *qp; 1607 struct siw_qp_attrs qp_attrs; 1608 int rv = -EINVAL, max_priv_data = MPA_MAX_PRIVDATA; 1609 bool wait_for_peer_rts = false; 1610 1611 siw_cep_set_inuse(cep); 1612 siw_cep_put(cep); 1613 1614 /* Free lingering inbound private data */ 1615 if (cep->mpa.hdr.params.pd_len) { 1616 cep->mpa.hdr.params.pd_len = 0; 1617 kfree(cep->mpa.pdata); 1618 cep->mpa.pdata = NULL; 1619 } 1620 siw_cancel_mpatimer(cep); 1621 1622 if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { 1623 siw_dbg_cep(cep, "out of state\n"); 1624 rv = -ECONNRESET; 1625 goto free_cep; 1626 } 1627 qp = siw_qp_id2obj(sdev, params->qpn); 1628 if (!qp) { 1629 WARN(1, "[QP %d] does not exist\n", params->qpn); 1630 goto free_cep; 1631 } 1632 down_write(&qp->state_lock); 1633 if (qp->attrs.state > SIW_QP_STATE_RTR) 1634 goto error_unlock; 1635 siw_dbg_cep(cep, "[QP %d]\n", params->qpn); 1636 1637 if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) { 1638 siw_dbg_cep(cep, "peer allows GSO on TX\n"); 1639 qp->tx_ctx.gso_seg_limit = 0; 1640 } 1641 if (params->ord > sdev->attrs.max_ord || 1642 params->ird > sdev->attrs.max_ird) { 1643 siw_dbg_cep( 1644 cep, 1645 "[QP %u]: ord %d (max %d), ird %d (max %d)\n", 1646 qp_id(qp), params->ord, sdev->attrs.max_ord, 1647 params->ird, sdev->attrs.max_ird); 1648 goto error_unlock; 1649 } 1650 if (cep->enhanced_rdma_conn_est) 1651 max_priv_data -= sizeof(struct mpa_v2_data); 1652 1653 if (params->private_data_len > max_priv_data) { 1654 siw_dbg_cep( 1655 cep, 1656 "[QP %u]: private data length: %d (max %d)\n", 1657 qp_id(qp), params->private_data_len, max_priv_data); 1658 goto error_unlock; 1659 } 1660 if (cep->enhanced_rdma_conn_est) { 1661 if (params->ord > cep->ord) { 1662 if (relaxed_ird_negotiation) { 1663 params->ord = cep->ord; 1664 } else { 1665 cep->ird = params->ird; 1666 cep->ord = params->ord; 1667 goto error_unlock; 1668 } 1669 } 1670 if (params->ird < cep->ird) { 1671 if (relaxed_ird_negotiation && 1672 cep->ird <= sdev->attrs.max_ird) 1673 params->ird = cep->ird; 1674 else { 1675 rv = -ENOMEM; 1676 goto error_unlock; 1677 } 1678 } 1679 if (cep->mpa.v2_ctrl.ord & 1680 (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR)) 1681 wait_for_peer_rts = true; 1682 /* 1683 * Signal back negotiated IRD and ORD values 1684 */ 1685 cep->mpa.v2_ctrl.ord = 1686 htons(params->ord & MPA_IRD_ORD_MASK) | 1687 (cep->mpa.v2_ctrl.ord & ~MPA_V2_MASK_IRD_ORD); 1688 cep->mpa.v2_ctrl.ird = 1689 htons(params->ird & MPA_IRD_ORD_MASK) | 1690 (cep->mpa.v2_ctrl.ird & ~MPA_V2_MASK_IRD_ORD); 1691 } 1692 cep->ird = params->ird; 1693 cep->ord = params->ord; 1694 1695 cep->cm_id = id; 1696 id->add_ref(id); 1697 1698 memset(&qp_attrs, 0, sizeof(qp_attrs)); 1699 qp_attrs.orq_size = cep->ord; 1700 qp_attrs.irq_size = cep->ird; 1701 qp_attrs.sk = cep->sock; 1702 if (cep->mpa.hdr.params.bits & MPA_RR_FLAG_CRC) 1703 qp_attrs.flags = SIW_MPA_CRC; 1704 qp_attrs.state = SIW_QP_STATE_RTS; 1705 1706 siw_dbg_cep(cep, "[QP%u]: moving to rts\n", qp_id(qp)); 1707 1708 /* Associate QP with CEP */ 1709 siw_cep_get(cep); 1710 qp->cep = cep; 1711 1712 /* siw_qp_get(qp) already done by QP lookup */ 1713 cep->qp = qp; 1714 1715 cep->state = SIW_EPSTATE_RDMA_MODE; 1716 1717 /* Move socket RX/TX under QP control */ 1718 rv = siw_qp_modify(qp, &qp_attrs, 1719 SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE | 1720 SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | 1721 SIW_QP_ATTR_MPA); 1722 up_write(&qp->state_lock); 1723 if (rv) 1724 goto error; 1725 1726 siw_dbg_cep(cep, "[QP %u]: send mpa reply, %d byte pdata\n", 1727 qp_id(qp), params->private_data_len); 1728 1729 rv = siw_send_mpareqrep(cep, params->private_data, 1730 params->private_data_len); 1731 if (rv != 0) 1732 goto error; 1733 1734 if (wait_for_peer_rts) { 1735 siw_sk_assign_rtr_upcalls(cep); 1736 } else { 1737 siw_qp_socket_assoc(cep, qp); 1738 rv = siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0); 1739 if (rv) 1740 goto error; 1741 } 1742 siw_cep_set_free(cep); 1743 1744 return 0; 1745 1746 error_unlock: 1747 up_write(&qp->state_lock); 1748 error: 1749 siw_destroy_cep_sock(cep); 1750 1751 cep->state = SIW_EPSTATE_CLOSED; 1752 1753 siw_free_cm_id(cep); 1754 if (qp->cep) { 1755 siw_cep_put(cep); 1756 qp->cep = NULL; 1757 } 1758 cep->qp = NULL; 1759 siw_qp_put(qp); 1760 free_cep: 1761 siw_cep_set_free_and_put(cep); 1762 return rv; 1763 } 1764 1765 /* 1766 * siw_reject() 1767 * 1768 * Local connection reject case. Send private data back to peer, 1769 * close connection and dereference connection id. 1770 */ 1771 int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) 1772 { 1773 struct siw_cep *cep = (struct siw_cep *)id->provider_data; 1774 1775 siw_cep_set_inuse(cep); 1776 siw_cep_put(cep); 1777 1778 siw_cancel_mpatimer(cep); 1779 1780 if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { 1781 siw_dbg_cep(cep, "out of state\n"); 1782 1783 siw_cep_set_free_and_put(cep); /* put last reference */ 1784 1785 return -ECONNRESET; 1786 } 1787 siw_dbg_cep(cep, "cep->state %d, pd_len %d\n", cep->state, 1788 pd_len); 1789 1790 if (__mpa_rr_revision(cep->mpa.hdr.params.bits) >= MPA_REVISION_1) { 1791 cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */ 1792 siw_send_mpareqrep(cep, pdata, pd_len); 1793 } 1794 siw_destroy_cep_sock(cep); 1795 1796 cep->state = SIW_EPSTATE_CLOSED; 1797 1798 siw_cep_set_free_and_put(cep); 1799 1800 return 0; 1801 } 1802 1803 /* 1804 * siw_create_listen - Create resources for a listener's IWCM ID @id 1805 * 1806 * Starts listen on the socket address id->local_addr. 1807 * 1808 */ 1809 int siw_create_listen(struct iw_cm_id *id, int backlog) 1810 { 1811 struct socket *s; 1812 struct siw_cep *cep = NULL; 1813 struct net_device *ndev = NULL; 1814 struct siw_device *sdev = to_siw_dev(id->device); 1815 int addr_family = id->local_addr.ss_family; 1816 int rv = 0; 1817 1818 if (addr_family != AF_INET && addr_family != AF_INET6) 1819 return -EAFNOSUPPORT; 1820 1821 rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s); 1822 if (rv < 0) 1823 return rv; 1824 siw_reclassify_socket(s); 1825 1826 /* 1827 * Allow binding local port when still in TIME_WAIT from last close. 1828 */ 1829 sock_set_reuseaddr(s->sk); 1830 1831 if (addr_family == AF_INET) { 1832 struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); 1833 1834 /* For wildcard addr, limit binding to current device only */ 1835 if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) { 1836 ndev = ib_device_get_netdev(id->device, SIW_PORT); 1837 if (ndev) { 1838 s->sk->sk_bound_dev_if = ndev->ifindex; 1839 } else { 1840 rv = -ENODEV; 1841 goto error; 1842 } 1843 } 1844 rv = s->ops->bind(s, (struct sockaddr_unsized *)laddr, 1845 sizeof(struct sockaddr_in)); 1846 } else { 1847 struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr); 1848 1849 if (id->afonly) { 1850 rv = ip6_sock_set_v6only(s->sk); 1851 if (rv) { 1852 siw_dbg(id->device, 1853 "ip6_sock_set_v6only erro: %d\n", rv); 1854 goto error; 1855 } 1856 } 1857 1858 /* For wildcard addr, limit binding to current device only */ 1859 if (ipv6_addr_any(&laddr->sin6_addr)) { 1860 ndev = ib_device_get_netdev(id->device, SIW_PORT); 1861 if (ndev) { 1862 s->sk->sk_bound_dev_if = ndev->ifindex; 1863 } else { 1864 rv = -ENODEV; 1865 goto error; 1866 } 1867 } 1868 rv = s->ops->bind(s, (struct sockaddr_unsized *)laddr, 1869 sizeof(struct sockaddr_in6)); 1870 } 1871 if (rv) { 1872 siw_dbg(id->device, "socket bind error: %d\n", rv); 1873 goto error; 1874 } 1875 cep = siw_cep_alloc(sdev); 1876 if (!cep) { 1877 rv = -ENOMEM; 1878 goto error; 1879 } 1880 siw_cep_socket_assoc(cep, s); 1881 1882 rv = siw_cm_alloc_work(cep, backlog); 1883 if (rv) { 1884 siw_dbg(id->device, 1885 "alloc_work error %d, backlog %d\n", 1886 rv, backlog); 1887 goto error; 1888 } 1889 rv = s->ops->listen(s, backlog); 1890 if (rv) { 1891 siw_dbg(id->device, "listen error %d\n", rv); 1892 goto error; 1893 } 1894 cep->cm_id = id; 1895 id->add_ref(id); 1896 1897 /* 1898 * In case of a wildcard rdma_listen on a multi-homed device, 1899 * a listener's IWCM id is associated with more than one listening CEP. 1900 * 1901 * We currently use id->provider_data in three different ways: 1902 * 1903 * o For a listener's IWCM id, id->provider_data points to 1904 * the list_head of the list of listening CEPs. 1905 * Uses: siw_create_listen(), siw_destroy_listen() 1906 * 1907 * o For each accepted passive-side IWCM id, id->provider_data 1908 * points to the CEP itself. This is a consequence of 1909 * - siw_cm_upcall() setting event.provider_data = cep and 1910 * - the IWCM's cm_conn_req_handler() setting provider_data of the 1911 * new passive-side IWCM id equal to event.provider_data 1912 * Uses: siw_accept(), siw_reject() 1913 * 1914 * o For an active-side IWCM id, id->provider_data is not used at all. 1915 * 1916 */ 1917 if (!id->provider_data) { 1918 id->provider_data = 1919 kmalloc_obj(struct list_head); 1920 if (!id->provider_data) { 1921 rv = -ENOMEM; 1922 goto error; 1923 } 1924 INIT_LIST_HEAD((struct list_head *)id->provider_data); 1925 } 1926 list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); 1927 cep->state = SIW_EPSTATE_LISTENING; 1928 dev_put(ndev); 1929 1930 siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr); 1931 1932 return 0; 1933 1934 error: 1935 siw_dbg(id->device, "failed: %d\n", rv); 1936 1937 if (cep) { 1938 siw_cep_set_inuse(cep); 1939 1940 siw_free_cm_id(cep); 1941 siw_socket_disassoc(s); 1942 cep->state = SIW_EPSTATE_CLOSED; 1943 1944 siw_cep_set_free_and_put(cep); 1945 } 1946 sock_release(s); 1947 dev_put(ndev); 1948 1949 return rv; 1950 } 1951 1952 static void siw_drop_listeners(struct iw_cm_id *id) 1953 { 1954 struct list_head *p, *tmp; 1955 1956 /* 1957 * In case of a wildcard rdma_listen on a multi-homed device, 1958 * a listener's IWCM id is associated with more than one listening CEP. 1959 */ 1960 list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) { 1961 struct siw_cep *cep = list_entry(p, struct siw_cep, listenq); 1962 struct socket *s = cep->sock; 1963 1964 list_del(p); 1965 1966 siw_dbg_cep(cep, "drop cep, state %d\n", cep->state); 1967 1968 siw_cep_set_inuse(cep); 1969 1970 siw_free_cm_id(cep); 1971 if (s) { 1972 siw_socket_disassoc(s); 1973 sock_release(s); 1974 } 1975 cep->state = SIW_EPSTATE_CLOSED; 1976 siw_cep_set_free_and_put(cep); 1977 } 1978 } 1979 1980 int siw_destroy_listen(struct iw_cm_id *id) 1981 { 1982 if (!id->provider_data) { 1983 siw_dbg(id->device, "no cep(s)\n"); 1984 return 0; 1985 } 1986 siw_drop_listeners(id); 1987 kfree(id->provider_data); 1988 id->provider_data = NULL; 1989 1990 return 0; 1991 } 1992 1993 int siw_cm_init(void) 1994 { 1995 /* 1996 * create_single_workqueue for strict ordering 1997 */ 1998 siw_cm_wq = create_singlethread_workqueue("siw_cm_wq"); 1999 if (!siw_cm_wq) 2000 return -ENOMEM; 2001 2002 return 0; 2003 } 2004 2005 void siw_cm_exit(void) 2006 { 2007 if (siw_cm_wq) 2008 destroy_workqueue(siw_cm_wq); 2009 } 2010