1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/conf.h> 30 #include <sys/modctl.h> 31 #include <sys/stat.h> 32 #include <sys/stream.h> 33 #include <sys/strsun.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/priv_names.h> 37 #include <inet/common.h> 38 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/tiuser.h> 43 #include <sys/suntpi.h> 44 #include <inet/common.h> 45 #include <inet/ip.h> 46 #include <inet/mi.h> 47 #include <sys/ib/clients/rds/rds.h> 48 #include <sys/policy.h> 49 #include <inet/ipclassifier.h> 50 #include <sys/ib/clients/rds/rds_kstat.h> 51 #include "sys/random.h" 52 #include <sys/ib/clients/rds/rds_transport.h> 53 #include <sys/ib/ibtl/ibti.h> 54 55 56 #define RDS_NAME "rds" 57 #define RDS_STRTAB rdsinfo 58 #define RDS_DEVDESC "RDS STREAMS driver %I%" 59 #define RDS_DEVMINOR 0 60 #define RDS_DEVMTFLAGS D_MP | D_SYNCSTR 61 #define RDS_DEFAULT_PRIV_MODE 0666 62 63 #define rds_smallest_port 1 64 #define rds_largest_port 65535 65 66 #define RDS_RECV_HIWATER (56 * 1024) 67 #define RDS_RECV_LOWATER 128 68 #define RDS_XMIT_HIWATER (56 * 1024) 69 #define RDS_XMIT_LOWATER 1024 70 71 #define RDS_DPRINTF2 0 && 72 #define LABEL "RDS" 73 74 typedef struct rdsahdr_s { 75 in_port_t uha_src_port; /* Source port */ 76 in_port_t uha_dst_port; /* Destination port */ 77 } rdsha_t; 78 79 #define RDSH_SIZE 4 80 81 int rds_recv_hiwat = RDS_RECV_HIWATER; 82 int rds_recv_lowat = RDS_RECV_LOWATER; 83 int rds_xmit_hiwat = RDS_XMIT_HIWATER; 84 int rds_xmit_lowat = RDS_XMIT_LOWATER; 85 86 int rdsdebug; 87 88 static dev_info_t *rds_dev_info; 89 90 /* Hint not protected by any lock */ 91 static in_port_t rds_next_port_to_try; 92 93 ldi_ident_t rds_li; 94 static int loopmax = rds_largest_port - rds_smallest_port + 1; 95 96 /* global configuration variables */ 97 uint_t UserBufferSize; 98 uint_t rds_rx_pkts_pending_hwm; 99 100 extern void rds_ioctl(queue_t *, mblk_t *); 101 extern void rds_ioctl_copyin_done(queue_t *q, mblk_t *mp); 102 103 int rds_open_transport_driver(); 104 int rds_close_transport_driver(); 105 106 #define RDS_CURRENT_PORT_QUOTA() \ 107 (rds_rx_pkts_pending_hwm/RDS_GET_NPORT()) 108 109 krwlock_t rds_transport_lock; 110 ldi_handle_t rds_transport_handle = NULL; 111 rds_transport_ops_t *rds_transport_ops = NULL; 112 113 static int 114 rds_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) 115 { 116 int ret; 117 118 if (cmd != DDI_ATTACH) 119 return (DDI_FAILURE); 120 121 rds_dev_info = devi; 122 123 ret = ddi_create_minor_node(devi, RDS_NAME, S_IFCHR, 124 RDS_DEVMINOR, DDI_PSEUDO, 0); 125 if (ret != DDI_SUCCESS) { 126 return (ret); 127 } 128 129 return (DDI_SUCCESS); 130 } 131 132 static int 133 rds_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) 134 { 135 if (cmd != DDI_DETACH) 136 return (DDI_FAILURE); 137 138 ASSERT(devi == rds_dev_info); 139 140 ddi_remove_minor_node(devi, NULL); 141 142 return (DDI_SUCCESS); 143 } 144 145 /* ARGSUSED */ 146 static int 147 rds_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 148 { 149 int error = DDI_FAILURE; 150 151 switch (cmd) { 152 case DDI_INFO_DEVT2DEVINFO: 153 if (rds_dev_info != NULL) { 154 *result = (void *)rds_dev_info; 155 error = DDI_SUCCESS; 156 } 157 break; 158 159 case DDI_INFO_DEVT2INSTANCE: 160 *result = NULL; 161 error = DDI_SUCCESS; 162 break; 163 164 default: 165 break; 166 } 167 168 return (error); 169 } 170 171 172 /*ARGSUSED*/ 173 static int 174 rds_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 175 { 176 rds_t *rds; 177 int ret; 178 179 if (is_system_labeled()) { 180 /* 181 * RDS socket is not supported on labeled systems 182 */ 183 return (ESOCKTNOSUPPORT); 184 } 185 186 /* Open the transport driver if IB HW is present */ 187 rw_enter(&rds_transport_lock, RW_READER); 188 if (rds_transport_handle == NULL) { 189 rw_exit(&rds_transport_lock); 190 ret = rds_open_transport_driver(); 191 rw_enter(&rds_transport_lock, RW_READER); 192 193 if (ret != 0) { 194 /* Transport driver failed to load */ 195 rw_exit(&rds_transport_lock); 196 return (ret); 197 } 198 } 199 rw_exit(&rds_transport_lock); 200 201 if (sflag == MODOPEN) { 202 return (EINVAL); 203 } 204 205 /* Reopen not supported */ 206 if (q->q_ptr != NULL) { 207 dprint(2, ("%s: Reopen is not supported: %p", LABEL, q->q_ptr)); 208 return (0); 209 } 210 211 rds = rds_create(q, credp); 212 if (rds == NULL) { 213 dprint(2, ("%s: rds_create failed", LABEL)); 214 return (0); 215 } 216 217 q->q_ptr = WR(q)->q_ptr = rds; 218 rds->rds_state = TS_UNBND; 219 rds->rds_family = AF_INET_OFFLOAD; 220 221 q->q_hiwat = rds_recv_hiwat; 222 q->q_lowat = rds_recv_lowat; 223 224 qprocson(q); 225 226 WR(q)->q_hiwat = rds_xmit_hiwat; 227 WR(q)->q_lowat = rds_xmit_lowat; 228 229 /* Set the Stream head watermarks */ 230 (void) mi_set_sth_hiwat(q, rds_recv_hiwat); 231 (void) mi_set_sth_lowat(q, rds_recv_lowat); 232 233 return (0); 234 } 235 236 static int 237 rds_close(queue_t *q) 238 { 239 rds_t *rdsp = (rds_t *)q->q_ptr; 240 241 qprocsoff(q); 242 243 /* 244 * NPORT should be decremented only if this socket was previously 245 * bound to an RDS port. 246 */ 247 if (rdsp->rds_state >= TS_IDLE) { 248 RDS_DECR_NPORT(); 249 RDS_SET_PORT_QUOTA(RDS_CURRENT_PORT_QUOTA()); 250 rds_transport_ops-> 251 rds_transport_resume_port(ntohs(rdsp->rds_port)); 252 } 253 254 /* close the transport driver if this is the last socket */ 255 if (RDS_GET_NPORT() == 1) { 256 (void) rds_close_transport_driver(); 257 } 258 259 /* 260 * We set the flags without holding a lock as this is 261 * just a hint for the fanout lookup to skip this rds. 262 * We dont free the struct until it's out of the hash and 263 * the ref count goes down. 264 */ 265 rdsp->rds_flags |= RDS_CLOSING; 266 rds_bind_hash_remove(rdsp, B_FALSE); 267 mutex_enter(&rdsp->rds_lock); 268 ASSERT(rdsp->rds_refcnt > 0); 269 if (rdsp->rds_refcnt != 1) { 270 cv_wait(&rdsp->rds_refcv, &rdsp->rds_lock); 271 } 272 mutex_exit(&rdsp->rds_lock); 273 RDS_DEC_REF_CNT(rdsp); 274 RD(q)->q_ptr = NULL; 275 WR(q)->q_ptr = NULL; 276 return (0); 277 } 278 279 /* 280 * Add a new message to the socket 281 */ 282 int 283 rds_deliver_new_msg(mblk_t *mp, ipaddr_t local_addr, ipaddr_t rem_addr, 284 in_port_t local_port, in_port_t rem_port, zoneid_t zoneid) 285 { 286 rds_t *rds; 287 struct T_unitdata_ind *tudi; 288 int udi_size; /* Size of T_unitdata_ind */ 289 mblk_t *mp1; 290 sin_t *sin; 291 int error = 0; 292 293 local_port = htons(local_port); 294 rem_port = htons(rem_port); 295 296 ASSERT(mp->b_datap->db_type == M_DATA); 297 rds = rds_fanout(local_addr, rem_addr, local_port, rem_port, zoneid); 298 if (rds == NULL) { 299 dprint(2, ("%s: rds_fanout failed: (0x%x 0x%x %d %d)", LABEL, 300 local_addr, rem_addr, ntohs(local_port), ntohs(rem_port))); 301 freemsg(mp); 302 return (error); 303 } 304 305 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 306 307 /* Allocate a message block for the T_UNITDATA_IND structure. */ 308 mp1 = allocb(udi_size, BPRI_MED); 309 if (mp1 == NULL) { 310 dprint(2, ("%s: allocb failed", LABEL)); 311 freemsg(mp); 312 return (ENOMEM); 313 } 314 315 mp1->b_cont = mp; 316 mp = mp1; 317 mp->b_datap->db_type = M_PROTO; 318 tudi = (struct T_unitdata_ind *)(uintptr_t)mp->b_rptr; 319 mp->b_wptr = (uchar_t *)tudi + udi_size; 320 tudi->PRIM_type = T_UNITDATA_IND; 321 tudi->SRC_length = sizeof (sin_t); 322 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 323 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 324 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 325 tudi->OPT_length = udi_size; 326 sin = (sin_t *)&tudi[1]; 327 sin->sin_addr.s_addr = rem_addr; 328 sin->sin_port = ntohs(rem_port); 329 sin->sin_family = rds->rds_family; 330 *(uint32_t *)(uintptr_t)&sin->sin_zero[0] = 0; 331 *(uint32_t *)(uintptr_t)&sin->sin_zero[4] = 0; 332 333 putnext(rds->rds_ulpd, mp); 334 335 /* check port quota */ 336 if (RDS_GET_RXPKTS_PEND() > rds_rx_pkts_pending_hwm) { 337 ulong_t current_port_quota = RDS_GET_PORT_QUOTA(); 338 if (rds->rds_port_quota > current_port_quota) { 339 /* this may result in stalling the port */ 340 rds->rds_port_quota = current_port_quota; 341 (void) mi_set_sth_hiwat(rds->rds_ulpd, 342 rds->rds_port_quota * UserBufferSize); 343 RDS_INCR_PORT_QUOTA_ADJUSTED(); 344 } 345 } 346 347 /* 348 * canputnext() check is done after putnext as the protocol does 349 * not allow dropping any received packet. 350 */ 351 if (!canputnext(rds->rds_ulpd)) { 352 error = ENOSPC; 353 } 354 355 RDS_DEC_REF_CNT(rds); 356 return (error); 357 } 358 359 360 /* Default structure copied into T_INFO_ACK messages */ 361 static struct T_info_ack rds_g_t_info_ack_ipv4 = { 362 T_INFO_ACK, 363 65535, /* TSDU_size. Excl. headers */ 364 T_INVALID, /* ETSU_size. rds does not support expedited data. */ 365 T_INVALID, /* CDATA_size. rds does not support connect data. */ 366 T_INVALID, /* DDATA_size. rds does not support disconnect data. */ 367 sizeof (sin_t), /* ADDR_size. */ 368 0, /* OPT_size - not initialized here */ 369 65535, /* TIDU_size. Excl. headers */ 370 T_CLTS, /* SERV_type. rds supports connection-less. */ 371 TS_UNBND, /* CURRENT_state. This is set from rds_state. */ 372 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 373 }; 374 375 static in_port_t 376 rds_update_next_port(in_port_t port) 377 { 378 (void) random_get_pseudo_bytes((uint8_t *)&port, sizeof (in_port_t)); 379 if (port < rds_smallest_port) 380 port = rds_smallest_port; 381 return (port); 382 } 383 384 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 385 static void 386 rds_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 387 { 388 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 389 qreply(q, mp); 390 } 391 392 static void 393 rds_capability_req(queue_t *q, mblk_t *mp) 394 { 395 t_uscalar_t cap_bits1; 396 struct T_capability_ack *tcap; 397 398 cap_bits1 = 399 ((struct T_capability_req *)(uintptr_t)mp->b_rptr)->CAP_bits1; 400 401 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 402 mp->b_datap->db_type, T_CAPABILITY_ACK); 403 if (mp == NULL) 404 return; 405 tcap = (struct T_capability_ack *)(uintptr_t)mp->b_rptr; 406 tcap->CAP_bits1 = 0; 407 408 if (cap_bits1 & TC1_INFO) { 409 tcap->CAP_bits1 |= TC1_INFO; 410 *(&tcap->INFO_ack) = rds_g_t_info_ack_ipv4; 411 } 412 413 qreply(q, mp); 414 } 415 416 static void 417 rds_info_req(queue_t *q, mblk_t *omp) 418 { 419 rds_t *rds = (rds_t *)q->q_ptr; 420 struct T_info_ack *tap; 421 mblk_t *mp; 422 423 /* Create a T_INFO_ACK message. */ 424 mp = tpi_ack_alloc(omp, sizeof (struct T_info_ack), M_PCPROTO, 425 T_INFO_ACK); 426 if (mp == NULL) 427 return; 428 tap = (struct T_info_ack *)(uintptr_t)mp->b_rptr; 429 *tap = rds_g_t_info_ack_ipv4; 430 tap->CURRENT_state = rds->rds_state; 431 tap->OPT_size = 128; 432 qreply(q, mp); 433 } 434 435 /* 436 * NO locking protection here as sockfs will only send down 437 * one bind operation at a time. 438 */ 439 static void 440 rds_bind(queue_t *q, mblk_t *mp) 441 { 442 sin_t *sin; 443 rds_t *rds; 444 struct T_bind_req *tbr; 445 in_port_t port; /* Host byte order */ 446 in_port_t requested_port; /* Host byte order */ 447 struct T_bind_ack *tba; 448 int count; 449 rds_bf_t *rdsbf; 450 in_port_t lport; /* Network byte order */ 451 452 rds = (rds_t *)q->q_ptr; 453 if (((uintptr_t)mp->b_wptr - (uintptr_t)mp->b_rptr) < sizeof (*tbr)) { 454 rds_err_ack(q, mp, TPROTO, 0); 455 return; 456 } 457 458 /* 459 * We don't allow multiple binds 460 */ 461 if (rds->rds_state != TS_UNBND) { 462 rds_err_ack(q, mp, TOUTSTATE, 0); 463 return; 464 } 465 466 tbr = (struct T_bind_req *)(uintptr_t)mp->b_rptr; 467 switch (tbr->ADDR_length) { 468 case sizeof (sin_t): /* Complete IPv4 address */ 469 sin = (sin_t *)(uintptr_t)mi_offset_param(mp, tbr->ADDR_offset, 470 sizeof (sin_t)); 471 if (sin == NULL || !OK_32PTR((char *)sin)) { 472 rds_err_ack(q, mp, TSYSERR, EINVAL); 473 return; 474 } 475 if (rds->rds_family != AF_INET_OFFLOAD || 476 sin->sin_family != AF_INET_OFFLOAD) { 477 rds_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 478 return; 479 } 480 if (sin->sin_addr.s_addr == INADDR_ANY) { 481 rds_err_ack(q, mp, TBADADDR, 0); 482 return; 483 } 484 485 /* 486 * verify that the address is hosted on IB 487 * only exception is the loopback address. 488 */ 489 if ((sin->sin_addr.s_addr != INADDR_LOOPBACK) && 490 !rds_verify_bind_address(sin->sin_addr.s_addr)) { 491 rds_err_ack(q, mp, TBADADDR, 0); 492 return; 493 } 494 495 port = ntohs(sin->sin_port); 496 break; 497 default: /* Invalid request */ 498 rds_err_ack(q, mp, TBADADDR, 0); 499 return; 500 } 501 502 requested_port = port; 503 504 /* 505 * TPI only sends down T_BIND_REQ for AF_INET and AF_INET6 506 * since RDS socket is of type AF_INET_OFFLOAD a O_T_BIND_REQ 507 * will be sent down. Treat O_T_BIND_REQ as T_BIND_REQ 508 */ 509 510 if (requested_port == 0) { 511 /* 512 * If the application passed in zero for the port number, it 513 * doesn't care which port number we bind to. Get one in the 514 * valid range. 515 */ 516 port = rds_update_next_port(rds_next_port_to_try); 517 } 518 519 ASSERT(port != 0); 520 count = 0; 521 for (;;) { 522 rds_t *rds1; 523 ASSERT(sin->sin_addr.s_addr != INADDR_ANY); 524 /* 525 * Walk through the list of rds streams bound to 526 * requested port with the same IP address. 527 */ 528 lport = htons(port); 529 rdsbf = &rds_bind_fanout[RDS_BIND_HASH(lport)]; 530 mutex_enter(&rdsbf->rds_bf_lock); 531 for (rds1 = rdsbf->rds_bf_rds; rds1 != NULL; 532 rds1 = rds1->rds_bind_hash) { 533 if (lport != rds1->rds_port || 534 rds1->rds_src != sin->sin_addr.s_addr || 535 rds1->rds_zoneid != rds->rds_zoneid) 536 537 continue; 538 break; 539 } 540 541 if (rds1 == NULL) { 542 /* 543 * No other stream has this IP address 544 * and port number. We can use it. 545 */ 546 break; 547 } 548 mutex_exit(&rdsbf->rds_bf_lock); 549 if (requested_port != 0) { 550 /* 551 * We get here only when requested port 552 * is bound (and only first of the for() 553 * loop iteration). 554 * 555 * The semantics of this bind request 556 * require it to fail so we return from 557 * the routine (and exit the loop). 558 * 559 */ 560 rds_err_ack(q, mp, TADDRBUSY, 0); 561 return; 562 } 563 564 port = rds_update_next_port(port + 1); 565 566 if (++count >= loopmax) { 567 /* 568 * We've tried every possible port number and 569 * there are none available, so send an error 570 * to the user. 571 */ 572 rds_err_ack(q, mp, TNOADDR, 0); 573 return; 574 } 575 } 576 577 /* 578 * Copy the source address into our rds structure. 579 */ 580 rds->rds_src = sin->sin_addr.s_addr; 581 rds->rds_port = lport; 582 583 /* 584 * reset the next port if we choose the port 585 */ 586 if (requested_port == 0) { 587 rds_next_port_to_try = port + 1; 588 } 589 590 rds->rds_state = TS_IDLE; 591 rds_bind_hash_insert(rdsbf, rds); 592 mutex_exit(&rdsbf->rds_bf_lock); 593 594 /* Reset the message type in preparation for shipping it back. */ 595 mp->b_datap->db_type = M_PCPROTO; 596 tba = (struct T_bind_ack *)(uintptr_t)mp->b_rptr; 597 tba->PRIM_type = T_BIND_ACK; 598 599 /* Increment the number of ports and set the port quota */ 600 RDS_INCR_NPORT(); 601 rds->rds_port_quota = RDS_CURRENT_PORT_QUOTA(); 602 RDS_SET_PORT_QUOTA(rds->rds_port_quota); 603 (void) mi_set_sth_hiwat(RD(q), rds->rds_port_quota * UserBufferSize); 604 605 qreply(q, mp); 606 } 607 608 static void 609 rds_wput_other(queue_t *q, mblk_t *mp) 610 { 611 rds_t *rds = (rds_t *)q->q_ptr; 612 uchar_t *rptr = mp->b_rptr; 613 struct datab *db; 614 cred_t *cr; 615 616 cr = DB_CREDDEF(mp, rds->rds_cred); 617 db = mp->b_datap; 618 switch (db->db_type) { 619 case M_DATA: 620 /* Not connected */ 621 freemsg(mp); 622 return; 623 case M_PROTO: 624 case M_PCPROTO: 625 if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr < 626 sizeof (t_scalar_t)) { 627 freemsg(mp); 628 return; 629 } 630 switch (((union T_primitives *)(uintptr_t)rptr)->type) { 631 case T_CAPABILITY_REQ: 632 rds_capability_req(q, mp); 633 return; 634 635 case T_INFO_REQ: 636 rds_info_req(q, mp); 637 return; 638 case O_T_BIND_REQ: 639 case T_BIND_REQ: 640 rds_bind(q, mp); 641 return; 642 case T_SVR4_OPTMGMT_REQ: 643 (void) svr4_optcom_req(q, mp, cr, &rds_opt_obj, 644 B_FALSE); 645 return; 646 case T_OPTMGMT_REQ: 647 (void) tpi_optcom_req(q, mp, cr, &rds_opt_obj, B_FALSE); 648 return; 649 case T_CONN_REQ: 650 /* 651 * We should not receive T_CONN_REQ as sockfs only 652 * sends down T_CONN_REQ if family == AF_INET/AF_INET6 653 * and type == SOCK_DGRAM/SOCK_RAW. For all others 654 * it simply calls soisconnected. see sotpi_connect() 655 * for details. 656 */ 657 /* FALLTHRU */ 658 default: 659 cmn_err(CE_PANIC, "type %d \n", 660 ((union T_primitives *)(uintptr_t)rptr)->type); 661 } 662 break; 663 case M_FLUSH: 664 if (*rptr & FLUSHW) 665 flushq(q, FLUSHDATA); 666 break; 667 case M_IOCTL: 668 rds_ioctl(q, mp); 669 break; 670 case M_IOCDATA: 671 /* IOCTL continuation following copyin or copyout. */ 672 if (mi_copy_state(q, mp, NULL) == -1) { 673 /* 674 * The copy operation failed. mi_copy_state already 675 * cleaned up, so we're out of here. 676 */ 677 return; 678 } 679 /* 680 * If we just completed a copy in, continue processing 681 * in rds_ioctl_copyin_done. If it was a copy out, we call 682 * mi_copyout again. If there is nothing more to copy out, 683 * it will complete the IOCTL. 684 */ 685 686 if (MI_COPY_DIRECTION(mp) == MI_COPY_IN) 687 rds_ioctl_copyin_done(q, mp); 688 else 689 mi_copyout(q, mp); 690 return; 691 692 default: 693 cmn_err(CE_PANIC, "types %d \n", db->db_type); 694 } 695 } 696 697 static int 698 rds_wput(queue_t *q, mblk_t *mp) 699 { 700 struct datab *db; 701 uchar_t *rptr = mp->b_rptr; 702 703 db = mp->b_datap; 704 switch (db->db_type) { 705 case M_PROTO: 706 case M_PCPROTO: 707 ASSERT(((uintptr_t)mp->b_wptr - (uintptr_t)rptr) <= 708 (uintptr_t)INT_MAX); 709 if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr >= 710 sizeof (struct T_unitdata_req)) { 711 if (((union T_primitives *)(uintptr_t)rptr)->type 712 == T_UNITDATA_REQ) { 713 /* 714 * We should never come here for T_UNITDATA_REQ 715 */ 716 cmn_err(CE_PANIC, "rds_wput T_UNITDATA_REQ \n"); 717 } 718 } 719 /* FALLTHRU */ 720 default: 721 rds_wput_other(q, mp); 722 return (0); 723 } 724 } 725 726 static int 727 rds_wput_data(queue_t *q, mblk_t *mp, uio_t *uiop) 728 { 729 uchar_t *rptr = mp->b_rptr; 730 rds_t *rds; 731 mblk_t *mp1; 732 sin_t *sin; 733 ipaddr_t dst; 734 uint16_t port; 735 int ret = 0; 736 737 #define tudr ((struct T_unitdata_req *)(uintptr_t)rptr) 738 739 rds = (rds_t *)q->q_ptr; 740 /* Handle UNITDATA_REQ messages here */ 741 if (rds->rds_state == TS_UNBND) { 742 /* If a port has not been bound to the stream, fail. */ 743 dprint(2, ("%s: socket is not bound to a port", LABEL)); 744 freemsg(mp); 745 return (EPROTO); 746 } 747 748 mp1 = mp->b_cont; 749 mp->b_cont = NULL; 750 if (mp1 == NULL) { 751 dprint(2, ("%s: No message to send", LABEL)); 752 freemsg(mp); 753 return (EPROTO); 754 } 755 756 /* 757 * No options allowed 758 */ 759 if (tudr->OPT_length != 0) { 760 ret = EINVAL; 761 goto done; 762 } 763 764 ASSERT(mp1->b_datap->db_ref == 1); 765 766 if ((rptr + tudr->DEST_offset + tudr->DEST_length) > 767 mp->b_wptr) { 768 ret = EDESTADDRREQ; 769 goto done; 770 } 771 772 sin = (sin_t *)(uintptr_t)&rptr[tudr->DEST_offset]; 773 if (!OK_32PTR((char *)sin) || tudr->DEST_length != 774 sizeof (sin_t) || sin->sin_family != AF_INET_OFFLOAD) { 775 ret = EDESTADDRREQ; 776 goto done; 777 } 778 /* Extract port and ipaddr */ 779 port = sin->sin_port; 780 dst = sin->sin_addr.s_addr; 781 782 if (port == 0 || dst == INADDR_ANY) { 783 ret = EDESTADDRREQ; 784 goto done; 785 } 786 787 ASSERT(rds_transport_ops != NULL); 788 ret = rds_transport_ops->rds_transport_sendmsg(uiop, rds->rds_src, dst, 789 ntohs(rds->rds_port), ntohs(port), rds->rds_zoneid); 790 if (ret != 0) { 791 if ((ret != ENOBUFS) && (ret != ENOMEM)) { 792 /* ENOMEM is actually EWOULDBLOCK */ 793 dprint(2, ("%s: rds_sendmsg returned %d", LABEL, ret)); 794 goto done; 795 } 796 } 797 done: 798 freemsg(mp1); 799 freemsg(mp); 800 return (ret); 801 } 802 803 /* 804 * Make sure we dont return EINVAL and EWOULDBLOCK as it has 805 * special meanings for the synchronous streams (rwnext()). 806 * We should return ENOMEM which is changed to EWOULDBLOCK by kstrputmsg() 807 */ 808 static int 809 rds_wrw(queue_t *q, struiod_t *dp) 810 { 811 mblk_t *mp = dp->d_mp; 812 int error = 0; 813 struct datab *db; 814 uchar_t *rptr; 815 816 db = mp->b_datap; 817 rptr = mp->b_rptr; 818 switch (db->db_type) { 819 case M_PROTO: 820 case M_PCPROTO: 821 ASSERT(((uintptr_t)mp->b_wptr - (uintptr_t)rptr) <= 822 (uintptr_t)INT_MAX); 823 if ((uintptr_t)mp->b_wptr - (uintptr_t)rptr >= 824 sizeof (struct T_unitdata_req)) { 825 /* Detect valid T_UNITDATA_REQ here */ 826 if (((union T_primitives *)(uintptr_t)rptr)->type 827 == T_UNITDATA_REQ) 828 break; 829 } 830 /* FALLTHRU */ 831 default: 832 833 if (isuioq(q) && (error = struioget(q, mp, dp, 0))) { 834 /* 835 * Uio error of some sort, so just return the error. 836 */ 837 goto done; 838 } 839 dp->d_mp = 0; 840 rds_wput_other(q, mp); 841 return (0); 842 } 843 844 dp->d_mp = 0; 845 error = rds_wput_data(q, mp, &dp->d_uio); 846 done: 847 if (error == EWOULDBLOCK || error == EINVAL) 848 error = EIO; 849 850 return (error); 851 } 852 853 static void 854 rds_rsrv(queue_t *q) 855 { 856 rds_t *rds = (rds_t *)q->q_ptr; 857 ulong_t current_port_quota; 858 859 /* update the port quota to the current level */ 860 current_port_quota = RDS_GET_PORT_QUOTA(); 861 if (rds->rds_port_quota != current_port_quota) { 862 rds->rds_port_quota = current_port_quota; 863 (void) mi_set_sth_hiwat(q, 864 rds->rds_port_quota * UserBufferSize); 865 } 866 867 /* No more messages in the q, unstall the socket */ 868 rds_transport_ops->rds_transport_resume_port(ntohs(rds->rds_port)); 869 } 870 871 int 872 rds_close_transport_driver() 873 { 874 ASSERT(rds_transport_ops != NULL); 875 876 rw_enter(&rds_transport_lock, RW_WRITER); 877 if (rds_transport_handle != NULL) { 878 rds_transport_ops->rds_transport_close_ib(); 879 (void) ldi_close(rds_transport_handle, FNDELAY, kcred); 880 rds_transport_handle = NULL; 881 } 882 rw_exit(&rds_transport_lock); 883 884 return (0); 885 } 886 887 888 int 889 rds_open_transport_driver() 890 { 891 int ret = 0; 892 893 rw_enter(&rds_transport_lock, RW_WRITER); 894 if (rds_transport_handle != NULL) { 895 /* 896 * Someone beat us to it. 897 */ 898 goto done; 899 } 900 901 if (ibt_hw_is_present() == 0) { 902 ret = ENODEV; 903 goto done; 904 } 905 906 if (rds_li == NULL) { 907 ret = EPROTONOSUPPORT; 908 goto done; 909 } 910 911 ret = ldi_open_by_name("/devices/ib/rdsib@0:rdsib", 912 FREAD | FWRITE, kcred, &rds_transport_handle, rds_li); 913 if (ret != 0) { 914 ret = EPROTONOSUPPORT; 915 rds_transport_handle = NULL; 916 goto done; 917 } 918 919 ret = rds_transport_ops->rds_transport_open_ib(); 920 if (ret != 0) { 921 (void) ldi_close(rds_transport_handle, FNDELAY, kcred); 922 rds_transport_handle = NULL; 923 } 924 done: 925 rw_exit(&rds_transport_lock); 926 return (ret); 927 } 928 929 static struct module_info info = { 930 0, "rds", 1, INFPSZ, 65536, 1024 931 }; 932 933 static struct qinit rinit = { 934 NULL, (pfi_t)rds_rsrv, rds_open, rds_close, NULL, &info 935 }; 936 937 static struct qinit winit = { 938 (pfi_t)rds_wput, NULL, rds_open, rds_close, NULL, &info, 939 NULL, rds_wrw, NULL, STRUIOT_STANDARD 940 }; 941 942 struct streamtab rdsinfo = { 943 &rinit, &winit, NULL, NULL 944 }; 945 946 DDI_DEFINE_STREAM_OPS(rds_devops, nulldev, nulldev, rds_attach, rds_detach, 947 nulldev, rds_info, RDS_DEVMTFLAGS, &RDS_STRTAB); 948 949 /* 950 * Module linkage information for the kernel. 951 */ 952 static struct modldrv modldrv = { 953 &mod_driverops, 954 RDS_DEVDESC, 955 &rds_devops 956 }; 957 958 static struct modlinkage modlinkage = { 959 MODREV_1, 960 &modldrv, 961 NULL 962 }; 963 964 int 965 _init(void) 966 { 967 int ret; 968 969 rds_init(); 970 971 ret = mod_install(&modlinkage); 972 if (ret != 0) 973 goto done; 974 ret = ldi_ident_from_mod(&modlinkage, &rds_li); 975 if (ret != 0) 976 rds_li = NULL; 977 done: 978 return (ret); 979 } 980 981 int 982 _fini(void) 983 { 984 int ret; 985 986 ret = mod_remove(&modlinkage); 987 if (ret != 0) { 988 return (ret); 989 } 990 991 rds_fini(); 992 993 ldi_ident_release(rds_li); 994 return (0); 995 } 996 997 int 998 _info(struct modinfo *modinfop) 999 { 1000 return (mod_info(&modlinkage, modinfop)); 1001 } 1002