1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/strsubr.h> 29 #include <sys/stropts.h> 30 #include <sys/strsun.h> 31 #include <sys/strlog.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/cmn_err.h> 38 #include <sys/proc.h> 39 #include <sys/suntpi.h> 40 #include <sys/policy.h> 41 #include <sys/zone.h> 42 #include <sys/disp.h> 43 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <netinet/in.h> 47 48 #include <inet/common.h> 49 #include <netinet/ip6.h> 50 #include <inet/ip.h> 51 #include <inet/ipclassifier.h> 52 #include <inet/proto_set.h> 53 #include <inet/nd.h> 54 #include <inet/optcom.h> 55 #include <netinet/ip_mroute.h> 56 #include <sys/isa_defs.h> 57 #include <net/route.h> 58 59 #include <inet/rts_impl.h> 60 #include <inet/ip_rts.h> 61 62 /* 63 * This is a transport provider for routing sockets. Downstream messages are 64 * wrapped with a IP_IOCTL header, and ip_wput_ioctl calls the appropriate entry 65 * in the ip_ioctl_ftbl callout table to pass the routing socket data into IP. 66 * Upstream messages are generated for listeners of the routing socket as well 67 * as the message sender (unless they have turned off their end using 68 * SO_USELOOPBACK or shutdown(3n)). Upstream messages may also be generated 69 * asynchronously when: 70 * 71 * Interfaces are brought up or down. 72 * Addresses are assigned to interfaces. 73 * ICMP redirects are processed and a IRE_HOST/RTF_DYNAMIC is installed. 74 * No route is found while sending a packet. 75 * When TCP requests IP to remove an IRE_CACHE of a troubled destination. 76 * 77 * Since all we do is reformat the messages between routing socket and 78 * ioctl forms, no synchronization is necessary in this module; all 79 * the dirty work is done down in ip. 80 */ 81 82 /* Default structure copied into T_INFO_ACK messages */ 83 static struct T_info_ack rts_g_t_info_ack = { 84 T_INFO_ACK, 85 T_INFINITE, /* TSDU_size. Maximum size messages. */ 86 T_INVALID, /* ETSDU_size. No expedited data. */ 87 T_INVALID, /* CDATA_size. No connect data. */ 88 T_INVALID, /* DDATA_size. No disconnect data. */ 89 0, /* ADDR_size. */ 90 0, /* OPT_size - not initialized here */ 91 64 * 1024, /* TIDU_size. rts allows maximum size messages. */ 92 T_COTS, /* SERV_type. rts supports connection oriented. */ 93 TS_UNBND, /* CURRENT_state. This is set from rts_state. */ 94 (XPG4_1) /* PROVIDER_flag */ 95 }; 96 97 /* 98 * Table of ND variables supported by rts. These are loaded into rts_g_nd 99 * in rts_open. 100 * All of these are alterable, within the min/max values given, at run time. 101 */ 102 static rtsparam_t lcl_param_arr[] = { 103 /* min max value name */ 104 { 4096, 65536, 8192, "rts_xmit_hiwat"}, 105 { 0, 65536, 1024, "rts_xmit_lowat"}, 106 { 4096, 65536, 8192, "rts_recv_hiwat"}, 107 { 65536, 1024*1024*1024, 256*1024, "rts_max_buf"}, 108 }; 109 #define rtss_xmit_hiwat rtss_params[0].rts_param_value 110 #define rtss_xmit_lowat rtss_params[1].rts_param_value 111 #define rtss_recv_hiwat rtss_params[2].rts_param_value 112 #define rtss_max_buf rtss_params[3].rts_param_value 113 114 static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 115 int sys_error); 116 static void rts_input(void *, mblk_t *, void *); 117 static mblk_t *rts_ioctl_alloc(mblk_t *data); 118 static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 119 static boolean_t rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt); 120 static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 121 cred_t *cr); 122 static void rts_rsrv(queue_t *q); 123 static void *rts_stack_init(netstackid_t stackid, netstack_t *ns); 124 static void rts_stack_fini(netstackid_t stackid, void *arg); 125 static void rts_wput(queue_t *q, mblk_t *mp); 126 static void rts_wput_iocdata(queue_t *q, mblk_t *mp); 127 static void rts_wput_other(queue_t *q, mblk_t *mp); 128 static int rts_wrw(queue_t *q, struiod_t *dp); 129 130 static int rts_stream_open(queue_t *q, dev_t *devp, int flag, int sflag, 131 cred_t *credp); 132 static conn_t *rts_open(int flag, cred_t *credp); 133 134 static int rts_stream_close(queue_t *q); 135 static int rts_close(sock_lower_handle_t proto_handle, int flags, 136 cred_t *cr); 137 138 static struct module_info rts_mod_info = { 139 129, "rts", 1, INFPSZ, 512, 128 140 }; 141 142 static struct qinit rtsrinit = { 143 NULL, (pfi_t)rts_rsrv, rts_stream_open, rts_stream_close, NULL, 144 &rts_mod_info 145 }; 146 147 static struct qinit rtswinit = { 148 (pfi_t)rts_wput, NULL, NULL, NULL, NULL, &rts_mod_info, 149 NULL, (pfi_t)rts_wrw, NULL, STRUIOT_STANDARD 150 }; 151 152 struct streamtab rtsinfo = { 153 &rtsrinit, &rtswinit 154 }; 155 156 /* 157 * This routine allocates the necessary 158 * message blocks for IOCTL wrapping the 159 * user data. 160 */ 161 static mblk_t * 162 rts_ioctl_alloc(mblk_t *data) 163 { 164 mblk_t *mp = NULL; 165 mblk_t *mp1 = NULL; 166 ipllc_t *ipllc; 167 struct iocblk *ioc; 168 169 mp = allocb_tmpl(sizeof (ipllc_t), data); 170 if (mp == NULL) 171 return (NULL); 172 mp1 = allocb_tmpl(sizeof (struct iocblk), data); 173 if (mp1 == NULL) { 174 freeb(mp); 175 return (NULL); 176 } 177 178 ipllc = (ipllc_t *)mp->b_rptr; 179 ipllc->ipllc_cmd = IP_IOC_RTS_REQUEST; 180 ipllc->ipllc_name_offset = 0; 181 ipllc->ipllc_name_length = 0; 182 mp->b_wptr += sizeof (ipllc_t); 183 mp->b_cont = data; 184 185 ioc = (struct iocblk *)mp1->b_rptr; 186 ioc->ioc_cmd = IP_IOCTL; 187 ioc->ioc_error = 0; 188 ioc->ioc_cr = NULL; 189 ioc->ioc_count = msgdsize(mp); 190 mp1->b_wptr += sizeof (struct iocblk); 191 mp1->b_datap->db_type = M_IOCTL; 192 mp1->b_cont = mp; 193 194 return (mp1); 195 } 196 197 /* 198 * This routine closes rts stream, by disabling 199 * put/srv routines and freeing the this module 200 * internal datastructure. 201 */ 202 static int 203 rts_common_close(queue_t *q, conn_t *connp) 204 { 205 206 ASSERT(connp != NULL && IPCL_IS_RTS(connp)); 207 208 ip_rts_unregister(connp); 209 210 ip_quiesce_conn(connp); 211 212 if (!IPCL_IS_NONSTR(connp)) { 213 qprocsoff(q); 214 215 /* 216 * Now we are truly single threaded on this stream, and can 217 * delete the things hanging off the connp, and finally the 218 * connp. 219 * We removed this connp from the fanout list, it cannot be 220 * accessed thru the fanouts, and we already waited for the 221 * conn_ref to drop to 0. We are already in close, so 222 * there cannot be any other thread from the top. qprocsoff 223 * has completed, and service has completed or won't run in 224 * future. 225 */ 226 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 227 } else { 228 ip_free_helper_stream(connp); 229 } 230 ASSERT(connp->conn_ref == 1); 231 232 233 connp->conn_ref--; 234 ipcl_conn_destroy(connp); 235 236 return (0); 237 } 238 239 static int 240 rts_stream_close(queue_t *q) 241 { 242 conn_t *connp = Q_TO_CONN(q); 243 244 (void) rts_common_close(q, connp); 245 q->q_ptr = WR(q)->q_ptr = NULL; 246 return (0); 247 } 248 249 /* 250 * This is the open routine for routing socket. It allocates 251 * rts_t structure for the stream and tells IP that it is a routing socket. 252 */ 253 /* ARGSUSED */ 254 static int 255 rts_stream_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 256 { 257 conn_t *connp; 258 dev_t conn_dev; 259 rts_stack_t *rtss; 260 rts_t *rts; 261 262 /* If the stream is already open, return immediately. */ 263 if (q->q_ptr != NULL) 264 return (0); 265 266 if (sflag == MODOPEN) 267 return (EINVAL); 268 269 270 /* 271 * Since RTS is not used so heavily, allocating from the small 272 * arena should be sufficient. 273 */ 274 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 275 return (EBUSY); 276 } 277 278 connp = rts_open(flag, credp); 279 ASSERT(connp != NULL); 280 281 282 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 283 284 rts = connp->conn_rts; 285 286 rw_enter(&rts->rts_rwlock, RW_WRITER); 287 connp->conn_dev = conn_dev; 288 connp->conn_minor_arena = ip_minor_arena_sa; 289 290 /* 291 * Initialize the rts_t structure for this stream. 292 */ 293 q->q_ptr = connp; 294 WR(q)->q_ptr = connp; 295 connp->conn_rq = q; 296 connp->conn_wq = WR(q); 297 298 rtss = rts->rts_rtss; 299 q->q_hiwat = rtss->rtss_recv_hiwat; 300 WR(q)->q_hiwat = rtss->rtss_xmit_hiwat; 301 WR(q)->q_lowat = rtss->rtss_xmit_lowat; 302 303 304 305 mutex_enter(&connp->conn_lock); 306 connp->conn_state_flags &= ~CONN_INCIPIENT; 307 mutex_exit(&connp->conn_lock); 308 309 qprocson(q); 310 rw_exit(&rts->rts_rwlock); 311 /* 312 * Indicate the down IP module that this is a routing socket 313 * client by sending an RTS IOCTL without any user data. Although 314 * this is just a notification message (without any real routing 315 * request), we pass in any credential for correctness sake. 316 */ 317 ip_rts_register(connp); 318 319 return (0); 320 } 321 322 /* ARGSUSED */ 323 static conn_t * 324 rts_open(int flag, cred_t *credp) 325 { 326 netstack_t *ns; 327 rts_stack_t *rtss; 328 rts_t *rts; 329 conn_t *connp; 330 zoneid_t zoneid; 331 332 ns = netstack_find_by_cred(credp); 333 ASSERT(ns != NULL); 334 rtss = ns->netstack_rts; 335 ASSERT(rtss != NULL); 336 337 /* 338 * For exclusive stacks we set the zoneid to zero 339 * to make RTS operate as if in the global zone. 340 */ 341 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 342 zoneid = GLOBAL_ZONEID; 343 else 344 zoneid = crgetzoneid(credp); 345 346 connp = ipcl_conn_create(IPCL_RTSCONN, KM_SLEEP, ns); 347 rts = connp->conn_rts; 348 349 /* 350 * ipcl_conn_create did a netstack_hold. Undo the hold that was 351 * done by netstack_find_by_cred() 352 */ 353 netstack_rele(ns); 354 355 356 rw_enter(&rts->rts_rwlock, RW_WRITER); 357 ASSERT(connp->conn_rts == rts); 358 ASSERT(rts->rts_connp == connp); 359 360 connp->conn_zoneid = zoneid; 361 connp->conn_flow_cntrld = B_FALSE; 362 363 connp->conn_ulp_labeled = is_system_labeled(); 364 365 rts->rts_rtss = rtss; 366 rts->rts_xmit_hiwat = rtss->rtss_xmit_hiwat; 367 368 connp->conn_recv = rts_input; 369 crhold(credp); 370 connp->conn_cred = credp; 371 372 /* 373 * rts sockets start out as bound and connected 374 * For streams based sockets, socket state is set to 375 * SS_ISBOUND | SS_ISCONNECTED in so_strinit. 376 */ 377 rts->rts_state = TS_DATA_XFER; 378 rw_exit(&rts->rts_rwlock); 379 380 return (connp); 381 } 382 383 /* 384 * This routine creates a T_ERROR_ACK message and passes it upstream. 385 */ 386 static void 387 rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 388 { 389 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 390 qreply(q, mp); 391 } 392 393 /* 394 * This routine creates a T_OK_ACK message and passes it upstream. 395 */ 396 static void 397 rts_ok_ack(queue_t *q, mblk_t *mp) 398 { 399 if ((mp = mi_tpi_ok_ack_alloc(mp)) != NULL) 400 qreply(q, mp); 401 } 402 403 /* 404 * This routine is called by rts_wput to handle T_UNBIND_REQ messages. 405 */ 406 static void 407 rts_tpi_unbind(queue_t *q, mblk_t *mp) 408 { 409 conn_t *connp = Q_TO_CONN(q); 410 rts_t *rts = connp->conn_rts; 411 412 /* If a bind has not been done, we can't unbind. */ 413 if (rts->rts_state != TS_IDLE) { 414 rts_err_ack(q, mp, TOUTSTATE, 0); 415 return; 416 } 417 rts->rts_state = TS_UNBND; 418 rts_ok_ack(q, mp); 419 } 420 421 /* 422 * This routine is called to handle each 423 * O_T_BIND_REQ/T_BIND_REQ message passed to 424 * rts_wput. Note: This routine works with both 425 * O_T_BIND_REQ and T_BIND_REQ semantics. 426 */ 427 static void 428 rts_tpi_bind(queue_t *q, mblk_t *mp) 429 { 430 conn_t *connp = Q_TO_CONN(q); 431 rts_t *rts = connp->conn_rts; 432 mblk_t *mp1; 433 struct T_bind_req *tbr; 434 435 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 436 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 437 "rts_tpi_bind: bad data, %d", rts->rts_state); 438 rts_err_ack(q, mp, TBADADDR, 0); 439 return; 440 } 441 if (rts->rts_state != TS_UNBND) { 442 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 443 "rts_tpi_bind: bad state, %d", rts->rts_state); 444 rts_err_ack(q, mp, TOUTSTATE, 0); 445 return; 446 } 447 /* 448 * Reallocate the message to make sure we have enough room for an 449 * address and the protocol type. 450 */ 451 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin_t), 1); 452 if (mp1 == NULL) { 453 rts_err_ack(q, mp, TSYSERR, ENOMEM); 454 return; 455 } 456 mp = mp1; 457 tbr = (struct T_bind_req *)mp->b_rptr; 458 if (tbr->ADDR_length != 0) { 459 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 460 "rts_tpi_bind: bad ADDR_length %d", tbr->ADDR_length); 461 rts_err_ack(q, mp, TBADADDR, 0); 462 return; 463 } 464 /* Generic request */ 465 tbr->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_req); 466 tbr->ADDR_length = 0; 467 tbr->PRIM_type = T_BIND_ACK; 468 rts->rts_state = TS_IDLE; 469 qreply(q, mp); 470 } 471 472 static void 473 rts_copy_info(struct T_info_ack *tap, rts_t *rts) 474 { 475 *tap = rts_g_t_info_ack; 476 tap->CURRENT_state = rts->rts_state; 477 tap->OPT_size = rts_max_optsize; 478 } 479 480 /* 481 * This routine responds to T_CAPABILITY_REQ messages. It is called by 482 * rts_wput. Much of the T_CAPABILITY_ACK information is copied from 483 * rts_g_t_info_ack. The current state of the stream is copied from 484 * rts_state. 485 */ 486 static void 487 rts_capability_req(queue_t *q, mblk_t *mp) 488 { 489 conn_t *connp = Q_TO_CONN(q); 490 rts_t *rts = connp->conn_rts; 491 t_uscalar_t cap_bits1; 492 struct T_capability_ack *tcap; 493 494 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 495 496 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 497 mp->b_datap->db_type, T_CAPABILITY_ACK); 498 if (mp == NULL) 499 return; 500 501 tcap = (struct T_capability_ack *)mp->b_rptr; 502 tcap->CAP_bits1 = 0; 503 504 if (cap_bits1 & TC1_INFO) { 505 rts_copy_info(&tcap->INFO_ack, rts); 506 tcap->CAP_bits1 |= TC1_INFO; 507 } 508 509 qreply(q, mp); 510 } 511 512 /* 513 * This routine responds to T_INFO_REQ messages. It is called by rts_wput. 514 * Most of the T_INFO_ACK information is copied from rts_g_t_info_ack. 515 * The current state of the stream is copied from rts_state. 516 */ 517 static void 518 rts_info_req(queue_t *q, mblk_t *mp) 519 { 520 conn_t *connp = Q_TO_CONN(q); 521 rts_t *rts = connp->conn_rts; 522 523 mp = tpi_ack_alloc(mp, sizeof (rts_g_t_info_ack), M_PCPROTO, 524 T_INFO_ACK); 525 if (mp == NULL) 526 return; 527 rts_copy_info((struct T_info_ack *)mp->b_rptr, rts); 528 qreply(q, mp); 529 } 530 531 /* 532 * This routine gets default values of certain options whose default 533 * values are maintained by protcol specific code 534 */ 535 /* ARGSUSED */ 536 int 537 rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 538 { 539 /* no default value processed by protocol specific code currently */ 540 return (-1); 541 } 542 543 544 static int 545 rts_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 546 { 547 rts_t *rts = connp->conn_rts; 548 int *i1 = (int *)ptr; 549 550 ASSERT(RW_READ_HELD(&rts->rts_rwlock)); 551 552 switch (level) { 553 case SOL_SOCKET: 554 switch (name) { 555 case SO_DEBUG: 556 *i1 = rts->rts_debug; 557 break; 558 case SO_REUSEADDR: 559 *i1 = rts->rts_reuseaddr; 560 break; 561 case SO_TYPE: 562 *i1 = SOCK_RAW; 563 break; 564 /* 565 * The following three items are available here, 566 * but are only meaningful to IP. 567 */ 568 case SO_DONTROUTE: 569 *i1 = rts->rts_dontroute; 570 break; 571 case SO_USELOOPBACK: 572 *i1 = rts->rts_useloopback; 573 break; 574 case SO_BROADCAST: 575 *i1 = rts->rts_broadcast; 576 break; 577 case SO_PROTOTYPE: 578 *i1 = rts->rts_proto; 579 break; 580 /* 581 * The following two items can be manipulated, 582 * but changing them should do nothing. 583 */ 584 case SO_SNDBUF: 585 ASSERT(rts->rts_xmit_hiwat <= INT_MAX); 586 *i1 = (int)(rts->rts_xmit_hiwat); 587 break; 588 case SO_RCVBUF: 589 ASSERT(rts->rts_recv_hiwat <= INT_MAX); 590 *i1 = (int)(rts->rts_recv_hiwat); 591 break; 592 case SO_DOMAIN: 593 *i1 = PF_ROUTE; 594 break; 595 default: 596 return (-1); 597 } 598 break; 599 case SOL_ROUTE: 600 switch (name) { 601 case RT_AWARE: 602 mutex_enter(&connp->conn_lock); 603 *i1 = connp->conn_rtaware; 604 mutex_exit(&connp->conn_lock); 605 break; 606 } 607 break; 608 default: 609 return (-1); 610 } 611 return ((int)sizeof (int)); 612 } 613 614 /* ARGSUSED */ 615 static int 616 rts_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 617 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 618 void *thisdg_attrs, boolean_t checkonly) 619 { 620 int *i1 = (int *)invalp; 621 rts_t *rts = connp->conn_rts; 622 rts_stack_t *rtss = rts->rts_rtss; 623 624 ASSERT(RW_WRITE_HELD(&rts->rts_rwlock)); 625 626 /* 627 * For rts, we should have no ancillary data sent down 628 * (rts_wput doesn't handle options). 629 */ 630 ASSERT(thisdg_attrs == NULL); 631 632 /* 633 * For fixed length options, no sanity check 634 * of passed in length is done. It is assumed *_optcom_req() 635 * routines do the right thing. 636 */ 637 638 switch (level) { 639 case SOL_SOCKET: 640 switch (name) { 641 case SO_REUSEADDR: 642 if (!checkonly) { 643 rts->rts_reuseaddr = *i1 ? 1 : 0; 644 connp->conn_reuseaddr = *i1 ? 1 : 0; 645 } 646 break; /* goto sizeof (int) option return */ 647 case SO_DEBUG: 648 if (!checkonly) 649 rts->rts_debug = *i1 ? 1 : 0; 650 break; /* goto sizeof (int) option return */ 651 /* 652 * The following three items are available here, 653 * but are only meaningful to IP. 654 */ 655 case SO_DONTROUTE: 656 if (!checkonly) { 657 rts->rts_dontroute = *i1 ? 1 : 0; 658 connp->conn_dontroute = *i1 ? 1 : 0; 659 } 660 break; /* goto sizeof (int) option return */ 661 case SO_USELOOPBACK: 662 if (!checkonly) { 663 rts->rts_useloopback = *i1 ? 1 : 0; 664 connp->conn_loopback = *i1 ? 1 : 0; 665 } 666 break; /* goto sizeof (int) option return */ 667 case SO_BROADCAST: 668 if (!checkonly) { 669 rts->rts_broadcast = *i1 ? 1 : 0; 670 connp->conn_broadcast = *i1 ? 1 : 0; 671 } 672 break; /* goto sizeof (int) option return */ 673 case SO_PROTOTYPE: 674 /* 675 * Routing socket applications that call socket() with 676 * a third argument can filter which messages will be 677 * sent upstream thanks to sockfs. so_socket() sends 678 * down the SO_PROTOTYPE and rts_queue_input() 679 * implements the filtering. 680 */ 681 if (*i1 != AF_INET && *i1 != AF_INET6) 682 return (EPROTONOSUPPORT); 683 if (!checkonly) { 684 rts->rts_proto = *i1; 685 connp->conn_proto = *i1; 686 } 687 break; /* goto sizeof (int) option return */ 688 /* 689 * The following two items can be manipulated, 690 * but changing them should do nothing. 691 */ 692 case SO_SNDBUF: 693 if (*i1 > rtss->rtss_max_buf) { 694 *outlenp = 0; 695 return (ENOBUFS); 696 } 697 if (!checkonly) { 698 rts->rts_xmit_hiwat = *i1; 699 if (!IPCL_IS_NONSTR(connp)) 700 connp->conn_wq->q_hiwat = *i1; 701 } 702 break; /* goto sizeof (int) option return */ 703 case SO_RCVBUF: 704 if (*i1 > rtss->rtss_max_buf) { 705 *outlenp = 0; 706 return (ENOBUFS); 707 } 708 if (!checkonly) { 709 rts->rts_recv_hiwat = *i1; 710 rw_exit(&rts->rts_rwlock); 711 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 712 *i1); 713 rw_enter(&rts->rts_rwlock, RW_WRITER); 714 } 715 716 break; /* goto sizeof (int) option return */ 717 case SO_RCVTIMEO: 718 case SO_SNDTIMEO: 719 /* 720 * Pass these two options in order for third part 721 * protocol usage. Here just return directly. 722 */ 723 return (0); 724 default: 725 *outlenp = 0; 726 return (EINVAL); 727 } 728 break; 729 case SOL_ROUTE: 730 switch (name) { 731 case RT_AWARE: 732 if (!checkonly) { 733 mutex_enter(&connp->conn_lock); 734 connp->conn_rtaware = *i1; 735 mutex_exit(&connp->conn_lock); 736 } 737 break; /* goto sizeof (int) option return */ 738 default: 739 *outlenp = 0; 740 return (EINVAL); 741 } 742 break; 743 default: 744 *outlenp = 0; 745 return (EINVAL); 746 } 747 /* 748 * Common case of return from an option that is sizeof (int) 749 */ 750 if (invalp != outvalp) { 751 /* don't trust bcopy for identical src/dst */ 752 (void) bcopy(invalp, outvalp, inlen); 753 } 754 *outlenp = (t_uscalar_t)sizeof (int); 755 return (0); 756 } 757 758 static int 759 rts_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 760 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 761 void *thisdg_attrs, cred_t *cr) 762 { 763 boolean_t checkonly = B_FALSE; 764 765 if (optset_context) { 766 switch (optset_context) { 767 case SETFN_OPTCOM_CHECKONLY: 768 checkonly = B_TRUE; 769 /* 770 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 771 * inlen != 0 implies value supplied and 772 * we have to "pretend" to set it. 773 * inlen == 0 implies that there is no value part 774 * in T_CHECK request and just validation 775 * done elsewhere should be enough, we just return here. 776 */ 777 if (inlen == 0) { 778 *outlenp = 0; 779 return (0); 780 } 781 break; 782 case SETFN_OPTCOM_NEGOTIATE: 783 checkonly = B_FALSE; 784 break; 785 case SETFN_UD_NEGOTIATE: 786 case SETFN_CONN_NEGOTIATE: 787 checkonly = B_FALSE; 788 /* 789 * Negotiating local and "association-related" options 790 * through T_UNITDATA_REQ or T_CONN_{REQ,CON} 791 * Not allowed in this module. 792 */ 793 return (EINVAL); 794 default: 795 /* 796 * We should never get here 797 */ 798 *outlenp = 0; 799 return (EINVAL); 800 } 801 802 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 803 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 804 805 } 806 return (rts_do_opt_set(connp, level, name, inlen, invalp, outlenp, 807 outvalp, cr, thisdg_attrs, checkonly)); 808 809 } 810 811 /* 812 * This routine retrieves the current status of socket options. 813 * It returns the size of the option retrieved. 814 */ 815 int 816 rts_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 817 { 818 rts_t *rts; 819 int err; 820 821 rts = Q_TO_RTS(q); 822 rw_enter(&rts->rts_rwlock, RW_READER); 823 err = rts_opt_get(Q_TO_CONN(q), level, name, ptr); 824 rw_exit(&rts->rts_rwlock); 825 return (err); 826 } 827 828 /* 829 * This routine sets socket options. 830 */ 831 /*ARGSUSED*/ 832 int 833 rts_tpi_opt_set(queue_t *q, uint_t optset_context, int level, 834 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 835 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 836 { 837 conn_t *connp = Q_TO_CONN(q); 838 int error; 839 rts_t *rts = connp->conn_rts; 840 841 842 rw_enter(&rts->rts_rwlock, RW_WRITER); 843 error = rts_opt_set(connp, optset_context, level, name, inlen, invalp, 844 outlenp, outvalp, thisdg_attrs, cr); 845 rw_exit(&rts->rts_rwlock); 846 return (error); 847 } 848 849 /* 850 * This routine retrieves the value of an ND variable in a rtsparam_t 851 * structure. It is called through nd_getset when a user reads the 852 * variable. 853 */ 854 /* ARGSUSED */ 855 static int 856 rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 857 { 858 rtsparam_t *rtspa = (rtsparam_t *)cp; 859 860 (void) mi_mpprintf(mp, "%u", rtspa->rts_param_value); 861 return (0); 862 } 863 864 /* 865 * Walk through the param array specified registering each element with the 866 * named dispatch (ND) handler. 867 */ 868 static boolean_t 869 rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt) 870 { 871 for (; cnt-- > 0; rtspa++) { 872 if (rtspa->rts_param_name != NULL && rtspa->rts_param_name[0]) { 873 if (!nd_load(ndp, rtspa->rts_param_name, 874 rts_param_get, rts_param_set, (caddr_t)rtspa)) { 875 nd_free(ndp); 876 return (B_FALSE); 877 } 878 } 879 } 880 return (B_TRUE); 881 } 882 883 /* This routine sets an ND variable in a rtsparam_t structure. */ 884 /* ARGSUSED */ 885 static int 886 rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 887 { 888 ulong_t new_value; 889 rtsparam_t *rtspa = (rtsparam_t *)cp; 890 891 /* 892 * Fail the request if the new value does not lie within the 893 * required bounds. 894 */ 895 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 896 new_value < rtspa->rts_param_min || 897 new_value > rtspa->rts_param_max) { 898 return (EINVAL); 899 } 900 901 /* Set the new value */ 902 rtspa->rts_param_value = new_value; 903 return (0); 904 } 905 906 /* 907 * Empty rsrv routine which is used by rts_input to cause a wakeup 908 * of a thread in qwait. 909 */ 910 /*ARGSUSED*/ 911 static void 912 rts_rsrv(queue_t *q) 913 { 914 } 915 916 /* 917 * This routine handles synchronous messages passed downstream. It either 918 * consumes the message or passes it downstream; it never queues a 919 * a message. The data messages that go down are wrapped in an IOCTL 920 * message. 921 * 922 * Since it is synchronous, it waits for the M_IOCACK/M_IOCNAK so that 923 * it can return an immediate error (such as ENETUNREACH when adding a route). 924 * It uses the RTS_WRW_PENDING to ensure that each rts instance has only 925 * one M_IOCTL outstanding at any given time. 926 */ 927 static int 928 rts_wrw(queue_t *q, struiod_t *dp) 929 { 930 mblk_t *mp = dp->d_mp; 931 mblk_t *mp1; 932 int error; 933 rt_msghdr_t *rtm; 934 conn_t *connp = Q_TO_CONN(q); 935 rts_t *rts = connp->conn_rts; 936 937 while (rts->rts_flag & RTS_WRW_PENDING) { 938 if (qwait_rw(q)) { 939 rts->rts_error = EINTR; 940 goto err_ret; 941 } 942 } 943 rts->rts_flag |= RTS_WRW_PENDING; 944 945 if (isuioq(q) && (error = struioget(q, mp, dp, 0))) { 946 /* 947 * Uio error of some sort, so just return the error. 948 */ 949 rts->rts_error = error; 950 goto err_ret; 951 } 952 /* 953 * Pass the mblk (chain) onto wput(). 954 */ 955 dp->d_mp = 0; 956 957 switch (mp->b_datap->db_type) { 958 case M_PROTO: 959 case M_PCPROTO: 960 /* Expedite other than T_DATA_REQ to below the switch */ 961 if (((mp->b_wptr - mp->b_rptr) != 962 sizeof (struct T_data_req)) || 963 (((union T_primitives *)mp->b_rptr)->type != T_DATA_REQ)) 964 break; 965 if ((mp1 = mp->b_cont) == NULL) { 966 rts->rts_error = EINVAL; 967 freemsg(mp); 968 goto err_ret; 969 } 970 freeb(mp); 971 mp = mp1; 972 /* FALLTHRU */ 973 case M_DATA: 974 /* 975 * The semantics of the routing socket is such that the rtm_pid 976 * field is automatically filled in during requests with the 977 * current process' pid. We do this here (where we still have 978 * user context) after checking we have at least a message the 979 * size of a routing message header. 980 */ 981 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 982 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) { 983 rts->rts_error = EINVAL; 984 freemsg(mp); 985 goto err_ret; 986 } 987 } 988 rtm = (rt_msghdr_t *)mp->b_rptr; 989 rtm->rtm_pid = curproc->p_pid; 990 break; 991 default: 992 break; 993 } 994 rts->rts_flag |= RTS_WPUT_PENDING; 995 rts_wput(q, mp); 996 while (rts->rts_flag & RTS_WPUT_PENDING) 997 if (qwait_rw(q)) { 998 /* RTS_WPUT_PENDING will be cleared below */ 999 rts->rts_error = EINTR; 1000 break; 1001 } 1002 err_ret: 1003 rts->rts_flag &= ~(RTS_WPUT_PENDING | RTS_WRW_PENDING); 1004 return (rts->rts_error); 1005 } 1006 1007 /* 1008 * This routine handles all messages passed downstream. It either 1009 * consumes the message or passes it downstream; it never queues a 1010 * a message. The data messages that go down are wrapped in an IOCTL 1011 * message. 1012 * 1013 * FIXME? Should we call IP rts_request directly? Could punt on returning 1014 * errno in the case when it defers processing due to 1015 * IPIF_CHANGING/ILL_CHANGING??? 1016 */ 1017 static void 1018 rts_wput(queue_t *q, mblk_t *mp) 1019 { 1020 uchar_t *rptr = mp->b_rptr; 1021 mblk_t *mp1; 1022 conn_t *connp = Q_TO_CONN(q); 1023 rts_t *rts = connp->conn_rts; 1024 1025 switch (mp->b_datap->db_type) { 1026 case M_DATA: 1027 break; 1028 case M_PROTO: 1029 case M_PCPROTO: 1030 if ((mp->b_wptr - rptr) == sizeof (struct T_data_req)) { 1031 /* Expedite valid T_DATA_REQ to below the switch */ 1032 if (((union T_primitives *)rptr)->type == T_DATA_REQ) { 1033 mp1 = mp->b_cont; 1034 freeb(mp); 1035 if (mp1 == NULL) 1036 return; 1037 mp = mp1; 1038 break; 1039 } 1040 } 1041 /* FALLTHRU */ 1042 default: 1043 rts_wput_other(q, mp); 1044 return; 1045 } 1046 1047 1048 ASSERT(msg_getcred(mp, NULL) != NULL); 1049 1050 mp1 = rts_ioctl_alloc(mp); 1051 if (mp1 == NULL) { 1052 ASSERT(rts != NULL); 1053 freemsg(mp); 1054 if (rts->rts_flag & RTS_WPUT_PENDING) { 1055 rts->rts_error = ENOMEM; 1056 rts->rts_flag &= ~RTS_WPUT_PENDING; 1057 } 1058 return; 1059 } 1060 ip_output(connp, mp1, q, IP_WPUT); 1061 } 1062 1063 1064 /* 1065 * Handles all the control message, if it 1066 * can not understand it, it will 1067 * pass down stream. 1068 */ 1069 static void 1070 rts_wput_other(queue_t *q, mblk_t *mp) 1071 { 1072 conn_t *connp = Q_TO_CONN(q); 1073 rts_t *rts = connp->conn_rts; 1074 uchar_t *rptr = mp->b_rptr; 1075 struct iocblk *iocp; 1076 cred_t *cr; 1077 rts_stack_t *rtss; 1078 1079 rtss = rts->rts_rtss; 1080 1081 switch (mp->b_datap->db_type) { 1082 case M_PROTO: 1083 case M_PCPROTO: 1084 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 1085 /* 1086 * If the message does not contain a PRIM_type, 1087 * throw it away. 1088 */ 1089 freemsg(mp); 1090 return; 1091 } 1092 switch (((union T_primitives *)rptr)->type) { 1093 case T_BIND_REQ: 1094 case O_T_BIND_REQ: 1095 rts_tpi_bind(q, mp); 1096 return; 1097 case T_UNBIND_REQ: 1098 rts_tpi_unbind(q, mp); 1099 return; 1100 case T_CAPABILITY_REQ: 1101 rts_capability_req(q, mp); 1102 return; 1103 case T_INFO_REQ: 1104 rts_info_req(q, mp); 1105 return; 1106 case T_SVR4_OPTMGMT_REQ: 1107 case T_OPTMGMT_REQ: 1108 /* 1109 * All Solaris components should pass a db_credp 1110 * for this TPI message, hence we ASSERT. 1111 * But in case there is some other M_PROTO that looks 1112 * like a TPI message sent by some other kernel 1113 * component, we check and return an error. 1114 */ 1115 cr = msg_getcred(mp, NULL); 1116 ASSERT(cr != NULL); 1117 if (cr == NULL) { 1118 rts_err_ack(q, mp, TSYSERR, EINVAL); 1119 return; 1120 } 1121 if (((union T_primitives *)rptr)->type == 1122 T_SVR4_OPTMGMT_REQ) { 1123 (void) svr4_optcom_req(q, mp, cr, 1124 &rts_opt_obj, B_TRUE); 1125 } else { 1126 (void) tpi_optcom_req(q, mp, cr, 1127 &rts_opt_obj, B_TRUE); 1128 } 1129 return; 1130 case O_T_CONN_RES: 1131 case T_CONN_RES: 1132 case T_DISCON_REQ: 1133 /* Not supported by rts. */ 1134 rts_err_ack(q, mp, TNOTSUPPORT, 0); 1135 return; 1136 case T_DATA_REQ: 1137 case T_EXDATA_REQ: 1138 case T_ORDREL_REQ: 1139 /* Illegal for rts. */ 1140 freemsg(mp); 1141 (void) putnextctl1(RD(q), M_ERROR, EPROTO); 1142 return; 1143 1144 default: 1145 break; 1146 } 1147 break; 1148 case M_IOCTL: 1149 iocp = (struct iocblk *)mp->b_rptr; 1150 switch (iocp->ioc_cmd) { 1151 case ND_SET: 1152 case ND_GET: 1153 if (nd_getset(q, rtss->rtss_g_nd, mp)) { 1154 qreply(q, mp); 1155 return; 1156 } 1157 break; 1158 case TI_GETPEERNAME: 1159 mi_copyin(q, mp, NULL, 1160 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 1161 return; 1162 default: 1163 break; 1164 } 1165 case M_IOCDATA: 1166 rts_wput_iocdata(q, mp); 1167 return; 1168 default: 1169 break; 1170 } 1171 ip_output(connp, mp, q, IP_WPUT); 1172 } 1173 1174 /* 1175 * Called by rts_wput_other to handle all M_IOCDATA messages. 1176 */ 1177 static void 1178 rts_wput_iocdata(queue_t *q, mblk_t *mp) 1179 { 1180 conn_t *connp = Q_TO_CONN(q); 1181 struct sockaddr *rtsaddr; 1182 mblk_t *mp1; 1183 STRUCT_HANDLE(strbuf, sb); 1184 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 1185 1186 /* Make sure it is one of ours. */ 1187 switch (iocp->ioc_cmd) { 1188 case TI_GETPEERNAME: 1189 break; 1190 default: 1191 ip_output(connp, mp, q, IP_WPUT); 1192 return; 1193 } 1194 switch (mi_copy_state(q, mp, &mp1)) { 1195 case -1: 1196 return; 1197 case MI_COPY_CASE(MI_COPY_IN, 1): 1198 break; 1199 case MI_COPY_CASE(MI_COPY_OUT, 1): 1200 /* Copy out the strbuf. */ 1201 mi_copyout(q, mp); 1202 return; 1203 case MI_COPY_CASE(MI_COPY_OUT, 2): 1204 /* All done. */ 1205 mi_copy_done(q, mp, 0); 1206 return; 1207 default: 1208 mi_copy_done(q, mp, EPROTO); 1209 return; 1210 } 1211 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 1212 if (STRUCT_FGET(sb, maxlen) < (int)sizeof (sin_t)) { 1213 mi_copy_done(q, mp, EINVAL); 1214 return; 1215 } 1216 switch (iocp->ioc_cmd) { 1217 case TI_GETPEERNAME: 1218 break; 1219 default: 1220 mi_copy_done(q, mp, EPROTO); 1221 return; 1222 } 1223 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), sizeof (sin_t), 1224 B_TRUE); 1225 if (mp1 == NULL) 1226 return; 1227 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 1228 rtsaddr = (struct sockaddr *)mp1->b_rptr; 1229 mp1->b_wptr = (uchar_t *)&rtsaddr[1]; 1230 bzero(rtsaddr, sizeof (struct sockaddr)); 1231 rtsaddr->sa_family = AF_ROUTE; 1232 /* Copy out the address */ 1233 mi_copyout(q, mp); 1234 } 1235 1236 /*ARGSUSED2*/ 1237 static void 1238 rts_input(void *arg1, mblk_t *mp, void *arg2) 1239 { 1240 conn_t *connp = (conn_t *)arg1; 1241 rts_t *rts = connp->conn_rts; 1242 struct iocblk *iocp; 1243 mblk_t *mp1; 1244 struct T_data_ind *tdi; 1245 int error; 1246 1247 switch (mp->b_datap->db_type) { 1248 case M_IOCACK: 1249 case M_IOCNAK: 1250 iocp = (struct iocblk *)mp->b_rptr; 1251 if (IPCL_IS_NONSTR(connp)) { 1252 ASSERT(rts->rts_flag & (RTS_REQ_PENDING)); 1253 mutex_enter(&rts->rts_send_mutex); 1254 rts->rts_flag &= ~RTS_REQ_INPROG; 1255 rts->rts_error = iocp->ioc_error; 1256 cv_signal(&rts->rts_io_cv); 1257 mutex_exit(&rts->rts_send_mutex); 1258 freemsg(mp); 1259 return; 1260 } else { 1261 if (rts->rts_flag & (RTS_WPUT_PENDING)) { 1262 rts->rts_flag &= ~RTS_WPUT_PENDING; 1263 rts->rts_error = iocp->ioc_error; 1264 /* 1265 * Tell rts_wvw/qwait that we are done. 1266 * Note: there is no qwait_wakeup() we can use. 1267 */ 1268 qenable(connp->conn_rq); 1269 freemsg(mp); 1270 return; 1271 } 1272 } 1273 break; 1274 case M_DATA: 1275 /* 1276 * Prepend T_DATA_IND to prevent the stream head from 1277 * consolidating multiple messages together. 1278 * If the allocation fails just send up the M_DATA. 1279 */ 1280 mp1 = allocb(sizeof (*tdi), BPRI_MED); 1281 if (mp1 != NULL) { 1282 mp1->b_cont = mp; 1283 mp = mp1; 1284 1285 mp->b_datap->db_type = M_PROTO; 1286 mp->b_wptr += sizeof (*tdi); 1287 tdi = (struct T_data_ind *)mp->b_rptr; 1288 tdi->PRIM_type = T_DATA_IND; 1289 tdi->MORE_flag = 0; 1290 } 1291 break; 1292 default: 1293 break; 1294 } 1295 1296 if (IPCL_IS_NONSTR(connp)) { 1297 if ((*connp->conn_upcalls->su_recv) 1298 (connp->conn_upper_handle, mp, msgdsize(mp), 0, 1299 &error, NULL) < 0) { 1300 ASSERT(error == ENOSPC); 1301 /* 1302 * Let's confirm hoding the lock that 1303 * we are out of recv space. 1304 */ 1305 mutex_enter(&rts->rts_recv_mutex); 1306 if ((*connp->conn_upcalls->su_recv) 1307 (connp->conn_upper_handle, NULL, 0, 0, 1308 &error, NULL) < 0) { 1309 ASSERT(error == ENOSPC); 1310 connp->conn_flow_cntrld = B_TRUE; 1311 } 1312 mutex_exit(&rts->rts_recv_mutex); 1313 } 1314 } else { 1315 putnext(connp->conn_rq, mp); 1316 } 1317 } 1318 1319 1320 void 1321 rts_ddi_g_init(void) 1322 { 1323 rts_max_optsize = optcom_max_optsize(rts_opt_obj.odb_opt_des_arr, 1324 rts_opt_obj.odb_opt_arr_cnt); 1325 1326 /* 1327 * We want to be informed each time a stack is created or 1328 * destroyed in the kernel, so we can maintain the 1329 * set of rts_stack_t's. 1330 */ 1331 netstack_register(NS_RTS, rts_stack_init, NULL, rts_stack_fini); 1332 } 1333 1334 void 1335 rts_ddi_g_destroy(void) 1336 { 1337 netstack_unregister(NS_RTS); 1338 } 1339 1340 #define INET_NAME "ip" 1341 1342 /* 1343 * Initialize the RTS stack instance. 1344 */ 1345 /* ARGSUSED */ 1346 static void * 1347 rts_stack_init(netstackid_t stackid, netstack_t *ns) 1348 { 1349 rts_stack_t *rtss; 1350 rtsparam_t *pa; 1351 int error = 0; 1352 major_t major; 1353 1354 rtss = (rts_stack_t *)kmem_zalloc(sizeof (*rtss), KM_SLEEP); 1355 rtss->rtss_netstack = ns; 1356 1357 pa = (rtsparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 1358 rtss->rtss_params = pa; 1359 bcopy(lcl_param_arr, rtss->rtss_params, sizeof (lcl_param_arr)); 1360 1361 (void) rts_param_register(&rtss->rtss_g_nd, 1362 rtss->rtss_params, A_CNT(lcl_param_arr)); 1363 1364 major = mod_name_to_major(INET_NAME); 1365 error = ldi_ident_from_major(major, &rtss->rtss_ldi_ident); 1366 ASSERT(error == 0); 1367 return (rtss); 1368 } 1369 1370 /* 1371 * Free the RTS stack instance. 1372 */ 1373 /* ARGSUSED */ 1374 static void 1375 rts_stack_fini(netstackid_t stackid, void *arg) 1376 { 1377 rts_stack_t *rtss = (rts_stack_t *)arg; 1378 1379 nd_free(&rtss->rtss_g_nd); 1380 kmem_free(rtss->rtss_params, sizeof (lcl_param_arr)); 1381 rtss->rtss_params = NULL; 1382 ldi_ident_release(rtss->rtss_ldi_ident); 1383 kmem_free(rtss, sizeof (*rtss)); 1384 } 1385 1386 /* ARGSUSED */ 1387 int 1388 rts_accept(sock_lower_handle_t lproto_handle, 1389 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 1390 cred_t *cr) 1391 { 1392 return (EINVAL); 1393 } 1394 1395 /* ARGSUSED */ 1396 static int 1397 rts_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 1398 socklen_t len, cred_t *cr) 1399 { 1400 /* 1401 * rebind not allowed 1402 */ 1403 return (EINVAL); 1404 } 1405 1406 /* ARGSUSED */ 1407 int 1408 rts_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 1409 { 1410 return (EINVAL); 1411 } 1412 1413 /* ARGSUSED */ 1414 int 1415 rts_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 1416 socklen_t len, sock_connid_t *id, cred_t *cr) 1417 { 1418 /* 1419 * rts sockets start out as bound and connected 1420 */ 1421 *id = 0; 1422 return (EISCONN); 1423 } 1424 1425 /* ARGSUSED */ 1426 int 1427 rts_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr, 1428 socklen_t *addrlen, cred_t *cr) 1429 { 1430 conn_t *connp = (conn_t *)proto_handle; 1431 rts_t *rts = connp->conn_rts; 1432 1433 ASSERT(rts != NULL); 1434 1435 bzero(addr, sizeof (struct sockaddr)); 1436 addr->sa_family = AF_ROUTE; 1437 *addrlen = sizeof (struct sockaddr); 1438 1439 return (0); 1440 } 1441 1442 /* ARGSUSED */ 1443 int 1444 rts_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr, 1445 socklen_t *addrlen, cred_t *cr) 1446 { 1447 return (EOPNOTSUPP); 1448 } 1449 1450 static int 1451 rts_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 1452 void *optvalp, socklen_t *optlen, cred_t *cr) 1453 { 1454 conn_t *connp = (conn_t *)proto_handle; 1455 rts_t *rts = connp->conn_rts; 1456 int error; 1457 t_uscalar_t max_optbuf_len; 1458 void *optvalp_buf; 1459 int len; 1460 1461 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 1462 rts_opt_obj.odb_opt_des_arr, 1463 rts_opt_obj.odb_opt_arr_cnt, 1464 rts_opt_obj.odb_topmost_tpiprovider, 1465 B_FALSE, B_TRUE, cr); 1466 if (error != 0) { 1467 if (error < 0) 1468 error = proto_tlitosyserr(-error); 1469 return (error); 1470 } 1471 1472 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 1473 rw_enter(&rts->rts_rwlock, RW_READER); 1474 len = rts_opt_get(connp, level, option_name, optvalp_buf); 1475 rw_exit(&rts->rts_rwlock); 1476 1477 if (len < 0) { 1478 /* 1479 * Pass on to IP 1480 */ 1481 error = ip_get_options(connp, level, option_name, 1482 optvalp, optlen, cr); 1483 } else { 1484 /* 1485 * update optlen and copy option value 1486 */ 1487 t_uscalar_t size = MIN(len, *optlen); 1488 bcopy(optvalp_buf, optvalp, size); 1489 bcopy(&size, optlen, sizeof (size)); 1490 error = 0; 1491 } 1492 1493 kmem_free(optvalp_buf, max_optbuf_len); 1494 return (error); 1495 } 1496 1497 static int 1498 rts_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 1499 const void *optvalp, socklen_t optlen, cred_t *cr) 1500 { 1501 conn_t *connp = (conn_t *)proto_handle; 1502 rts_t *rts = connp->conn_rts; 1503 int error; 1504 1505 error = proto_opt_check(level, option_name, optlen, NULL, 1506 rts_opt_obj.odb_opt_des_arr, 1507 rts_opt_obj.odb_opt_arr_cnt, 1508 rts_opt_obj.odb_topmost_tpiprovider, 1509 B_TRUE, B_FALSE, cr); 1510 1511 if (error != 0) { 1512 if (error < 0) 1513 error = proto_tlitosyserr(-error); 1514 return (error); 1515 } 1516 1517 rw_enter(&rts->rts_rwlock, RW_WRITER); 1518 error = rts_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 1519 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 1520 NULL, cr); 1521 rw_exit(&rts->rts_rwlock); 1522 1523 ASSERT(error >= 0); 1524 1525 return (error); 1526 } 1527 1528 /* ARGSUSED */ 1529 static int 1530 rts_send(sock_lower_handle_t proto_handle, mblk_t *mp, 1531 struct nmsghdr *msg, cred_t *cr) 1532 { 1533 mblk_t *mp1; 1534 conn_t *connp = (conn_t *)proto_handle; 1535 rts_t *rts = connp->conn_rts; 1536 rt_msghdr_t *rtm; 1537 int error; 1538 1539 ASSERT(DB_TYPE(mp) == M_DATA); 1540 /* 1541 * The semantics of the routing socket is such that the rtm_pid 1542 * field is automatically filled in during requests with the 1543 * current process' pid. We do this here (where we still have 1544 * user context) after checking we have at least a message the 1545 * size of a routing message header. 1546 */ 1547 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 1548 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) { 1549 rts->rts_error = EINVAL; 1550 freemsg(mp); 1551 return (rts->rts_error); 1552 } 1553 } 1554 rtm = (rt_msghdr_t *)mp->b_rptr; 1555 rtm->rtm_pid = curproc->p_pid; 1556 1557 mp1 = rts_ioctl_alloc(mp); 1558 if (mp1 == NULL) { 1559 ASSERT(rts != NULL); 1560 freemsg(mp); 1561 return (ENOMEM); 1562 } 1563 1564 /* 1565 * Allow only one outstanding request(ioctl) at any given time 1566 */ 1567 mutex_enter(&rts->rts_send_mutex); 1568 while (rts->rts_flag & RTS_REQ_PENDING) { 1569 int ret; 1570 1571 ret = cv_wait_sig(&rts->rts_send_cv, &rts->rts_send_mutex); 1572 if (ret <= 0) { 1573 mutex_exit(&rts->rts_send_mutex); 1574 freemsg(mp); 1575 return (EINTR); 1576 } 1577 } 1578 1579 rts->rts_flag |= RTS_REQ_PENDING; 1580 1581 rts->rts_flag |= RTS_REQ_INPROG; 1582 1583 mutex_exit(&rts->rts_send_mutex); 1584 1585 CONN_INC_REF(connp); 1586 1587 error = ip_rts_request_common(rts->rts_connp->conn_wq, mp1, connp, cr); 1588 1589 mutex_enter(&rts->rts_send_mutex); 1590 if (error == EINPROGRESS) { 1591 ASSERT(rts->rts_flag & RTS_REQ_INPROG); 1592 if (rts->rts_flag & RTS_REQ_INPROG) { 1593 /* 1594 * Once the request has been issued we wait for 1595 * completion 1596 */ 1597 cv_wait(&rts->rts_io_cv, &rts->rts_send_mutex); 1598 error = rts->rts_error; 1599 } 1600 } 1601 1602 ASSERT((error != 0) || !(rts->rts_flag & RTS_REQ_INPROG)); 1603 ASSERT(MUTEX_HELD(&rts->rts_send_mutex)); 1604 1605 rts->rts_flag &= ~(RTS_REQ_PENDING | RTS_REQ_INPROG); 1606 cv_signal(&rts->rts_send_cv); 1607 mutex_exit(&rts->rts_send_mutex); 1608 return (error); 1609 } 1610 1611 /* ARGSUSED */ 1612 sock_lower_handle_t 1613 rts_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 1614 uint_t *smodep, int *errorp, int flags, cred_t *credp) 1615 { 1616 conn_t *connp; 1617 rts_t *rts; 1618 rts_stack_t *rtss; 1619 1620 if (family != AF_ROUTE || type != SOCK_RAW || 1621 (proto != 0 && proto != AF_INET && proto != AF_INET6)) { 1622 *errorp = EPROTONOSUPPORT; 1623 return (NULL); 1624 } 1625 1626 connp = rts_open(flags, credp); 1627 ASSERT(connp != NULL); 1628 connp->conn_flags |= IPCL_NONSTR; 1629 1630 rts = connp->conn_rts; 1631 rtss = rts->rts_rtss; 1632 1633 rts->rts_xmit_hiwat = rtss->rtss_xmit_hiwat; 1634 rts->rts_xmit_lowat = rtss->rtss_xmit_lowat; 1635 rts->rts_recv_hiwat = rtss->rtss_recv_hiwat; 1636 rts->rts_recv_lowat = rts_mod_info.mi_lowat; 1637 1638 ASSERT(rtss->rtss_ldi_ident != NULL); 1639 1640 *errorp = ip_create_helper_stream(connp, rtss->rtss_ldi_ident); 1641 if (*errorp != 0) { 1642 #ifdef DEBUG 1643 cmn_err(CE_CONT, "rts_create: create of IP helper stream" 1644 " failed\n"); 1645 #endif 1646 (void) rts_close((sock_lower_handle_t)connp, 0, credp); 1647 return (NULL); 1648 } 1649 1650 mutex_enter(&connp->conn_lock); 1651 connp->conn_state_flags &= ~CONN_INCIPIENT; 1652 mutex_exit(&connp->conn_lock); 1653 1654 *errorp = 0; 1655 *smodep = SM_ATOMIC; 1656 *sock_downcalls = &sock_rts_downcalls; 1657 return ((sock_lower_handle_t)connp); 1658 } 1659 1660 /* ARGSUSED */ 1661 void 1662 rts_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 1663 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 1664 { 1665 conn_t *connp = (conn_t *)proto_handle; 1666 rts_t *rts = connp->conn_rts; 1667 rts_stack_t *rtss = rts->rts_rtss; 1668 struct sock_proto_props sopp; 1669 1670 connp->conn_upcalls = sock_upcalls; 1671 connp->conn_upper_handle = sock_handle; 1672 1673 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 1674 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 1675 sopp.sopp_wroff = 0; 1676 sopp.sopp_rxhiwat = rtss->rtss_recv_hiwat; 1677 sopp.sopp_rxlowat = rts_mod_info.mi_lowat; 1678 sopp.sopp_maxblk = INFPSZ; 1679 sopp.sopp_maxpsz = rts_mod_info.mi_maxpsz; 1680 sopp.sopp_minpsz = (rts_mod_info.mi_minpsz == 1) ? 0 : 1681 rts_mod_info.mi_minpsz; 1682 1683 (*connp->conn_upcalls->su_set_proto_props) 1684 (connp->conn_upper_handle, &sopp); 1685 1686 /* 1687 * We treat it as already connected for routing socket. 1688 */ 1689 (*connp->conn_upcalls->su_connected) 1690 (connp->conn_upper_handle, 0, NULL, -1); 1691 1692 /* 1693 * Indicate the down IP module that this is a routing socket 1694 * client by sending an RTS IOCTL without any user data. Although 1695 * this is just a notification message (without any real routing 1696 * request), we pass in any credential for correctness sake. 1697 */ 1698 ip_rts_register(connp); 1699 } 1700 1701 /* ARGSUSED */ 1702 int 1703 rts_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 1704 { 1705 conn_t *connp = (conn_t *)proto_handle; 1706 1707 ASSERT(connp != NULL && IPCL_IS_RTS(connp)); 1708 return (rts_common_close(NULL, connp)); 1709 } 1710 1711 /* ARGSUSED */ 1712 int 1713 rts_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 1714 { 1715 conn_t *connp = (conn_t *)proto_handle; 1716 1717 /* shut down the send side */ 1718 if (how != SHUT_RD) 1719 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 1720 SOCK_OPCTL_SHUT_SEND, 0); 1721 /* shut down the recv side */ 1722 if (how != SHUT_WR) 1723 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 1724 SOCK_OPCTL_SHUT_RECV, 0); 1725 return (0); 1726 } 1727 1728 void 1729 rts_clr_flowctrl(sock_lower_handle_t proto_handle) 1730 { 1731 conn_t *connp = (conn_t *)proto_handle; 1732 rts_t *rts = connp->conn_rts; 1733 1734 mutex_enter(&rts->rts_recv_mutex); 1735 connp->conn_flow_cntrld = B_FALSE; 1736 mutex_exit(&rts->rts_recv_mutex); 1737 } 1738 1739 int 1740 rts_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 1741 int mode, int32_t *rvalp, cred_t *cr) 1742 { 1743 conn_t *connp = (conn_t *)proto_handle; 1744 int error; 1745 1746 switch (cmd) { 1747 case ND_SET: 1748 case ND_GET: 1749 case TI_GETPEERNAME: 1750 case TI_GETMYNAME: 1751 #ifdef DEUG 1752 cmn_err(CE_CONT, "rts_ioctl cmd 0x%x on non sreams" 1753 " socket", cmd); 1754 #endif 1755 error = EINVAL; 1756 break; 1757 default: 1758 /* 1759 * Pass on to IP using helper stream 1760 */ 1761 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 1762 cmd, arg, mode, cr, rvalp); 1763 break; 1764 } 1765 1766 return (error); 1767 } 1768 1769 sock_downcalls_t sock_rts_downcalls = { 1770 rts_activate, 1771 rts_accept, 1772 rts_bind, 1773 rts_listen, 1774 rts_connect, 1775 rts_getpeername, 1776 rts_getsockname, 1777 rts_getsockopt, 1778 rts_setsockopt, 1779 rts_send, 1780 NULL, 1781 NULL, 1782 NULL, 1783 rts_shutdown, 1784 rts_clr_flowctrl, 1785 rts_ioctl, 1786 rts_close 1787 }; 1788