1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/strsubr.h> 31 #include <sys/stropts.h> 32 #include <sys/strsun.h> 33 #include <sys/strlog.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/timod.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/cmn_err.h> 40 #include <sys/proc.h> 41 #include <sys/suntpi.h> 42 #include <sys/policy.h> 43 #include <sys/zone.h> 44 45 #include <sys/socket.h> 46 #include <netinet/in.h> 47 48 #include <inet/common.h> 49 #include <netinet/ip6.h> 50 #include <inet/ip.h> 51 #include <inet/mi.h> 52 #include <inet/nd.h> 53 #include <inet/optcom.h> 54 #include <netinet/ip_mroute.h> 55 #include <sys/isa_defs.h> 56 #include <net/route.h> 57 58 /* 59 * This is a transport provider for routing sockets. Downstream messages are 60 * wrapped with a IP_IOCTL header, and ip_wput_ioctl calls the appropriate entry 61 * in the ip_ioctl_ftbl callout table to pass the routing socket data into IP. 62 * Upstream messages are generated for listeners of the routing socket as well 63 * as the message sender (unless they have turned off their end using 64 * SO_USELOOPBACK or shutdown(3n)). Upstream messages may also be generated 65 * asynchronously when: 66 * 67 * Interfaces are brought up or down. 68 * Addresses are assigned to interfaces. 69 * ICMP redirects are processed and a IRE_HOST/RTF_DYNAMIC is installed. 70 * No route is found while sending a packet. 71 * When TCP requests IP to remove an IRE_CACHE of a troubled destination. 72 * 73 * Since all we do is reformat the messages between routing socket and 74 * ioctl forms, no synchronization is necessary in this module; all 75 * the dirty work is done down in ip. 76 */ 77 78 /* 79 * RTS stack instances 80 */ 81 struct rts_stack { 82 netstack_t *rtss_netstack; /* Common netstack */ 83 84 caddr_t rtss_g_nd; 85 struct rtsparam_s *rtss_params; 86 }; 87 typedef struct rts_stack rts_stack_t; 88 89 /* 90 * Object to represent database of options to search passed to 91 * {sock,tpi}optcom_req() interface routine to take care of option 92 * management and associated methods. 93 * XXX. These and other externs should really move to a rts header. 94 */ 95 extern optdb_obj_t rts_opt_obj; 96 extern uint_t rts_max_optsize; 97 98 /* Internal routing socket stream control structure, one per open stream */ 99 typedef struct rts_s { 100 cred_t *rts_credp; /* Opener's credentials */ 101 uint_t rts_state; /* Provider interface state */ 102 uint_t rts_error; /* Routing socket error code */ 103 uint_t rts_flag; /* Pending I/O state */ 104 uint_t rts_proto; /* SO_PROTOTYPE "socket" option. */ 105 uint_t rts_debug : 1, /* SO_DEBUG "socket" option. */ 106 rts_dontroute : 1, /* SO_DONTROUTE "socket" option. */ 107 rts_broadcast : 1, /* SO_BROADCAST "socket" option. */ 108 rts_reuseaddr : 1, /* SO_REUSEADDR "socket" option. */ 109 rts_useloopback : 1, /* SO_USELOOPBACK "socket" option. */ 110 rts_multicast_loop : 1, /* IP_MULTICAST_LOOP option */ 111 rts_hdrincl : 1, /* IP_HDRINCL option + RAW and IGMP */ 112 113 : 0; 114 rts_stack_t *rts_rtss; 115 } rts_t; 116 117 #define RTS_WPUT_PENDING 0x1 /* Waiting for write-side to complete */ 118 #define RTS_WRW_PENDING 0x2 /* Routing socket write in progress */ 119 #define RTS_OPEN_PENDING 0x4 /* Routing socket open in progress */ 120 121 /* Default structure copied into T_INFO_ACK messages */ 122 static struct T_info_ack rts_g_t_info_ack = { 123 T_INFO_ACK, 124 T_INFINITE, /* TSDU_size. Maximum size messages. */ 125 T_INVALID, /* ETSDU_size. No expedited data. */ 126 T_INVALID, /* CDATA_size. No connect data. */ 127 T_INVALID, /* DDATA_size. No disconnect data. */ 128 0, /* ADDR_size. */ 129 0, /* OPT_size - not initialized here */ 130 64 * 1024, /* TIDU_size. rts allows maximum size messages. */ 131 T_COTS, /* SERV_type. rts supports connection oriented. */ 132 TS_UNBND, /* CURRENT_state. This is set from rts_state. */ 133 (XPG4_1) /* PROVIDER_flag */ 134 }; 135 136 /* Named Dispatch Parameter Management Structure */ 137 typedef struct rtsparam_s { 138 uint_t rts_param_min; 139 uint_t rts_param_max; 140 uint_t rts_param_value; 141 char *rts_param_name; 142 } rtsparam_t; 143 144 /* 145 * Table of ND variables supported by rts. These are loaded into rts_g_nd 146 * in rts_open. 147 * All of these are alterable, within the min/max values given, at run time. 148 */ 149 static rtsparam_t lcl_param_arr[] = { 150 /* min max value name */ 151 { 4096, 65536, 8192, "rts_xmit_hiwat"}, 152 { 0, 65536, 1024, "rts_xmit_lowat"}, 153 { 4096, 65536, 8192, "rts_recv_hiwat"}, 154 { 65536, 1024*1024*1024, 256*1024, "rts_max_buf"}, 155 }; 156 #define rtss_xmit_hiwat rtss_params[0].rts_param_value 157 #define rtss_xmit_lowat rtss_params[1].rts_param_value 158 #define rtss_recv_hiwat rtss_params[2].rts_param_value 159 #define rtss_max_buf rtss_params[3].rts_param_value 160 161 static int rts_close(queue_t *q); 162 static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 163 int sys_error); 164 static mblk_t *rts_ioctl_alloc(mblk_t *data, cred_t *cr); 165 static int rts_open(queue_t *q, dev_t *devp, int flag, int sflag, 166 cred_t *credp); 167 int rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, 168 uchar_t *ptr); 169 int rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, 170 uchar_t *ptr); 171 int rts_opt_set(queue_t *q, uint_t optset_context, int level, 172 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 173 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 174 static void rts_param_cleanup(IDP *ndp); 175 static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 176 static boolean_t rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt); 177 static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 178 cred_t *cr); 179 static void rts_rput(queue_t *q, mblk_t *mp); 180 static void *rts_stack_init(netstackid_t stackid, netstack_t *ns); 181 static void rts_stack_fini(netstackid_t stackid, void *arg); 182 static void rts_wput(queue_t *q, mblk_t *mp); 183 static void rts_wput_iocdata(queue_t *q, mblk_t *mp); 184 static void rts_wput_other(queue_t *q, mblk_t *mp); 185 static int rts_wrw(queue_t *q, struiod_t *dp); 186 187 static struct module_info info = { 188 129, "rts", 1, INFPSZ, 512, 128 189 }; 190 191 static struct qinit rinit = { 192 (pfi_t)rts_rput, NULL, rts_open, rts_close, NULL, &info 193 }; 194 195 static struct qinit winit = { 196 (pfi_t)rts_wput, NULL, NULL, NULL, NULL, &info, 197 NULL, (pfi_t)rts_wrw, NULL, STRUIOT_STANDARD 198 }; 199 200 struct streamtab rtsinfo = { 201 &rinit, &winit 202 }; 203 204 /* 205 * This routine allocates the necessary 206 * message blocks for IOCTL wrapping the 207 * user data. 208 */ 209 static mblk_t * 210 rts_ioctl_alloc(mblk_t *data, cred_t *cr) 211 { 212 mblk_t *mp = NULL; 213 mblk_t *mp1 = NULL; 214 ipllc_t *ipllc; 215 struct iocblk *ioc; 216 217 mp = allocb_cred(sizeof (ipllc_t), cr); 218 if (mp == NULL) 219 return (NULL); 220 mp1 = allocb_cred(sizeof (struct iocblk), cr); 221 if (mp1 == NULL) { 222 freeb(mp); 223 return (NULL); 224 } 225 226 ipllc = (ipllc_t *)mp->b_rptr; 227 ipllc->ipllc_cmd = IP_IOC_RTS_REQUEST; 228 ipllc->ipllc_name_offset = 0; 229 ipllc->ipllc_name_length = 0; 230 mp->b_wptr += sizeof (ipllc_t); 231 mp->b_cont = data; 232 233 ioc = (struct iocblk *)mp1->b_rptr; 234 ioc->ioc_cmd = IP_IOCTL; 235 ioc->ioc_error = 0; 236 ioc->ioc_cr = NULL; 237 ioc->ioc_count = msgdsize(mp); 238 mp1->b_wptr += sizeof (struct iocblk); 239 mp1->b_datap->db_type = M_IOCTL; 240 mp1->b_cont = mp; 241 242 return (mp1); 243 } 244 245 /* 246 * This routine closes rts stream, by disabling 247 * put/srv routines and freeing the this module 248 * internal datastructure. 249 */ 250 static int 251 rts_close(queue_t *q) 252 { 253 rts_t *rts = (rts_t *)q->q_ptr; 254 255 qprocsoff(q); 256 257 crfree(rts->rts_credp); 258 netstack_rele(rts->rts_rtss->rtss_netstack); 259 260 mi_free(q->q_ptr); 261 return (0); 262 } 263 264 /* 265 * This is the open routine for routing socket. It allocates 266 * rts_t structure for the stream and sends an IOCTL to 267 * the down module to indicate that it is a routing socket 268 * stream. 269 */ 270 /* ARGSUSED */ 271 static int 272 rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 273 { 274 mblk_t *mp = NULL; 275 rts_t *rts; 276 netstack_t *ns; 277 rts_stack_t *rtss; 278 279 /* If the stream is already open, return immediately. */ 280 if (q->q_ptr != NULL) 281 return (0); 282 283 /* If this is not a push of rts as a module, fail. */ 284 if (sflag != MODOPEN) 285 return (EINVAL); 286 287 ns = netstack_find_by_cred(credp); 288 ASSERT(ns != NULL); 289 rtss = ns->netstack_rts; 290 ASSERT(rtss != NULL); 291 292 q->q_ptr = mi_zalloc_sleep(sizeof (rts_t)); 293 WR(q)->q_ptr = q->q_ptr; 294 rts = (rts_t *)q->q_ptr; 295 296 rts->rts_rtss = rtss; 297 298 rts->rts_credp = credp; 299 crhold(credp); 300 /* 301 * The receive hiwat is only looked at on the stream head queue. 302 * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. 303 */ 304 q->q_hiwat = rtss->rtss_recv_hiwat; 305 /* 306 * The transmit hiwat/lowat is only looked at on IP's queue. 307 * Store in q_hiwat/q_lowat in order to return on SO_SNDBUF/SO_SNDLOWAT 308 * getsockopts. 309 */ 310 WR(q)->q_hiwat = rtss->rtss_xmit_hiwat; 311 WR(q)->q_lowat = rtss->rtss_xmit_lowat; 312 qprocson(q); 313 /* 314 * Indicate the down IP module that this is a routing socket 315 * client by sending an RTS IOCTL without any user data. Although 316 * this is just a notification message (without any real routing 317 * request), we pass in any credential for correctness sake. 318 */ 319 mp = rts_ioctl_alloc(NULL, credp); 320 if (mp == NULL) { 321 qprocsoff(q); 322 ASSERT(q->q_ptr != NULL); 323 netstack_rele(rtss->rtss_netstack); 324 mi_free(q->q_ptr); 325 crfree(credp); 326 return (ENOMEM); 327 } 328 rts->rts_flag |= RTS_OPEN_PENDING; 329 putnext(WR(q), mp); 330 while (rts->rts_flag & RTS_OPEN_PENDING) { 331 if (!qwait_sig(q)) { 332 (void) rts_close(q); 333 return (EINTR); 334 } 335 } 336 if (rts->rts_error != 0) { 337 (void) rts_close(q); 338 return (ENOTSUP); 339 } 340 rts->rts_state = TS_UNBND; 341 return (0); 342 } 343 344 /* 345 * This routine creates a T_ERROR_ACK message and passes it upstream. 346 */ 347 static void 348 rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 349 { 350 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 351 qreply(q, mp); 352 } 353 354 /* 355 * This routine creates a T_OK_ACK message and passes it upstream. 356 */ 357 static void 358 rts_ok_ack(queue_t *q, mblk_t *mp) 359 { 360 if ((mp = mi_tpi_ok_ack_alloc(mp)) != NULL) 361 qreply(q, mp); 362 } 363 364 /* 365 * This routine is called by rts_wput to handle T_UNBIND_REQ messages. 366 * After some error checking, the message is passed downstream to ip. 367 */ 368 static void 369 rts_unbind(queue_t *q, mblk_t *mp) 370 { 371 rts_t *rts; 372 373 rts = (rts_t *)q->q_ptr; 374 /* If a bind has not been done, we can't unbind. */ 375 if (rts->rts_state != TS_IDLE) { 376 rts_err_ack(q, mp, TOUTSTATE, 0); 377 return; 378 } 379 rts->rts_state = TS_UNBND; 380 rts_ok_ack(q, mp); 381 } 382 383 /* 384 * This routine is called to handle each 385 * O_T_BIND_REQ/T_BIND_REQ message passed to 386 * rts_wput. Note: This routine works with both 387 * O_T_BIND_REQ and T_BIND_REQ semantics. 388 */ 389 static void 390 rts_bind(queue_t *q, mblk_t *mp) 391 { 392 mblk_t *mp1; 393 struct T_bind_req *tbr; 394 rts_t *rts; 395 396 rts = (rts_t *)q->q_ptr; 397 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 398 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 399 "rts_bind: bad data, %d", rts->rts_state); 400 rts_err_ack(q, mp, TBADADDR, 0); 401 return; 402 } 403 if (rts->rts_state != TS_UNBND) { 404 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 405 "rts_bind: bad state, %d", rts->rts_state); 406 rts_err_ack(q, mp, TOUTSTATE, 0); 407 return; 408 } 409 /* 410 * Reallocate the message to make sure we have enough room for an 411 * address and the protocol type. 412 */ 413 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin_t), 1); 414 if (mp1 == NULL) { 415 rts_err_ack(q, mp, TSYSERR, ENOMEM); 416 return; 417 } 418 mp = mp1; 419 tbr = (struct T_bind_req *)mp->b_rptr; 420 if (tbr->ADDR_length != 0) { 421 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 422 "rts_bind: bad ADDR_length %d", tbr->ADDR_length); 423 rts_err_ack(q, mp, TBADADDR, 0); 424 return; 425 } 426 /* Generic request */ 427 tbr->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_req); 428 tbr->ADDR_length = 0; 429 tbr->PRIM_type = T_BIND_ACK; 430 rts->rts_state = TS_IDLE; 431 qreply(q, mp); 432 } 433 434 static void 435 rts_copy_info(struct T_info_ack *tap, rts_t *rts) 436 { 437 *tap = rts_g_t_info_ack; 438 tap->CURRENT_state = rts->rts_state; 439 tap->OPT_size = rts_max_optsize; 440 } 441 442 /* 443 * This routine responds to T_CAPABILITY_REQ messages. It is called by 444 * rts_wput. Much of the T_CAPABILITY_ACK information is copied from 445 * rts_g_t_info_ack. The current state of the stream is copied from 446 * rts_state. 447 */ 448 static void 449 rts_capability_req(queue_t *q, mblk_t *mp) 450 { 451 rts_t *rts = (rts_t *)q->q_ptr; 452 t_uscalar_t cap_bits1; 453 struct T_capability_ack *tcap; 454 455 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 456 457 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 458 mp->b_datap->db_type, T_CAPABILITY_ACK); 459 if (mp == NULL) 460 return; 461 462 tcap = (struct T_capability_ack *)mp->b_rptr; 463 tcap->CAP_bits1 = 0; 464 465 if (cap_bits1 & TC1_INFO) { 466 rts_copy_info(&tcap->INFO_ack, rts); 467 tcap->CAP_bits1 |= TC1_INFO; 468 } 469 470 qreply(q, mp); 471 } 472 473 /* 474 * This routine responds to T_INFO_REQ messages. It is called by rts_wput. 475 * Most of the T_INFO_ACK information is copied from rts_g_t_info_ack. 476 * The current state of the stream is copied from rts_state. 477 */ 478 static void 479 rts_info_req(queue_t *q, mblk_t *mp) 480 { 481 rts_t *rts = (rts_t *)q->q_ptr; 482 483 mp = tpi_ack_alloc(mp, sizeof (rts_g_t_info_ack), M_PCPROTO, 484 T_INFO_ACK); 485 if (mp == NULL) 486 return; 487 rts_copy_info((struct T_info_ack *)mp->b_rptr, rts); 488 qreply(q, mp); 489 } 490 491 /* 492 * This routine gets default values of certain options whose default 493 * values are maintained by protcol specific code 494 */ 495 /* ARGSUSED */ 496 int 497 rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 498 { 499 /* no default value processed by protocol specific code currently */ 500 return (-1); 501 } 502 503 /* 504 * This routine retrieves the current status of socket options. 505 * It returns the size of the option retrieved. 506 */ 507 int 508 rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 509 { 510 int *i1 = (int *)ptr; 511 rts_t *rts = (rts_t *)q->q_ptr; 512 513 switch (level) { 514 case SOL_SOCKET: 515 switch (name) { 516 case SO_DEBUG: 517 *i1 = rts->rts_debug; 518 break; 519 case SO_REUSEADDR: 520 *i1 = rts->rts_reuseaddr; 521 break; 522 case SO_TYPE: 523 *i1 = SOCK_RAW; 524 break; 525 526 /* 527 * The following three items are available here, 528 * but are only meaningful to IP. 529 */ 530 case SO_DONTROUTE: 531 *i1 = rts->rts_dontroute; 532 break; 533 case SO_USELOOPBACK: 534 *i1 = rts->rts_useloopback; 535 break; 536 case SO_BROADCAST: 537 *i1 = rts->rts_broadcast; 538 break; 539 case SO_PROTOTYPE: 540 *i1 = rts->rts_proto; 541 break; 542 /* 543 * The following two items can be manipulated, 544 * but changing them should do nothing. 545 */ 546 case SO_SNDBUF: 547 ASSERT(q->q_hiwat <= INT_MAX); 548 *i1 = (int)(q->q_hiwat); 549 break; 550 case SO_RCVBUF: 551 ASSERT(q->q_hiwat <= INT_MAX); 552 *i1 = (int)(RD(q)->q_hiwat); 553 break; 554 case SO_DOMAIN: 555 *i1 = PF_ROUTE; 556 break; 557 default: 558 return (-1); 559 } 560 break; 561 default: 562 return (-1); 563 } 564 return ((int)sizeof (int)); 565 } 566 567 568 /* 569 * This routine sets socket options. 570 */ 571 /*ARGSUSED*/ 572 int 573 rts_opt_set(queue_t *q, uint_t optset_context, int level, 574 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 575 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 576 { 577 int *i1 = (int *)invalp; 578 rts_t *rts = (rts_t *)q->q_ptr; 579 boolean_t checkonly; 580 rts_stack_t *rtss = rts->rts_rtss; 581 582 switch (optset_context) { 583 case SETFN_OPTCOM_CHECKONLY: 584 checkonly = B_TRUE; 585 /* 586 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 587 * inlen != 0 implies value supplied and 588 * we have to "pretend" to set it. 589 * inlen == 0 implies that there is no 590 * value part in T_CHECK request and just validation 591 * done elsewhere should be enough, we just return here. 592 */ 593 if (inlen == 0) { 594 *outlenp = 0; 595 return (0); 596 } 597 break; 598 case SETFN_OPTCOM_NEGOTIATE: 599 checkonly = B_FALSE; 600 break; 601 case SETFN_UD_NEGOTIATE: 602 case SETFN_CONN_NEGOTIATE: 603 checkonly = B_FALSE; 604 /* 605 * Negotiating local and "association-related" options 606 * through T_UNITDATA_REQ or T_CONN_{REQ,CON} 607 * Not allowed in this module. 608 */ 609 return (EINVAL); 610 default: 611 /* 612 * We should never get here 613 */ 614 *outlenp = 0; 615 return (EINVAL); 616 } 617 618 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 619 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 620 621 /* 622 * For rts, we should have no ancillary data sent down 623 * (rts_wput doesn't handle options). 624 */ 625 ASSERT(thisdg_attrs == NULL); 626 627 /* 628 * For fixed length options, no sanity check 629 * of passed in length is done. It is assumed *_optcom_req() 630 * routines do the right thing. 631 */ 632 633 switch (level) { 634 case SOL_SOCKET: 635 switch (name) { 636 case SO_REUSEADDR: 637 if (!checkonly) 638 rts->rts_reuseaddr = *i1; 639 break; /* goto sizeof (int) option return */ 640 case SO_DEBUG: 641 if (!checkonly) 642 rts->rts_debug = *i1; 643 break; /* goto sizeof (int) option return */ 644 /* 645 * The following three items are available here, 646 * but are only meaningful to IP. 647 */ 648 case SO_DONTROUTE: 649 if (!checkonly) 650 rts->rts_dontroute = *i1; 651 break; /* goto sizeof (int) option return */ 652 case SO_USELOOPBACK: 653 if (!checkonly) 654 rts->rts_useloopback = *i1; 655 break; /* goto sizeof (int) option return */ 656 case SO_BROADCAST: 657 if (!checkonly) 658 rts->rts_broadcast = *i1; 659 break; /* goto sizeof (int) option return */ 660 case SO_PROTOTYPE: 661 /* 662 * Routing socket applications that call socket() with 663 * a third argument can filter which messages will be 664 * sent upstream thanks to sockfs. so_socket() sends 665 * down the SO_PROTOTYPE and rts_queue_input() 666 * implements the filtering. 667 */ 668 if (*i1 != AF_INET && *i1 != AF_INET6) 669 return (EPROTONOSUPPORT); 670 if (!checkonly) 671 rts->rts_proto = *i1; 672 break; /* goto sizeof (int) option return */ 673 /* 674 * The following two items can be manipulated, 675 * but changing them should do nothing. 676 */ 677 case SO_SNDBUF: 678 if (*i1 > rtss->rtss_max_buf) { 679 *outlenp = 0; 680 return (ENOBUFS); 681 } 682 if (!checkonly) { 683 q->q_hiwat = *i1; 684 q->q_next->q_hiwat = *i1; 685 } 686 break; /* goto sizeof (int) option return */ 687 case SO_RCVBUF: 688 if (*i1 > rtss->rtss_max_buf) { 689 *outlenp = 0; 690 return (ENOBUFS); 691 } 692 if (!checkonly) { 693 RD(q)->q_hiwat = *i1; 694 (void) mi_set_sth_hiwat(RD(q), *i1); 695 } 696 break; /* goto sizeof (int) option return */ 697 default: 698 *outlenp = 0; 699 return (EINVAL); 700 } 701 break; 702 default: 703 *outlenp = 0; 704 return (EINVAL); 705 } 706 /* 707 * Common case of return from an option that is sizeof (int) 708 */ 709 *(int *)outvalp = *i1; 710 *outlenp = (t_uscalar_t)sizeof (int); 711 return (0); 712 } 713 714 /* 715 * This routine frees the ND table if all streams have been closed. 716 * It is called by rts_close and rts_open. 717 */ 718 static void 719 rts_param_cleanup(IDP *ndp) 720 { 721 nd_free(ndp); 722 } 723 724 /* 725 * This routine retrieves the value of an ND variable in a rtsparam_t 726 * structure. It is called through nd_getset when a user reads the 727 * variable. 728 */ 729 /* ARGSUSED */ 730 static int 731 rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 732 { 733 rtsparam_t *rtspa = (rtsparam_t *)cp; 734 735 (void) mi_mpprintf(mp, "%u", rtspa->rts_param_value); 736 return (0); 737 } 738 739 /* 740 * Walk through the param array specified registering each element with the 741 * named dispatch (ND) handler. 742 */ 743 static boolean_t 744 rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt) 745 { 746 for (; cnt-- > 0; rtspa++) { 747 if (rtspa->rts_param_name != NULL && rtspa->rts_param_name[0]) { 748 if (!nd_load(ndp, rtspa->rts_param_name, 749 rts_param_get, rts_param_set, (caddr_t)rtspa)) { 750 nd_free(ndp); 751 return (B_FALSE); 752 } 753 } 754 } 755 return (B_TRUE); 756 } 757 758 /* This routine sets an ND variable in a rtsparam_t structure. */ 759 /* ARGSUSED */ 760 static int 761 rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 762 { 763 ulong_t new_value; 764 rtsparam_t *rtspa = (rtsparam_t *)cp; 765 766 /* 767 * Fail the request if the new value does not lie within the 768 * required bounds. 769 */ 770 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 771 new_value < rtspa->rts_param_min || 772 new_value > rtspa->rts_param_max) { 773 return (EINVAL); 774 } 775 776 /* Set the new value */ 777 rtspa->rts_param_value = new_value; 778 return (0); 779 } 780 781 /* 782 * This routine handles synchronous messages passed downstream. It either 783 * consumes the message or passes it downstream; it never queues a 784 * a message. The data messages that go down are wrapped in an IOCTL 785 * message. 786 * 787 * Since it is synchronous, it waits for the M_IOCACK/M_IOCNAK so that 788 * it can return an immediate error (such as ENETUNREACH when adding a route). 789 * It uses the RTS_WRW_PENDING to ensure that each rts instance has only 790 * one M_IOCTL outstanding at any given time. 791 */ 792 static int 793 rts_wrw(queue_t *q, struiod_t *dp) 794 { 795 mblk_t *mp = dp->d_mp; 796 mblk_t *mp1; 797 int error; 798 rt_msghdr_t *rtm; 799 rts_t *rts; 800 801 rts = (rts_t *)q->q_ptr; 802 while (rts->rts_flag & RTS_WRW_PENDING) { 803 if (qwait_rw(q)) { 804 rts->rts_error = EINTR; 805 goto err_ret; 806 } 807 } 808 rts->rts_flag |= RTS_WRW_PENDING; 809 810 if (isuioq(q) && (error = struioget(q, mp, dp, 0))) { 811 /* 812 * Uio error of some sort, so just return the error. 813 */ 814 rts->rts_error = error; 815 goto err_ret; 816 } 817 /* 818 * Pass the mblk (chain) onto wput(). 819 */ 820 dp->d_mp = 0; 821 822 switch (mp->b_datap->db_type) { 823 case M_PROTO: 824 case M_PCPROTO: 825 /* Expedite other than T_DATA_REQ to below the switch */ 826 if (((mp->b_wptr - mp->b_rptr) != 827 sizeof (struct T_data_req)) || 828 (((union T_primitives *)mp->b_rptr)->type != T_DATA_REQ)) 829 break; 830 if ((mp1 = mp->b_cont) == NULL) { 831 rts->rts_error = EINVAL; 832 goto err_ret; 833 } 834 freeb(mp); 835 mp = mp1; 836 /* FALLTHRU */ 837 case M_DATA: 838 /* 839 * The semantics of the routing socket is such that the rtm_pid 840 * field is automatically filled in during requests with the 841 * current process' pid. We do this here (where we still have 842 * user context) after checking we have at least a message the 843 * size of a routing message header. 844 */ 845 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 846 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) { 847 rts->rts_error = EINVAL; 848 goto err_ret; 849 } 850 } 851 rtm = (rt_msghdr_t *)mp->b_rptr; 852 rtm->rtm_pid = curproc->p_pid; 853 break; 854 default: 855 break; 856 } 857 rts->rts_flag |= RTS_WPUT_PENDING; 858 rts_wput(q, mp); 859 while (rts->rts_flag & RTS_WPUT_PENDING) 860 if (qwait_rw(q)) { 861 /* RTS_WPUT_PENDING will be cleared below */ 862 rts->rts_error = EINTR; 863 break; 864 } 865 err_ret: 866 rts->rts_flag &= ~(RTS_WPUT_PENDING | RTS_WRW_PENDING); 867 return (rts->rts_error); 868 } 869 870 /* 871 * This routine handles all messages passed downstream. It either 872 * consumes the message or passes it downstream; it never queues a 873 * a message. The data messages that go down are wrapped in an IOCTL 874 * message. 875 */ 876 static void 877 rts_wput(queue_t *q, mblk_t *mp) 878 { 879 uchar_t *rptr = mp->b_rptr; 880 mblk_t *mp1; 881 882 switch (mp->b_datap->db_type) { 883 case M_DATA: 884 break; 885 case M_PROTO: 886 case M_PCPROTO: 887 if ((mp->b_wptr - rptr) == sizeof (struct T_data_req)) { 888 /* Expedite valid T_DATA_REQ to below the switch */ 889 if (((union T_primitives *)rptr)->type == T_DATA_REQ) { 890 mp1 = mp->b_cont; 891 freeb(mp); 892 if (mp1 == NULL) 893 return; 894 mp = mp1; 895 break; 896 } 897 } 898 /* FALLTHRU */ 899 default: 900 rts_wput_other(q, mp); 901 return; 902 } 903 904 905 mp1 = rts_ioctl_alloc(mp, DB_CRED(mp)); 906 if (mp1 == NULL) { 907 rts_t *rts = (rts_t *)q->q_ptr; 908 909 ASSERT(rts != NULL); 910 freemsg(mp); 911 if (rts->rts_flag & RTS_WPUT_PENDING) { 912 rts->rts_error = ENOMEM; 913 rts->rts_flag &= ~RTS_WPUT_PENDING; 914 } 915 return; 916 } 917 putnext(q, mp1); 918 } 919 920 921 /* 922 * Handles all the control message, if it 923 * can not understand it, it will 924 * pass down stream. 925 */ 926 static void 927 rts_wput_other(queue_t *q, mblk_t *mp) 928 { 929 uchar_t *rptr = mp->b_rptr; 930 rts_t *rts; 931 struct iocblk *iocp; 932 cred_t *cr; 933 rts_stack_t *rtss; 934 935 rts = (rts_t *)q->q_ptr; 936 rtss = rts->rts_rtss; 937 938 cr = DB_CREDDEF(mp, rts->rts_credp); 939 940 switch (mp->b_datap->db_type) { 941 case M_PROTO: 942 case M_PCPROTO: 943 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 944 /* 945 * If the message does not contain a PRIM_type, 946 * throw it away. 947 */ 948 freemsg(mp); 949 return; 950 } 951 switch (((union T_primitives *)rptr)->type) { 952 case T_BIND_REQ: 953 case O_T_BIND_REQ: 954 rts_bind(q, mp); 955 return; 956 case T_UNBIND_REQ: 957 rts_unbind(q, mp); 958 return; 959 case T_CAPABILITY_REQ: 960 rts_capability_req(q, mp); 961 return; 962 case T_INFO_REQ: 963 rts_info_req(q, mp); 964 return; 965 case T_SVR4_OPTMGMT_REQ: 966 (void) svr4_optcom_req(q, mp, cr, &rts_opt_obj); 967 return; 968 case T_OPTMGMT_REQ: 969 (void) tpi_optcom_req(q, mp, cr, &rts_opt_obj); 970 return; 971 case O_T_CONN_RES: 972 case T_CONN_RES: 973 case T_DISCON_REQ: 974 /* Not supported by rts. */ 975 rts_err_ack(q, mp, TNOTSUPPORT, 0); 976 return; 977 case T_DATA_REQ: 978 case T_EXDATA_REQ: 979 case T_ORDREL_REQ: 980 /* Illegal for rts. */ 981 freemsg(mp); 982 (void) putnextctl1(RD(q), M_ERROR, EPROTO); 983 return; 984 default: 985 break; 986 } 987 break; 988 case M_IOCTL: 989 iocp = (struct iocblk *)mp->b_rptr; 990 switch (iocp->ioc_cmd) { 991 case ND_SET: 992 case ND_GET: 993 if (nd_getset(q, rtss->rtss_g_nd, mp)) { 994 qreply(q, mp); 995 return; 996 } 997 break; 998 case TI_GETPEERNAME: 999 mi_copyin(q, mp, NULL, 1000 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 1001 return; 1002 default: 1003 break; 1004 } 1005 case M_IOCDATA: 1006 rts_wput_iocdata(q, mp); 1007 return; 1008 default: 1009 break; 1010 } 1011 putnext(q, mp); 1012 } 1013 1014 /* 1015 * Called by rts_wput_other to handle all M_IOCDATA messages. 1016 */ 1017 static void 1018 rts_wput_iocdata(queue_t *q, mblk_t *mp) 1019 { 1020 struct sockaddr *rtsaddr; 1021 mblk_t *mp1; 1022 STRUCT_HANDLE(strbuf, sb); 1023 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 1024 1025 /* Make sure it is one of ours. */ 1026 switch (iocp->ioc_cmd) { 1027 case TI_GETPEERNAME: 1028 break; 1029 default: 1030 putnext(q, mp); 1031 return; 1032 } 1033 switch (mi_copy_state(q, mp, &mp1)) { 1034 case -1: 1035 return; 1036 case MI_COPY_CASE(MI_COPY_IN, 1): 1037 break; 1038 case MI_COPY_CASE(MI_COPY_OUT, 1): 1039 /* Copy out the strbuf. */ 1040 mi_copyout(q, mp); 1041 return; 1042 case MI_COPY_CASE(MI_COPY_OUT, 2): 1043 /* All done. */ 1044 mi_copy_done(q, mp, 0); 1045 return; 1046 default: 1047 mi_copy_done(q, mp, EPROTO); 1048 return; 1049 } 1050 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 1051 if (STRUCT_FGET(sb, maxlen) < (int)sizeof (sin_t)) { 1052 mi_copy_done(q, mp, EINVAL); 1053 return; 1054 } 1055 switch (iocp->ioc_cmd) { 1056 case TI_GETPEERNAME: 1057 break; 1058 default: 1059 mi_copy_done(q, mp, EPROTO); 1060 return; 1061 } 1062 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), sizeof (sin_t), 1063 B_TRUE); 1064 if (mp1 == NULL) 1065 return; 1066 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 1067 rtsaddr = (struct sockaddr *)mp1->b_rptr; 1068 mp1->b_wptr = (uchar_t *)&rtsaddr[1]; 1069 bzero(rtsaddr, sizeof (struct sockaddr)); 1070 rtsaddr->sa_family = AF_ROUTE; 1071 /* Copy out the address */ 1072 mi_copyout(q, mp); 1073 } 1074 1075 static void 1076 rts_rput(queue_t *q, mblk_t *mp) 1077 { 1078 rts_t *rts; 1079 struct iocblk *iocp; 1080 mblk_t *mp1; 1081 struct T_data_ind *tdi; 1082 1083 rts = (rts_t *)q->q_ptr; 1084 switch (mp->b_datap->db_type) { 1085 case M_IOCACK: 1086 case M_IOCNAK: 1087 iocp = (struct iocblk *)mp->b_rptr; 1088 if (rts->rts_flag & (RTS_WPUT_PENDING|RTS_OPEN_PENDING)) { 1089 if (rts->rts_flag & RTS_WPUT_PENDING) 1090 rts->rts_flag &= ~RTS_WPUT_PENDING; 1091 else 1092 rts->rts_flag &= ~RTS_OPEN_PENDING; 1093 rts->rts_error = iocp->ioc_error; 1094 freemsg(mp); 1095 return; 1096 } 1097 break; 1098 case M_DATA: 1099 /* 1100 * Prepend T_DATA_IND to prevent the stream head from 1101 * consolidating multiple messages together. 1102 * If the allocation fails just send up the M_DATA. 1103 */ 1104 mp1 = allocb(sizeof (*tdi), BPRI_MED); 1105 if (mp1 != NULL) { 1106 mp1->b_cont = mp; 1107 mp = mp1; 1108 1109 mp->b_datap->db_type = M_PROTO; 1110 mp->b_wptr += sizeof (*tdi); 1111 tdi = (struct T_data_ind *)mp->b_rptr; 1112 tdi->PRIM_type = T_DATA_IND; 1113 tdi->MORE_flag = 0; 1114 } 1115 break; 1116 default: 1117 break; 1118 } 1119 putnext(q, mp); 1120 } 1121 1122 1123 void 1124 rts_ddi_init(void) 1125 { 1126 rts_max_optsize = optcom_max_optsize(rts_opt_obj.odb_opt_des_arr, 1127 rts_opt_obj.odb_opt_arr_cnt); 1128 1129 /* 1130 * We want to be informed each time a stack is created or 1131 * destroyed in the kernel, so we can maintain the 1132 * set of rts_stack_t's. 1133 */ 1134 netstack_register(NS_RTS, rts_stack_init, NULL, rts_stack_fini); 1135 } 1136 1137 void 1138 rts_ddi_destroy(void) 1139 { 1140 netstack_unregister(NS_RTS); 1141 } 1142 1143 /* 1144 * Initialize the RTS stack instance. 1145 */ 1146 /* ARGSUSED */ 1147 static void * 1148 rts_stack_init(netstackid_t stackid, netstack_t *ns) 1149 { 1150 rts_stack_t *rtss; 1151 rtsparam_t *pa; 1152 1153 rtss = (rts_stack_t *)kmem_zalloc(sizeof (*rtss), KM_SLEEP); 1154 rtss->rtss_netstack = ns; 1155 1156 pa = (rtsparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 1157 rtss->rtss_params = pa; 1158 bcopy(lcl_param_arr, rtss->rtss_params, sizeof (lcl_param_arr)); 1159 1160 (void) rts_param_register(&rtss->rtss_g_nd, 1161 rtss->rtss_params, A_CNT(lcl_param_arr)); 1162 return (rtss); 1163 } 1164 1165 /* 1166 * Free the RTS stack instance. 1167 */ 1168 /* ARGSUSED */ 1169 static void 1170 rts_stack_fini(netstackid_t stackid, void *arg) 1171 { 1172 rts_stack_t *rtss = (rts_stack_t *)arg; 1173 1174 rts_param_cleanup(&rtss->rtss_g_nd); 1175 kmem_free(rtss->rtss_params, sizeof (lcl_param_arr)); 1176 rtss->rtss_params = NULL; 1177 kmem_free(rtss, sizeof (*rtss)); 1178 } 1179