1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/stream.h> 30 #include <sys/strsubr.h> 31 #include <sys/stropts.h> 32 #include <sys/strsun.h> 33 #include <sys/strlog.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/timod.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/cmn_err.h> 40 #include <sys/proc.h> 41 #include <sys/suntpi.h> 42 #include <sys/policy.h> 43 44 #include <sys/socket.h> 45 #include <netinet/in.h> 46 47 #include <inet/common.h> 48 #include <netinet/ip6.h> 49 #include <inet/ip.h> 50 #include <inet/mi.h> 51 #include <inet/nd.h> 52 #include <inet/optcom.h> 53 #include <netinet/ip_mroute.h> 54 #include <sys/isa_defs.h> 55 #include <net/route.h> 56 57 /* 58 * This is a transport provider for routing sockets. Downstream messages are 59 * wrapped with a IP_IOCTL header, and ip_wput_ioctl calls the appropriate entry 60 * in the ip_ioctl_ftbl callout table to pass the routing socket data into IP. 61 * Upstream messages are generated for listeners of the routing socket as well 62 * as the message sender (unless they have turned off their end using 63 * SO_USELOOPBACK or shutdown(3n)). Upstream messages may also be generated 64 * asynchronously when: 65 * 66 * Interfaces are brought up or down. 67 * Addresses are assigned to interfaces. 68 * ICMP redirects are processed and a IRE_HOST_REDIRECT is installed. 69 * No route is found while sending a packet. 70 * When TCP requests IP to remove an IRE_CACHE of a troubled destination. 71 * 72 * Since all we do is reformat the messages between routing socket and 73 * ioctl forms, no synchronization is necessary in this module; all 74 * the dirty work is done down in ip. 75 */ 76 77 /* 78 * Object to represent database of options to search passed to 79 * {sock,tpi}optcom_req() interface routine to take care of option 80 * management and associated methods. 81 * XXX. These and other externs should really move to a rts header. 82 */ 83 extern optdb_obj_t rts_opt_obj; 84 extern uint_t rts_max_optsize; 85 86 /* Internal routing socket stream control structure, one per open stream */ 87 typedef struct rts_s { 88 cred_t *rts_credp; /* Opener's credentials */ 89 uint_t rts_state; /* Provider interface state */ 90 uint_t rts_error; /* Routing socket error code */ 91 uint_t rts_flag; /* Pending I/O state */ 92 uint_t rts_proto; /* SO_PROTOTYPE "socket" option. */ 93 uint_t rts_debug : 1, /* SO_DEBUG "socket" option. */ 94 rts_dontroute : 1, /* SO_DONTROUTE "socket" option. */ 95 rts_broadcast : 1, /* SO_BROADCAST "socket" option. */ 96 rts_reuseaddr : 1, /* SO_REUSEADDR "socket" option. */ 97 rts_useloopback : 1, /* SO_USELOOPBACK "socket" option. */ 98 rts_multicast_loop : 1, /* IP_MULTICAST_LOOP option */ 99 rts_hdrincl : 1, /* IP_HDRINCL option + RAW and IGMP */ 100 101 : 0; 102 } rts_t; 103 104 #define RTS_WPUT_PENDING 0x1 /* Waiting for write-side to complete */ 105 #define RTS_WRW_PENDING 0x2 /* Routing socket write in progress */ 106 #define RTS_OPEN_PENDING 0x4 /* Routing socket open in progress */ 107 108 /* Default structure copied into T_INFO_ACK messages */ 109 static struct T_info_ack rts_g_t_info_ack = { 110 T_INFO_ACK, 111 T_INFINITE, /* TSDU_size. Maximum size messages. */ 112 T_INVALID, /* ETSDU_size. No expedited data. */ 113 T_INVALID, /* CDATA_size. No connect data. */ 114 T_INVALID, /* DDATA_size. No disconnect data. */ 115 0, /* ADDR_size. */ 116 0, /* OPT_size - not initialized here */ 117 64 * 1024, /* TIDU_size. rts allows maximum size messages. */ 118 T_COTS, /* SERV_type. rts supports connection oriented. */ 119 TS_UNBND, /* CURRENT_state. This is set from rts_state. */ 120 (XPG4_1) /* PROVIDER_flag */ 121 }; 122 123 /* Named Dispatch Parameter Management Structure */ 124 typedef struct rtspparam_s { 125 uint_t rts_param_min; 126 uint_t rts_param_max; 127 uint_t rts_param_value; 128 char *rts_param_name; 129 } rtsparam_t; 130 131 /* 132 * Table of ND variables supported by rts. These are loaded into rts_g_nd 133 * in rts_open. 134 * All of these are alterable, within the min/max values given, at run time. 135 */ 136 static rtsparam_t rts_param_arr[] = { 137 /* min max value name */ 138 { 4096, 65536, 8192, "rts_xmit_hiwat"}, 139 { 0, 65536, 1024, "rts_xmit_lowat"}, 140 { 4096, 65536, 8192, "rts_recv_hiwat"}, 141 { 65536, 1024*1024*1024, 256*1024, "rts_max_buf"}, 142 }; 143 #define rts_xmit_hiwat rts_param_arr[0].rts_param_value 144 #define rts_xmit_lowat rts_param_arr[1].rts_param_value 145 #define rts_recv_hiwat rts_param_arr[2].rts_param_value 146 #define rts_max_buf rts_param_arr[3].rts_param_value 147 148 static int rts_close(queue_t *q); 149 static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 150 int sys_error); 151 static mblk_t *rts_ioctl_alloc(mblk_t *data, cred_t *cr); 152 static int rts_open(queue_t *q, dev_t *devp, int flag, int sflag, 153 cred_t *credp); 154 int rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, 155 uchar_t *ptr); 156 int rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, 157 uchar_t *ptr); 158 int rts_opt_set(queue_t *q, uint_t optset_context, int level, 159 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 160 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 161 static void rts_param_cleanup(void); 162 static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 163 static boolean_t rts_param_register(rtsparam_t *rtspa, int cnt); 164 static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 165 cred_t *cr); 166 static void rts_rput(queue_t *q, mblk_t *mp); 167 static void rts_wput(queue_t *q, mblk_t *mp); 168 static void rts_wput_iocdata(queue_t *q, mblk_t *mp); 169 static void rts_wput_other(queue_t *q, mblk_t *mp); 170 static int rts_wrw(queue_t *q, struiod_t *dp); 171 172 static struct module_info info = { 173 129, "rts", 1, INFPSZ, 512, 128 174 }; 175 176 static struct qinit rinit = { 177 (pfi_t)rts_rput, NULL, rts_open, rts_close, NULL, &info 178 }; 179 180 static struct qinit winit = { 181 (pfi_t)rts_wput, NULL, NULL, NULL, NULL, &info, 182 NULL, (pfi_t)rts_wrw, NULL, STRUIOT_STANDARD 183 }; 184 185 struct streamtab rtsinfo = { 186 &rinit, &winit 187 }; 188 189 static IDP rts_g_nd; /* Points to table of RTS ND variables. */ 190 uint_t rts_open_streams = 0; 191 192 /* 193 * This routine allocates the necessary 194 * message blocks for IOCTL wrapping the 195 * user data. 196 */ 197 static mblk_t * 198 rts_ioctl_alloc(mblk_t *data, cred_t *cr) 199 { 200 mblk_t *mp = NULL; 201 mblk_t *mp1 = NULL; 202 ipllc_t *ipllc; 203 struct iocblk *ioc; 204 205 mp = allocb_cred(sizeof (ipllc_t), cr); 206 if (mp == NULL) 207 return (NULL); 208 mp1 = allocb_cred(sizeof (struct iocblk), cr); 209 if (mp1 == NULL) { 210 freeb(mp); 211 return (NULL); 212 } 213 214 ipllc = (ipllc_t *)mp->b_rptr; 215 ipllc->ipllc_cmd = IP_IOC_RTS_REQUEST; 216 ipllc->ipllc_name_offset = 0; 217 ipllc->ipllc_name_length = 0; 218 mp->b_wptr += sizeof (ipllc_t); 219 mp->b_cont = data; 220 221 ioc = (struct iocblk *)mp1->b_rptr; 222 ioc->ioc_cmd = IP_IOCTL; 223 ioc->ioc_error = 0; 224 ioc->ioc_cr = NULL; 225 ioc->ioc_count = msgdsize(mp); 226 mp1->b_wptr += sizeof (struct iocblk); 227 mp1->b_datap->db_type = M_IOCTL; 228 mp1->b_cont = mp; 229 230 return (mp1); 231 } 232 233 /* 234 * This routine closes rts stream, by disabling 235 * put/srv routines and freeing the this module 236 * internal datastructure. 237 */ 238 static int 239 rts_close(queue_t *q) 240 { 241 qprocsoff(q); 242 243 crfree(((rts_t *)q->q_ptr)->rts_credp); 244 245 mi_free(q->q_ptr); 246 rts_open_streams--; 247 /* 248 * Free the ND table if this was 249 * the last stream close 250 */ 251 rts_param_cleanup(); 252 return (0); 253 } 254 255 /* 256 * This is the open routine for routing socket. It allocates 257 * rts_t structure for the stream and sends an IOCTL to 258 * the down module to indicate that it is a routing socket 259 * stream. 260 */ 261 /* ARGSUSED */ 262 static int 263 rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 264 { 265 mblk_t *mp = NULL; 266 rts_t *rts; 267 268 /* If the stream is already open, return immediately. */ 269 if (q->q_ptr != NULL) 270 return (0); 271 272 /* If this is not a push of rts as a module, fail. */ 273 if (sflag != MODOPEN) 274 return (EINVAL); 275 276 /* If this is the first open of rts, create the ND table. */ 277 if (rts_g_nd == NULL) { 278 if (!rts_param_register(rts_param_arr, A_CNT(rts_param_arr))) 279 return (ENOMEM); 280 } 281 q->q_ptr = mi_zalloc_sleep(sizeof (rts_t)); 282 WR(q)->q_ptr = q->q_ptr; 283 rts = (rts_t *)q->q_ptr; 284 285 rts->rts_credp = credp; 286 crhold(credp); 287 /* 288 * The receive hiwat is only looked at on the stream head queue. 289 * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. 290 */ 291 q->q_hiwat = rts_recv_hiwat; 292 /* 293 * The transmit hiwat/lowat is only looked at on IP's queue. 294 * Store in q_hiwat/q_lowat in order to return on SO_SNDBUF/SO_SNDLOWAT 295 * getsockopts. 296 */ 297 WR(q)->q_hiwat = rts_xmit_hiwat; 298 WR(q)->q_lowat = rts_xmit_lowat; 299 qprocson(q); 300 /* 301 * Indicate the down IP module that this is a routing socket 302 * client by sending an RTS IOCTL without any user data. Although 303 * this is just a notification message (without any real routing 304 * request), we pass in any credential for correctness sake. 305 */ 306 mp = rts_ioctl_alloc(NULL, credp); 307 if (mp == NULL) { 308 rts_param_cleanup(); 309 qprocsoff(q); 310 ASSERT(q->q_ptr != NULL); 311 mi_free(q->q_ptr); 312 crfree(credp); 313 return (ENOMEM); 314 } 315 rts_open_streams++; 316 rts->rts_flag |= RTS_OPEN_PENDING; 317 putnext(WR(q), mp); 318 while (rts->rts_flag & RTS_OPEN_PENDING) { 319 if (!qwait_sig(q)) { 320 (void) rts_close(q); 321 return (EINTR); 322 } 323 } 324 if (rts->rts_error != 0) { 325 (void) rts_close(q); 326 return (ENOTSUP); 327 } 328 rts->rts_state = TS_UNBND; 329 return (0); 330 } 331 332 /* 333 * This routine creates a T_ERROR_ACK message and passes it upstream. 334 */ 335 static void 336 rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 337 { 338 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 339 qreply(q, mp); 340 } 341 342 /* 343 * This routine creates a T_OK_ACK message and passes it upstream. 344 */ 345 static void 346 rts_ok_ack(queue_t *q, mblk_t *mp) 347 { 348 if ((mp = mi_tpi_ok_ack_alloc(mp)) != NULL) 349 qreply(q, mp); 350 } 351 352 /* 353 * This routine is called by rts_wput to handle T_UNBIND_REQ messages. 354 * After some error checking, the message is passed downstream to ip. 355 */ 356 static void 357 rts_unbind(queue_t *q, mblk_t *mp) 358 { 359 rts_t *rts; 360 361 rts = (rts_t *)q->q_ptr; 362 /* If a bind has not been done, we can't unbind. */ 363 if (rts->rts_state != TS_IDLE) { 364 rts_err_ack(q, mp, TOUTSTATE, 0); 365 return; 366 } 367 rts->rts_state = TS_UNBND; 368 rts_ok_ack(q, mp); 369 } 370 371 /* 372 * This routine is called to handle each 373 * O_T_BIND_REQ/T_BIND_REQ message passed to 374 * rts_wput. Note: This routine works with both 375 * O_T_BIND_REQ and T_BIND_REQ semantics. 376 */ 377 static void 378 rts_bind(queue_t *q, mblk_t *mp) 379 { 380 mblk_t *mp1; 381 struct T_bind_req *tbr; 382 rts_t *rts; 383 384 rts = (rts_t *)q->q_ptr; 385 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 386 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 387 "rts_bind: bad data, %d", rts->rts_state); 388 rts_err_ack(q, mp, TBADADDR, 0); 389 return; 390 } 391 if (rts->rts_state != TS_UNBND) { 392 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 393 "rts_bind: bad state, %d", rts->rts_state); 394 rts_err_ack(q, mp, TOUTSTATE, 0); 395 return; 396 } 397 /* 398 * Reallocate the message to make sure we have enough room for an 399 * address and the protocol type. 400 */ 401 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin_t), 1); 402 if (mp1 == NULL) { 403 rts_err_ack(q, mp, TSYSERR, ENOMEM); 404 return; 405 } 406 mp = mp1; 407 tbr = (struct T_bind_req *)mp->b_rptr; 408 if (tbr->ADDR_length != 0) { 409 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 410 "rts_bind: bad ADDR_length %d", tbr->ADDR_length); 411 rts_err_ack(q, mp, TBADADDR, 0); 412 return; 413 } 414 /* Generic request */ 415 tbr->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_req); 416 tbr->ADDR_length = 0; 417 tbr->PRIM_type = T_BIND_ACK; 418 rts->rts_state = TS_IDLE; 419 qreply(q, mp); 420 } 421 422 static void 423 rts_copy_info(struct T_info_ack *tap, rts_t *rts) 424 { 425 *tap = rts_g_t_info_ack; 426 tap->CURRENT_state = rts->rts_state; 427 tap->OPT_size = rts_max_optsize; 428 } 429 430 /* 431 * This routine responds to T_CAPABILITY_REQ messages. It is called by 432 * rts_wput. Much of the T_CAPABILITY_ACK information is copied from 433 * rts_g_t_info_ack. The current state of the stream is copied from 434 * rts_state. 435 */ 436 static void 437 rts_capability_req(queue_t *q, mblk_t *mp) 438 { 439 rts_t *rts = (rts_t *)q->q_ptr; 440 t_uscalar_t cap_bits1; 441 struct T_capability_ack *tcap; 442 443 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 444 445 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 446 mp->b_datap->db_type, T_CAPABILITY_ACK); 447 if (mp == NULL) 448 return; 449 450 tcap = (struct T_capability_ack *)mp->b_rptr; 451 tcap->CAP_bits1 = 0; 452 453 if (cap_bits1 & TC1_INFO) { 454 rts_copy_info(&tcap->INFO_ack, rts); 455 tcap->CAP_bits1 |= TC1_INFO; 456 } 457 458 qreply(q, mp); 459 } 460 461 /* 462 * This routine responds to T_INFO_REQ messages. It is called by rts_wput. 463 * Most of the T_INFO_ACK information is copied from rts_g_t_info_ack. 464 * The current state of the stream is copied from rts_state. 465 */ 466 static void 467 rts_info_req(queue_t *q, mblk_t *mp) 468 { 469 rts_t *rts = (rts_t *)q->q_ptr; 470 471 mp = tpi_ack_alloc(mp, sizeof (rts_g_t_info_ack), M_PCPROTO, 472 T_INFO_ACK); 473 if (mp == NULL) 474 return; 475 rts_copy_info((struct T_info_ack *)mp->b_rptr, rts); 476 qreply(q, mp); 477 } 478 479 /* 480 * This routine gets default values of certain options whose default 481 * values are maintained by protcol specific code 482 */ 483 /* ARGSUSED */ 484 int 485 rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 486 { 487 /* no default value processed by protocol specific code currently */ 488 return (-1); 489 } 490 491 /* 492 * This routine retrieves the current status of socket options. 493 * It returns the size of the option retrieved. 494 */ 495 int 496 rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 497 { 498 int *i1 = (int *)ptr; 499 rts_t *rts = (rts_t *)q->q_ptr; 500 501 switch (level) { 502 case SOL_SOCKET: 503 switch (name) { 504 case SO_DEBUG: 505 *i1 = rts->rts_debug; 506 break; 507 case SO_REUSEADDR: 508 *i1 = rts->rts_reuseaddr; 509 break; 510 case SO_TYPE: 511 *i1 = SOCK_RAW; 512 break; 513 514 /* 515 * The following three items are available here, 516 * but are only meaningful to IP. 517 */ 518 case SO_DONTROUTE: 519 *i1 = rts->rts_dontroute; 520 break; 521 case SO_USELOOPBACK: 522 *i1 = rts->rts_useloopback; 523 break; 524 case SO_BROADCAST: 525 *i1 = rts->rts_broadcast; 526 break; 527 case SO_PROTOTYPE: 528 *i1 = rts->rts_proto; 529 break; 530 /* 531 * The following two items can be manipulated, 532 * but changing them should do nothing. 533 */ 534 case SO_SNDBUF: 535 ASSERT(q->q_hiwat <= INT_MAX); 536 *i1 = (int)(q->q_hiwat); 537 break; 538 case SO_RCVBUF: 539 ASSERT(q->q_hiwat <= INT_MAX); 540 *i1 = (int)(RD(q)->q_hiwat); 541 break; 542 case SO_DOMAIN: 543 *i1 = PF_ROUTE; 544 break; 545 default: 546 return (-1); 547 } 548 break; 549 default: 550 return (-1); 551 } 552 return ((int)sizeof (int)); 553 } 554 555 556 /* 557 * This routine sets socket options. 558 */ 559 /*ARGSUSED*/ 560 int 561 rts_opt_set(queue_t *q, uint_t optset_context, int level, 562 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 563 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 564 { 565 int *i1 = (int *)invalp; 566 rts_t *rts = (rts_t *)q->q_ptr; 567 boolean_t checkonly; 568 569 switch (optset_context) { 570 case SETFN_OPTCOM_CHECKONLY: 571 checkonly = B_TRUE; 572 /* 573 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 574 * inlen != 0 implies value supplied and 575 * we have to "pretend" to set it. 576 * inlen == 0 implies that there is no 577 * value part in T_CHECK request and just validation 578 * done elsewhere should be enough, we just return here. 579 */ 580 if (inlen == 0) { 581 *outlenp = 0; 582 return (0); 583 } 584 break; 585 case SETFN_OPTCOM_NEGOTIATE: 586 checkonly = B_FALSE; 587 break; 588 case SETFN_UD_NEGOTIATE: 589 case SETFN_CONN_NEGOTIATE: 590 checkonly = B_FALSE; 591 /* 592 * Negotiating local and "association-related" options 593 * through T_UNITDATA_REQ or T_CONN_{REQ,CON} 594 * Not allowed in this module. 595 */ 596 return (EINVAL); 597 default: 598 /* 599 * We should never get here 600 */ 601 *outlenp = 0; 602 return (EINVAL); 603 } 604 605 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 606 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 607 608 /* 609 * For rts, we should have no ancillary data sent down 610 * (rts_wput doesn't handle options). 611 */ 612 ASSERT(thisdg_attrs == NULL); 613 614 /* 615 * For fixed length options, no sanity check 616 * of passed in length is done. It is assumed *_optcom_req() 617 * routines do the right thing. 618 */ 619 620 switch (level) { 621 case SOL_SOCKET: 622 switch (name) { 623 case SO_REUSEADDR: 624 if (!checkonly) 625 rts->rts_reuseaddr = *i1; 626 break; /* goto sizeof (int) option return */ 627 case SO_DEBUG: 628 if (!checkonly) 629 rts->rts_debug = *i1; 630 break; /* goto sizeof (int) option return */ 631 /* 632 * The following three items are available here, 633 * but are only meaningful to IP. 634 */ 635 case SO_DONTROUTE: 636 if (!checkonly) 637 rts->rts_dontroute = *i1; 638 break; /* goto sizeof (int) option return */ 639 case SO_USELOOPBACK: 640 if (!checkonly) 641 rts->rts_useloopback = *i1; 642 break; /* goto sizeof (int) option return */ 643 case SO_BROADCAST: 644 if (!checkonly) 645 rts->rts_broadcast = *i1; 646 break; /* goto sizeof (int) option return */ 647 case SO_PROTOTYPE: 648 /* 649 * Routing socket applications that call socket() with 650 * a third argument can filter which messages will be 651 * sent upstream thanks to sockfs. so_socket() sends 652 * down the SO_PROTOTYPE and rts_queue_input() 653 * implements the filtering. 654 */ 655 if (*i1 != AF_INET && *i1 != AF_INET6) 656 return (EPROTONOSUPPORT); 657 if (!checkonly) 658 rts->rts_proto = *i1; 659 break; /* goto sizeof (int) option return */ 660 /* 661 * The following two items can be manipulated, 662 * but changing them should do nothing. 663 */ 664 case SO_SNDBUF: 665 if (*i1 > rts_max_buf) { 666 *outlenp = 0; 667 return (ENOBUFS); 668 } 669 if (!checkonly) { 670 q->q_hiwat = *i1; 671 q->q_next->q_hiwat = *i1; 672 } 673 break; /* goto sizeof (int) option return */ 674 case SO_RCVBUF: 675 if (*i1 > rts_max_buf) { 676 *outlenp = 0; 677 return (ENOBUFS); 678 } 679 if (!checkonly) { 680 RD(q)->q_hiwat = *i1; 681 (void) mi_set_sth_hiwat(RD(q), *i1); 682 } 683 break; /* goto sizeof (int) option return */ 684 default: 685 *outlenp = 0; 686 return (EINVAL); 687 } 688 break; 689 default: 690 *outlenp = 0; 691 return (EINVAL); 692 } 693 /* 694 * Common case of return from an option that is sizeof (int) 695 */ 696 *(int *)outvalp = *i1; 697 *outlenp = (t_uscalar_t)sizeof (int); 698 return (0); 699 } 700 701 /* 702 * This routine frees the ND table if all streams have been closed. 703 * It is called by rts_close and rts_open. 704 */ 705 static void 706 rts_param_cleanup(void) 707 { 708 if (!rts_open_streams) 709 nd_free(&rts_g_nd); 710 } 711 712 /* 713 * This routine retrieves the value of an ND variable in a rtsparam_t 714 * structure. It is called through nd_getset when a user reads the 715 * variable. 716 */ 717 /* ARGSUSED */ 718 static int 719 rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 720 { 721 rtsparam_t *rtspa = (rtsparam_t *)cp; 722 723 (void) mi_mpprintf(mp, "%u", rtspa->rts_param_value); 724 return (0); 725 } 726 727 /* 728 * Walk through the param array specified registering each element with the 729 * named dispatch (ND) handler. 730 */ 731 static boolean_t 732 rts_param_register(rtsparam_t *rtspa, int cnt) 733 { 734 for (; cnt-- > 0; rtspa++) { 735 if (rtspa->rts_param_name != NULL && rtspa->rts_param_name[0]) { 736 if (!nd_load(&rts_g_nd, rtspa->rts_param_name, 737 rts_param_get, rts_param_set, (caddr_t)rtspa)) { 738 nd_free(&rts_g_nd); 739 return (B_FALSE); 740 } 741 } 742 } 743 return (B_TRUE); 744 } 745 746 /* This routine sets an ND variable in a rtsparam_t structure. */ 747 /* ARGSUSED */ 748 static int 749 rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 750 { 751 ulong_t new_value; 752 rtsparam_t *rtspa = (rtsparam_t *)cp; 753 754 /* 755 * Fail the request if the new value does not lie within the 756 * required bounds. 757 */ 758 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 759 new_value < rtspa->rts_param_min || 760 new_value > rtspa->rts_param_max) { 761 return (EINVAL); 762 } 763 764 /* Set the new value */ 765 rtspa->rts_param_value = new_value; 766 return (0); 767 } 768 769 /* 770 * This routine handles synchronous messages passed downstream. It either 771 * consumes the message or passes it downstream; it never queues a 772 * a message. The data messages that go down are wrapped in an IOCTL 773 * message. 774 * 775 * Since it is synchronous, it waits for the M_IOCACK/M_IOCNAK so that 776 * it can return an immediate error (such as ENETUNREACH when adding a route). 777 * It uses the RTS_WRW_PENDING to ensure that each rts instance has only 778 * one M_IOCTL outstanding at any given time. 779 */ 780 static int 781 rts_wrw(queue_t *q, struiod_t *dp) 782 { 783 mblk_t *mp = dp->d_mp; 784 mblk_t *mp1; 785 int error; 786 rt_msghdr_t *rtm; 787 rts_t *rts; 788 789 rts = (rts_t *)q->q_ptr; 790 while (rts->rts_flag & RTS_WRW_PENDING) { 791 if (qwait_rw(q)) { 792 rts->rts_error = EINTR; 793 goto err_ret; 794 } 795 } 796 rts->rts_flag |= RTS_WRW_PENDING; 797 798 if (isuioq(q) && (error = struioget(q, mp, dp, 0))) { 799 /* 800 * Uio error of some sort, so just return the error. 801 */ 802 rts->rts_error = error; 803 goto err_ret; 804 } 805 /* 806 * Pass the mblk (chain) onto wput(). 807 */ 808 dp->d_mp = 0; 809 810 switch (mp->b_datap->db_type) { 811 case M_PROTO: 812 case M_PCPROTO: 813 /* Expedite other than T_DATA_REQ to below the switch */ 814 if (((mp->b_wptr - mp->b_rptr) != 815 sizeof (struct T_data_req)) || 816 (((union T_primitives *)mp->b_rptr)->type != T_DATA_REQ)) 817 break; 818 if ((mp1 = mp->b_cont) == NULL) { 819 rts->rts_error = EINVAL; 820 goto err_ret; 821 } 822 freeb(mp); 823 mp = mp1; 824 /* FALLTHRU */ 825 case M_DATA: 826 /* 827 * The semantics of the routing socket is such that the rtm_pid 828 * field is automatically filled in during requests with the 829 * current process' pid. We do this here (where we still have 830 * user context) after checking we have at least a message the 831 * size of a routing message header. 832 */ 833 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 834 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) { 835 rts->rts_error = EINVAL; 836 goto err_ret; 837 } 838 } 839 rtm = (rt_msghdr_t *)mp->b_rptr; 840 rtm->rtm_pid = curproc->p_pid; 841 break; 842 default: 843 break; 844 } 845 rts->rts_flag |= RTS_WPUT_PENDING; 846 rts_wput(q, mp); 847 while (rts->rts_flag & RTS_WPUT_PENDING) 848 if (qwait_rw(q)) { 849 /* RTS_WPUT_PENDING will be cleared below */ 850 rts->rts_error = EINTR; 851 break; 852 } 853 err_ret: 854 rts->rts_flag &= ~(RTS_WPUT_PENDING | RTS_WRW_PENDING); 855 return (rts->rts_error); 856 } 857 858 /* 859 * This routine handles all messages passed downstream. It either 860 * consumes the message or passes it downstream; it never queues a 861 * a message. The data messages that go down are wrapped in an IOCTL 862 * message. 863 */ 864 static void 865 rts_wput(queue_t *q, mblk_t *mp) 866 { 867 uchar_t *rptr = mp->b_rptr; 868 mblk_t *mp1; 869 870 switch (mp->b_datap->db_type) { 871 case M_DATA: 872 break; 873 case M_PROTO: 874 case M_PCPROTO: 875 if ((mp->b_wptr - rptr) == sizeof (struct T_data_req)) { 876 /* Expedite valid T_DATA_REQ to below the switch */ 877 if (((union T_primitives *)rptr)->type == T_DATA_REQ) { 878 mp1 = mp->b_cont; 879 freeb(mp); 880 if (mp1 == NULL) 881 return; 882 mp = mp1; 883 break; 884 } 885 } 886 /* FALLTHRU */ 887 default: 888 rts_wput_other(q, mp); 889 return; 890 } 891 892 893 mp1 = rts_ioctl_alloc(mp, DB_CRED(mp)); 894 if (mp1 == NULL) { 895 rts_t *rts = (rts_t *)q->q_ptr; 896 897 ASSERT(rts != NULL); 898 freemsg(mp); 899 if (rts->rts_flag & RTS_WPUT_PENDING) { 900 rts->rts_error = ENOMEM; 901 rts->rts_flag &= ~RTS_WPUT_PENDING; 902 } 903 return; 904 } 905 putnext(q, mp1); 906 } 907 908 909 /* 910 * Handles all the control message, if it 911 * can not understand it, it will 912 * pass down stream. 913 */ 914 static void 915 rts_wput_other(queue_t *q, mblk_t *mp) 916 { 917 uchar_t *rptr = mp->b_rptr; 918 rts_t *rts; 919 struct iocblk *iocp; 920 cred_t *cr; 921 922 rts = (rts_t *)q->q_ptr; 923 924 cr = DB_CREDDEF(mp, rts->rts_credp); 925 926 switch (mp->b_datap->db_type) { 927 case M_PROTO: 928 case M_PCPROTO: 929 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 930 /* 931 * If the message does not contain a PRIM_type, 932 * throw it away. 933 */ 934 freemsg(mp); 935 return; 936 } 937 switch (((union T_primitives *)rptr)->type) { 938 case T_BIND_REQ: 939 case O_T_BIND_REQ: 940 rts_bind(q, mp); 941 return; 942 case T_UNBIND_REQ: 943 rts_unbind(q, mp); 944 return; 945 case T_CAPABILITY_REQ: 946 rts_capability_req(q, mp); 947 return; 948 case T_INFO_REQ: 949 rts_info_req(q, mp); 950 return; 951 case T_SVR4_OPTMGMT_REQ: 952 (void) svr4_optcom_req(q, mp, cr, &rts_opt_obj); 953 return; 954 case T_OPTMGMT_REQ: 955 (void) tpi_optcom_req(q, mp, cr, &rts_opt_obj); 956 return; 957 case O_T_CONN_RES: 958 case T_CONN_RES: 959 case T_DISCON_REQ: 960 /* Not supported by rts. */ 961 rts_err_ack(q, mp, TNOTSUPPORT, 0); 962 return; 963 case T_DATA_REQ: 964 case T_EXDATA_REQ: 965 case T_ORDREL_REQ: 966 /* Illegal for rts. */ 967 freemsg(mp); 968 (void) putnextctl1(RD(q), M_ERROR, EPROTO); 969 return; 970 default: 971 break; 972 } 973 break; 974 case M_IOCTL: 975 iocp = (struct iocblk *)mp->b_rptr; 976 switch (iocp->ioc_cmd) { 977 case ND_SET: 978 case ND_GET: 979 if (nd_getset(q, rts_g_nd, mp)) { 980 qreply(q, mp); 981 return; 982 } 983 break; 984 case TI_GETPEERNAME: 985 mi_copyin(q, mp, NULL, 986 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 987 return; 988 default: 989 break; 990 } 991 case M_IOCDATA: 992 rts_wput_iocdata(q, mp); 993 return; 994 default: 995 break; 996 } 997 putnext(q, mp); 998 } 999 1000 /* 1001 * Called by rts_wput_other to handle all M_IOCDATA messages. 1002 */ 1003 static void 1004 rts_wput_iocdata(queue_t *q, mblk_t *mp) 1005 { 1006 struct sockaddr *rtsaddr; 1007 mblk_t *mp1; 1008 STRUCT_HANDLE(strbuf, sb); 1009 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 1010 1011 /* Make sure it is one of ours. */ 1012 switch (iocp->ioc_cmd) { 1013 case TI_GETPEERNAME: 1014 break; 1015 default: 1016 putnext(q, mp); 1017 return; 1018 } 1019 switch (mi_copy_state(q, mp, &mp1)) { 1020 case -1: 1021 return; 1022 case MI_COPY_CASE(MI_COPY_IN, 1): 1023 break; 1024 case MI_COPY_CASE(MI_COPY_OUT, 1): 1025 /* Copy out the strbuf. */ 1026 mi_copyout(q, mp); 1027 return; 1028 case MI_COPY_CASE(MI_COPY_OUT, 2): 1029 /* All done. */ 1030 mi_copy_done(q, mp, 0); 1031 return; 1032 default: 1033 mi_copy_done(q, mp, EPROTO); 1034 return; 1035 } 1036 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 1037 if (STRUCT_FGET(sb, maxlen) < (int)sizeof (sin_t)) { 1038 mi_copy_done(q, mp, EINVAL); 1039 return; 1040 } 1041 switch (iocp->ioc_cmd) { 1042 case TI_GETPEERNAME: 1043 break; 1044 default: 1045 mi_copy_done(q, mp, EPROTO); 1046 return; 1047 } 1048 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), sizeof (sin_t), 1049 B_TRUE); 1050 if (mp1 == NULL) 1051 return; 1052 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 1053 rtsaddr = (struct sockaddr *)mp1->b_rptr; 1054 mp1->b_wptr = (uchar_t *)&rtsaddr[1]; 1055 bzero(rtsaddr, sizeof (struct sockaddr)); 1056 rtsaddr->sa_family = AF_ROUTE; 1057 /* Copy out the address */ 1058 mi_copyout(q, mp); 1059 } 1060 1061 static void 1062 rts_rput(queue_t *q, mblk_t *mp) 1063 { 1064 rts_t *rts; 1065 struct iocblk *iocp; 1066 mblk_t *mp1; 1067 struct T_data_ind *tdi; 1068 1069 rts = (rts_t *)q->q_ptr; 1070 switch (mp->b_datap->db_type) { 1071 case M_IOCACK: 1072 case M_IOCNAK: 1073 iocp = (struct iocblk *)mp->b_rptr; 1074 if (rts->rts_flag & (RTS_WPUT_PENDING|RTS_OPEN_PENDING)) { 1075 if (rts->rts_flag & RTS_WPUT_PENDING) 1076 rts->rts_flag &= ~RTS_WPUT_PENDING; 1077 else 1078 rts->rts_flag &= ~RTS_OPEN_PENDING; 1079 rts->rts_error = iocp->ioc_error; 1080 freemsg(mp); 1081 return; 1082 } 1083 break; 1084 case M_DATA: 1085 /* 1086 * Prepend T_DATA_IND to prevent the stream head from 1087 * consolidating multiple messages together. 1088 * If the allocation fails just send up the M_DATA. 1089 */ 1090 mp1 = allocb(sizeof (*tdi), BPRI_MED); 1091 if (mp1 != NULL) { 1092 mp1->b_cont = mp; 1093 mp = mp1; 1094 1095 mp->b_datap->db_type = M_PROTO; 1096 mp->b_wptr += sizeof (*tdi); 1097 tdi = (struct T_data_ind *)mp->b_rptr; 1098 tdi->PRIM_type = T_DATA_IND; 1099 tdi->MORE_flag = 0; 1100 } 1101 break; 1102 default: 1103 break; 1104 } 1105 putnext(q, mp); 1106 } 1107 1108 1109 void 1110 rts_ddi_init(void) 1111 { 1112 rts_max_optsize = optcom_max_optsize(rts_opt_obj.odb_opt_des_arr, 1113 rts_opt_obj.odb_opt_arr_cnt); 1114 } 1115