1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/strsubr.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/strlog.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/cmn_err.h> 41 #include <sys/proc.h> 42 #include <sys/suntpi.h> 43 #include <sys/policy.h> 44 45 #include <sys/socket.h> 46 #include <netinet/in.h> 47 48 #include <inet/common.h> 49 #include <netinet/ip6.h> 50 #include <inet/ip.h> 51 #include <inet/mi.h> 52 #include <inet/nd.h> 53 #include <inet/optcom.h> 54 #include <netinet/ip_mroute.h> 55 #include <sys/isa_defs.h> 56 #include <net/route.h> 57 58 /* 59 * This is a transport provider for routing sockets. Downstream messages are 60 * wrapped with a IP_IOCTL header, and ip_wput_ioctl calls the appropriate entry 61 * in the ip_ioctl_ftbl callout table to pass the routing socket data into IP. 62 * Upstream messages are generated for listeners of the routing socket as well 63 * as the message sender (unless they have turned off their end using 64 * SO_USELOOPBACK or shutdown(3n)). Upstream messages may also be generated 65 * asynchronously when: 66 * 67 * Interfaces are brought up or down. 68 * Addresses are assigned to interfaces. 69 * ICMP redirects are processed and a IRE_HOST_REDIRECT is installed. 70 * No route is found while sending a packet. 71 * When TCP requests IP to remove an IRE_CACHE of a troubled destination. 72 * 73 * Since all we do is reformat the messages between routing socket and 74 * ioctl forms, no synchronization is necessary in this module; all 75 * the dirty work is done down in ip. 76 */ 77 78 /* 79 * Object to represent database of options to search passed to 80 * {sock,tpi}optcom_req() interface routine to take care of option 81 * management and associated methods. 82 * XXX. These and other externs should really move to a rts header. 83 */ 84 extern optdb_obj_t rts_opt_obj; 85 extern uint_t rts_max_optsize; 86 87 /* Internal routing socket stream control structure, one per open stream */ 88 typedef struct rts_s { 89 cred_t *rts_credp; /* Opener's credentials */ 90 uint_t rts_state; /* Provider interface state */ 91 uint_t rts_error; /* Routing socket error code */ 92 uint_t rts_flag; /* Pending I/O state */ 93 uint_t rts_proto; /* SO_PROTOTYPE "socket" option. */ 94 uint_t rts_debug : 1, /* SO_DEBUG "socket" option. */ 95 rts_dontroute : 1, /* SO_DONTROUTE "socket" option. */ 96 rts_broadcast : 1, /* SO_BROADCAST "socket" option. */ 97 rts_reuseaddr : 1, /* SO_REUSEADDR "socket" option. */ 98 rts_useloopback : 1, /* SO_USELOOPBACK "socket" option. */ 99 rts_multicast_loop : 1, /* IP_MULTICAST_LOOP option */ 100 rts_hdrincl : 1, /* IP_HDRINCL option + RAW and IGMP */ 101 102 : 0; 103 } rts_t; 104 105 #define RTS_WPUT_PENDING 0x1 /* Waiting for write-side to complete */ 106 #define RTS_WRW_PENDING 0x2 /* Routing socket write in progress */ 107 #define RTS_OPEN_PENDING 0x4 /* Routing socket open in progress */ 108 109 /* Default structure copied into T_INFO_ACK messages */ 110 static struct T_info_ack rts_g_t_info_ack = { 111 T_INFO_ACK, 112 T_INFINITE, /* TSDU_size. Maximum size messages. */ 113 T_INVALID, /* ETSDU_size. No expedited data. */ 114 T_INVALID, /* CDATA_size. No connect data. */ 115 T_INVALID, /* DDATA_size. No disconnect data. */ 116 0, /* ADDR_size. */ 117 0, /* OPT_size - not initialized here */ 118 64 * 1024, /* TIDU_size. rts allows maximum size messages. */ 119 T_COTS, /* SERV_type. rts supports connection oriented. */ 120 TS_UNBND, /* CURRENT_state. This is set from rts_state. */ 121 (XPG4_1) /* PROVIDER_flag */ 122 }; 123 124 /* Named Dispatch Parameter Management Structure */ 125 typedef struct rtspparam_s { 126 uint_t rts_param_min; 127 uint_t rts_param_max; 128 uint_t rts_param_value; 129 char *rts_param_name; 130 } rtsparam_t; 131 132 /* 133 * Table of ND variables supported by rts. These are loaded into rts_g_nd 134 * in rts_open. 135 * All of these are alterable, within the min/max values given, at run time. 136 */ 137 static rtsparam_t rts_param_arr[] = { 138 /* min max value name */ 139 { 4096, 65536, 8192, "rts_xmit_hiwat"}, 140 { 0, 65536, 1024, "rts_xmit_lowat"}, 141 { 4096, 65536, 8192, "rts_recv_hiwat"}, 142 { 65536, 1024*1024*1024, 256*1024, "rts_max_buf"}, 143 }; 144 #define rts_xmit_hiwat rts_param_arr[0].rts_param_value 145 #define rts_xmit_lowat rts_param_arr[1].rts_param_value 146 #define rts_recv_hiwat rts_param_arr[2].rts_param_value 147 #define rts_max_buf rts_param_arr[3].rts_param_value 148 149 static int rts_close(queue_t *q); 150 static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 151 int sys_error); 152 static mblk_t *rts_ioctl_alloc(mblk_t *data, cred_t *cr); 153 static int rts_open(queue_t *q, dev_t *devp, int flag, int sflag, 154 cred_t *credp); 155 int rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, 156 uchar_t *ptr); 157 int rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, 158 uchar_t *ptr); 159 int rts_opt_set(queue_t *q, uint_t optset_context, int level, 160 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 161 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk); 162 static void rts_param_cleanup(void); 163 static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 164 static boolean_t rts_param_register(rtsparam_t *rtspa, int cnt); 165 static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 166 cred_t *cr); 167 static void rts_rput(queue_t *q, mblk_t *mp); 168 static void rts_wput(queue_t *q, mblk_t *mp); 169 static void rts_wput_iocdata(queue_t *q, mblk_t *mp); 170 static void rts_wput_other(queue_t *q, mblk_t *mp); 171 static int rts_wrw(queue_t *q, struiod_t *dp); 172 173 static struct module_info info = { 174 129, "rts", 1, INFPSZ, 512, 128 175 }; 176 177 static struct qinit rinit = { 178 (pfi_t)rts_rput, NULL, rts_open, rts_close, NULL, &info 179 }; 180 181 static struct qinit winit = { 182 (pfi_t)rts_wput, NULL, NULL, NULL, NULL, &info, 183 NULL, (pfi_t)rts_wrw, NULL, STRUIOT_STANDARD 184 }; 185 186 struct streamtab rtsinfo = { 187 &rinit, &winit 188 }; 189 190 static IDP rts_g_nd; /* Points to table of RTS ND variables. */ 191 uint_t rts_open_streams = 0; 192 193 /* 194 * This routine allocates the necessary 195 * message blocks for IOCTL wrapping the 196 * user data. 197 */ 198 static mblk_t * 199 rts_ioctl_alloc(mblk_t *data, cred_t *cr) 200 { 201 mblk_t *mp = NULL; 202 mblk_t *mp1 = NULL; 203 ipllc_t *ipllc; 204 struct iocblk *ioc; 205 206 mp = allocb_cred(sizeof (ipllc_t), cr); 207 if (mp == NULL) 208 return (NULL); 209 mp1 = allocb_cred(sizeof (struct iocblk), cr); 210 if (mp1 == NULL) { 211 freeb(mp); 212 return (NULL); 213 } 214 215 ipllc = (ipllc_t *)mp->b_rptr; 216 ipllc->ipllc_cmd = IP_IOC_RTS_REQUEST; 217 ipllc->ipllc_name_offset = 0; 218 ipllc->ipllc_name_length = 0; 219 mp->b_wptr += sizeof (ipllc_t); 220 mp->b_cont = data; 221 222 ioc = (struct iocblk *)mp1->b_rptr; 223 ioc->ioc_cmd = IP_IOCTL; 224 ioc->ioc_error = 0; 225 ioc->ioc_cr = NULL; 226 ioc->ioc_count = msgdsize(mp); 227 mp1->b_wptr += sizeof (struct iocblk); 228 mp1->b_datap->db_type = M_IOCTL; 229 mp1->b_cont = mp; 230 231 return (mp1); 232 } 233 234 /* 235 * This routine closes rts stream, by disabling 236 * put/srv routines and freeing the this module 237 * internal datastructure. 238 */ 239 static int 240 rts_close(queue_t *q) 241 { 242 qprocsoff(q); 243 244 crfree(((rts_t *)q->q_ptr)->rts_credp); 245 246 mi_free(q->q_ptr); 247 rts_open_streams--; 248 /* 249 * Free the ND table if this was 250 * the last stream close 251 */ 252 rts_param_cleanup(); 253 return (0); 254 } 255 256 /* 257 * This is the open routine for routing socket. It allocates 258 * rts_t structure for the stream and sends an IOCTL to 259 * the down module to indicate that it is a routing socket 260 * stream. 261 */ 262 /* ARGSUSED */ 263 static int 264 rts_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 265 { 266 mblk_t *mp = NULL; 267 rts_t *rts; 268 269 /* If the stream is already open, return immediately. */ 270 if (q->q_ptr != NULL) 271 return (0); 272 273 /* If this is not a push of rts as a module, fail. */ 274 if (sflag != MODOPEN) 275 return (EINVAL); 276 277 /* If this is the first open of rts, create the ND table. */ 278 if (rts_g_nd == NULL) { 279 if (!rts_param_register(rts_param_arr, A_CNT(rts_param_arr))) 280 return (ENOMEM); 281 } 282 q->q_ptr = mi_zalloc_sleep(sizeof (rts_t)); 283 WR(q)->q_ptr = q->q_ptr; 284 rts = (rts_t *)q->q_ptr; 285 286 rts->rts_credp = credp; 287 crhold(credp); 288 /* 289 * The receive hiwat is only looked at on the stream head queue. 290 * Store in q_hiwat in order to return on SO_RCVBUF getsockopts. 291 */ 292 q->q_hiwat = rts_recv_hiwat; 293 /* 294 * The transmit hiwat/lowat is only looked at on IP's queue. 295 * Store in q_hiwat/q_lowat in order to return on SO_SNDBUF/SO_SNDLOWAT 296 * getsockopts. 297 */ 298 WR(q)->q_hiwat = rts_xmit_hiwat; 299 WR(q)->q_lowat = rts_xmit_lowat; 300 qprocson(q); 301 /* 302 * Indicate the down IP module that this is a routing socket 303 * client by sending an RTS IOCTL without any user data. Although 304 * this is just a notification message (without any real routing 305 * request), we pass in any credential for correctness sake. 306 */ 307 mp = rts_ioctl_alloc(NULL, credp); 308 if (mp == NULL) { 309 rts_param_cleanup(); 310 qprocsoff(q); 311 ASSERT(q->q_ptr != NULL); 312 mi_free(q->q_ptr); 313 crfree(credp); 314 return (ENOMEM); 315 } 316 rts_open_streams++; 317 rts->rts_flag |= RTS_OPEN_PENDING; 318 putnext(WR(q), mp); 319 while (rts->rts_flag & RTS_OPEN_PENDING) { 320 if (!qwait_sig(q)) { 321 (void) rts_close(q); 322 return (EINTR); 323 } 324 } 325 if (rts->rts_error != 0) { 326 (void) rts_close(q); 327 return (ENOTSUP); 328 } 329 rts->rts_state = TS_UNBND; 330 return (0); 331 } 332 333 /* 334 * This routine creates a T_ERROR_ACK message and passes it upstream. 335 */ 336 static void 337 rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 338 { 339 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 340 qreply(q, mp); 341 } 342 343 /* 344 * This routine creates a T_OK_ACK message and passes it upstream. 345 */ 346 static void 347 rts_ok_ack(queue_t *q, mblk_t *mp) 348 { 349 if ((mp = mi_tpi_ok_ack_alloc(mp)) != NULL) 350 qreply(q, mp); 351 } 352 353 /* 354 * This routine is called by rts_wput to handle T_UNBIND_REQ messages. 355 * After some error checking, the message is passed downstream to ip. 356 */ 357 static void 358 rts_unbind(queue_t *q, mblk_t *mp) 359 { 360 rts_t *rts; 361 362 rts = (rts_t *)q->q_ptr; 363 /* If a bind has not been done, we can't unbind. */ 364 if (rts->rts_state != TS_IDLE) { 365 rts_err_ack(q, mp, TOUTSTATE, 0); 366 return; 367 } 368 rts->rts_state = TS_UNBND; 369 rts_ok_ack(q, mp); 370 } 371 372 /* 373 * This routine is called to handle each 374 * O_T_BIND_REQ/T_BIND_REQ message passed to 375 * rts_wput. Note: This routine works with both 376 * O_T_BIND_REQ and T_BIND_REQ semantics. 377 */ 378 static void 379 rts_bind(queue_t *q, mblk_t *mp) 380 { 381 mblk_t *mp1; 382 struct T_bind_req *tbr; 383 rts_t *rts; 384 385 rts = (rts_t *)q->q_ptr; 386 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 387 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 388 "rts_bind: bad data, %d", rts->rts_state); 389 rts_err_ack(q, mp, TBADADDR, 0); 390 return; 391 } 392 if (rts->rts_state != TS_UNBND) { 393 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 394 "rts_bind: bad state, %d", rts->rts_state); 395 rts_err_ack(q, mp, TOUTSTATE, 0); 396 return; 397 } 398 /* 399 * Reallocate the message to make sure we have enough room for an 400 * address and the protocol type. 401 */ 402 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin_t), 1); 403 if (mp1 == NULL) { 404 rts_err_ack(q, mp, TSYSERR, ENOMEM); 405 return; 406 } 407 mp = mp1; 408 tbr = (struct T_bind_req *)mp->b_rptr; 409 if (tbr->ADDR_length != 0) { 410 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 411 "rts_bind: bad ADDR_length %d", tbr->ADDR_length); 412 rts_err_ack(q, mp, TBADADDR, 0); 413 return; 414 } 415 /* Generic request */ 416 tbr->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_req); 417 tbr->ADDR_length = 0; 418 tbr->PRIM_type = T_BIND_ACK; 419 rts->rts_state = TS_IDLE; 420 qreply(q, mp); 421 } 422 423 static void 424 rts_copy_info(struct T_info_ack *tap, rts_t *rts) 425 { 426 *tap = rts_g_t_info_ack; 427 tap->CURRENT_state = rts->rts_state; 428 tap->OPT_size = rts_max_optsize; 429 } 430 431 /* 432 * This routine responds to T_CAPABILITY_REQ messages. It is called by 433 * rts_wput. Much of the T_CAPABILITY_ACK information is copied from 434 * rts_g_t_info_ack. The current state of the stream is copied from 435 * rts_state. 436 */ 437 static void 438 rts_capability_req(queue_t *q, mblk_t *mp) 439 { 440 rts_t *rts = (rts_t *)q->q_ptr; 441 t_uscalar_t cap_bits1; 442 struct T_capability_ack *tcap; 443 444 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 445 446 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 447 mp->b_datap->db_type, T_CAPABILITY_ACK); 448 if (mp == NULL) 449 return; 450 451 tcap = (struct T_capability_ack *)mp->b_rptr; 452 tcap->CAP_bits1 = 0; 453 454 if (cap_bits1 & TC1_INFO) { 455 rts_copy_info(&tcap->INFO_ack, rts); 456 tcap->CAP_bits1 |= TC1_INFO; 457 } 458 459 qreply(q, mp); 460 } 461 462 /* 463 * This routine responds to T_INFO_REQ messages. It is called by rts_wput. 464 * Most of the T_INFO_ACK information is copied from rts_g_t_info_ack. 465 * The current state of the stream is copied from rts_state. 466 */ 467 static void 468 rts_info_req(queue_t *q, mblk_t *mp) 469 { 470 rts_t *rts = (rts_t *)q->q_ptr; 471 472 mp = tpi_ack_alloc(mp, sizeof (rts_g_t_info_ack), M_PCPROTO, 473 T_INFO_ACK); 474 if (mp == NULL) 475 return; 476 rts_copy_info((struct T_info_ack *)mp->b_rptr, rts); 477 qreply(q, mp); 478 } 479 480 /* 481 * This routine gets default values of certain options whose default 482 * values are maintained by protcol specific code 483 */ 484 /* ARGSUSED */ 485 int 486 rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 487 { 488 /* no default value processed by protocol specific code currently */ 489 return (-1); 490 } 491 492 /* 493 * This routine retrieves the current status of socket options. 494 * It returns the size of the option retrieved. 495 */ 496 int 497 rts_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 498 { 499 int *i1 = (int *)ptr; 500 rts_t *rts = (rts_t *)q->q_ptr; 501 502 switch (level) { 503 case SOL_SOCKET: 504 switch (name) { 505 case SO_DEBUG: 506 *i1 = rts->rts_debug; 507 break; 508 case SO_REUSEADDR: 509 *i1 = rts->rts_reuseaddr; 510 break; 511 case SO_TYPE: 512 *i1 = SOCK_RAW; 513 break; 514 515 /* 516 * The following three items are available here, 517 * but are only meaningful to IP. 518 */ 519 case SO_DONTROUTE: 520 *i1 = rts->rts_dontroute; 521 break; 522 case SO_USELOOPBACK: 523 *i1 = rts->rts_useloopback; 524 break; 525 case SO_BROADCAST: 526 *i1 = rts->rts_broadcast; 527 break; 528 case SO_PROTOTYPE: 529 *i1 = rts->rts_proto; 530 break; 531 /* 532 * The following two items can be manipulated, 533 * but changing them should do nothing. 534 */ 535 case SO_SNDBUF: 536 ASSERT(q->q_hiwat <= INT_MAX); 537 *i1 = (int)(q->q_hiwat); 538 break; 539 case SO_RCVBUF: 540 ASSERT(q->q_hiwat <= INT_MAX); 541 *i1 = (int)(RD(q)->q_hiwat); 542 break; 543 default: 544 return (-1); 545 } 546 break; 547 default: 548 return (-1); 549 } 550 return ((int)sizeof (int)); 551 } 552 553 554 /* 555 * This routine sets socket options. 556 */ 557 /*ARGSUSED*/ 558 int 559 rts_opt_set(queue_t *q, uint_t optset_context, int level, 560 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 561 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 562 { 563 int *i1 = (int *)invalp; 564 rts_t *rts = (rts_t *)q->q_ptr; 565 boolean_t checkonly; 566 567 switch (optset_context) { 568 case SETFN_OPTCOM_CHECKONLY: 569 checkonly = B_TRUE; 570 /* 571 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 572 * inlen != 0 implies value supplied and 573 * we have to "pretend" to set it. 574 * inlen == 0 implies that there is no 575 * value part in T_CHECK request and just validation 576 * done elsewhere should be enough, we just return here. 577 */ 578 if (inlen == 0) { 579 *outlenp = 0; 580 return (0); 581 } 582 break; 583 case SETFN_OPTCOM_NEGOTIATE: 584 checkonly = B_FALSE; 585 break; 586 case SETFN_UD_NEGOTIATE: 587 case SETFN_CONN_NEGOTIATE: 588 checkonly = B_FALSE; 589 /* 590 * Negotiating local and "association-related" options 591 * through T_UNITDATA_REQ or T_CONN_{REQ,CON} 592 * Not allowed in this module. 593 */ 594 return (EINVAL); 595 default: 596 /* 597 * We should never get here 598 */ 599 *outlenp = 0; 600 return (EINVAL); 601 } 602 603 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 604 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 605 606 /* 607 * For rts, we should have no ancillary data sent down 608 * (rts_wput doesn't handle options). 609 */ 610 ASSERT(thisdg_attrs == NULL); 611 612 /* 613 * For fixed length options, no sanity check 614 * of passed in length is done. It is assumed *_optcom_req() 615 * routines do the right thing. 616 */ 617 618 switch (level) { 619 case SOL_SOCKET: 620 switch (name) { 621 case SO_REUSEADDR: 622 if (!checkonly) 623 rts->rts_reuseaddr = *i1; 624 break; /* goto sizeof (int) option return */ 625 case SO_DEBUG: 626 if (!checkonly) 627 rts->rts_debug = *i1; 628 break; /* goto sizeof (int) option return */ 629 /* 630 * The following three items are available here, 631 * but are only meaningful to IP. 632 */ 633 case SO_DONTROUTE: 634 if (!checkonly) 635 rts->rts_dontroute = *i1; 636 break; /* goto sizeof (int) option return */ 637 case SO_USELOOPBACK: 638 if (!checkonly) 639 rts->rts_useloopback = *i1; 640 break; /* goto sizeof (int) option return */ 641 case SO_BROADCAST: 642 if (!checkonly) 643 rts->rts_broadcast = *i1; 644 break; /* goto sizeof (int) option return */ 645 case SO_PROTOTYPE: 646 /* 647 * Routing socket applications that call socket() with 648 * a third argument can filter which messages will be 649 * sent upstream thanks to sockfs. so_socket() sends 650 * down the SO_PROTOTYPE and rts_queue_input() 651 * implements the filtering. 652 */ 653 if (*i1 != AF_INET && *i1 != AF_INET6) 654 return (EPROTONOSUPPORT); 655 if (!checkonly) 656 rts->rts_proto = *i1; 657 break; /* goto sizeof (int) option return */ 658 /* 659 * The following two items can be manipulated, 660 * but changing them should do nothing. 661 */ 662 case SO_SNDBUF: 663 if (*i1 > rts_max_buf) { 664 *outlenp = 0; 665 return (ENOBUFS); 666 } 667 if (!checkonly) { 668 q->q_hiwat = *i1; 669 q->q_next->q_hiwat = *i1; 670 } 671 break; /* goto sizeof (int) option return */ 672 case SO_RCVBUF: 673 if (*i1 > rts_max_buf) { 674 *outlenp = 0; 675 return (ENOBUFS); 676 } 677 if (!checkonly) { 678 RD(q)->q_hiwat = *i1; 679 (void) mi_set_sth_hiwat(RD(q), *i1); 680 } 681 break; /* goto sizeof (int) option return */ 682 default: 683 *outlenp = 0; 684 return (EINVAL); 685 } 686 break; 687 default: 688 *outlenp = 0; 689 return (EINVAL); 690 } 691 /* 692 * Common case of return from an option that is sizeof (int) 693 */ 694 *(int *)outvalp = *i1; 695 *outlenp = (t_uscalar_t)sizeof (int); 696 return (0); 697 } 698 699 /* 700 * This routine frees the ND table if all streams have been closed. 701 * It is called by rts_close and rts_open. 702 */ 703 static void 704 rts_param_cleanup(void) 705 { 706 if (!rts_open_streams) 707 nd_free(&rts_g_nd); 708 } 709 710 /* 711 * This routine retrieves the value of an ND variable in a rtsparam_t 712 * structure. It is called through nd_getset when a user reads the 713 * variable. 714 */ 715 /* ARGSUSED */ 716 static int 717 rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 718 { 719 rtsparam_t *rtspa = (rtsparam_t *)cp; 720 721 (void) mi_mpprintf(mp, "%u", rtspa->rts_param_value); 722 return (0); 723 } 724 725 /* 726 * Walk through the param array specified registering each element with the 727 * named dispatch (ND) handler. 728 */ 729 static boolean_t 730 rts_param_register(rtsparam_t *rtspa, int cnt) 731 { 732 for (; cnt-- > 0; rtspa++) { 733 if (rtspa->rts_param_name != NULL && rtspa->rts_param_name[0]) { 734 if (!nd_load(&rts_g_nd, rtspa->rts_param_name, 735 rts_param_get, rts_param_set, (caddr_t)rtspa)) { 736 nd_free(&rts_g_nd); 737 return (B_FALSE); 738 } 739 } 740 } 741 return (B_TRUE); 742 } 743 744 /* This routine sets an ND variable in a rtsparam_t structure. */ 745 /* ARGSUSED */ 746 static int 747 rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 748 { 749 ulong_t new_value; 750 rtsparam_t *rtspa = (rtsparam_t *)cp; 751 752 /* 753 * Fail the request if the new value does not lie within the 754 * required bounds. 755 */ 756 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 757 new_value < rtspa->rts_param_min || 758 new_value > rtspa->rts_param_max) { 759 return (EINVAL); 760 } 761 762 /* Set the new value */ 763 rtspa->rts_param_value = new_value; 764 return (0); 765 } 766 767 /* 768 * This routine handles synchronous messages passed downstream. It either 769 * consumes the message or passes it downstream; it never queues a 770 * a message. The data messages that go down are wrapped in an IOCTL 771 * message. 772 * 773 * Since it is synchronous, it waits for the M_IOCACK/M_IOCNAK so that 774 * it can return an immediate error (such as ENETUNREACH when adding a route). 775 * It uses the RTS_WRW_PENDING to ensure that each rts instance has only 776 * one M_IOCTL outstanding at any given time. 777 */ 778 static int 779 rts_wrw(queue_t *q, struiod_t *dp) 780 { 781 mblk_t *mp = dp->d_mp; 782 mblk_t *mp1; 783 int error; 784 rt_msghdr_t *rtm; 785 rts_t *rts; 786 787 rts = (rts_t *)q->q_ptr; 788 while (rts->rts_flag & RTS_WRW_PENDING) { 789 if (qwait_rw(q)) { 790 rts->rts_error = EINTR; 791 goto err_ret; 792 } 793 } 794 rts->rts_flag |= RTS_WRW_PENDING; 795 796 if (isuioq(q) && (error = struioget(q, mp, dp, 0))) { 797 /* 798 * Uio error of some sort, so just return the error. 799 */ 800 rts->rts_error = error; 801 goto err_ret; 802 } 803 /* 804 * Pass the mblk (chain) onto wput(). 805 */ 806 dp->d_mp = 0; 807 808 switch (mp->b_datap->db_type) { 809 case M_PROTO: 810 case M_PCPROTO: 811 /* Expedite other than T_DATA_REQ to below the switch */ 812 if (((mp->b_wptr - mp->b_rptr) != 813 sizeof (struct T_data_req)) || 814 (((union T_primitives *)mp->b_rptr)->type != T_DATA_REQ)) 815 break; 816 if ((mp1 = mp->b_cont) == NULL) { 817 rts->rts_error = EINVAL; 818 goto err_ret; 819 } 820 freeb(mp); 821 mp = mp1; 822 /* FALLTHRU */ 823 case M_DATA: 824 /* 825 * The semantics of the routing socket is such that the rtm_pid 826 * field is automatically filled in during requests with the 827 * current process' pid. We do this here (where we still have 828 * user context) after checking we have at least a message the 829 * size of a routing message header. 830 */ 831 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 832 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) { 833 rts->rts_error = EINVAL; 834 goto err_ret; 835 } 836 } 837 rtm = (rt_msghdr_t *)mp->b_rptr; 838 rtm->rtm_pid = curproc->p_pid; 839 break; 840 default: 841 break; 842 } 843 rts->rts_flag |= RTS_WPUT_PENDING; 844 rts_wput(q, mp); 845 while (rts->rts_flag & RTS_WPUT_PENDING) 846 if (qwait_rw(q)) { 847 /* RTS_WPUT_PENDING will be cleared below */ 848 rts->rts_error = EINTR; 849 break; 850 } 851 err_ret: 852 rts->rts_flag &= ~(RTS_WPUT_PENDING | RTS_WRW_PENDING); 853 return (rts->rts_error); 854 } 855 856 /* 857 * This routine handles all messages passed downstream. It either 858 * consumes the message or passes it downstream; it never queues a 859 * a message. The data messages that go down are wrapped in an IOCTL 860 * message. 861 */ 862 static void 863 rts_wput(queue_t *q, mblk_t *mp) 864 { 865 uchar_t *rptr = mp->b_rptr; 866 mblk_t *mp1; 867 868 switch (mp->b_datap->db_type) { 869 case M_DATA: 870 break; 871 case M_PROTO: 872 case M_PCPROTO: 873 if ((mp->b_wptr - rptr) == sizeof (struct T_data_req)) { 874 /* Expedite valid T_DATA_REQ to below the switch */ 875 if (((union T_primitives *)rptr)->type == T_DATA_REQ) { 876 mp1 = mp->b_cont; 877 freeb(mp); 878 if (mp1 == NULL) 879 return; 880 mp = mp1; 881 break; 882 } 883 } 884 /* FALLTHRU */ 885 default: 886 rts_wput_other(q, mp); 887 return; 888 } 889 890 891 mp1 = rts_ioctl_alloc(mp, DB_CRED(mp)); 892 if (mp1 == NULL) { 893 rts_t *rts = (rts_t *)q->q_ptr; 894 895 ASSERT(rts != NULL); 896 freemsg(mp); 897 if (rts->rts_flag & RTS_WPUT_PENDING) { 898 rts->rts_error = ENOMEM; 899 rts->rts_flag &= ~RTS_WPUT_PENDING; 900 } 901 return; 902 } 903 putnext(q, mp1); 904 } 905 906 907 /* 908 * Handles all the control message, if it 909 * can not understand it, it will 910 * pass down stream. 911 */ 912 static void 913 rts_wput_other(queue_t *q, mblk_t *mp) 914 { 915 uchar_t *rptr = mp->b_rptr; 916 rts_t *rts; 917 struct iocblk *iocp; 918 cred_t *cr; 919 920 rts = (rts_t *)q->q_ptr; 921 922 cr = DB_CREDDEF(mp, rts->rts_credp); 923 924 switch (mp->b_datap->db_type) { 925 case M_PROTO: 926 case M_PCPROTO: 927 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 928 /* 929 * If the message does not contain a PRIM_type, 930 * throw it away. 931 */ 932 freemsg(mp); 933 return; 934 } 935 switch (((union T_primitives *)rptr)->type) { 936 case T_BIND_REQ: 937 case O_T_BIND_REQ: 938 rts_bind(q, mp); 939 return; 940 case T_UNBIND_REQ: 941 rts_unbind(q, mp); 942 return; 943 case T_CAPABILITY_REQ: 944 rts_capability_req(q, mp); 945 return; 946 case T_INFO_REQ: 947 rts_info_req(q, mp); 948 return; 949 case T_SVR4_OPTMGMT_REQ: 950 (void) svr4_optcom_req(q, mp, cr, &rts_opt_obj); 951 return; 952 case T_OPTMGMT_REQ: 953 (void) tpi_optcom_req(q, mp, cr, &rts_opt_obj); 954 return; 955 case O_T_CONN_RES: 956 case T_CONN_RES: 957 case T_DISCON_REQ: 958 /* Not supported by rts. */ 959 rts_err_ack(q, mp, TNOTSUPPORT, 0); 960 return; 961 case T_DATA_REQ: 962 case T_EXDATA_REQ: 963 case T_ORDREL_REQ: 964 /* Illegal for rts. */ 965 freemsg(mp); 966 (void) putnextctl1(RD(q), M_ERROR, EPROTO); 967 return; 968 default: 969 break; 970 } 971 break; 972 case M_IOCTL: 973 iocp = (struct iocblk *)mp->b_rptr; 974 switch (iocp->ioc_cmd) { 975 case ND_SET: 976 case ND_GET: 977 if (nd_getset(q, rts_g_nd, mp)) { 978 qreply(q, mp); 979 return; 980 } 981 break; 982 case TI_GETPEERNAME: 983 mi_copyin(q, mp, NULL, 984 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 985 return; 986 default: 987 break; 988 } 989 case M_IOCDATA: 990 rts_wput_iocdata(q, mp); 991 return; 992 default: 993 break; 994 } 995 putnext(q, mp); 996 } 997 998 /* 999 * Called by rts_wput_other to handle all M_IOCDATA messages. 1000 */ 1001 static void 1002 rts_wput_iocdata(queue_t *q, mblk_t *mp) 1003 { 1004 struct sockaddr *rtsaddr; 1005 mblk_t *mp1; 1006 STRUCT_HANDLE(strbuf, sb); 1007 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 1008 1009 /* Make sure it is one of ours. */ 1010 switch (iocp->ioc_cmd) { 1011 case TI_GETPEERNAME: 1012 break; 1013 default: 1014 putnext(q, mp); 1015 return; 1016 } 1017 switch (mi_copy_state(q, mp, &mp1)) { 1018 case -1: 1019 return; 1020 case MI_COPY_CASE(MI_COPY_IN, 1): 1021 break; 1022 case MI_COPY_CASE(MI_COPY_OUT, 1): 1023 /* Copy out the strbuf. */ 1024 mi_copyout(q, mp); 1025 return; 1026 case MI_COPY_CASE(MI_COPY_OUT, 2): 1027 /* All done. */ 1028 mi_copy_done(q, mp, 0); 1029 return; 1030 default: 1031 mi_copy_done(q, mp, EPROTO); 1032 return; 1033 } 1034 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 1035 if (STRUCT_FGET(sb, maxlen) < (int)sizeof (sin_t)) { 1036 mi_copy_done(q, mp, EINVAL); 1037 return; 1038 } 1039 switch (iocp->ioc_cmd) { 1040 case TI_GETPEERNAME: 1041 break; 1042 default: 1043 mi_copy_done(q, mp, EPROTO); 1044 return; 1045 } 1046 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), sizeof (sin_t), 1047 B_TRUE); 1048 if (mp1 == NULL) 1049 return; 1050 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 1051 rtsaddr = (struct sockaddr *)mp1->b_rptr; 1052 mp1->b_wptr = (uchar_t *)&rtsaddr[1]; 1053 bzero(rtsaddr, sizeof (struct sockaddr)); 1054 rtsaddr->sa_family = AF_ROUTE; 1055 /* Copy out the address */ 1056 mi_copyout(q, mp); 1057 } 1058 1059 static void 1060 rts_rput(queue_t *q, mblk_t *mp) 1061 { 1062 rts_t *rts; 1063 struct iocblk *iocp; 1064 mblk_t *mp1; 1065 struct T_data_ind *tdi; 1066 1067 rts = (rts_t *)q->q_ptr; 1068 switch (mp->b_datap->db_type) { 1069 case M_IOCACK: 1070 case M_IOCNAK: 1071 iocp = (struct iocblk *)mp->b_rptr; 1072 if (rts->rts_flag & (RTS_WPUT_PENDING|RTS_OPEN_PENDING)) { 1073 if (rts->rts_flag & RTS_WPUT_PENDING) 1074 rts->rts_flag &= ~RTS_WPUT_PENDING; 1075 else 1076 rts->rts_flag &= ~RTS_OPEN_PENDING; 1077 rts->rts_error = iocp->ioc_error; 1078 freemsg(mp); 1079 return; 1080 } 1081 break; 1082 case M_DATA: 1083 /* 1084 * Prepend T_DATA_IND to prevent the stream head from 1085 * consolidating multiple messages together. 1086 * If the allocation fails just send up the M_DATA. 1087 */ 1088 mp1 = allocb(sizeof (*tdi), BPRI_MED); 1089 if (mp1 != NULL) { 1090 mp1->b_cont = mp; 1091 mp = mp1; 1092 1093 mp->b_datap->db_type = M_PROTO; 1094 mp->b_wptr += sizeof (*tdi); 1095 tdi = (struct T_data_ind *)mp->b_rptr; 1096 tdi->PRIM_type = T_DATA_IND; 1097 tdi->MORE_flag = 0; 1098 } 1099 break; 1100 default: 1101 break; 1102 } 1103 putnext(q, mp); 1104 } 1105 1106 1107 void 1108 rts_ddi_init(void) 1109 { 1110 rts_max_optsize = optcom_max_optsize(rts_opt_obj.odb_opt_des_arr, 1111 rts_opt_obj.odb_opt_arr_cnt); 1112 } 1113