1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2018 Toomas Soome <tsoome@me.com> 25 */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/strsubr.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/strlog.h> 33 #define _SUN_TPI_VERSION 2 34 #include <sys/tihdr.h> 35 #include <sys/timod.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/proc.h> 40 #include <sys/suntpi.h> 41 #include <sys/policy.h> 42 #include <sys/zone.h> 43 #include <sys/disp.h> 44 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <netinet/in.h> 48 49 #include <inet/common.h> 50 #include <netinet/ip6.h> 51 #include <inet/ip.h> 52 #include <inet/ipclassifier.h> 53 #include <inet/proto_set.h> 54 #include <inet/nd.h> 55 #include <inet/optcom.h> 56 #include <netinet/ip_mroute.h> 57 #include <sys/isa_defs.h> 58 #include <net/route.h> 59 60 #include <inet/rts_impl.h> 61 #include <inet/ip_rts.h> 62 63 /* 64 * This is a transport provider for routing sockets. Downstream messages are 65 * wrapped with a IP_IOCTL header, and ip_wput_ioctl calls the appropriate entry 66 * in the ip_ioctl_ftbl callout table to pass the routing socket data into IP. 67 * Upstream messages are generated for listeners of the routing socket as well 68 * as the message sender (unless they have turned off their end using 69 * SO_USELOOPBACK or shutdown(3n)). Upstream messages may also be generated 70 * asynchronously when: 71 * 72 * Interfaces are brought up or down. 73 * Addresses are assigned to interfaces. 74 * ICMP redirects are processed and a IRE_HOST/RTF_DYNAMIC is installed. 75 * No route is found while sending a packet. 76 * 77 * Since all we do is reformat the messages between routing socket and 78 * ioctl forms, no synchronization is necessary in this module; all 79 * the dirty work is done down in ip. 80 */ 81 82 /* Default structure copied into T_INFO_ACK messages */ 83 static struct T_info_ack rts_g_t_info_ack = { 84 T_INFO_ACK, 85 T_INFINITE, /* TSDU_size. Maximum size messages. */ 86 T_INVALID, /* ETSDU_size. No expedited data. */ 87 T_INVALID, /* CDATA_size. No connect data. */ 88 T_INVALID, /* DDATA_size. No disconnect data. */ 89 0, /* ADDR_size. */ 90 0, /* OPT_size - not initialized here */ 91 64 * 1024, /* TIDU_size. rts allows maximum size messages. */ 92 T_COTS, /* SERV_type. rts supports connection oriented. */ 93 TS_UNBND, /* CURRENT_state. This is set from rts_state. */ 94 (XPG4_1) /* PROVIDER_flag */ 95 }; 96 97 /* 98 * Table of ND variables supported by rts. These are loaded into rts_g_nd 99 * in rts_open. 100 * All of these are alterable, within the min/max values given, at run time. 101 */ 102 static rtsparam_t lcl_param_arr[] = { 103 /* min max value name */ 104 { 4096, 65536, 8192, "rts_xmit_hiwat"}, 105 { 0, 65536, 1024, "rts_xmit_lowat"}, 106 { 4096, 65536, 8192, "rts_recv_hiwat"}, 107 { 65536, 1024*1024*1024, 256*1024, "rts_max_buf"}, 108 }; 109 #define rtss_xmit_hiwat rtss_params[0].rts_param_value 110 #define rtss_xmit_lowat rtss_params[1].rts_param_value 111 #define rtss_recv_hiwat rtss_params[2].rts_param_value 112 #define rtss_max_buf rtss_params[3].rts_param_value 113 114 static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 115 int sys_error); 116 static void rts_input(void *, mblk_t *, void *, ip_recv_attr_t *); 117 static void rts_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 118 static mblk_t *rts_ioctl_alloc(mblk_t *data); 119 static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 120 static boolean_t rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt); 121 static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 122 cred_t *cr); 123 static int rts_rsrv(queue_t *q); 124 static void *rts_stack_init(netstackid_t stackid, netstack_t *ns); 125 static void rts_stack_fini(netstackid_t stackid, void *arg); 126 static int rts_wput(queue_t *q, mblk_t *mp); 127 static void rts_wput_iocdata(queue_t *q, mblk_t *mp); 128 static void rts_wput_other(queue_t *q, mblk_t *mp); 129 static int rts_wrw(queue_t *q, struiod_t *dp); 130 131 static int rts_stream_open(queue_t *q, dev_t *devp, int flag, int sflag, 132 cred_t *credp); 133 static conn_t *rts_open(int flag, cred_t *credp); 134 135 static int rts_stream_close(queue_t *, int, cred_t *); 136 static int rts_close(sock_lower_handle_t proto_handle, int flags, 137 cred_t *cr); 138 139 static struct module_info rts_mod_info = { 140 129, "rts", 1, INFPSZ, 512, 128 141 }; 142 143 static struct qinit rtsrinit = { 144 NULL, rts_rsrv, rts_stream_open, rts_stream_close, NULL, 145 &rts_mod_info 146 }; 147 148 static struct qinit rtswinit = { 149 rts_wput, NULL, NULL, NULL, NULL, &rts_mod_info, 150 NULL, rts_wrw, NULL, STRUIOT_STANDARD 151 }; 152 153 struct streamtab rtsinfo = { 154 &rtsrinit, &rtswinit 155 }; 156 157 /* 158 * This routine allocates the necessary 159 * message blocks for IOCTL wrapping the 160 * user data. 161 */ 162 static mblk_t * 163 rts_ioctl_alloc(mblk_t *data) 164 { 165 mblk_t *mp = NULL; 166 mblk_t *mp1 = NULL; 167 ipllc_t *ipllc; 168 struct iocblk *ioc; 169 170 mp = allocb_tmpl(sizeof (ipllc_t), data); 171 if (mp == NULL) 172 return (NULL); 173 mp1 = allocb_tmpl(sizeof (struct iocblk), data); 174 if (mp1 == NULL) { 175 freeb(mp); 176 return (NULL); 177 } 178 179 ipllc = (ipllc_t *)mp->b_rptr; 180 ipllc->ipllc_cmd = IP_IOC_RTS_REQUEST; 181 ipllc->ipllc_name_offset = 0; 182 ipllc->ipllc_name_length = 0; 183 mp->b_wptr += sizeof (ipllc_t); 184 mp->b_cont = data; 185 186 ioc = (struct iocblk *)mp1->b_rptr; 187 ioc->ioc_cmd = IP_IOCTL; 188 ioc->ioc_error = 0; 189 ioc->ioc_cr = NULL; 190 ioc->ioc_count = msgdsize(mp); 191 mp1->b_wptr += sizeof (struct iocblk); 192 mp1->b_datap->db_type = M_IOCTL; 193 mp1->b_cont = mp; 194 195 return (mp1); 196 } 197 198 /* 199 * This routine closes rts stream, by disabling 200 * put/srv routines and freeing the this module 201 * internal datastructure. 202 */ 203 static int 204 rts_common_close(queue_t *q, conn_t *connp) 205 { 206 207 ASSERT(connp != NULL && IPCL_IS_RTS(connp)); 208 209 ip_rts_unregister(connp); 210 211 ip_quiesce_conn(connp); 212 213 if (!IPCL_IS_NONSTR(connp)) { 214 qprocsoff(q); 215 } 216 217 /* 218 * Now we are truly single threaded on this stream, and can 219 * delete the things hanging off the connp, and finally the connp. 220 * We removed this connp from the fanout list, it cannot be 221 * accessed thru the fanouts, and we already waited for the 222 * conn_ref to drop to 0. We are already in close, so 223 * there cannot be any other thread from the top. qprocsoff 224 * has completed, and service has completed or won't run in 225 * future. 226 */ 227 ASSERT(connp->conn_ref == 1); 228 229 if (!IPCL_IS_NONSTR(connp)) { 230 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 231 } else { 232 ip_free_helper_stream(connp); 233 } 234 235 connp->conn_ref--; 236 ipcl_conn_destroy(connp); 237 return (0); 238 } 239 240 /* ARGSUSED */ 241 static int 242 rts_stream_close(queue_t *q, int flags __unused, cred_t *credp __unused) 243 { 244 conn_t *connp = Q_TO_CONN(q); 245 246 (void) rts_common_close(q, connp); 247 q->q_ptr = WR(q)->q_ptr = NULL; 248 return (0); 249 } 250 251 /* 252 * This is the open routine for routing socket. It allocates 253 * rts_t structure for the stream and tells IP that it is a routing socket. 254 */ 255 /* ARGSUSED */ 256 static int 257 rts_stream_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 258 { 259 conn_t *connp; 260 dev_t conn_dev; 261 rts_t *rts; 262 263 /* If the stream is already open, return immediately. */ 264 if (q->q_ptr != NULL) 265 return (0); 266 267 if (sflag == MODOPEN) 268 return (EINVAL); 269 270 /* 271 * Since RTS is not used so heavily, allocating from the small 272 * arena should be sufficient. 273 */ 274 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 275 return (EBUSY); 276 } 277 278 connp = rts_open(flag, credp); 279 ASSERT(connp != NULL); 280 281 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 282 283 rts = connp->conn_rts; 284 rw_enter(&rts->rts_rwlock, RW_WRITER); 285 connp->conn_dev = conn_dev; 286 connp->conn_minor_arena = ip_minor_arena_sa; 287 288 q->q_ptr = connp; 289 WR(q)->q_ptr = connp; 290 connp->conn_rq = q; 291 connp->conn_wq = WR(q); 292 293 WR(q)->q_hiwat = connp->conn_sndbuf; 294 WR(q)->q_lowat = connp->conn_sndlowat; 295 296 mutex_enter(&connp->conn_lock); 297 connp->conn_state_flags &= ~CONN_INCIPIENT; 298 mutex_exit(&connp->conn_lock); 299 rw_exit(&rts->rts_rwlock); 300 301 /* Indicate to IP that this is a routing socket client */ 302 ip_rts_register(connp); 303 304 qprocson(q); 305 306 return (0); 307 } 308 309 /* ARGSUSED */ 310 static conn_t * 311 rts_open(int flag, cred_t *credp) 312 { 313 netstack_t *ns; 314 rts_stack_t *rtss; 315 rts_t *rts; 316 conn_t *connp; 317 zoneid_t zoneid; 318 319 ns = netstack_find_by_cred(credp); 320 ASSERT(ns != NULL); 321 rtss = ns->netstack_rts; 322 ASSERT(rtss != NULL); 323 324 /* 325 * For exclusive stacks we set the zoneid to zero 326 * to make RTS operate as if in the global zone. 327 */ 328 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 329 zoneid = GLOBAL_ZONEID; 330 else 331 zoneid = crgetzoneid(credp); 332 333 connp = ipcl_conn_create(IPCL_RTSCONN, KM_SLEEP, ns); 334 rts = connp->conn_rts; 335 336 /* 337 * ipcl_conn_create did a netstack_hold. Undo the hold that was 338 * done by netstack_find_by_cred() 339 */ 340 netstack_rele(ns); 341 342 rw_enter(&rts->rts_rwlock, RW_WRITER); 343 ASSERT(connp->conn_rts == rts); 344 ASSERT(rts->rts_connp == connp); 345 346 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 347 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 348 connp->conn_ixa->ixa_zoneid = zoneid; 349 connp->conn_zoneid = zoneid; 350 connp->conn_flow_cntrld = B_FALSE; 351 352 rts->rts_rtss = rtss; 353 354 connp->conn_rcvbuf = rtss->rtss_recv_hiwat; 355 connp->conn_sndbuf = rtss->rtss_xmit_hiwat; 356 connp->conn_sndlowat = rtss->rtss_xmit_lowat; 357 connp->conn_rcvlowat = rts_mod_info.mi_lowat; 358 359 connp->conn_family = PF_ROUTE; 360 connp->conn_so_type = SOCK_RAW; 361 /* SO_PROTOTYPE is always sent down by sockfs setting conn_proto */ 362 363 connp->conn_recv = rts_input; 364 connp->conn_recvicmp = rts_icmp_input; 365 366 crhold(credp); 367 connp->conn_cred = credp; 368 connp->conn_cpid = curproc->p_pid; 369 /* Cache things in ixa without an extra refhold */ 370 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 371 connp->conn_ixa->ixa_cred = connp->conn_cred; 372 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 373 if (is_system_labeled()) 374 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 375 376 /* 377 * rts sockets start out as bound and connected 378 * For streams based sockets, socket state is set to 379 * SS_ISBOUND | SS_ISCONNECTED in so_strinit. 380 */ 381 rts->rts_state = TS_DATA_XFER; 382 rw_exit(&rts->rts_rwlock); 383 384 return (connp); 385 } 386 387 /* 388 * This routine creates a T_ERROR_ACK message and passes it upstream. 389 */ 390 static void 391 rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 392 { 393 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 394 qreply(q, mp); 395 } 396 397 /* 398 * This routine creates a T_OK_ACK message and passes it upstream. 399 */ 400 static void 401 rts_ok_ack(queue_t *q, mblk_t *mp) 402 { 403 if ((mp = mi_tpi_ok_ack_alloc(mp)) != NULL) 404 qreply(q, mp); 405 } 406 407 /* 408 * This routine is called by rts_wput to handle T_UNBIND_REQ messages. 409 */ 410 static void 411 rts_tpi_unbind(queue_t *q, mblk_t *mp) 412 { 413 conn_t *connp = Q_TO_CONN(q); 414 rts_t *rts = connp->conn_rts; 415 416 /* If a bind has not been done, we can't unbind. */ 417 if (rts->rts_state != TS_IDLE) { 418 rts_err_ack(q, mp, TOUTSTATE, 0); 419 return; 420 } 421 rts->rts_state = TS_UNBND; 422 rts_ok_ack(q, mp); 423 } 424 425 /* 426 * This routine is called to handle each 427 * O_T_BIND_REQ/T_BIND_REQ message passed to 428 * rts_wput. Note: This routine works with both 429 * O_T_BIND_REQ and T_BIND_REQ semantics. 430 */ 431 static void 432 rts_tpi_bind(queue_t *q, mblk_t *mp) 433 { 434 conn_t *connp = Q_TO_CONN(q); 435 rts_t *rts = connp->conn_rts; 436 struct T_bind_req *tbr; 437 438 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 439 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 440 "rts_tpi_bind: bad data, %d", rts->rts_state); 441 rts_err_ack(q, mp, TBADADDR, 0); 442 return; 443 } 444 if (rts->rts_state != TS_UNBND) { 445 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 446 "rts_tpi_bind: bad state, %d", rts->rts_state); 447 rts_err_ack(q, mp, TOUTSTATE, 0); 448 return; 449 } 450 tbr = (struct T_bind_req *)mp->b_rptr; 451 if (tbr->ADDR_length != 0) { 452 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 453 "rts_tpi_bind: bad ADDR_length %d", tbr->ADDR_length); 454 rts_err_ack(q, mp, TBADADDR, 0); 455 return; 456 } 457 /* Generic request */ 458 tbr->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_req); 459 tbr->ADDR_length = 0; 460 tbr->PRIM_type = T_BIND_ACK; 461 mp->b_datap->db_type = M_PCPROTO; 462 rts->rts_state = TS_IDLE; 463 qreply(q, mp); 464 } 465 466 static void 467 rts_copy_info(struct T_info_ack *tap, rts_t *rts) 468 { 469 *tap = rts_g_t_info_ack; 470 tap->CURRENT_state = rts->rts_state; 471 tap->OPT_size = rts_max_optsize; 472 } 473 474 /* 475 * This routine responds to T_CAPABILITY_REQ messages. It is called by 476 * rts_wput. Much of the T_CAPABILITY_ACK information is copied from 477 * rts_g_t_info_ack. The current state of the stream is copied from 478 * rts_state. 479 */ 480 static void 481 rts_capability_req(queue_t *q, mblk_t *mp) 482 { 483 conn_t *connp = Q_TO_CONN(q); 484 rts_t *rts = connp->conn_rts; 485 t_uscalar_t cap_bits1; 486 struct T_capability_ack *tcap; 487 488 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 489 490 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 491 mp->b_datap->db_type, T_CAPABILITY_ACK); 492 if (mp == NULL) 493 return; 494 495 tcap = (struct T_capability_ack *)mp->b_rptr; 496 tcap->CAP_bits1 = 0; 497 498 if (cap_bits1 & TC1_INFO) { 499 rts_copy_info(&tcap->INFO_ack, rts); 500 tcap->CAP_bits1 |= TC1_INFO; 501 } 502 503 qreply(q, mp); 504 } 505 506 /* 507 * This routine responds to T_INFO_REQ messages. It is called by rts_wput. 508 * Most of the T_INFO_ACK information is copied from rts_g_t_info_ack. 509 * The current state of the stream is copied from rts_state. 510 */ 511 static void 512 rts_info_req(queue_t *q, mblk_t *mp) 513 { 514 conn_t *connp = Q_TO_CONN(q); 515 rts_t *rts = connp->conn_rts; 516 517 mp = tpi_ack_alloc(mp, sizeof (rts_g_t_info_ack), M_PCPROTO, 518 T_INFO_ACK); 519 if (mp == NULL) 520 return; 521 rts_copy_info((struct T_info_ack *)mp->b_rptr, rts); 522 qreply(q, mp); 523 } 524 525 /* 526 * This routine gets default values of certain options whose default 527 * values are maintained by protcol specific code 528 */ 529 /* ARGSUSED */ 530 int 531 rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 532 { 533 /* no default value processed by protocol specific code currently */ 534 return (-1); 535 } 536 537 538 static int 539 rts_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 540 { 541 rts_t *rts = connp->conn_rts; 542 conn_opt_arg_t coas; 543 int retval; 544 545 ASSERT(RW_READ_HELD(&rts->rts_rwlock)); 546 547 switch (level) { 548 /* do this in conn_opt_get? */ 549 case SOL_ROUTE: 550 switch (name) { 551 case RT_AWARE: 552 mutex_enter(&connp->conn_lock); 553 *(int *)ptr = connp->conn_rtaware; 554 mutex_exit(&connp->conn_lock); 555 return (0); 556 } 557 break; 558 } 559 coas.coa_connp = connp; 560 coas.coa_ixa = connp->conn_ixa; 561 coas.coa_ipp = &connp->conn_xmit_ipp; 562 mutex_enter(&connp->conn_lock); 563 retval = conn_opt_get(&coas, level, name, ptr); 564 mutex_exit(&connp->conn_lock); 565 return (retval); 566 } 567 568 /* ARGSUSED */ 569 static int 570 rts_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 571 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 572 void *thisdg_attrs, boolean_t checkonly) 573 { 574 int *i1 = (int *)invalp; 575 rts_t *rts = connp->conn_rts; 576 rts_stack_t *rtss = rts->rts_rtss; 577 int error; 578 conn_opt_arg_t coas; 579 580 coas.coa_connp = connp; 581 coas.coa_ixa = connp->conn_ixa; 582 coas.coa_ipp = &connp->conn_xmit_ipp; 583 584 ASSERT(RW_WRITE_HELD(&rts->rts_rwlock)); 585 586 /* 587 * For rts, we should have no ancillary data sent down 588 * (rts_wput doesn't handle options). 589 */ 590 ASSERT(thisdg_attrs == NULL); 591 592 /* 593 * For fixed length options, no sanity check 594 * of passed in length is done. It is assumed *_optcom_req() 595 * routines do the right thing. 596 */ 597 598 switch (level) { 599 case SOL_SOCKET: 600 switch (name) { 601 case SO_PROTOTYPE: 602 /* 603 * Routing socket applications that call socket() with 604 * a third argument can filter which messages will be 605 * sent upstream thanks to sockfs. so_socket() sends 606 * down the SO_PROTOTYPE and rts_queue_input() 607 * implements the filtering. 608 */ 609 if (*i1 != AF_INET && *i1 != AF_INET6) { 610 *outlenp = 0; 611 return (EPROTONOSUPPORT); 612 } 613 if (!checkonly) 614 connp->conn_proto = *i1; 615 *outlenp = inlen; 616 return (0); 617 618 /* 619 * The following two items can be manipulated, 620 * but changing them should do nothing. 621 */ 622 case SO_SNDBUF: 623 if (*i1 > rtss->rtss_max_buf) { 624 *outlenp = 0; 625 return (ENOBUFS); 626 } 627 break; /* goto sizeof (int) option return */ 628 case SO_RCVBUF: 629 if (*i1 > rtss->rtss_max_buf) { 630 *outlenp = 0; 631 return (ENOBUFS); 632 } 633 break; /* goto sizeof (int) option return */ 634 } 635 break; 636 case SOL_ROUTE: 637 switch (name) { 638 case RT_AWARE: 639 if (!checkonly) { 640 mutex_enter(&connp->conn_lock); 641 connp->conn_rtaware = *i1; 642 mutex_exit(&connp->conn_lock); 643 } 644 *outlenp = inlen; 645 return (0); 646 } 647 break; 648 } 649 /* Serialized setsockopt since we are D_MTQPAIR */ 650 error = conn_opt_set(&coas, level, name, inlen, invalp, 651 checkonly, cr); 652 if (error != 0) { 653 *outlenp = 0; 654 return (error); 655 } 656 /* 657 * Common case of return from an option that is sizeof (int) 658 */ 659 if (invalp != outvalp) { 660 /* don't trust bcopy for identical src/dst */ 661 (void) bcopy(invalp, outvalp, inlen); 662 } 663 *outlenp = (t_uscalar_t)sizeof (int); 664 return (0); 665 } 666 667 static int 668 rts_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 669 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 670 void *thisdg_attrs, cred_t *cr) 671 { 672 boolean_t checkonly = B_FALSE; 673 674 if (optset_context) { 675 switch (optset_context) { 676 case SETFN_OPTCOM_CHECKONLY: 677 checkonly = B_TRUE; 678 /* 679 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 680 * inlen != 0 implies value supplied and 681 * we have to "pretend" to set it. 682 * inlen == 0 implies that there is no value part 683 * in T_CHECK request and just validation 684 * done elsewhere should be enough, we just return here. 685 */ 686 if (inlen == 0) { 687 *outlenp = 0; 688 return (0); 689 } 690 break; 691 case SETFN_OPTCOM_NEGOTIATE: 692 checkonly = B_FALSE; 693 break; 694 case SETFN_UD_NEGOTIATE: 695 case SETFN_CONN_NEGOTIATE: 696 checkonly = B_FALSE; 697 /* 698 * Negotiating local and "association-related" options 699 * through T_UNITDATA_REQ or T_CONN_{REQ,CON} 700 * Not allowed in this module. 701 */ 702 return (EINVAL); 703 default: 704 /* 705 * We should never get here 706 */ 707 *outlenp = 0; 708 return (EINVAL); 709 } 710 711 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 712 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 713 714 } 715 return (rts_do_opt_set(connp, level, name, inlen, invalp, outlenp, 716 outvalp, cr, thisdg_attrs, checkonly)); 717 718 } 719 720 /* 721 * This routine retrieves the current status of socket options. 722 * It returns the size of the option retrieved. 723 */ 724 int 725 rts_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 726 { 727 rts_t *rts; 728 int err; 729 730 rts = Q_TO_RTS(q); 731 rw_enter(&rts->rts_rwlock, RW_READER); 732 err = rts_opt_get(Q_TO_CONN(q), level, name, ptr); 733 rw_exit(&rts->rts_rwlock); 734 return (err); 735 } 736 737 /* 738 * This routine sets socket options. 739 */ 740 /*ARGSUSED*/ 741 int 742 rts_tpi_opt_set(queue_t *q, uint_t optset_context, int level, 743 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 744 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 745 { 746 conn_t *connp = Q_TO_CONN(q); 747 int error; 748 rts_t *rts = connp->conn_rts; 749 750 751 rw_enter(&rts->rts_rwlock, RW_WRITER); 752 error = rts_opt_set(connp, optset_context, level, name, inlen, invalp, 753 outlenp, outvalp, thisdg_attrs, cr); 754 rw_exit(&rts->rts_rwlock); 755 return (error); 756 } 757 758 /* 759 * This routine retrieves the value of an ND variable in a rtsparam_t 760 * structure. It is called through nd_getset when a user reads the 761 * variable. 762 */ 763 /* ARGSUSED */ 764 static int 765 rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 766 { 767 rtsparam_t *rtspa = (rtsparam_t *)cp; 768 769 (void) mi_mpprintf(mp, "%u", rtspa->rts_param_value); 770 return (0); 771 } 772 773 /* 774 * Walk through the param array specified registering each element with the 775 * named dispatch (ND) handler. 776 */ 777 static boolean_t 778 rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt) 779 { 780 for (; cnt-- > 0; rtspa++) { 781 if (rtspa->rts_param_name != NULL && rtspa->rts_param_name[0]) { 782 if (!nd_load(ndp, rtspa->rts_param_name, 783 rts_param_get, rts_param_set, (caddr_t)rtspa)) { 784 nd_free(ndp); 785 return (B_FALSE); 786 } 787 } 788 } 789 return (B_TRUE); 790 } 791 792 /* This routine sets an ND variable in a rtsparam_t structure. */ 793 /* ARGSUSED */ 794 static int 795 rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 796 { 797 ulong_t new_value; 798 rtsparam_t *rtspa = (rtsparam_t *)cp; 799 800 /* 801 * Fail the request if the new value does not lie within the 802 * required bounds. 803 */ 804 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 805 new_value < rtspa->rts_param_min || 806 new_value > rtspa->rts_param_max) { 807 return (EINVAL); 808 } 809 810 /* Set the new value */ 811 rtspa->rts_param_value = new_value; 812 return (0); 813 } 814 815 /* 816 * Empty rsrv routine which is used by rts_input to cause a wakeup 817 * of a thread in qwait. 818 */ 819 /*ARGSUSED*/ 820 static int 821 rts_rsrv(queue_t *q) 822 { 823 return (0); 824 } 825 826 /* 827 * This routine handles synchronous messages passed downstream. It either 828 * consumes the message or passes it downstream; it never queues a 829 * a message. The data messages that go down are wrapped in an IOCTL 830 * message. 831 * 832 * Since it is synchronous, it waits for the M_IOCACK/M_IOCNAK so that 833 * it can return an immediate error (such as ENETUNREACH when adding a route). 834 * It uses the RTS_WRW_PENDING to ensure that each rts instance has only 835 * one M_IOCTL outstanding at any given time. 836 */ 837 static int 838 rts_wrw(queue_t *q, struiod_t *dp) 839 { 840 mblk_t *mp = dp->d_mp; 841 mblk_t *mp1; 842 int error; 843 rt_msghdr_t *rtm; 844 conn_t *connp = Q_TO_CONN(q); 845 rts_t *rts = connp->conn_rts; 846 847 while (rts->rts_flag & RTS_WRW_PENDING) { 848 if (qwait_rw(q)) { 849 rts->rts_error = EINTR; 850 goto err_ret; 851 } 852 } 853 rts->rts_flag |= RTS_WRW_PENDING; 854 855 if (isuioq(q) && (error = struioget(q, mp, dp, 0))) { 856 /* 857 * Uio error of some sort, so just return the error. 858 */ 859 rts->rts_error = error; 860 goto err_ret; 861 } 862 /* 863 * Pass the mblk (chain) onto wput(). 864 */ 865 dp->d_mp = 0; 866 867 switch (mp->b_datap->db_type) { 868 case M_PROTO: 869 case M_PCPROTO: 870 /* Expedite other than T_DATA_REQ to below the switch */ 871 if (((mp->b_wptr - mp->b_rptr) != 872 sizeof (struct T_data_req)) || 873 (((union T_primitives *)mp->b_rptr)->type != T_DATA_REQ)) 874 break; 875 if ((mp1 = mp->b_cont) == NULL) { 876 rts->rts_error = EINVAL; 877 freemsg(mp); 878 goto err_ret; 879 } 880 freeb(mp); 881 mp = mp1; 882 /* FALLTHRU */ 883 case M_DATA: 884 /* 885 * The semantics of the routing socket is such that the rtm_pid 886 * field is automatically filled in during requests with the 887 * current process' pid. We do this here (where we still have 888 * user context) after checking we have at least a message the 889 * size of a routing message header. 890 */ 891 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 892 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) { 893 rts->rts_error = EINVAL; 894 freemsg(mp); 895 goto err_ret; 896 } 897 } 898 rtm = (rt_msghdr_t *)mp->b_rptr; 899 rtm->rtm_pid = curproc->p_pid; 900 break; 901 default: 902 break; 903 } 904 rts->rts_flag |= RTS_WPUT_PENDING; 905 rts_wput(q, mp); 906 while (rts->rts_flag & RTS_WPUT_PENDING) 907 if (qwait_rw(q)) { 908 /* RTS_WPUT_PENDING will be cleared below */ 909 rts->rts_error = EINTR; 910 break; 911 } 912 err_ret: 913 rts->rts_flag &= ~(RTS_WPUT_PENDING | RTS_WRW_PENDING); 914 return (rts->rts_error); 915 } 916 917 /* 918 * This routine handles all messages passed downstream. It either 919 * consumes the message or passes it downstream; it never queues a 920 * a message. The data messages that go down are wrapped in an IOCTL 921 * message. 922 */ 923 static int 924 rts_wput(queue_t *q, mblk_t *mp) 925 { 926 uchar_t *rptr = mp->b_rptr; 927 mblk_t *mp1; 928 conn_t *connp = Q_TO_CONN(q); 929 rts_t *rts = connp->conn_rts; 930 931 switch (mp->b_datap->db_type) { 932 case M_DATA: 933 break; 934 case M_PROTO: 935 case M_PCPROTO: 936 if ((mp->b_wptr - rptr) == sizeof (struct T_data_req)) { 937 /* Expedite valid T_DATA_REQ to below the switch */ 938 if (((union T_primitives *)rptr)->type == T_DATA_REQ) { 939 mp1 = mp->b_cont; 940 freeb(mp); 941 if (mp1 == NULL) 942 return (0); 943 mp = mp1; 944 break; 945 } 946 } 947 /* FALLTHRU */ 948 default: 949 rts_wput_other(q, mp); 950 return (0); 951 } 952 953 954 ASSERT(msg_getcred(mp, NULL) != NULL); 955 956 mp1 = rts_ioctl_alloc(mp); 957 if (mp1 == NULL) { 958 ASSERT(rts != NULL); 959 freemsg(mp); 960 if (rts->rts_flag & RTS_WPUT_PENDING) { 961 rts->rts_error = ENOMEM; 962 rts->rts_flag &= ~RTS_WPUT_PENDING; 963 } 964 return (0); 965 } 966 ip_wput_nondata(q, mp1); 967 return (0); 968 } 969 970 971 /* 972 * Handles all the control message, if it 973 * can not understand it, it will 974 * pass down stream. 975 */ 976 static void 977 rts_wput_other(queue_t *q, mblk_t *mp) 978 { 979 conn_t *connp = Q_TO_CONN(q); 980 rts_t *rts = connp->conn_rts; 981 uchar_t *rptr = mp->b_rptr; 982 struct iocblk *iocp; 983 cred_t *cr; 984 rts_stack_t *rtss; 985 986 rtss = rts->rts_rtss; 987 988 switch (mp->b_datap->db_type) { 989 case M_PROTO: 990 case M_PCPROTO: 991 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 992 /* 993 * If the message does not contain a PRIM_type, 994 * throw it away. 995 */ 996 freemsg(mp); 997 return; 998 } 999 switch (((union T_primitives *)rptr)->type) { 1000 case T_BIND_REQ: 1001 case O_T_BIND_REQ: 1002 rts_tpi_bind(q, mp); 1003 return; 1004 case T_UNBIND_REQ: 1005 rts_tpi_unbind(q, mp); 1006 return; 1007 case T_CAPABILITY_REQ: 1008 rts_capability_req(q, mp); 1009 return; 1010 case T_INFO_REQ: 1011 rts_info_req(q, mp); 1012 return; 1013 case T_SVR4_OPTMGMT_REQ: 1014 case T_OPTMGMT_REQ: 1015 /* 1016 * All Solaris components should pass a db_credp 1017 * for this TPI message, hence we ASSERT. 1018 * But in case there is some other M_PROTO that looks 1019 * like a TPI message sent by some other kernel 1020 * component, we check and return an error. 1021 */ 1022 cr = msg_getcred(mp, NULL); 1023 ASSERT(cr != NULL); 1024 if (cr == NULL) { 1025 rts_err_ack(q, mp, TSYSERR, EINVAL); 1026 return; 1027 } 1028 if (((union T_primitives *)rptr)->type == 1029 T_SVR4_OPTMGMT_REQ) { 1030 svr4_optcom_req(q, mp, cr, &rts_opt_obj); 1031 } else { 1032 tpi_optcom_req(q, mp, cr, &rts_opt_obj); 1033 } 1034 return; 1035 case O_T_CONN_RES: 1036 case T_CONN_RES: 1037 case T_DISCON_REQ: 1038 /* Not supported by rts. */ 1039 rts_err_ack(q, mp, TNOTSUPPORT, 0); 1040 return; 1041 case T_DATA_REQ: 1042 case T_EXDATA_REQ: 1043 case T_ORDREL_REQ: 1044 /* Illegal for rts. */ 1045 freemsg(mp); 1046 (void) putnextctl1(RD(q), M_ERROR, EPROTO); 1047 return; 1048 1049 default: 1050 break; 1051 } 1052 break; 1053 case M_IOCTL: 1054 iocp = (struct iocblk *)mp->b_rptr; 1055 switch (iocp->ioc_cmd) { 1056 case ND_SET: 1057 case ND_GET: 1058 if (nd_getset(q, rtss->rtss_g_nd, mp)) { 1059 qreply(q, mp); 1060 return; 1061 } 1062 break; 1063 case TI_GETPEERNAME: 1064 mi_copyin(q, mp, NULL, 1065 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 1066 return; 1067 default: 1068 break; 1069 } 1070 break; 1071 case M_IOCDATA: 1072 rts_wput_iocdata(q, mp); 1073 return; 1074 default: 1075 break; 1076 } 1077 ip_wput_nondata(q, mp); 1078 } 1079 1080 /* 1081 * Called by rts_wput_other to handle all M_IOCDATA messages. 1082 */ 1083 static void 1084 rts_wput_iocdata(queue_t *q, mblk_t *mp) 1085 { 1086 struct sockaddr *rtsaddr; 1087 mblk_t *mp1; 1088 STRUCT_HANDLE(strbuf, sb); 1089 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 1090 1091 /* Make sure it is one of ours. */ 1092 switch (iocp->ioc_cmd) { 1093 case TI_GETPEERNAME: 1094 break; 1095 default: 1096 ip_wput_nondata(q, mp); 1097 return; 1098 } 1099 switch (mi_copy_state(q, mp, &mp1)) { 1100 case -1: 1101 return; 1102 case MI_COPY_CASE(MI_COPY_IN, 1): 1103 break; 1104 case MI_COPY_CASE(MI_COPY_OUT, 1): 1105 /* Copy out the strbuf. */ 1106 mi_copyout(q, mp); 1107 return; 1108 case MI_COPY_CASE(MI_COPY_OUT, 2): 1109 /* All done. */ 1110 mi_copy_done(q, mp, 0); 1111 return; 1112 default: 1113 mi_copy_done(q, mp, EPROTO); 1114 return; 1115 } 1116 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 1117 if (STRUCT_FGET(sb, maxlen) < (int)sizeof (sin_t)) { 1118 mi_copy_done(q, mp, EINVAL); 1119 return; 1120 } 1121 switch (iocp->ioc_cmd) { 1122 case TI_GETPEERNAME: 1123 break; 1124 default: 1125 mi_copy_done(q, mp, EPROTO); 1126 return; 1127 } 1128 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), sizeof (sin_t), 1129 B_TRUE); 1130 if (mp1 == NULL) 1131 return; 1132 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 1133 rtsaddr = (struct sockaddr *)mp1->b_rptr; 1134 mp1->b_wptr = (uchar_t *)&rtsaddr[1]; 1135 bzero(rtsaddr, sizeof (struct sockaddr)); 1136 rtsaddr->sa_family = AF_ROUTE; 1137 /* Copy out the address */ 1138 mi_copyout(q, mp); 1139 } 1140 1141 /* 1142 * IP passes up a NULL ira. 1143 */ 1144 /*ARGSUSED2*/ 1145 static void 1146 rts_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 1147 { 1148 conn_t *connp = (conn_t *)arg1; 1149 rts_t *rts = connp->conn_rts; 1150 struct iocblk *iocp; 1151 mblk_t *mp1; 1152 struct T_data_ind *tdi; 1153 int error; 1154 1155 switch (mp->b_datap->db_type) { 1156 case M_IOCACK: 1157 case M_IOCNAK: 1158 iocp = (struct iocblk *)mp->b_rptr; 1159 ASSERT(!IPCL_IS_NONSTR(connp)); 1160 if (rts->rts_flag & (RTS_WPUT_PENDING)) { 1161 rts->rts_flag &= ~RTS_WPUT_PENDING; 1162 rts->rts_error = iocp->ioc_error; 1163 /* 1164 * Tell rts_wvw/qwait that we are done. 1165 * Note: there is no qwait_wakeup() we can use. 1166 */ 1167 qenable(connp->conn_rq); 1168 freemsg(mp); 1169 return; 1170 } 1171 break; 1172 case M_DATA: 1173 /* 1174 * Prepend T_DATA_IND to prevent the stream head from 1175 * consolidating multiple messages together. 1176 * If the allocation fails just send up the M_DATA. 1177 */ 1178 mp1 = allocb(sizeof (*tdi), BPRI_MED); 1179 if (mp1 != NULL) { 1180 mp1->b_cont = mp; 1181 mp = mp1; 1182 1183 mp->b_datap->db_type = M_PROTO; 1184 mp->b_wptr += sizeof (*tdi); 1185 tdi = (struct T_data_ind *)mp->b_rptr; 1186 tdi->PRIM_type = T_DATA_IND; 1187 tdi->MORE_flag = 0; 1188 } 1189 break; 1190 default: 1191 break; 1192 } 1193 1194 if (IPCL_IS_NONSTR(connp)) { 1195 if ((*connp->conn_upcalls->su_recv) 1196 (connp->conn_upper_handle, mp, msgdsize(mp), 0, 1197 &error, NULL) < 0) { 1198 ASSERT(error == ENOSPC); 1199 /* 1200 * Let's confirm hoding the lock that 1201 * we are out of recv space. 1202 */ 1203 mutex_enter(&rts->rts_recv_mutex); 1204 if ((*connp->conn_upcalls->su_recv) 1205 (connp->conn_upper_handle, NULL, 0, 0, 1206 &error, NULL) < 0) { 1207 ASSERT(error == ENOSPC); 1208 connp->conn_flow_cntrld = B_TRUE; 1209 } 1210 mutex_exit(&rts->rts_recv_mutex); 1211 } 1212 } else { 1213 putnext(connp->conn_rq, mp); 1214 } 1215 } 1216 1217 /*ARGSUSED*/ 1218 static void 1219 rts_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 1220 { 1221 freemsg(mp); 1222 } 1223 1224 void 1225 rts_ddi_g_init(void) 1226 { 1227 rts_max_optsize = optcom_max_optsize(rts_opt_obj.odb_opt_des_arr, 1228 rts_opt_obj.odb_opt_arr_cnt); 1229 1230 /* 1231 * We want to be informed each time a stack is created or 1232 * destroyed in the kernel, so we can maintain the 1233 * set of rts_stack_t's. 1234 */ 1235 netstack_register(NS_RTS, rts_stack_init, NULL, rts_stack_fini); 1236 } 1237 1238 void 1239 rts_ddi_g_destroy(void) 1240 { 1241 netstack_unregister(NS_RTS); 1242 } 1243 1244 #define INET_NAME "ip" 1245 1246 /* 1247 * Initialize the RTS stack instance. 1248 */ 1249 /* ARGSUSED */ 1250 static void * 1251 rts_stack_init(netstackid_t stackid, netstack_t *ns) 1252 { 1253 rts_stack_t *rtss; 1254 rtsparam_t *pa; 1255 int error = 0; 1256 major_t major; 1257 1258 rtss = (rts_stack_t *)kmem_zalloc(sizeof (*rtss), KM_SLEEP); 1259 rtss->rtss_netstack = ns; 1260 1261 pa = (rtsparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 1262 rtss->rtss_params = pa; 1263 bcopy(lcl_param_arr, rtss->rtss_params, sizeof (lcl_param_arr)); 1264 1265 (void) rts_param_register(&rtss->rtss_g_nd, 1266 rtss->rtss_params, A_CNT(lcl_param_arr)); 1267 1268 major = mod_name_to_major(INET_NAME); 1269 error = ldi_ident_from_major(major, &rtss->rtss_ldi_ident); 1270 ASSERT(error == 0); 1271 return (rtss); 1272 } 1273 1274 /* 1275 * Free the RTS stack instance. 1276 */ 1277 /* ARGSUSED */ 1278 static void 1279 rts_stack_fini(netstackid_t stackid, void *arg) 1280 { 1281 rts_stack_t *rtss = (rts_stack_t *)arg; 1282 1283 nd_free(&rtss->rtss_g_nd); 1284 kmem_free(rtss->rtss_params, sizeof (lcl_param_arr)); 1285 rtss->rtss_params = NULL; 1286 ldi_ident_release(rtss->rtss_ldi_ident); 1287 kmem_free(rtss, sizeof (*rtss)); 1288 } 1289 1290 /* ARGSUSED */ 1291 int 1292 rts_accept(sock_lower_handle_t lproto_handle, 1293 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 1294 cred_t *cr) 1295 { 1296 return (EINVAL); 1297 } 1298 1299 /* ARGSUSED */ 1300 static int 1301 rts_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 1302 socklen_t len, cred_t *cr) 1303 { 1304 /* 1305 * rebind not allowed 1306 */ 1307 return (EINVAL); 1308 } 1309 1310 /* ARGSUSED */ 1311 int 1312 rts_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 1313 { 1314 return (EINVAL); 1315 } 1316 1317 /* ARGSUSED */ 1318 int 1319 rts_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 1320 socklen_t len, sock_connid_t *id, cred_t *cr) 1321 { 1322 /* 1323 * rts sockets start out as bound and connected 1324 */ 1325 *id = 0; 1326 return (EISCONN); 1327 } 1328 1329 /* ARGSUSED */ 1330 int 1331 rts_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr, 1332 socklen_t *addrlen, cred_t *cr) 1333 { 1334 bzero(addr, sizeof (struct sockaddr)); 1335 addr->sa_family = AF_ROUTE; 1336 *addrlen = sizeof (struct sockaddr); 1337 1338 return (0); 1339 } 1340 1341 /* ARGSUSED */ 1342 int 1343 rts_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr, 1344 socklen_t *addrlen, cred_t *cr) 1345 { 1346 bzero(addr, sizeof (struct sockaddr)); 1347 addr->sa_family = AF_ROUTE; 1348 *addrlen = sizeof (struct sockaddr); 1349 1350 return (0); 1351 } 1352 1353 static int 1354 rts_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 1355 void *optvalp, socklen_t *optlen, cred_t *cr) 1356 { 1357 conn_t *connp = (conn_t *)proto_handle; 1358 rts_t *rts = connp->conn_rts; 1359 int error; 1360 t_uscalar_t max_optbuf_len; 1361 void *optvalp_buf; 1362 int len; 1363 1364 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 1365 rts_opt_obj.odb_opt_des_arr, 1366 rts_opt_obj.odb_opt_arr_cnt, 1367 B_FALSE, B_TRUE, cr); 1368 if (error != 0) { 1369 if (error < 0) 1370 error = proto_tlitosyserr(-error); 1371 return (error); 1372 } 1373 1374 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 1375 rw_enter(&rts->rts_rwlock, RW_READER); 1376 len = rts_opt_get(connp, level, option_name, optvalp_buf); 1377 rw_exit(&rts->rts_rwlock); 1378 if (len == -1) { 1379 kmem_free(optvalp_buf, max_optbuf_len); 1380 return (EINVAL); 1381 } 1382 1383 /* 1384 * update optlen and copy option value 1385 */ 1386 t_uscalar_t size = MIN(len, *optlen); 1387 1388 bcopy(optvalp_buf, optvalp, size); 1389 bcopy(&size, optlen, sizeof (size)); 1390 kmem_free(optvalp_buf, max_optbuf_len); 1391 return (0); 1392 } 1393 1394 static int 1395 rts_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 1396 const void *optvalp, socklen_t optlen, cred_t *cr) 1397 { 1398 conn_t *connp = (conn_t *)proto_handle; 1399 rts_t *rts = connp->conn_rts; 1400 int error; 1401 1402 error = proto_opt_check(level, option_name, optlen, NULL, 1403 rts_opt_obj.odb_opt_des_arr, 1404 rts_opt_obj.odb_opt_arr_cnt, 1405 B_TRUE, B_FALSE, cr); 1406 1407 if (error != 0) { 1408 if (error < 0) 1409 error = proto_tlitosyserr(-error); 1410 return (error); 1411 } 1412 1413 rw_enter(&rts->rts_rwlock, RW_WRITER); 1414 error = rts_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 1415 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 1416 NULL, cr); 1417 rw_exit(&rts->rts_rwlock); 1418 1419 ASSERT(error >= 0); 1420 1421 return (error); 1422 } 1423 1424 /* ARGSUSED */ 1425 static int 1426 rts_send(sock_lower_handle_t proto_handle, mblk_t *mp, 1427 struct nmsghdr *msg, cred_t *cr) 1428 { 1429 conn_t *connp = (conn_t *)proto_handle; 1430 rt_msghdr_t *rtm; 1431 int error; 1432 1433 ASSERT(DB_TYPE(mp) == M_DATA); 1434 /* 1435 * The semantics of the routing socket is such that the rtm_pid 1436 * field is automatically filled in during requests with the 1437 * current process' pid. We do this here (where we still have 1438 * user context) after checking we have at least a message the 1439 * size of a routing message header. 1440 */ 1441 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 1442 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) { 1443 freemsg(mp); 1444 return (EINVAL); 1445 } 1446 } 1447 rtm = (rt_msghdr_t *)mp->b_rptr; 1448 rtm->rtm_pid = curproc->p_pid; 1449 1450 /* 1451 * We are not constrained by the ioctl interface and 1452 * ip_rts_request_common processing requests synchronously hence 1453 * we can send them down concurrently. 1454 */ 1455 error = ip_rts_request_common(mp, connp, cr); 1456 return (error); 1457 } 1458 1459 /* ARGSUSED */ 1460 sock_lower_handle_t 1461 rts_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 1462 uint_t *smodep, int *errorp, int flags, cred_t *credp) 1463 { 1464 conn_t *connp; 1465 1466 if (family != AF_ROUTE || type != SOCK_RAW || 1467 (proto != 0 && proto != AF_INET && proto != AF_INET6)) { 1468 *errorp = EPROTONOSUPPORT; 1469 return (NULL); 1470 } 1471 1472 connp = rts_open(flags, credp); 1473 ASSERT(connp != NULL); 1474 connp->conn_flags |= IPCL_NONSTR; 1475 1476 connp->conn_proto = proto; 1477 1478 mutex_enter(&connp->conn_lock); 1479 connp->conn_state_flags &= ~CONN_INCIPIENT; 1480 mutex_exit(&connp->conn_lock); 1481 1482 *errorp = 0; 1483 *smodep = SM_ATOMIC; 1484 *sock_downcalls = &sock_rts_downcalls; 1485 return ((sock_lower_handle_t)connp); 1486 } 1487 1488 /* ARGSUSED */ 1489 void 1490 rts_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 1491 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 1492 { 1493 conn_t *connp = (conn_t *)proto_handle; 1494 struct sock_proto_props sopp; 1495 1496 connp->conn_upcalls = sock_upcalls; 1497 connp->conn_upper_handle = sock_handle; 1498 1499 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 1500 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 1501 sopp.sopp_wroff = 0; 1502 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 1503 sopp.sopp_rxlowat = connp->conn_rcvlowat; 1504 sopp.sopp_maxblk = INFPSZ; 1505 sopp.sopp_maxpsz = rts_mod_info.mi_maxpsz; 1506 sopp.sopp_minpsz = (rts_mod_info.mi_minpsz == 1) ? 0 : 1507 rts_mod_info.mi_minpsz; 1508 1509 (*connp->conn_upcalls->su_set_proto_props) 1510 (connp->conn_upper_handle, &sopp); 1511 1512 /* 1513 * We treat it as already connected for routing socket. 1514 */ 1515 (*connp->conn_upcalls->su_connected) 1516 (connp->conn_upper_handle, 0, NULL, -1); 1517 1518 /* Indicate to IP that this is a routing socket client */ 1519 ip_rts_register(connp); 1520 } 1521 1522 /* ARGSUSED */ 1523 int 1524 rts_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 1525 { 1526 conn_t *connp = (conn_t *)proto_handle; 1527 1528 ASSERT(connp != NULL && IPCL_IS_RTS(connp)); 1529 return (rts_common_close(NULL, connp)); 1530 } 1531 1532 /* ARGSUSED */ 1533 int 1534 rts_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 1535 { 1536 conn_t *connp = (conn_t *)proto_handle; 1537 1538 /* shut down the send side */ 1539 if (how != SHUT_RD) 1540 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 1541 SOCK_OPCTL_SHUT_SEND, 0); 1542 /* shut down the recv side */ 1543 if (how != SHUT_WR) 1544 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 1545 SOCK_OPCTL_SHUT_RECV, 0); 1546 return (0); 1547 } 1548 1549 void 1550 rts_clr_flowctrl(sock_lower_handle_t proto_handle) 1551 { 1552 conn_t *connp = (conn_t *)proto_handle; 1553 rts_t *rts = connp->conn_rts; 1554 1555 mutex_enter(&rts->rts_recv_mutex); 1556 connp->conn_flow_cntrld = B_FALSE; 1557 mutex_exit(&rts->rts_recv_mutex); 1558 } 1559 1560 int 1561 rts_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 1562 int mode, int32_t *rvalp, cred_t *cr) 1563 { 1564 conn_t *connp = (conn_t *)proto_handle; 1565 int error; 1566 1567 /* 1568 * If we don't have a helper stream then create one. 1569 * ip_create_helper_stream takes care of locking the conn_t, 1570 * so this check for NULL is just a performance optimization. 1571 */ 1572 if (connp->conn_helper_info == NULL) { 1573 rts_stack_t *rtss = connp->conn_rts->rts_rtss; 1574 1575 ASSERT(rtss->rtss_ldi_ident != NULL); 1576 1577 /* 1578 * Create a helper stream for non-STREAMS socket. 1579 */ 1580 error = ip_create_helper_stream(connp, rtss->rtss_ldi_ident); 1581 if (error != 0) { 1582 ip0dbg(("rts_ioctl: create of IP helper stream " 1583 "failed %d\n", error)); 1584 return (error); 1585 } 1586 } 1587 1588 switch (cmd) { 1589 case ND_SET: 1590 case ND_GET: 1591 case TI_GETPEERNAME: 1592 case TI_GETMYNAME: 1593 #ifdef DEUG 1594 cmn_err(CE_CONT, "rts_ioctl cmd 0x%x on non sreams" 1595 " socket", cmd); 1596 #endif 1597 error = EINVAL; 1598 break; 1599 default: 1600 /* 1601 * Pass on to IP using helper stream 1602 */ 1603 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 1604 cmd, arg, mode, cr, rvalp); 1605 break; 1606 } 1607 1608 return (error); 1609 } 1610 1611 sock_downcalls_t sock_rts_downcalls = { 1612 rts_activate, 1613 rts_accept, 1614 rts_bind, 1615 rts_listen, 1616 rts_connect, 1617 rts_getpeername, 1618 rts_getsockname, 1619 rts_getsockopt, 1620 rts_setsockopt, 1621 rts_send, 1622 NULL, 1623 NULL, 1624 NULL, 1625 rts_shutdown, 1626 rts_clr_flowctrl, 1627 rts_ioctl, 1628 rts_close 1629 }; 1630