1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2018 Toomas Soome <tsoome@me.com> 25 */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/strsubr.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/strlog.h> 33 #define _SUN_TPI_VERSION 2 34 #include <sys/tihdr.h> 35 #include <sys/timod.h> 36 #include <sys/ddi.h> 37 #include <sys/sunddi.h> 38 #include <sys/cmn_err.h> 39 #include <sys/proc.h> 40 #include <sys/suntpi.h> 41 #include <sys/policy.h> 42 #include <sys/zone.h> 43 #include <sys/disp.h> 44 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <netinet/in.h> 48 49 #include <inet/common.h> 50 #include <netinet/ip6.h> 51 #include <inet/ip.h> 52 #include <inet/ipclassifier.h> 53 #include <inet/proto_set.h> 54 #include <inet/nd.h> 55 #include <inet/optcom.h> 56 #include <netinet/ip_mroute.h> 57 #include <sys/isa_defs.h> 58 #include <net/route.h> 59 60 #include <inet/rts_impl.h> 61 #include <inet/ip_rts.h> 62 63 /* 64 * This is a transport provider for routing sockets. Downstream messages are 65 * wrapped with a IP_IOCTL header, and ip_wput_ioctl calls the appropriate entry 66 * in the ip_ioctl_ftbl callout table to pass the routing socket data into IP. 67 * Upstream messages are generated for listeners of the routing socket as well 68 * as the message sender (unless they have turned off their end using 69 * SO_USELOOPBACK or shutdown(3n)). Upstream messages may also be generated 70 * asynchronously when: 71 * 72 * Interfaces are brought up or down. 73 * Addresses are assigned to interfaces. 74 * ICMP redirects are processed and a IRE_HOST/RTF_DYNAMIC is installed. 75 * No route is found while sending a packet. 76 * 77 * Since all we do is reformat the messages between routing socket and 78 * ioctl forms, no synchronization is necessary in this module; all 79 * the dirty work is done down in ip. 80 */ 81 82 /* Default structure copied into T_INFO_ACK messages */ 83 static struct T_info_ack rts_g_t_info_ack = { 84 T_INFO_ACK, 85 T_INFINITE, /* TSDU_size. Maximum size messages. */ 86 T_INVALID, /* ETSDU_size. No expedited data. */ 87 T_INVALID, /* CDATA_size. No connect data. */ 88 T_INVALID, /* DDATA_size. No disconnect data. */ 89 0, /* ADDR_size. */ 90 0, /* OPT_size - not initialized here */ 91 64 * 1024, /* TIDU_size. rts allows maximum size messages. */ 92 T_COTS, /* SERV_type. rts supports connection oriented. */ 93 TS_UNBND, /* CURRENT_state. This is set from rts_state. */ 94 (XPG4_1) /* PROVIDER_flag */ 95 }; 96 97 /* 98 * Table of ND variables supported by rts. These are loaded into rts_g_nd 99 * in rts_open. 100 * All of these are alterable, within the min/max values given, at run time. 101 */ 102 static rtsparam_t lcl_param_arr[] = { 103 /* min max value name */ 104 { 4096, 65536, 8192, "rts_xmit_hiwat"}, 105 { 0, 65536, 1024, "rts_xmit_lowat"}, 106 { 4096, 65536, 8192, "rts_recv_hiwat"}, 107 { 65536, 1024*1024*1024, 256*1024, "rts_max_buf"}, 108 }; 109 #define rtss_xmit_hiwat rtss_params[0].rts_param_value 110 #define rtss_xmit_lowat rtss_params[1].rts_param_value 111 #define rtss_recv_hiwat rtss_params[2].rts_param_value 112 #define rtss_max_buf rtss_params[3].rts_param_value 113 114 static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 115 int sys_error); 116 static void rts_input(void *, mblk_t *, void *, ip_recv_attr_t *); 117 static void rts_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 118 static mblk_t *rts_ioctl_alloc(mblk_t *data); 119 static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 120 static boolean_t rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt); 121 static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 122 cred_t *cr); 123 static void rts_rsrv(queue_t *q); 124 static void *rts_stack_init(netstackid_t stackid, netstack_t *ns); 125 static void rts_stack_fini(netstackid_t stackid, void *arg); 126 static void rts_wput(queue_t *q, mblk_t *mp); 127 static void rts_wput_iocdata(queue_t *q, mblk_t *mp); 128 static void rts_wput_other(queue_t *q, mblk_t *mp); 129 static int rts_wrw(queue_t *q, struiod_t *dp); 130 131 static int rts_stream_open(queue_t *q, dev_t *devp, int flag, int sflag, 132 cred_t *credp); 133 static conn_t *rts_open(int flag, cred_t *credp); 134 135 static int rts_stream_close(queue_t *, int, cred_t *); 136 static int rts_close(sock_lower_handle_t proto_handle, int flags, 137 cred_t *cr); 138 139 static struct module_info rts_mod_info = { 140 129, "rts", 1, INFPSZ, 512, 128 141 }; 142 143 static struct qinit rtsrinit = { 144 NULL, (pfi_t)rts_rsrv, rts_stream_open, rts_stream_close, NULL, 145 &rts_mod_info 146 }; 147 148 static struct qinit rtswinit = { 149 (pfi_t)rts_wput, NULL, NULL, NULL, NULL, &rts_mod_info, 150 NULL, (pfi_t)rts_wrw, NULL, STRUIOT_STANDARD 151 }; 152 153 struct streamtab rtsinfo = { 154 &rtsrinit, &rtswinit 155 }; 156 157 /* 158 * This routine allocates the necessary 159 * message blocks for IOCTL wrapping the 160 * user data. 161 */ 162 static mblk_t * 163 rts_ioctl_alloc(mblk_t *data) 164 { 165 mblk_t *mp = NULL; 166 mblk_t *mp1 = NULL; 167 ipllc_t *ipllc; 168 struct iocblk *ioc; 169 170 mp = allocb_tmpl(sizeof (ipllc_t), data); 171 if (mp == NULL) 172 return (NULL); 173 mp1 = allocb_tmpl(sizeof (struct iocblk), data); 174 if (mp1 == NULL) { 175 freeb(mp); 176 return (NULL); 177 } 178 179 ipllc = (ipllc_t *)mp->b_rptr; 180 ipllc->ipllc_cmd = IP_IOC_RTS_REQUEST; 181 ipllc->ipllc_name_offset = 0; 182 ipllc->ipllc_name_length = 0; 183 mp->b_wptr += sizeof (ipllc_t); 184 mp->b_cont = data; 185 186 ioc = (struct iocblk *)mp1->b_rptr; 187 ioc->ioc_cmd = IP_IOCTL; 188 ioc->ioc_error = 0; 189 ioc->ioc_cr = NULL; 190 ioc->ioc_count = msgdsize(mp); 191 mp1->b_wptr += sizeof (struct iocblk); 192 mp1->b_datap->db_type = M_IOCTL; 193 mp1->b_cont = mp; 194 195 return (mp1); 196 } 197 198 /* 199 * This routine closes rts stream, by disabling 200 * put/srv routines and freeing the this module 201 * internal datastructure. 202 */ 203 static int 204 rts_common_close(queue_t *q, conn_t *connp) 205 { 206 207 ASSERT(connp != NULL && IPCL_IS_RTS(connp)); 208 209 ip_rts_unregister(connp); 210 211 ip_quiesce_conn(connp); 212 213 if (!IPCL_IS_NONSTR(connp)) { 214 qprocsoff(q); 215 } 216 217 /* 218 * Now we are truly single threaded on this stream, and can 219 * delete the things hanging off the connp, and finally the connp. 220 * We removed this connp from the fanout list, it cannot be 221 * accessed thru the fanouts, and we already waited for the 222 * conn_ref to drop to 0. We are already in close, so 223 * there cannot be any other thread from the top. qprocsoff 224 * has completed, and service has completed or won't run in 225 * future. 226 */ 227 ASSERT(connp->conn_ref == 1); 228 229 if (!IPCL_IS_NONSTR(connp)) { 230 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 231 } else { 232 ip_free_helper_stream(connp); 233 } 234 235 connp->conn_ref--; 236 ipcl_conn_destroy(connp); 237 return (0); 238 } 239 240 /* ARGSUSED */ 241 static int 242 rts_stream_close(queue_t *q, int flags __unused, cred_t *credp __unused) 243 { 244 conn_t *connp = Q_TO_CONN(q); 245 246 (void) rts_common_close(q, connp); 247 q->q_ptr = WR(q)->q_ptr = NULL; 248 return (0); 249 } 250 251 /* 252 * This is the open routine for routing socket. It allocates 253 * rts_t structure for the stream and tells IP that it is a routing socket. 254 */ 255 /* ARGSUSED */ 256 static int 257 rts_stream_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 258 { 259 conn_t *connp; 260 dev_t conn_dev; 261 rts_t *rts; 262 263 /* If the stream is already open, return immediately. */ 264 if (q->q_ptr != NULL) 265 return (0); 266 267 if (sflag == MODOPEN) 268 return (EINVAL); 269 270 /* 271 * Since RTS is not used so heavily, allocating from the small 272 * arena should be sufficient. 273 */ 274 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 275 return (EBUSY); 276 } 277 278 connp = rts_open(flag, credp); 279 ASSERT(connp != NULL); 280 281 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 282 283 rts = connp->conn_rts; 284 rw_enter(&rts->rts_rwlock, RW_WRITER); 285 connp->conn_dev = conn_dev; 286 connp->conn_minor_arena = ip_minor_arena_sa; 287 288 q->q_ptr = connp; 289 WR(q)->q_ptr = connp; 290 connp->conn_rq = q; 291 connp->conn_wq = WR(q); 292 293 WR(q)->q_hiwat = connp->conn_sndbuf; 294 WR(q)->q_lowat = connp->conn_sndlowat; 295 296 mutex_enter(&connp->conn_lock); 297 connp->conn_state_flags &= ~CONN_INCIPIENT; 298 mutex_exit(&connp->conn_lock); 299 rw_exit(&rts->rts_rwlock); 300 301 /* Indicate to IP that this is a routing socket client */ 302 ip_rts_register(connp); 303 304 qprocson(q); 305 306 return (0); 307 } 308 309 /* ARGSUSED */ 310 static conn_t * 311 rts_open(int flag, cred_t *credp) 312 { 313 netstack_t *ns; 314 rts_stack_t *rtss; 315 rts_t *rts; 316 conn_t *connp; 317 zoneid_t zoneid; 318 319 ns = netstack_find_by_cred(credp); 320 ASSERT(ns != NULL); 321 rtss = ns->netstack_rts; 322 ASSERT(rtss != NULL); 323 324 /* 325 * For exclusive stacks we set the zoneid to zero 326 * to make RTS operate as if in the global zone. 327 */ 328 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 329 zoneid = GLOBAL_ZONEID; 330 else 331 zoneid = crgetzoneid(credp); 332 333 connp = ipcl_conn_create(IPCL_RTSCONN, KM_SLEEP, ns); 334 rts = connp->conn_rts; 335 336 /* 337 * ipcl_conn_create did a netstack_hold. Undo the hold that was 338 * done by netstack_find_by_cred() 339 */ 340 netstack_rele(ns); 341 342 rw_enter(&rts->rts_rwlock, RW_WRITER); 343 ASSERT(connp->conn_rts == rts); 344 ASSERT(rts->rts_connp == connp); 345 346 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 347 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 348 connp->conn_ixa->ixa_zoneid = zoneid; 349 connp->conn_zoneid = zoneid; 350 connp->conn_flow_cntrld = B_FALSE; 351 352 rts->rts_rtss = rtss; 353 354 connp->conn_rcvbuf = rtss->rtss_recv_hiwat; 355 connp->conn_sndbuf = rtss->rtss_xmit_hiwat; 356 connp->conn_sndlowat = rtss->rtss_xmit_lowat; 357 connp->conn_rcvlowat = rts_mod_info.mi_lowat; 358 359 connp->conn_family = PF_ROUTE; 360 connp->conn_so_type = SOCK_RAW; 361 /* SO_PROTOTYPE is always sent down by sockfs setting conn_proto */ 362 363 connp->conn_recv = rts_input; 364 connp->conn_recvicmp = rts_icmp_input; 365 366 crhold(credp); 367 connp->conn_cred = credp; 368 connp->conn_cpid = curproc->p_pid; 369 /* Cache things in ixa without an extra refhold */ 370 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 371 connp->conn_ixa->ixa_cred = connp->conn_cred; 372 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 373 if (is_system_labeled()) 374 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 375 376 /* 377 * rts sockets start out as bound and connected 378 * For streams based sockets, socket state is set to 379 * SS_ISBOUND | SS_ISCONNECTED in so_strinit. 380 */ 381 rts->rts_state = TS_DATA_XFER; 382 rw_exit(&rts->rts_rwlock); 383 384 return (connp); 385 } 386 387 /* 388 * This routine creates a T_ERROR_ACK message and passes it upstream. 389 */ 390 static void 391 rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 392 { 393 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 394 qreply(q, mp); 395 } 396 397 /* 398 * This routine creates a T_OK_ACK message and passes it upstream. 399 */ 400 static void 401 rts_ok_ack(queue_t *q, mblk_t *mp) 402 { 403 if ((mp = mi_tpi_ok_ack_alloc(mp)) != NULL) 404 qreply(q, mp); 405 } 406 407 /* 408 * This routine is called by rts_wput to handle T_UNBIND_REQ messages. 409 */ 410 static void 411 rts_tpi_unbind(queue_t *q, mblk_t *mp) 412 { 413 conn_t *connp = Q_TO_CONN(q); 414 rts_t *rts = connp->conn_rts; 415 416 /* If a bind has not been done, we can't unbind. */ 417 if (rts->rts_state != TS_IDLE) { 418 rts_err_ack(q, mp, TOUTSTATE, 0); 419 return; 420 } 421 rts->rts_state = TS_UNBND; 422 rts_ok_ack(q, mp); 423 } 424 425 /* 426 * This routine is called to handle each 427 * O_T_BIND_REQ/T_BIND_REQ message passed to 428 * rts_wput. Note: This routine works with both 429 * O_T_BIND_REQ and T_BIND_REQ semantics. 430 */ 431 static void 432 rts_tpi_bind(queue_t *q, mblk_t *mp) 433 { 434 conn_t *connp = Q_TO_CONN(q); 435 rts_t *rts = connp->conn_rts; 436 struct T_bind_req *tbr; 437 438 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 439 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 440 "rts_tpi_bind: bad data, %d", rts->rts_state); 441 rts_err_ack(q, mp, TBADADDR, 0); 442 return; 443 } 444 if (rts->rts_state != TS_UNBND) { 445 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 446 "rts_tpi_bind: bad state, %d", rts->rts_state); 447 rts_err_ack(q, mp, TOUTSTATE, 0); 448 return; 449 } 450 tbr = (struct T_bind_req *)mp->b_rptr; 451 if (tbr->ADDR_length != 0) { 452 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 453 "rts_tpi_bind: bad ADDR_length %d", tbr->ADDR_length); 454 rts_err_ack(q, mp, TBADADDR, 0); 455 return; 456 } 457 /* Generic request */ 458 tbr->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_req); 459 tbr->ADDR_length = 0; 460 tbr->PRIM_type = T_BIND_ACK; 461 mp->b_datap->db_type = M_PCPROTO; 462 rts->rts_state = TS_IDLE; 463 qreply(q, mp); 464 } 465 466 static void 467 rts_copy_info(struct T_info_ack *tap, rts_t *rts) 468 { 469 *tap = rts_g_t_info_ack; 470 tap->CURRENT_state = rts->rts_state; 471 tap->OPT_size = rts_max_optsize; 472 } 473 474 /* 475 * This routine responds to T_CAPABILITY_REQ messages. It is called by 476 * rts_wput. Much of the T_CAPABILITY_ACK information is copied from 477 * rts_g_t_info_ack. The current state of the stream is copied from 478 * rts_state. 479 */ 480 static void 481 rts_capability_req(queue_t *q, mblk_t *mp) 482 { 483 conn_t *connp = Q_TO_CONN(q); 484 rts_t *rts = connp->conn_rts; 485 t_uscalar_t cap_bits1; 486 struct T_capability_ack *tcap; 487 488 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 489 490 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 491 mp->b_datap->db_type, T_CAPABILITY_ACK); 492 if (mp == NULL) 493 return; 494 495 tcap = (struct T_capability_ack *)mp->b_rptr; 496 tcap->CAP_bits1 = 0; 497 498 if (cap_bits1 & TC1_INFO) { 499 rts_copy_info(&tcap->INFO_ack, rts); 500 tcap->CAP_bits1 |= TC1_INFO; 501 } 502 503 qreply(q, mp); 504 } 505 506 /* 507 * This routine responds to T_INFO_REQ messages. It is called by rts_wput. 508 * Most of the T_INFO_ACK information is copied from rts_g_t_info_ack. 509 * The current state of the stream is copied from rts_state. 510 */ 511 static void 512 rts_info_req(queue_t *q, mblk_t *mp) 513 { 514 conn_t *connp = Q_TO_CONN(q); 515 rts_t *rts = connp->conn_rts; 516 517 mp = tpi_ack_alloc(mp, sizeof (rts_g_t_info_ack), M_PCPROTO, 518 T_INFO_ACK); 519 if (mp == NULL) 520 return; 521 rts_copy_info((struct T_info_ack *)mp->b_rptr, rts); 522 qreply(q, mp); 523 } 524 525 /* 526 * This routine gets default values of certain options whose default 527 * values are maintained by protcol specific code 528 */ 529 /* ARGSUSED */ 530 int 531 rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 532 { 533 /* no default value processed by protocol specific code currently */ 534 return (-1); 535 } 536 537 538 static int 539 rts_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 540 { 541 rts_t *rts = connp->conn_rts; 542 conn_opt_arg_t coas; 543 int retval; 544 545 ASSERT(RW_READ_HELD(&rts->rts_rwlock)); 546 547 switch (level) { 548 /* do this in conn_opt_get? */ 549 case SOL_ROUTE: 550 switch (name) { 551 case RT_AWARE: 552 mutex_enter(&connp->conn_lock); 553 *(int *)ptr = connp->conn_rtaware; 554 mutex_exit(&connp->conn_lock); 555 return (0); 556 } 557 break; 558 } 559 coas.coa_connp = connp; 560 coas.coa_ixa = connp->conn_ixa; 561 coas.coa_ipp = &connp->conn_xmit_ipp; 562 mutex_enter(&connp->conn_lock); 563 retval = conn_opt_get(&coas, level, name, ptr); 564 mutex_exit(&connp->conn_lock); 565 return (retval); 566 } 567 568 /* ARGSUSED */ 569 static int 570 rts_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 571 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 572 void *thisdg_attrs, boolean_t checkonly) 573 { 574 int *i1 = (int *)invalp; 575 rts_t *rts = connp->conn_rts; 576 rts_stack_t *rtss = rts->rts_rtss; 577 int error; 578 conn_opt_arg_t coas; 579 580 coas.coa_connp = connp; 581 coas.coa_ixa = connp->conn_ixa; 582 coas.coa_ipp = &connp->conn_xmit_ipp; 583 584 ASSERT(RW_WRITE_HELD(&rts->rts_rwlock)); 585 586 /* 587 * For rts, we should have no ancillary data sent down 588 * (rts_wput doesn't handle options). 589 */ 590 ASSERT(thisdg_attrs == NULL); 591 592 /* 593 * For fixed length options, no sanity check 594 * of passed in length is done. It is assumed *_optcom_req() 595 * routines do the right thing. 596 */ 597 598 switch (level) { 599 case SOL_SOCKET: 600 switch (name) { 601 case SO_PROTOTYPE: 602 /* 603 * Routing socket applications that call socket() with 604 * a third argument can filter which messages will be 605 * sent upstream thanks to sockfs. so_socket() sends 606 * down the SO_PROTOTYPE and rts_queue_input() 607 * implements the filtering. 608 */ 609 if (*i1 != AF_INET && *i1 != AF_INET6) { 610 *outlenp = 0; 611 return (EPROTONOSUPPORT); 612 } 613 if (!checkonly) 614 connp->conn_proto = *i1; 615 *outlenp = inlen; 616 return (0); 617 618 /* 619 * The following two items can be manipulated, 620 * but changing them should do nothing. 621 */ 622 case SO_SNDBUF: 623 if (*i1 > rtss->rtss_max_buf) { 624 *outlenp = 0; 625 return (ENOBUFS); 626 } 627 break; /* goto sizeof (int) option return */ 628 case SO_RCVBUF: 629 if (*i1 > rtss->rtss_max_buf) { 630 *outlenp = 0; 631 return (ENOBUFS); 632 } 633 break; /* goto sizeof (int) option return */ 634 } 635 break; 636 case SOL_ROUTE: 637 switch (name) { 638 case RT_AWARE: 639 if (!checkonly) { 640 mutex_enter(&connp->conn_lock); 641 connp->conn_rtaware = *i1; 642 mutex_exit(&connp->conn_lock); 643 } 644 *outlenp = inlen; 645 return (0); 646 } 647 break; 648 } 649 /* Serialized setsockopt since we are D_MTQPAIR */ 650 error = conn_opt_set(&coas, level, name, inlen, invalp, 651 checkonly, cr); 652 if (error != 0) { 653 *outlenp = 0; 654 return (error); 655 } 656 /* 657 * Common case of return from an option that is sizeof (int) 658 */ 659 if (invalp != outvalp) { 660 /* don't trust bcopy for identical src/dst */ 661 (void) bcopy(invalp, outvalp, inlen); 662 } 663 *outlenp = (t_uscalar_t)sizeof (int); 664 return (0); 665 } 666 667 static int 668 rts_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 669 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 670 void *thisdg_attrs, cred_t *cr) 671 { 672 boolean_t checkonly = B_FALSE; 673 674 if (optset_context) { 675 switch (optset_context) { 676 case SETFN_OPTCOM_CHECKONLY: 677 checkonly = B_TRUE; 678 /* 679 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 680 * inlen != 0 implies value supplied and 681 * we have to "pretend" to set it. 682 * inlen == 0 implies that there is no value part 683 * in T_CHECK request and just validation 684 * done elsewhere should be enough, we just return here. 685 */ 686 if (inlen == 0) { 687 *outlenp = 0; 688 return (0); 689 } 690 break; 691 case SETFN_OPTCOM_NEGOTIATE: 692 checkonly = B_FALSE; 693 break; 694 case SETFN_UD_NEGOTIATE: 695 case SETFN_CONN_NEGOTIATE: 696 checkonly = B_FALSE; 697 /* 698 * Negotiating local and "association-related" options 699 * through T_UNITDATA_REQ or T_CONN_{REQ,CON} 700 * Not allowed in this module. 701 */ 702 return (EINVAL); 703 default: 704 /* 705 * We should never get here 706 */ 707 *outlenp = 0; 708 return (EINVAL); 709 } 710 711 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 712 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 713 714 } 715 return (rts_do_opt_set(connp, level, name, inlen, invalp, outlenp, 716 outvalp, cr, thisdg_attrs, checkonly)); 717 718 } 719 720 /* 721 * This routine retrieves the current status of socket options. 722 * It returns the size of the option retrieved. 723 */ 724 int 725 rts_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 726 { 727 rts_t *rts; 728 int err; 729 730 rts = Q_TO_RTS(q); 731 rw_enter(&rts->rts_rwlock, RW_READER); 732 err = rts_opt_get(Q_TO_CONN(q), level, name, ptr); 733 rw_exit(&rts->rts_rwlock); 734 return (err); 735 } 736 737 /* 738 * This routine sets socket options. 739 */ 740 /*ARGSUSED*/ 741 int 742 rts_tpi_opt_set(queue_t *q, uint_t optset_context, int level, 743 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 744 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 745 { 746 conn_t *connp = Q_TO_CONN(q); 747 int error; 748 rts_t *rts = connp->conn_rts; 749 750 751 rw_enter(&rts->rts_rwlock, RW_WRITER); 752 error = rts_opt_set(connp, optset_context, level, name, inlen, invalp, 753 outlenp, outvalp, thisdg_attrs, cr); 754 rw_exit(&rts->rts_rwlock); 755 return (error); 756 } 757 758 /* 759 * This routine retrieves the value of an ND variable in a rtsparam_t 760 * structure. It is called through nd_getset when a user reads the 761 * variable. 762 */ 763 /* ARGSUSED */ 764 static int 765 rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 766 { 767 rtsparam_t *rtspa = (rtsparam_t *)cp; 768 769 (void) mi_mpprintf(mp, "%u", rtspa->rts_param_value); 770 return (0); 771 } 772 773 /* 774 * Walk through the param array specified registering each element with the 775 * named dispatch (ND) handler. 776 */ 777 static boolean_t 778 rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt) 779 { 780 for (; cnt-- > 0; rtspa++) { 781 if (rtspa->rts_param_name != NULL && rtspa->rts_param_name[0]) { 782 if (!nd_load(ndp, rtspa->rts_param_name, 783 rts_param_get, rts_param_set, (caddr_t)rtspa)) { 784 nd_free(ndp); 785 return (B_FALSE); 786 } 787 } 788 } 789 return (B_TRUE); 790 } 791 792 /* This routine sets an ND variable in a rtsparam_t structure. */ 793 /* ARGSUSED */ 794 static int 795 rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 796 { 797 ulong_t new_value; 798 rtsparam_t *rtspa = (rtsparam_t *)cp; 799 800 /* 801 * Fail the request if the new value does not lie within the 802 * required bounds. 803 */ 804 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 || 805 new_value < rtspa->rts_param_min || 806 new_value > rtspa->rts_param_max) { 807 return (EINVAL); 808 } 809 810 /* Set the new value */ 811 rtspa->rts_param_value = new_value; 812 return (0); 813 } 814 815 /* 816 * Empty rsrv routine which is used by rts_input to cause a wakeup 817 * of a thread in qwait. 818 */ 819 /*ARGSUSED*/ 820 static void 821 rts_rsrv(queue_t *q) 822 { 823 } 824 825 /* 826 * This routine handles synchronous messages passed downstream. It either 827 * consumes the message or passes it downstream; it never queues a 828 * a message. The data messages that go down are wrapped in an IOCTL 829 * message. 830 * 831 * Since it is synchronous, it waits for the M_IOCACK/M_IOCNAK so that 832 * it can return an immediate error (such as ENETUNREACH when adding a route). 833 * It uses the RTS_WRW_PENDING to ensure that each rts instance has only 834 * one M_IOCTL outstanding at any given time. 835 */ 836 static int 837 rts_wrw(queue_t *q, struiod_t *dp) 838 { 839 mblk_t *mp = dp->d_mp; 840 mblk_t *mp1; 841 int error; 842 rt_msghdr_t *rtm; 843 conn_t *connp = Q_TO_CONN(q); 844 rts_t *rts = connp->conn_rts; 845 846 while (rts->rts_flag & RTS_WRW_PENDING) { 847 if (qwait_rw(q)) { 848 rts->rts_error = EINTR; 849 goto err_ret; 850 } 851 } 852 rts->rts_flag |= RTS_WRW_PENDING; 853 854 if (isuioq(q) && (error = struioget(q, mp, dp, 0))) { 855 /* 856 * Uio error of some sort, so just return the error. 857 */ 858 rts->rts_error = error; 859 goto err_ret; 860 } 861 /* 862 * Pass the mblk (chain) onto wput(). 863 */ 864 dp->d_mp = 0; 865 866 switch (mp->b_datap->db_type) { 867 case M_PROTO: 868 case M_PCPROTO: 869 /* Expedite other than T_DATA_REQ to below the switch */ 870 if (((mp->b_wptr - mp->b_rptr) != 871 sizeof (struct T_data_req)) || 872 (((union T_primitives *)mp->b_rptr)->type != T_DATA_REQ)) 873 break; 874 if ((mp1 = mp->b_cont) == NULL) { 875 rts->rts_error = EINVAL; 876 freemsg(mp); 877 goto err_ret; 878 } 879 freeb(mp); 880 mp = mp1; 881 /* FALLTHRU */ 882 case M_DATA: 883 /* 884 * The semantics of the routing socket is such that the rtm_pid 885 * field is automatically filled in during requests with the 886 * current process' pid. We do this here (where we still have 887 * user context) after checking we have at least a message the 888 * size of a routing message header. 889 */ 890 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 891 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) { 892 rts->rts_error = EINVAL; 893 freemsg(mp); 894 goto err_ret; 895 } 896 } 897 rtm = (rt_msghdr_t *)mp->b_rptr; 898 rtm->rtm_pid = curproc->p_pid; 899 break; 900 default: 901 break; 902 } 903 rts->rts_flag |= RTS_WPUT_PENDING; 904 rts_wput(q, mp); 905 while (rts->rts_flag & RTS_WPUT_PENDING) 906 if (qwait_rw(q)) { 907 /* RTS_WPUT_PENDING will be cleared below */ 908 rts->rts_error = EINTR; 909 break; 910 } 911 err_ret: 912 rts->rts_flag &= ~(RTS_WPUT_PENDING | RTS_WRW_PENDING); 913 return (rts->rts_error); 914 } 915 916 /* 917 * This routine handles all messages passed downstream. It either 918 * consumes the message or passes it downstream; it never queues a 919 * a message. The data messages that go down are wrapped in an IOCTL 920 * message. 921 */ 922 static void 923 rts_wput(queue_t *q, mblk_t *mp) 924 { 925 uchar_t *rptr = mp->b_rptr; 926 mblk_t *mp1; 927 conn_t *connp = Q_TO_CONN(q); 928 rts_t *rts = connp->conn_rts; 929 930 switch (mp->b_datap->db_type) { 931 case M_DATA: 932 break; 933 case M_PROTO: 934 case M_PCPROTO: 935 if ((mp->b_wptr - rptr) == sizeof (struct T_data_req)) { 936 /* Expedite valid T_DATA_REQ to below the switch */ 937 if (((union T_primitives *)rptr)->type == T_DATA_REQ) { 938 mp1 = mp->b_cont; 939 freeb(mp); 940 if (mp1 == NULL) 941 return; 942 mp = mp1; 943 break; 944 } 945 } 946 /* FALLTHRU */ 947 default: 948 rts_wput_other(q, mp); 949 return; 950 } 951 952 953 ASSERT(msg_getcred(mp, NULL) != NULL); 954 955 mp1 = rts_ioctl_alloc(mp); 956 if (mp1 == NULL) { 957 ASSERT(rts != NULL); 958 freemsg(mp); 959 if (rts->rts_flag & RTS_WPUT_PENDING) { 960 rts->rts_error = ENOMEM; 961 rts->rts_flag &= ~RTS_WPUT_PENDING; 962 } 963 return; 964 } 965 ip_wput_nondata(q, mp1); 966 } 967 968 969 /* 970 * Handles all the control message, if it 971 * can not understand it, it will 972 * pass down stream. 973 */ 974 static void 975 rts_wput_other(queue_t *q, mblk_t *mp) 976 { 977 conn_t *connp = Q_TO_CONN(q); 978 rts_t *rts = connp->conn_rts; 979 uchar_t *rptr = mp->b_rptr; 980 struct iocblk *iocp; 981 cred_t *cr; 982 rts_stack_t *rtss; 983 984 rtss = rts->rts_rtss; 985 986 switch (mp->b_datap->db_type) { 987 case M_PROTO: 988 case M_PCPROTO: 989 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) { 990 /* 991 * If the message does not contain a PRIM_type, 992 * throw it away. 993 */ 994 freemsg(mp); 995 return; 996 } 997 switch (((union T_primitives *)rptr)->type) { 998 case T_BIND_REQ: 999 case O_T_BIND_REQ: 1000 rts_tpi_bind(q, mp); 1001 return; 1002 case T_UNBIND_REQ: 1003 rts_tpi_unbind(q, mp); 1004 return; 1005 case T_CAPABILITY_REQ: 1006 rts_capability_req(q, mp); 1007 return; 1008 case T_INFO_REQ: 1009 rts_info_req(q, mp); 1010 return; 1011 case T_SVR4_OPTMGMT_REQ: 1012 case T_OPTMGMT_REQ: 1013 /* 1014 * All Solaris components should pass a db_credp 1015 * for this TPI message, hence we ASSERT. 1016 * But in case there is some other M_PROTO that looks 1017 * like a TPI message sent by some other kernel 1018 * component, we check and return an error. 1019 */ 1020 cr = msg_getcred(mp, NULL); 1021 ASSERT(cr != NULL); 1022 if (cr == NULL) { 1023 rts_err_ack(q, mp, TSYSERR, EINVAL); 1024 return; 1025 } 1026 if (((union T_primitives *)rptr)->type == 1027 T_SVR4_OPTMGMT_REQ) { 1028 svr4_optcom_req(q, mp, cr, &rts_opt_obj); 1029 } else { 1030 tpi_optcom_req(q, mp, cr, &rts_opt_obj); 1031 } 1032 return; 1033 case O_T_CONN_RES: 1034 case T_CONN_RES: 1035 case T_DISCON_REQ: 1036 /* Not supported by rts. */ 1037 rts_err_ack(q, mp, TNOTSUPPORT, 0); 1038 return; 1039 case T_DATA_REQ: 1040 case T_EXDATA_REQ: 1041 case T_ORDREL_REQ: 1042 /* Illegal for rts. */ 1043 freemsg(mp); 1044 (void) putnextctl1(RD(q), M_ERROR, EPROTO); 1045 return; 1046 1047 default: 1048 break; 1049 } 1050 break; 1051 case M_IOCTL: 1052 iocp = (struct iocblk *)mp->b_rptr; 1053 switch (iocp->ioc_cmd) { 1054 case ND_SET: 1055 case ND_GET: 1056 if (nd_getset(q, rtss->rtss_g_nd, mp)) { 1057 qreply(q, mp); 1058 return; 1059 } 1060 break; 1061 case TI_GETPEERNAME: 1062 mi_copyin(q, mp, NULL, 1063 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 1064 return; 1065 default: 1066 break; 1067 } 1068 break; 1069 case M_IOCDATA: 1070 rts_wput_iocdata(q, mp); 1071 return; 1072 default: 1073 break; 1074 } 1075 ip_wput_nondata(q, mp); 1076 } 1077 1078 /* 1079 * Called by rts_wput_other to handle all M_IOCDATA messages. 1080 */ 1081 static void 1082 rts_wput_iocdata(queue_t *q, mblk_t *mp) 1083 { 1084 struct sockaddr *rtsaddr; 1085 mblk_t *mp1; 1086 STRUCT_HANDLE(strbuf, sb); 1087 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 1088 1089 /* Make sure it is one of ours. */ 1090 switch (iocp->ioc_cmd) { 1091 case TI_GETPEERNAME: 1092 break; 1093 default: 1094 ip_wput_nondata(q, mp); 1095 return; 1096 } 1097 switch (mi_copy_state(q, mp, &mp1)) { 1098 case -1: 1099 return; 1100 case MI_COPY_CASE(MI_COPY_IN, 1): 1101 break; 1102 case MI_COPY_CASE(MI_COPY_OUT, 1): 1103 /* Copy out the strbuf. */ 1104 mi_copyout(q, mp); 1105 return; 1106 case MI_COPY_CASE(MI_COPY_OUT, 2): 1107 /* All done. */ 1108 mi_copy_done(q, mp, 0); 1109 return; 1110 default: 1111 mi_copy_done(q, mp, EPROTO); 1112 return; 1113 } 1114 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 1115 if (STRUCT_FGET(sb, maxlen) < (int)sizeof (sin_t)) { 1116 mi_copy_done(q, mp, EINVAL); 1117 return; 1118 } 1119 switch (iocp->ioc_cmd) { 1120 case TI_GETPEERNAME: 1121 break; 1122 default: 1123 mi_copy_done(q, mp, EPROTO); 1124 return; 1125 } 1126 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), sizeof (sin_t), 1127 B_TRUE); 1128 if (mp1 == NULL) 1129 return; 1130 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 1131 rtsaddr = (struct sockaddr *)mp1->b_rptr; 1132 mp1->b_wptr = (uchar_t *)&rtsaddr[1]; 1133 bzero(rtsaddr, sizeof (struct sockaddr)); 1134 rtsaddr->sa_family = AF_ROUTE; 1135 /* Copy out the address */ 1136 mi_copyout(q, mp); 1137 } 1138 1139 /* 1140 * IP passes up a NULL ira. 1141 */ 1142 /*ARGSUSED2*/ 1143 static void 1144 rts_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 1145 { 1146 conn_t *connp = (conn_t *)arg1; 1147 rts_t *rts = connp->conn_rts; 1148 struct iocblk *iocp; 1149 mblk_t *mp1; 1150 struct T_data_ind *tdi; 1151 int error; 1152 1153 switch (mp->b_datap->db_type) { 1154 case M_IOCACK: 1155 case M_IOCNAK: 1156 iocp = (struct iocblk *)mp->b_rptr; 1157 ASSERT(!IPCL_IS_NONSTR(connp)); 1158 if (rts->rts_flag & (RTS_WPUT_PENDING)) { 1159 rts->rts_flag &= ~RTS_WPUT_PENDING; 1160 rts->rts_error = iocp->ioc_error; 1161 /* 1162 * Tell rts_wvw/qwait that we are done. 1163 * Note: there is no qwait_wakeup() we can use. 1164 */ 1165 qenable(connp->conn_rq); 1166 freemsg(mp); 1167 return; 1168 } 1169 break; 1170 case M_DATA: 1171 /* 1172 * Prepend T_DATA_IND to prevent the stream head from 1173 * consolidating multiple messages together. 1174 * If the allocation fails just send up the M_DATA. 1175 */ 1176 mp1 = allocb(sizeof (*tdi), BPRI_MED); 1177 if (mp1 != NULL) { 1178 mp1->b_cont = mp; 1179 mp = mp1; 1180 1181 mp->b_datap->db_type = M_PROTO; 1182 mp->b_wptr += sizeof (*tdi); 1183 tdi = (struct T_data_ind *)mp->b_rptr; 1184 tdi->PRIM_type = T_DATA_IND; 1185 tdi->MORE_flag = 0; 1186 } 1187 break; 1188 default: 1189 break; 1190 } 1191 1192 if (IPCL_IS_NONSTR(connp)) { 1193 if ((*connp->conn_upcalls->su_recv) 1194 (connp->conn_upper_handle, mp, msgdsize(mp), 0, 1195 &error, NULL) < 0) { 1196 ASSERT(error == ENOSPC); 1197 /* 1198 * Let's confirm hoding the lock that 1199 * we are out of recv space. 1200 */ 1201 mutex_enter(&rts->rts_recv_mutex); 1202 if ((*connp->conn_upcalls->su_recv) 1203 (connp->conn_upper_handle, NULL, 0, 0, 1204 &error, NULL) < 0) { 1205 ASSERT(error == ENOSPC); 1206 connp->conn_flow_cntrld = B_TRUE; 1207 } 1208 mutex_exit(&rts->rts_recv_mutex); 1209 } 1210 } else { 1211 putnext(connp->conn_rq, mp); 1212 } 1213 } 1214 1215 /*ARGSUSED*/ 1216 static void 1217 rts_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 1218 { 1219 freemsg(mp); 1220 } 1221 1222 void 1223 rts_ddi_g_init(void) 1224 { 1225 rts_max_optsize = optcom_max_optsize(rts_opt_obj.odb_opt_des_arr, 1226 rts_opt_obj.odb_opt_arr_cnt); 1227 1228 /* 1229 * We want to be informed each time a stack is created or 1230 * destroyed in the kernel, so we can maintain the 1231 * set of rts_stack_t's. 1232 */ 1233 netstack_register(NS_RTS, rts_stack_init, NULL, rts_stack_fini); 1234 } 1235 1236 void 1237 rts_ddi_g_destroy(void) 1238 { 1239 netstack_unregister(NS_RTS); 1240 } 1241 1242 #define INET_NAME "ip" 1243 1244 /* 1245 * Initialize the RTS stack instance. 1246 */ 1247 /* ARGSUSED */ 1248 static void * 1249 rts_stack_init(netstackid_t stackid, netstack_t *ns) 1250 { 1251 rts_stack_t *rtss; 1252 rtsparam_t *pa; 1253 int error = 0; 1254 major_t major; 1255 1256 rtss = (rts_stack_t *)kmem_zalloc(sizeof (*rtss), KM_SLEEP); 1257 rtss->rtss_netstack = ns; 1258 1259 pa = (rtsparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP); 1260 rtss->rtss_params = pa; 1261 bcopy(lcl_param_arr, rtss->rtss_params, sizeof (lcl_param_arr)); 1262 1263 (void) rts_param_register(&rtss->rtss_g_nd, 1264 rtss->rtss_params, A_CNT(lcl_param_arr)); 1265 1266 major = mod_name_to_major(INET_NAME); 1267 error = ldi_ident_from_major(major, &rtss->rtss_ldi_ident); 1268 ASSERT(error == 0); 1269 return (rtss); 1270 } 1271 1272 /* 1273 * Free the RTS stack instance. 1274 */ 1275 /* ARGSUSED */ 1276 static void 1277 rts_stack_fini(netstackid_t stackid, void *arg) 1278 { 1279 rts_stack_t *rtss = (rts_stack_t *)arg; 1280 1281 nd_free(&rtss->rtss_g_nd); 1282 kmem_free(rtss->rtss_params, sizeof (lcl_param_arr)); 1283 rtss->rtss_params = NULL; 1284 ldi_ident_release(rtss->rtss_ldi_ident); 1285 kmem_free(rtss, sizeof (*rtss)); 1286 } 1287 1288 /* ARGSUSED */ 1289 int 1290 rts_accept(sock_lower_handle_t lproto_handle, 1291 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 1292 cred_t *cr) 1293 { 1294 return (EINVAL); 1295 } 1296 1297 /* ARGSUSED */ 1298 static int 1299 rts_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 1300 socklen_t len, cred_t *cr) 1301 { 1302 /* 1303 * rebind not allowed 1304 */ 1305 return (EINVAL); 1306 } 1307 1308 /* ARGSUSED */ 1309 int 1310 rts_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 1311 { 1312 return (EINVAL); 1313 } 1314 1315 /* ARGSUSED */ 1316 int 1317 rts_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 1318 socklen_t len, sock_connid_t *id, cred_t *cr) 1319 { 1320 /* 1321 * rts sockets start out as bound and connected 1322 */ 1323 *id = 0; 1324 return (EISCONN); 1325 } 1326 1327 /* ARGSUSED */ 1328 int 1329 rts_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr, 1330 socklen_t *addrlen, cred_t *cr) 1331 { 1332 bzero(addr, sizeof (struct sockaddr)); 1333 addr->sa_family = AF_ROUTE; 1334 *addrlen = sizeof (struct sockaddr); 1335 1336 return (0); 1337 } 1338 1339 /* ARGSUSED */ 1340 int 1341 rts_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr, 1342 socklen_t *addrlen, cred_t *cr) 1343 { 1344 bzero(addr, sizeof (struct sockaddr)); 1345 addr->sa_family = AF_ROUTE; 1346 *addrlen = sizeof (struct sockaddr); 1347 1348 return (0); 1349 } 1350 1351 static int 1352 rts_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 1353 void *optvalp, socklen_t *optlen, cred_t *cr) 1354 { 1355 conn_t *connp = (conn_t *)proto_handle; 1356 rts_t *rts = connp->conn_rts; 1357 int error; 1358 t_uscalar_t max_optbuf_len; 1359 void *optvalp_buf; 1360 int len; 1361 1362 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 1363 rts_opt_obj.odb_opt_des_arr, 1364 rts_opt_obj.odb_opt_arr_cnt, 1365 B_FALSE, B_TRUE, cr); 1366 if (error != 0) { 1367 if (error < 0) 1368 error = proto_tlitosyserr(-error); 1369 return (error); 1370 } 1371 1372 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 1373 rw_enter(&rts->rts_rwlock, RW_READER); 1374 len = rts_opt_get(connp, level, option_name, optvalp_buf); 1375 rw_exit(&rts->rts_rwlock); 1376 if (len == -1) { 1377 kmem_free(optvalp_buf, max_optbuf_len); 1378 return (EINVAL); 1379 } 1380 1381 /* 1382 * update optlen and copy option value 1383 */ 1384 t_uscalar_t size = MIN(len, *optlen); 1385 1386 bcopy(optvalp_buf, optvalp, size); 1387 bcopy(&size, optlen, sizeof (size)); 1388 kmem_free(optvalp_buf, max_optbuf_len); 1389 return (0); 1390 } 1391 1392 static int 1393 rts_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 1394 const void *optvalp, socklen_t optlen, cred_t *cr) 1395 { 1396 conn_t *connp = (conn_t *)proto_handle; 1397 rts_t *rts = connp->conn_rts; 1398 int error; 1399 1400 error = proto_opt_check(level, option_name, optlen, NULL, 1401 rts_opt_obj.odb_opt_des_arr, 1402 rts_opt_obj.odb_opt_arr_cnt, 1403 B_TRUE, B_FALSE, cr); 1404 1405 if (error != 0) { 1406 if (error < 0) 1407 error = proto_tlitosyserr(-error); 1408 return (error); 1409 } 1410 1411 rw_enter(&rts->rts_rwlock, RW_WRITER); 1412 error = rts_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 1413 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 1414 NULL, cr); 1415 rw_exit(&rts->rts_rwlock); 1416 1417 ASSERT(error >= 0); 1418 1419 return (error); 1420 } 1421 1422 /* ARGSUSED */ 1423 static int 1424 rts_send(sock_lower_handle_t proto_handle, mblk_t *mp, 1425 struct nmsghdr *msg, cred_t *cr) 1426 { 1427 conn_t *connp = (conn_t *)proto_handle; 1428 rt_msghdr_t *rtm; 1429 int error; 1430 1431 ASSERT(DB_TYPE(mp) == M_DATA); 1432 /* 1433 * The semantics of the routing socket is such that the rtm_pid 1434 * field is automatically filled in during requests with the 1435 * current process' pid. We do this here (where we still have 1436 * user context) after checking we have at least a message the 1437 * size of a routing message header. 1438 */ 1439 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) { 1440 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) { 1441 freemsg(mp); 1442 return (EINVAL); 1443 } 1444 } 1445 rtm = (rt_msghdr_t *)mp->b_rptr; 1446 rtm->rtm_pid = curproc->p_pid; 1447 1448 /* 1449 * We are not constrained by the ioctl interface and 1450 * ip_rts_request_common processing requests synchronously hence 1451 * we can send them down concurrently. 1452 */ 1453 error = ip_rts_request_common(mp, connp, cr); 1454 return (error); 1455 } 1456 1457 /* ARGSUSED */ 1458 sock_lower_handle_t 1459 rts_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 1460 uint_t *smodep, int *errorp, int flags, cred_t *credp) 1461 { 1462 conn_t *connp; 1463 1464 if (family != AF_ROUTE || type != SOCK_RAW || 1465 (proto != 0 && proto != AF_INET && proto != AF_INET6)) { 1466 *errorp = EPROTONOSUPPORT; 1467 return (NULL); 1468 } 1469 1470 connp = rts_open(flags, credp); 1471 ASSERT(connp != NULL); 1472 connp->conn_flags |= IPCL_NONSTR; 1473 1474 connp->conn_proto = proto; 1475 1476 mutex_enter(&connp->conn_lock); 1477 connp->conn_state_flags &= ~CONN_INCIPIENT; 1478 mutex_exit(&connp->conn_lock); 1479 1480 *errorp = 0; 1481 *smodep = SM_ATOMIC; 1482 *sock_downcalls = &sock_rts_downcalls; 1483 return ((sock_lower_handle_t)connp); 1484 } 1485 1486 /* ARGSUSED */ 1487 void 1488 rts_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 1489 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 1490 { 1491 conn_t *connp = (conn_t *)proto_handle; 1492 struct sock_proto_props sopp; 1493 1494 connp->conn_upcalls = sock_upcalls; 1495 connp->conn_upper_handle = sock_handle; 1496 1497 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 1498 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 1499 sopp.sopp_wroff = 0; 1500 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 1501 sopp.sopp_rxlowat = connp->conn_rcvlowat; 1502 sopp.sopp_maxblk = INFPSZ; 1503 sopp.sopp_maxpsz = rts_mod_info.mi_maxpsz; 1504 sopp.sopp_minpsz = (rts_mod_info.mi_minpsz == 1) ? 0 : 1505 rts_mod_info.mi_minpsz; 1506 1507 (*connp->conn_upcalls->su_set_proto_props) 1508 (connp->conn_upper_handle, &sopp); 1509 1510 /* 1511 * We treat it as already connected for routing socket. 1512 */ 1513 (*connp->conn_upcalls->su_connected) 1514 (connp->conn_upper_handle, 0, NULL, -1); 1515 1516 /* Indicate to IP that this is a routing socket client */ 1517 ip_rts_register(connp); 1518 } 1519 1520 /* ARGSUSED */ 1521 int 1522 rts_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 1523 { 1524 conn_t *connp = (conn_t *)proto_handle; 1525 1526 ASSERT(connp != NULL && IPCL_IS_RTS(connp)); 1527 return (rts_common_close(NULL, connp)); 1528 } 1529 1530 /* ARGSUSED */ 1531 int 1532 rts_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 1533 { 1534 conn_t *connp = (conn_t *)proto_handle; 1535 1536 /* shut down the send side */ 1537 if (how != SHUT_RD) 1538 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 1539 SOCK_OPCTL_SHUT_SEND, 0); 1540 /* shut down the recv side */ 1541 if (how != SHUT_WR) 1542 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 1543 SOCK_OPCTL_SHUT_RECV, 0); 1544 return (0); 1545 } 1546 1547 void 1548 rts_clr_flowctrl(sock_lower_handle_t proto_handle) 1549 { 1550 conn_t *connp = (conn_t *)proto_handle; 1551 rts_t *rts = connp->conn_rts; 1552 1553 mutex_enter(&rts->rts_recv_mutex); 1554 connp->conn_flow_cntrld = B_FALSE; 1555 mutex_exit(&rts->rts_recv_mutex); 1556 } 1557 1558 int 1559 rts_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 1560 int mode, int32_t *rvalp, cred_t *cr) 1561 { 1562 conn_t *connp = (conn_t *)proto_handle; 1563 int error; 1564 1565 /* 1566 * If we don't have a helper stream then create one. 1567 * ip_create_helper_stream takes care of locking the conn_t, 1568 * so this check for NULL is just a performance optimization. 1569 */ 1570 if (connp->conn_helper_info == NULL) { 1571 rts_stack_t *rtss = connp->conn_rts->rts_rtss; 1572 1573 ASSERT(rtss->rtss_ldi_ident != NULL); 1574 1575 /* 1576 * Create a helper stream for non-STREAMS socket. 1577 */ 1578 error = ip_create_helper_stream(connp, rtss->rtss_ldi_ident); 1579 if (error != 0) { 1580 ip0dbg(("rts_ioctl: create of IP helper stream " 1581 "failed %d\n", error)); 1582 return (error); 1583 } 1584 } 1585 1586 switch (cmd) { 1587 case ND_SET: 1588 case ND_GET: 1589 case TI_GETPEERNAME: 1590 case TI_GETMYNAME: 1591 #ifdef DEUG 1592 cmn_err(CE_CONT, "rts_ioctl cmd 0x%x on non sreams" 1593 " socket", cmd); 1594 #endif 1595 error = EINVAL; 1596 break; 1597 default: 1598 /* 1599 * Pass on to IP using helper stream 1600 */ 1601 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 1602 cmd, arg, mode, cr, rvalp); 1603 break; 1604 } 1605 1606 return (error); 1607 } 1608 1609 sock_downcalls_t sock_rts_downcalls = { 1610 rts_activate, 1611 rts_accept, 1612 rts_bind, 1613 rts_listen, 1614 rts_connect, 1615 rts_getpeername, 1616 rts_getsockname, 1617 rts_getsockopt, 1618 rts_setsockopt, 1619 rts_send, 1620 NULL, 1621 NULL, 1622 NULL, 1623 rts_shutdown, 1624 rts_clr_flowctrl, 1625 rts_ioctl, 1626 rts_close 1627 }; 1628