1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 1995 Søren Schmidt 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include "opt_inet6.h" 30 31 #include <sys/param.h> 32 #include <sys/capsicum.h> 33 #include <sys/filedesc.h> 34 #include <sys/limits.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/proc.h> 38 #include <sys/protosw.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 #include <sys/syscallsubr.h> 42 #include <sys/sysproto.h> 43 #include <sys/vnode.h> 44 #include <sys/un.h> 45 #include <sys/unistd.h> 46 47 #include <security/audit/audit.h> 48 49 #include <net/if.h> 50 #include <net/vnet.h> 51 #include <netinet/in.h> 52 #include <netinet/ip.h> 53 #include <netinet/tcp.h> 54 #ifdef INET6 55 #include <netinet/ip6.h> 56 #include <netinet6/ip6_var.h> 57 #endif 58 59 #ifdef COMPAT_LINUX32 60 #include <compat/freebsd32/freebsd32_util.h> 61 #include <machine/../linux32/linux.h> 62 #include <machine/../linux32/linux32_proto.h> 63 #else 64 #include <machine/../linux/linux.h> 65 #include <machine/../linux/linux_proto.h> 66 #endif 67 #include <compat/linux/linux_common.h> 68 #include <compat/linux/linux_emul.h> 69 #include <compat/linux/linux_file.h> 70 #include <compat/linux/linux_mib.h> 71 #include <compat/linux/linux_socket.h> 72 #include <compat/linux/linux_time.h> 73 #include <compat/linux/linux_util.h> 74 75 _Static_assert(offsetof(struct l_ifreq, ifr_ifru) == 76 offsetof(struct ifreq, ifr_ifru), 77 "Linux ifreq members names should be equal to FreeeBSD"); 78 _Static_assert(offsetof(struct l_ifreq, ifr_index) == 79 offsetof(struct ifreq, ifr_index), 80 "Linux ifreq members names should be equal to FreeeBSD"); 81 _Static_assert(offsetof(struct l_ifreq, ifr_name) == 82 offsetof(struct ifreq, ifr_name), 83 "Linux ifreq members names should be equal to FreeeBSD"); 84 85 #define SECURITY_CONTEXT_STRING "unconfined" 86 87 static int linux_sendmsg_common(struct thread *, l_int, struct l_msghdr *, 88 l_uint); 89 static int linux_recvmsg_common(struct thread *, l_int, struct l_msghdr *, 90 l_uint, struct msghdr *); 91 static int linux_set_socket_flags(int, int *); 92 93 #define SOL_NETLINK 270 94 95 static int 96 linux_to_bsd_sockopt_level(int level) 97 { 98 99 if (level == LINUX_SOL_SOCKET) 100 return (SOL_SOCKET); 101 /* Remaining values are RFC-defined protocol numbers. */ 102 return (level); 103 } 104 105 static int 106 bsd_to_linux_sockopt_level(int level) 107 { 108 109 if (level == SOL_SOCKET) 110 return (LINUX_SOL_SOCKET); 111 return (level); 112 } 113 114 static int 115 linux_to_bsd_ip_sockopt(int opt) 116 { 117 118 switch (opt) { 119 /* known and translated sockopts */ 120 case LINUX_IP_TOS: 121 return (IP_TOS); 122 case LINUX_IP_TTL: 123 return (IP_TTL); 124 case LINUX_IP_HDRINCL: 125 return (IP_HDRINCL); 126 case LINUX_IP_OPTIONS: 127 return (IP_OPTIONS); 128 case LINUX_IP_RECVOPTS: 129 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_RECVOPTS"); 130 return (IP_RECVOPTS); 131 case LINUX_IP_RETOPTS: 132 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_REETOPTS"); 133 return (IP_RETOPTS); 134 case LINUX_IP_RECVTTL: 135 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_RECVTTL"); 136 return (IP_RECVTTL); 137 case LINUX_IP_RECVTOS: 138 return (IP_RECVTOS); 139 case LINUX_IP_FREEBIND: 140 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_FREEBIND"); 141 return (IP_BINDANY); 142 case LINUX_IP_IPSEC_POLICY: 143 /* we have this option, but not documented in ip(4) manpage */ 144 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_IPSEC_POLICY"); 145 return (IP_IPSEC_POLICY); 146 case LINUX_IP_MINTTL: 147 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_MINTTL"); 148 return (IP_MINTTL); 149 case LINUX_IP_MULTICAST_IF: 150 return (IP_MULTICAST_IF); 151 case LINUX_IP_MULTICAST_TTL: 152 return (IP_MULTICAST_TTL); 153 case LINUX_IP_MULTICAST_LOOP: 154 return (IP_MULTICAST_LOOP); 155 case LINUX_IP_ADD_MEMBERSHIP: 156 return (IP_ADD_MEMBERSHIP); 157 case LINUX_IP_DROP_MEMBERSHIP: 158 return (IP_DROP_MEMBERSHIP); 159 case LINUX_IP_UNBLOCK_SOURCE: 160 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_UNBLOCK_SOURCE"); 161 return (IP_UNBLOCK_SOURCE); 162 case LINUX_IP_BLOCK_SOURCE: 163 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_BLOCK_SOURCE"); 164 return (IP_BLOCK_SOURCE); 165 case LINUX_IP_ADD_SOURCE_MEMBERSHIP: 166 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_ADD_SOURCE_MEMBERSHIP"); 167 return (IP_ADD_SOURCE_MEMBERSHIP); 168 case LINUX_IP_DROP_SOURCE_MEMBERSHIP: 169 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_DROP_SOURCE_MEMBERSHIP"); 170 return (IP_DROP_SOURCE_MEMBERSHIP); 171 case LINUX_MCAST_JOIN_GROUP: 172 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_MCAST_JOIN_GROUP"); 173 return (MCAST_JOIN_GROUP); 174 case LINUX_MCAST_LEAVE_GROUP: 175 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_MCAST_LEAVE_GROUP"); 176 return (MCAST_LEAVE_GROUP); 177 case LINUX_MCAST_JOIN_SOURCE_GROUP: 178 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_MCAST_JOIN_SOURCE_GROUP"); 179 return (MCAST_JOIN_SOURCE_GROUP); 180 case LINUX_MCAST_LEAVE_SOURCE_GROUP: 181 LINUX_RATELIMIT_MSG_NOTTESTED("IPv4 socket option IP_MCAST_LEAVE_SOURCE_GROUP"); 182 return (MCAST_LEAVE_SOURCE_GROUP); 183 case LINUX_IP_RECVORIGDSTADDR: 184 return (IP_RECVORIGDSTADDR); 185 186 /* known but not implemented sockopts */ 187 case LINUX_IP_ROUTER_ALERT: 188 LINUX_RATELIMIT_MSG_OPT1( 189 "unsupported IPv4 socket option IP_ROUTER_ALERT (%d), you can not do user-space routing from linux programs", 190 opt); 191 return (-2); 192 case LINUX_IP_PKTINFO: 193 LINUX_RATELIMIT_MSG_OPT1( 194 "unsupported IPv4 socket option IP_PKTINFO (%d), you can not get extended packet info for datagram sockets in linux programs", 195 opt); 196 return (-2); 197 case LINUX_IP_PKTOPTIONS: 198 LINUX_RATELIMIT_MSG_OPT1( 199 "unsupported IPv4 socket option IP_PKTOPTIONS (%d)", 200 opt); 201 return (-2); 202 case LINUX_IP_MTU_DISCOVER: 203 LINUX_RATELIMIT_MSG_OPT1( 204 "unsupported IPv4 socket option IP_MTU_DISCOVER (%d), your linux program can not control path-MTU discovery", 205 opt); 206 return (-2); 207 case LINUX_IP_RECVERR: 208 /* needed by steam */ 209 LINUX_RATELIMIT_MSG_OPT1( 210 "unsupported IPv4 socket option IP_RECVERR (%d), you can not get extended reliability info in linux programs", 211 opt); 212 return (-2); 213 case LINUX_IP_MTU: 214 LINUX_RATELIMIT_MSG_OPT1( 215 "unsupported IPv4 socket option IP_MTU (%d), your linux program can not control the MTU on this socket", 216 opt); 217 return (-2); 218 case LINUX_IP_XFRM_POLICY: 219 LINUX_RATELIMIT_MSG_OPT1( 220 "unsupported IPv4 socket option IP_XFRM_POLICY (%d)", 221 opt); 222 return (-2); 223 case LINUX_IP_PASSSEC: 224 /* needed by steam */ 225 LINUX_RATELIMIT_MSG_OPT1( 226 "unsupported IPv4 socket option IP_PASSSEC (%d), you can not get IPSEC related credential information associated with this socket in linux programs -- if you do not use IPSEC, you can ignore this", 227 opt); 228 return (-2); 229 case LINUX_IP_TRANSPARENT: 230 /* IP_BINDANY or more? */ 231 LINUX_RATELIMIT_MSG_OPT1( 232 "unsupported IPv4 socket option IP_TRANSPARENT (%d), you can not enable transparent proxying in linux programs -- note, IP_FREEBIND is supported, no idea if the FreeBSD IP_BINDANY is equivalent to the Linux IP_TRANSPARENT or not, any info is welcome", 233 opt); 234 return (-2); 235 case LINUX_IP_NODEFRAG: 236 LINUX_RATELIMIT_MSG_OPT1( 237 "unsupported IPv4 socket option IP_NODEFRAG (%d)", 238 opt); 239 return (-2); 240 case LINUX_IP_CHECKSUM: 241 LINUX_RATELIMIT_MSG_OPT1( 242 "unsupported IPv4 socket option IP_CHECKSUM (%d)", 243 opt); 244 return (-2); 245 case LINUX_IP_BIND_ADDRESS_NO_PORT: 246 LINUX_RATELIMIT_MSG_OPT1( 247 "unsupported IPv4 socket option IP_BIND_ADDRESS_NO_PORT (%d)", 248 opt); 249 return (-2); 250 case LINUX_IP_RECVFRAGSIZE: 251 LINUX_RATELIMIT_MSG_OPT1( 252 "unsupported IPv4 socket option IP_RECVFRAGSIZE (%d)", 253 opt); 254 return (-2); 255 case LINUX_MCAST_MSFILTER: 256 LINUX_RATELIMIT_MSG_OPT1( 257 "unsupported IPv4 socket option IP_MCAST_MSFILTER (%d)", 258 opt); 259 return (-2); 260 case LINUX_IP_MULTICAST_ALL: 261 LINUX_RATELIMIT_MSG_OPT1( 262 "unsupported IPv4 socket option IP_MULTICAST_ALL (%d), your linux program will not see all multicast groups joined by the entire system, only those the program joined itself on this socket", 263 opt); 264 return (-2); 265 case LINUX_IP_UNICAST_IF: 266 LINUX_RATELIMIT_MSG_OPT1( 267 "unsupported IPv4 socket option IP_UNICAST_IF (%d)", 268 opt); 269 return (-2); 270 271 /* unknown sockopts */ 272 default: 273 return (-1); 274 } 275 } 276 277 static int 278 linux_to_bsd_ip6_sockopt(int opt) 279 { 280 281 switch (opt) { 282 /* known and translated sockopts */ 283 case LINUX_IPV6_2292PKTINFO: 284 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292PKTINFO"); 285 return (IPV6_2292PKTINFO); 286 case LINUX_IPV6_2292HOPOPTS: 287 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292HOPOPTS"); 288 return (IPV6_2292HOPOPTS); 289 case LINUX_IPV6_2292DSTOPTS: 290 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292DSTOPTS"); 291 return (IPV6_2292DSTOPTS); 292 case LINUX_IPV6_2292RTHDR: 293 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292RTHDR"); 294 return (IPV6_2292RTHDR); 295 case LINUX_IPV6_2292PKTOPTIONS: 296 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292PKTOPTIONS"); 297 return (IPV6_2292PKTOPTIONS); 298 case LINUX_IPV6_CHECKSUM: 299 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_CHECKSUM"); 300 return (IPV6_CHECKSUM); 301 case LINUX_IPV6_2292HOPLIMIT: 302 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_2292HOPLIMIT"); 303 return (IPV6_2292HOPLIMIT); 304 case LINUX_IPV6_NEXTHOP: 305 return (IPV6_NEXTHOP); 306 case LINUX_IPV6_UNICAST_HOPS: 307 return (IPV6_UNICAST_HOPS); 308 case LINUX_IPV6_MULTICAST_IF: 309 return (IPV6_MULTICAST_IF); 310 case LINUX_IPV6_MULTICAST_HOPS: 311 return (IPV6_MULTICAST_HOPS); 312 case LINUX_IPV6_MULTICAST_LOOP: 313 return (IPV6_MULTICAST_LOOP); 314 case LINUX_IPV6_ADD_MEMBERSHIP: 315 return (IPV6_JOIN_GROUP); 316 case LINUX_IPV6_DROP_MEMBERSHIP: 317 return (IPV6_LEAVE_GROUP); 318 case LINUX_IPV6_V6ONLY: 319 return (IPV6_V6ONLY); 320 case LINUX_IPV6_IPSEC_POLICY: 321 /* we have this option, but not documented in ip6(4) manpage */ 322 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_IPSEC_POLICY"); 323 return (IPV6_IPSEC_POLICY); 324 case LINUX_MCAST_JOIN_GROUP: 325 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_JOIN_GROUP"); 326 return (IPV6_JOIN_GROUP); 327 case LINUX_MCAST_LEAVE_GROUP: 328 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_LEAVE_GROUP"); 329 return (IPV6_LEAVE_GROUP); 330 case LINUX_IPV6_RECVPKTINFO: 331 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVPKTINFO"); 332 return (IPV6_RECVPKTINFO); 333 case LINUX_IPV6_PKTINFO: 334 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_PKTINFO"); 335 return (IPV6_PKTINFO); 336 case LINUX_IPV6_RECVHOPLIMIT: 337 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVHOPLIMIT"); 338 return (IPV6_RECVHOPLIMIT); 339 case LINUX_IPV6_HOPLIMIT: 340 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_HOPLIMIT"); 341 return (IPV6_HOPLIMIT); 342 case LINUX_IPV6_RECVHOPOPTS: 343 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVHOPOPTS"); 344 return (IPV6_RECVHOPOPTS); 345 case LINUX_IPV6_HOPOPTS: 346 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_HOPOPTS"); 347 return (IPV6_HOPOPTS); 348 case LINUX_IPV6_RTHDRDSTOPTS: 349 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RTHDRDSTOPTS"); 350 return (IPV6_RTHDRDSTOPTS); 351 case LINUX_IPV6_RECVRTHDR: 352 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVRTHDR"); 353 return (IPV6_RECVRTHDR); 354 case LINUX_IPV6_RTHDR: 355 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RTHDR"); 356 return (IPV6_RTHDR); 357 case LINUX_IPV6_RECVDSTOPTS: 358 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVDSTOPTS"); 359 return (IPV6_RECVDSTOPTS); 360 case LINUX_IPV6_DSTOPTS: 361 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_DSTOPTS"); 362 return (IPV6_DSTOPTS); 363 case LINUX_IPV6_RECVPATHMTU: 364 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_RECVPATHMTU"); 365 return (IPV6_RECVPATHMTU); 366 case LINUX_IPV6_PATHMTU: 367 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_PATHMTU"); 368 return (IPV6_PATHMTU); 369 case LINUX_IPV6_DONTFRAG: 370 return (IPV6_DONTFRAG); 371 case LINUX_IPV6_AUTOFLOWLABEL: 372 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_AUTOFLOWLABEL"); 373 return (IPV6_AUTOFLOWLABEL); 374 case LINUX_IPV6_ORIGDSTADDR: 375 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_ORIGDSTADDR"); 376 return (IPV6_ORIGDSTADDR); 377 case LINUX_IPV6_FREEBIND: 378 LINUX_RATELIMIT_MSG_NOTTESTED("IPv6 socket option IPV6_FREEBIND"); 379 return (IPV6_BINDANY); 380 381 /* known but not implemented sockopts */ 382 case LINUX_IPV6_ADDRFORM: 383 LINUX_RATELIMIT_MSG_OPT1( 384 "unsupported IPv6 socket option IPV6_ADDRFORM (%d), you linux program can not convert the socket to IPv4", 385 opt); 386 return (-2); 387 case LINUX_IPV6_AUTHHDR: 388 LINUX_RATELIMIT_MSG_OPT1( 389 "unsupported IPv6 socket option IPV6_AUTHHDR (%d), your linux program can not get the authentication header info of IPv6 packets", 390 opt); 391 return (-2); 392 case LINUX_IPV6_FLOWINFO: 393 LINUX_RATELIMIT_MSG_OPT1( 394 "unsupported IPv6 socket option IPV6_FLOWINFO (%d), your linux program can not get the flowid of IPv6 packets", 395 opt); 396 return (-2); 397 case LINUX_IPV6_ROUTER_ALERT: 398 LINUX_RATELIMIT_MSG_OPT1( 399 "unsupported IPv6 socket option IPV6_ROUTER_ALERT (%d), you can not do user-space routing from linux programs", 400 opt); 401 return (-2); 402 case LINUX_IPV6_MTU_DISCOVER: 403 LINUX_RATELIMIT_MSG_OPT1( 404 "unsupported IPv6 socket option IPV6_MTU_DISCOVER (%d), your linux program can not control path-MTU discovery", 405 opt); 406 return (-2); 407 case LINUX_IPV6_MTU: 408 LINUX_RATELIMIT_MSG_OPT1( 409 "unsupported IPv6 socket option IPV6_MTU (%d), your linux program can not control the MTU on this socket", 410 opt); 411 return (-2); 412 case LINUX_IPV6_JOIN_ANYCAST: 413 LINUX_RATELIMIT_MSG_OPT1( 414 "unsupported IPv6 socket option IPV6_JOIN_ANYCAST (%d)", 415 opt); 416 return (-2); 417 case LINUX_IPV6_LEAVE_ANYCAST: 418 LINUX_RATELIMIT_MSG_OPT1( 419 "unsupported IPv6 socket option IPV6_LEAVE_ANYCAST (%d)", 420 opt); 421 return (-2); 422 case LINUX_IPV6_MULTICAST_ALL: 423 LINUX_RATELIMIT_MSG_OPT1( 424 "unsupported IPv6 socket option IPV6_MULTICAST_ALL (%d)", 425 opt); 426 return (-2); 427 case LINUX_IPV6_ROUTER_ALERT_ISOLATE: 428 LINUX_RATELIMIT_MSG_OPT1( 429 "unsupported IPv6 socket option IPV6_ROUTER_ALERT_ISOLATE (%d)", 430 opt); 431 return (-2); 432 case LINUX_IPV6_FLOWLABEL_MGR: 433 LINUX_RATELIMIT_MSG_OPT1( 434 "unsupported IPv6 socket option IPV6_FLOWLABEL_MGR (%d)", 435 opt); 436 return (-2); 437 case LINUX_IPV6_FLOWINFO_SEND: 438 LINUX_RATELIMIT_MSG_OPT1( 439 "unsupported IPv6 socket option IPV6_FLOWINFO_SEND (%d)", 440 opt); 441 return (-2); 442 case LINUX_IPV6_XFRM_POLICY: 443 LINUX_RATELIMIT_MSG_OPT1( 444 "unsupported IPv6 socket option IPV6_XFRM_POLICY (%d)", 445 opt); 446 return (-2); 447 case LINUX_IPV6_HDRINCL: 448 LINUX_RATELIMIT_MSG_OPT1( 449 "unsupported IPv6 socket option IPV6_HDRINCL (%d)", 450 opt); 451 return (-2); 452 case LINUX_MCAST_BLOCK_SOURCE: 453 LINUX_RATELIMIT_MSG_OPT1( 454 "unsupported IPv6 socket option MCAST_BLOCK_SOURCE (%d), your linux program may see more multicast stuff than it wants", 455 opt); 456 return (-2); 457 case LINUX_MCAST_UNBLOCK_SOURCE: 458 LINUX_RATELIMIT_MSG_OPT1( 459 "unsupported IPv6 socket option MCAST_UNBLOCK_SOURCE (%d), your linux program may not see all the multicast stuff it wants", 460 opt); 461 return (-2); 462 case LINUX_MCAST_JOIN_SOURCE_GROUP: 463 LINUX_RATELIMIT_MSG_OPT1( 464 "unsupported IPv6 socket option MCAST_JOIN_SOURCE_GROUP (%d), your linux program is not able to join a multicast source group", 465 opt); 466 return (-2); 467 case LINUX_MCAST_LEAVE_SOURCE_GROUP: 468 LINUX_RATELIMIT_MSG_OPT1( 469 "unsupported IPv6 socket option MCAST_LEAVE_SOURCE_GROUP (%d), your linux program is not able to leave a multicast source group -- but it was also not able to join one, so no issue", 470 opt); 471 return (-2); 472 case LINUX_MCAST_MSFILTER: 473 LINUX_RATELIMIT_MSG_OPT1( 474 "unsupported IPv6 socket option MCAST_MSFILTER (%d), your linux program can not manipulate the multicast filter, it may see more multicast data than it wants to see", 475 opt); 476 return (-2); 477 case LINUX_IPV6_ADDR_PREFERENCES: 478 LINUX_RATELIMIT_MSG_OPT1( 479 "unsupported IPv6 socket option IPV6_ADDR_PREFERENCES (%d)", 480 opt); 481 return (-2); 482 case LINUX_IPV6_MINHOPCOUNT: 483 LINUX_RATELIMIT_MSG_OPT1( 484 "unsupported IPv6 socket option IPV6_MINHOPCOUNT (%d)", 485 opt); 486 return (-2); 487 case LINUX_IPV6_TRANSPARENT: 488 /* IP_BINDANY or more? */ 489 LINUX_RATELIMIT_MSG_OPT1( 490 "unsupported IPv6 socket option IPV6_TRANSPARENT (%d), you can not enable transparent proxying in linux programs -- note, IP_FREEBIND is supported, no idea if the FreeBSD IP_BINDANY is equivalent to the Linux IP_TRANSPARENT or not, any info is welcome", 491 opt); 492 return (-2); 493 case LINUX_IPV6_UNICAST_IF: 494 LINUX_RATELIMIT_MSG_OPT1( 495 "unsupported IPv6 socket option IPV6_UNICAST_IF (%d)", 496 opt); 497 return (-2); 498 case LINUX_IPV6_RECVFRAGSIZE: 499 LINUX_RATELIMIT_MSG_OPT1( 500 "unsupported IPv6 socket option IPV6_RECVFRAGSIZE (%d)", 501 opt); 502 return (-2); 503 case LINUX_IPV6_RECVERR: 504 LINUX_RATELIMIT_MSG_OPT1( 505 "unsupported IPv6 socket option IPV6_RECVERR (%d), you can not get extended reliability info in linux programs", 506 opt); 507 return (-2); 508 509 /* unknown sockopts */ 510 default: 511 return (-1); 512 } 513 } 514 515 static int 516 linux_to_bsd_so_sockopt(int opt) 517 { 518 519 switch (opt) { 520 case LINUX_SO_DEBUG: 521 return (SO_DEBUG); 522 case LINUX_SO_REUSEADDR: 523 return (SO_REUSEADDR); 524 case LINUX_SO_TYPE: 525 return (SO_TYPE); 526 case LINUX_SO_ERROR: 527 return (SO_ERROR); 528 case LINUX_SO_DONTROUTE: 529 return (SO_DONTROUTE); 530 case LINUX_SO_BROADCAST: 531 return (SO_BROADCAST); 532 case LINUX_SO_SNDBUF: 533 case LINUX_SO_SNDBUFFORCE: 534 return (SO_SNDBUF); 535 case LINUX_SO_RCVBUF: 536 case LINUX_SO_RCVBUFFORCE: 537 return (SO_RCVBUF); 538 case LINUX_SO_KEEPALIVE: 539 return (SO_KEEPALIVE); 540 case LINUX_SO_OOBINLINE: 541 return (SO_OOBINLINE); 542 case LINUX_SO_LINGER: 543 return (SO_LINGER); 544 case LINUX_SO_REUSEPORT: 545 return (SO_REUSEPORT_LB); 546 case LINUX_SO_PASSCRED: 547 return (LOCAL_CREDS_PERSISTENT); 548 case LINUX_SO_PEERCRED: 549 return (LOCAL_PEERCRED); 550 case LINUX_SO_RCVLOWAT: 551 return (SO_RCVLOWAT); 552 case LINUX_SO_SNDLOWAT: 553 return (SO_SNDLOWAT); 554 case LINUX_SO_RCVTIMEO: 555 return (SO_RCVTIMEO); 556 case LINUX_SO_SNDTIMEO: 557 return (SO_SNDTIMEO); 558 case LINUX_SO_TIMESTAMPO: 559 case LINUX_SO_TIMESTAMPN: 560 return (SO_TIMESTAMP); 561 case LINUX_SO_TIMESTAMPNSO: 562 case LINUX_SO_TIMESTAMPNSN: 563 return (SO_BINTIME); 564 case LINUX_SO_ACCEPTCONN: 565 return (SO_ACCEPTCONN); 566 case LINUX_SO_PROTOCOL: 567 return (SO_PROTOCOL); 568 case LINUX_SO_DOMAIN: 569 return (SO_DOMAIN); 570 } 571 return (-1); 572 } 573 574 static int 575 linux_to_bsd_tcp_sockopt(int opt) 576 { 577 578 switch (opt) { 579 case LINUX_TCP_NODELAY: 580 return (TCP_NODELAY); 581 case LINUX_TCP_MAXSEG: 582 return (TCP_MAXSEG); 583 case LINUX_TCP_CORK: 584 return (TCP_NOPUSH); 585 case LINUX_TCP_KEEPIDLE: 586 return (TCP_KEEPIDLE); 587 case LINUX_TCP_KEEPINTVL: 588 return (TCP_KEEPINTVL); 589 case LINUX_TCP_KEEPCNT: 590 return (TCP_KEEPCNT); 591 case LINUX_TCP_INFO: 592 LINUX_RATELIMIT_MSG_OPT1( 593 "unsupported TCP socket option TCP_INFO (%d)", opt); 594 return (-2); 595 case LINUX_TCP_MD5SIG: 596 return (TCP_MD5SIG); 597 } 598 return (-1); 599 } 600 601 static int 602 linux_to_bsd_msg_flags(int flags) 603 { 604 int ret_flags = 0; 605 606 if (flags & LINUX_MSG_OOB) 607 ret_flags |= MSG_OOB; 608 if (flags & LINUX_MSG_PEEK) 609 ret_flags |= MSG_PEEK; 610 if (flags & LINUX_MSG_DONTROUTE) 611 ret_flags |= MSG_DONTROUTE; 612 if (flags & LINUX_MSG_CTRUNC) 613 ret_flags |= MSG_CTRUNC; 614 if (flags & LINUX_MSG_TRUNC) 615 ret_flags |= MSG_TRUNC; 616 if (flags & LINUX_MSG_DONTWAIT) 617 ret_flags |= MSG_DONTWAIT; 618 if (flags & LINUX_MSG_EOR) 619 ret_flags |= MSG_EOR; 620 if (flags & LINUX_MSG_WAITALL) 621 ret_flags |= MSG_WAITALL; 622 if (flags & LINUX_MSG_NOSIGNAL) 623 ret_flags |= MSG_NOSIGNAL; 624 if (flags & LINUX_MSG_PROXY) 625 LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_PROXY (%d) not handled", 626 LINUX_MSG_PROXY); 627 if (flags & LINUX_MSG_FIN) 628 LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_FIN (%d) not handled", 629 LINUX_MSG_FIN); 630 if (flags & LINUX_MSG_SYN) 631 LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_SYN (%d) not handled", 632 LINUX_MSG_SYN); 633 if (flags & LINUX_MSG_CONFIRM) 634 LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_CONFIRM (%d) not handled", 635 LINUX_MSG_CONFIRM); 636 if (flags & LINUX_MSG_RST) 637 LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_RST (%d) not handled", 638 LINUX_MSG_RST); 639 if (flags & LINUX_MSG_ERRQUEUE) 640 LINUX_RATELIMIT_MSG_OPT1("socket message flag MSG_ERRQUEUE (%d) not handled", 641 LINUX_MSG_ERRQUEUE); 642 return (ret_flags); 643 } 644 645 static int 646 linux_to_bsd_cmsg_type(int cmsg_type) 647 { 648 649 switch (cmsg_type) { 650 case LINUX_SCM_RIGHTS: 651 return (SCM_RIGHTS); 652 case LINUX_SCM_CREDENTIALS: 653 return (SCM_CREDS); 654 } 655 return (-1); 656 } 657 658 static int 659 bsd_to_linux_ip_cmsg_type(int cmsg_type) 660 { 661 662 switch (cmsg_type) { 663 case IP_RECVORIGDSTADDR: 664 return (LINUX_IP_RECVORIGDSTADDR); 665 case IP_RECVTOS: 666 return (LINUX_IP_TOS); 667 } 668 return (-1); 669 } 670 671 static int 672 bsd_to_linux_cmsg_type(struct proc *p, int cmsg_type, int cmsg_level) 673 { 674 struct linux_pemuldata *pem; 675 676 if (cmsg_level == IPPROTO_IP) 677 return (bsd_to_linux_ip_cmsg_type(cmsg_type)); 678 if (cmsg_level != SOL_SOCKET) 679 return (-1); 680 681 pem = pem_find(p); 682 683 switch (cmsg_type) { 684 case SCM_RIGHTS: 685 return (LINUX_SCM_RIGHTS); 686 case SCM_CREDS: 687 return (LINUX_SCM_CREDENTIALS); 688 case SCM_CREDS2: 689 return (LINUX_SCM_CREDENTIALS); 690 case SCM_TIMESTAMP: 691 return (pem->so_timestamp); 692 case SCM_BINTIME: 693 return (pem->so_timestampns); 694 } 695 return (-1); 696 } 697 698 static int 699 linux_to_bsd_msghdr(struct msghdr *bhdr, const struct l_msghdr *lhdr) 700 { 701 if (lhdr->msg_controllen > INT_MAX) 702 return (ENOBUFS); 703 704 bhdr->msg_name = PTRIN(lhdr->msg_name); 705 bhdr->msg_namelen = lhdr->msg_namelen; 706 bhdr->msg_iov = PTRIN(lhdr->msg_iov); 707 bhdr->msg_iovlen = lhdr->msg_iovlen; 708 bhdr->msg_control = PTRIN(lhdr->msg_control); 709 710 /* 711 * msg_controllen is skipped since BSD and LINUX control messages 712 * are potentially different sizes (e.g. the cred structure used 713 * by SCM_CREDS is different between the two operating system). 714 * 715 * The caller can set it (if necessary) after converting all the 716 * control messages. 717 */ 718 719 bhdr->msg_flags = linux_to_bsd_msg_flags(lhdr->msg_flags); 720 return (0); 721 } 722 723 static int 724 bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr) 725 { 726 lhdr->msg_name = PTROUT(bhdr->msg_name); 727 lhdr->msg_namelen = bhdr->msg_namelen; 728 lhdr->msg_iov = PTROUT(bhdr->msg_iov); 729 lhdr->msg_iovlen = bhdr->msg_iovlen; 730 lhdr->msg_control = PTROUT(bhdr->msg_control); 731 732 /* 733 * msg_controllen is skipped since BSD and LINUX control messages 734 * are potentially different sizes (e.g. the cred structure used 735 * by SCM_CREDS is different between the two operating system). 736 * 737 * The caller can set it (if necessary) after converting all the 738 * control messages. 739 */ 740 741 /* msg_flags skipped */ 742 return (0); 743 } 744 745 static int 746 linux_set_socket_flags(int lflags, int *flags) 747 { 748 749 if (lflags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK)) 750 return (EINVAL); 751 if (lflags & LINUX_SOCK_NONBLOCK) 752 *flags |= SOCK_NONBLOCK; 753 if (lflags & LINUX_SOCK_CLOEXEC) 754 *flags |= SOCK_CLOEXEC; 755 return (0); 756 } 757 758 static int 759 linux_copyout_sockaddr(const struct sockaddr *sa, void *uaddr, size_t len) 760 { 761 struct l_sockaddr *lsa; 762 int error; 763 764 error = bsd_to_linux_sockaddr(sa, &lsa, len); 765 if (error != 0) 766 return (error); 767 768 error = copyout(lsa, uaddr, len); 769 free(lsa, M_LINUX); 770 771 return (error); 772 } 773 774 static int 775 linux_sendit(struct thread *td, int s, struct msghdr *mp, int flags, 776 struct mbuf *control, enum uio_seg segflg) 777 { 778 struct sockaddr *to; 779 int error, len; 780 781 if (mp->msg_name != NULL) { 782 len = mp->msg_namelen; 783 error = linux_to_bsd_sockaddr(mp->msg_name, &to, &len); 784 if (error != 0) 785 return (error); 786 mp->msg_name = to; 787 } else 788 to = NULL; 789 790 error = kern_sendit(td, s, mp, linux_to_bsd_msg_flags(flags), control, 791 segflg); 792 793 if (to) 794 free(to, M_SONAME); 795 return (error); 796 } 797 798 /* Return 0 if IP_HDRINCL is set for the given socket. */ 799 static int 800 linux_check_hdrincl(struct thread *td, int s) 801 { 802 int error, optval; 803 socklen_t size_val; 804 805 size_val = sizeof(optval); 806 error = kern_getsockopt(td, s, IPPROTO_IP, IP_HDRINCL, 807 &optval, UIO_SYSSPACE, &size_val); 808 if (error != 0) 809 return (error); 810 811 return (optval == 0); 812 } 813 814 /* 815 * Updated sendto() when IP_HDRINCL is set: 816 * tweak endian-dependent fields in the IP packet. 817 */ 818 static int 819 linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args) 820 { 821 /* 822 * linux_ip_copysize defines how many bytes we should copy 823 * from the beginning of the IP packet before we customize it for BSD. 824 * It should include all the fields we modify (ip_len and ip_off). 825 */ 826 #define linux_ip_copysize 8 827 828 struct ip *packet; 829 struct msghdr msg; 830 struct iovec aiov[1]; 831 int error; 832 833 /* Check that the packet isn't too big or too small. */ 834 if (linux_args->len < linux_ip_copysize || 835 linux_args->len > IP_MAXPACKET) 836 return (EINVAL); 837 838 packet = (struct ip *)malloc(linux_args->len, M_LINUX, M_WAITOK); 839 840 /* Make kernel copy of the packet to be sent */ 841 if ((error = copyin(PTRIN(linux_args->msg), packet, 842 linux_args->len))) 843 goto goout; 844 845 /* Convert fields from Linux to BSD raw IP socket format */ 846 packet->ip_len = linux_args->len; 847 packet->ip_off = ntohs(packet->ip_off); 848 849 /* Prepare the msghdr and iovec structures describing the new packet */ 850 msg.msg_name = PTRIN(linux_args->to); 851 msg.msg_namelen = linux_args->tolen; 852 msg.msg_iov = aiov; 853 msg.msg_iovlen = 1; 854 msg.msg_control = NULL; 855 msg.msg_flags = 0; 856 aiov[0].iov_base = (char *)packet; 857 aiov[0].iov_len = linux_args->len; 858 error = linux_sendit(td, linux_args->s, &msg, linux_args->flags, 859 NULL, UIO_SYSSPACE); 860 goout: 861 free(packet, M_LINUX); 862 return (error); 863 } 864 865 static const char *linux_netlink_names[] = { 866 [LINUX_NETLINK_ROUTE] = "ROUTE", 867 [LINUX_NETLINK_SOCK_DIAG] = "SOCK_DIAG", 868 [LINUX_NETLINK_NFLOG] = "NFLOG", 869 [LINUX_NETLINK_SELINUX] = "SELINUX", 870 [LINUX_NETLINK_AUDIT] = "AUDIT", 871 [LINUX_NETLINK_FIB_LOOKUP] = "FIB_LOOKUP", 872 [LINUX_NETLINK_NETFILTER] = "NETFILTER", 873 [LINUX_NETLINK_KOBJECT_UEVENT] = "KOBJECT_UEVENT", 874 }; 875 876 int 877 linux_socket(struct thread *td, struct linux_socket_args *args) 878 { 879 int retval_socket, type; 880 sa_family_t domain; 881 882 type = args->type & LINUX_SOCK_TYPE_MASK; 883 if (type < 0 || type > LINUX_SOCK_MAX) 884 return (EINVAL); 885 retval_socket = linux_set_socket_flags(args->type & ~LINUX_SOCK_TYPE_MASK, 886 &type); 887 if (retval_socket != 0) 888 return (retval_socket); 889 domain = linux_to_bsd_domain(args->domain); 890 if (domain == AF_UNKNOWN) { 891 /* Mask off SOCK_NONBLOCK / CLOEXEC for error messages. */ 892 type = args->type & LINUX_SOCK_TYPE_MASK; 893 if (args->domain == LINUX_AF_NETLINK && 894 args->protocol == LINUX_NETLINK_AUDIT) { 895 ; /* Do nothing, quietly. */ 896 } else if (args->domain == LINUX_AF_NETLINK) { 897 const char *nl_name; 898 899 if (args->protocol >= 0 && 900 args->protocol < nitems(linux_netlink_names)) 901 nl_name = linux_netlink_names[args->protocol]; 902 else 903 nl_name = NULL; 904 if (nl_name != NULL) 905 linux_msg(curthread, 906 "unsupported socket(AF_NETLINK, %d, " 907 "NETLINK_%s)", type, nl_name); 908 else 909 linux_msg(curthread, 910 "unsupported socket(AF_NETLINK, %d, %d)", 911 type, args->protocol); 912 } else { 913 linux_msg(curthread, "unsupported socket domain %d, " 914 "type %d, protocol %d", args->domain, type, 915 args->protocol); 916 } 917 return (EAFNOSUPPORT); 918 } 919 920 retval_socket = kern_socket(td, domain, type, args->protocol); 921 if (retval_socket) 922 return (retval_socket); 923 924 if (type == SOCK_RAW 925 && (args->protocol == IPPROTO_RAW || args->protocol == 0) 926 && domain == PF_INET) { 927 /* It's a raw IP socket: set the IP_HDRINCL option. */ 928 int hdrincl; 929 930 hdrincl = 1; 931 /* We ignore any error returned by kern_setsockopt() */ 932 kern_setsockopt(td, td->td_retval[0], IPPROTO_IP, IP_HDRINCL, 933 &hdrincl, UIO_SYSSPACE, sizeof(hdrincl)); 934 } 935 #ifdef INET6 936 /* 937 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by default 938 * and some apps depend on this. So, set V6ONLY to 0 for Linux apps. 939 * For simplicity we do this unconditionally of the net.inet6.ip6.v6only 940 * sysctl value. 941 */ 942 if (domain == PF_INET6) { 943 int v6only; 944 945 v6only = 0; 946 /* We ignore any error returned by setsockopt() */ 947 kern_setsockopt(td, td->td_retval[0], IPPROTO_IPV6, IPV6_V6ONLY, 948 &v6only, UIO_SYSSPACE, sizeof(v6only)); 949 } 950 #endif 951 952 return (retval_socket); 953 } 954 955 int 956 linux_bind(struct thread *td, struct linux_bind_args *args) 957 { 958 struct sockaddr *sa; 959 int error; 960 961 error = linux_to_bsd_sockaddr(PTRIN(args->name), &sa, 962 &args->namelen); 963 if (error != 0) 964 return (error); 965 966 error = kern_bindat(td, AT_FDCWD, args->s, sa); 967 free(sa, M_SONAME); 968 969 /* XXX */ 970 if (error == EADDRNOTAVAIL && args->namelen != sizeof(struct sockaddr_in)) 971 return (EINVAL); 972 return (error); 973 } 974 975 int 976 linux_connect(struct thread *td, struct linux_connect_args *args) 977 { 978 struct socket *so; 979 struct sockaddr *sa; 980 struct file *fp; 981 int error; 982 983 error = linux_to_bsd_sockaddr(PTRIN(args->name), &sa, 984 &args->namelen); 985 if (error != 0) 986 return (error); 987 988 error = kern_connectat(td, AT_FDCWD, args->s, sa); 989 free(sa, M_SONAME); 990 if (error != EISCONN) 991 return (error); 992 993 /* 994 * Linux doesn't return EISCONN the first time it occurs, 995 * when on a non-blocking socket. Instead it returns the 996 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD. 997 */ 998 error = getsock(td, args->s, &cap_connect_rights, &fp); 999 if (error != 0) 1000 return (error); 1001 1002 error = EISCONN; 1003 so = fp->f_data; 1004 if (atomic_load_int(&fp->f_flag) & FNONBLOCK) { 1005 SOCK_LOCK(so); 1006 if (so->so_emuldata == 0) 1007 error = so->so_error; 1008 so->so_emuldata = (void *)1; 1009 SOCK_UNLOCK(so); 1010 } 1011 fdrop(fp, td); 1012 1013 return (error); 1014 } 1015 1016 int 1017 linux_listen(struct thread *td, struct linux_listen_args *args) 1018 { 1019 1020 return (kern_listen(td, args->s, args->backlog)); 1021 } 1022 1023 static int 1024 linux_accept_common(struct thread *td, int s, l_uintptr_t addr, 1025 l_uintptr_t namelen, int flags) 1026 { 1027 struct sockaddr_storage ss = { .ss_len = sizeof(ss) }; 1028 struct file *fp, *fp1; 1029 struct socket *so; 1030 socklen_t len; 1031 int bflags, error, error1; 1032 1033 bflags = 0; 1034 fp = NULL; 1035 1036 error = linux_set_socket_flags(flags, &bflags); 1037 if (error != 0) 1038 return (error); 1039 1040 if (PTRIN(addr) != NULL) { 1041 error = copyin(PTRIN(namelen), &len, sizeof(len)); 1042 if (error != 0) 1043 return (error); 1044 if (len < 0) 1045 return (EINVAL); 1046 } else 1047 len = 0; 1048 1049 error = kern_accept4(td, s, (struct sockaddr *)&ss, bflags, &fp); 1050 1051 /* 1052 * Translate errno values into ones used by Linux. 1053 */ 1054 if (error != 0) { 1055 /* 1056 * XXX. This is wrong, different sockaddr structures 1057 * have different sizes. 1058 */ 1059 switch (error) { 1060 case EFAULT: 1061 if (namelen != sizeof(struct sockaddr_in)) 1062 error = EINVAL; 1063 break; 1064 case EINVAL: 1065 error1 = getsock(td, s, &cap_accept_rights, &fp1); 1066 if (error1 != 0) { 1067 error = error1; 1068 break; 1069 } 1070 so = fp1->f_data; 1071 if (so->so_type == SOCK_DGRAM) 1072 error = EOPNOTSUPP; 1073 fdrop(fp1, td); 1074 break; 1075 } 1076 return (error); 1077 } 1078 1079 if (PTRIN(addr) != NULL) { 1080 len = min(ss.ss_len, len); 1081 error = linux_copyout_sockaddr((struct sockaddr *)&ss, 1082 PTRIN(addr), len); 1083 if (error == 0) { 1084 len = ss.ss_len; 1085 error = copyout(&len, PTRIN(namelen), sizeof(len)); 1086 } 1087 if (error != 0) { 1088 fdclose(td, fp, td->td_retval[0]); 1089 td->td_retval[0] = 0; 1090 } 1091 } 1092 if (fp != NULL) 1093 fdrop(fp, td); 1094 return (error); 1095 } 1096 1097 int 1098 linux_accept(struct thread *td, struct linux_accept_args *args) 1099 { 1100 1101 return (linux_accept_common(td, args->s, args->addr, 1102 args->namelen, 0)); 1103 } 1104 1105 int 1106 linux_accept4(struct thread *td, struct linux_accept4_args *args) 1107 { 1108 1109 return (linux_accept_common(td, args->s, args->addr, 1110 args->namelen, args->flags)); 1111 } 1112 1113 int 1114 linux_getsockname(struct thread *td, struct linux_getsockname_args *args) 1115 { 1116 struct sockaddr_storage ss = { .ss_len = sizeof(ss) }; 1117 socklen_t len; 1118 int error; 1119 1120 error = copyin(PTRIN(args->namelen), &len, sizeof(len)); 1121 if (error != 0) 1122 return (error); 1123 1124 error = kern_getsockname(td, args->s, (struct sockaddr *)&ss); 1125 if (error != 0) 1126 return (error); 1127 1128 len = min(ss.ss_len, len); 1129 error = linux_copyout_sockaddr((struct sockaddr *)&ss, 1130 PTRIN(args->addr), len); 1131 if (error == 0) { 1132 len = ss.ss_len; 1133 error = copyout(&len, PTRIN(args->namelen), sizeof(len)); 1134 } 1135 return (error); 1136 } 1137 1138 int 1139 linux_getpeername(struct thread *td, struct linux_getpeername_args *args) 1140 { 1141 struct sockaddr_storage ss = { .ss_len = sizeof(ss) }; 1142 socklen_t len; 1143 int error; 1144 1145 error = copyin(PTRIN(args->namelen), &len, sizeof(len)); 1146 if (error != 0) 1147 return (error); 1148 1149 error = kern_getpeername(td, args->s, (struct sockaddr *)&ss); 1150 if (error != 0) 1151 return (error); 1152 1153 len = min(ss.ss_len, len); 1154 error = linux_copyout_sockaddr((struct sockaddr *)&ss, 1155 PTRIN(args->addr), len); 1156 if (error == 0) { 1157 len = ss.ss_len; 1158 error = copyout(&len, PTRIN(args->namelen), sizeof(len)); 1159 } 1160 return (error); 1161 } 1162 1163 int 1164 linux_socketpair(struct thread *td, struct linux_socketpair_args *args) 1165 { 1166 int domain, error, sv[2], type; 1167 1168 domain = linux_to_bsd_domain(args->domain); 1169 if (domain != PF_LOCAL) 1170 return (EAFNOSUPPORT); 1171 type = args->type & LINUX_SOCK_TYPE_MASK; 1172 if (type < 0 || type > LINUX_SOCK_MAX) 1173 return (EINVAL); 1174 error = linux_set_socket_flags(args->type & ~LINUX_SOCK_TYPE_MASK, 1175 &type); 1176 if (error != 0) 1177 return (error); 1178 if (args->protocol != 0 && args->protocol != PF_UNIX) { 1179 /* 1180 * Use of PF_UNIX as protocol argument is not right, 1181 * but Linux does it. 1182 * Do not map PF_UNIX as its Linux value is identical 1183 * to FreeBSD one. 1184 */ 1185 return (EPROTONOSUPPORT); 1186 } 1187 error = kern_socketpair(td, domain, type, 0, sv); 1188 if (error != 0) 1189 return (error); 1190 error = copyout(sv, PTRIN(args->rsv), 2 * sizeof(int)); 1191 if (error != 0) { 1192 (void)kern_close(td, sv[0]); 1193 (void)kern_close(td, sv[1]); 1194 } 1195 return (error); 1196 } 1197 1198 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1199 struct linux_send_args { 1200 register_t s; 1201 register_t msg; 1202 register_t len; 1203 register_t flags; 1204 }; 1205 1206 static int 1207 linux_send(struct thread *td, struct linux_send_args *args) 1208 { 1209 struct sendto_args /* { 1210 int s; 1211 caddr_t buf; 1212 int len; 1213 int flags; 1214 caddr_t to; 1215 int tolen; 1216 } */ bsd_args; 1217 struct file *fp; 1218 int error; 1219 1220 bsd_args.s = args->s; 1221 bsd_args.buf = (caddr_t)PTRIN(args->msg); 1222 bsd_args.len = args->len; 1223 bsd_args.flags = linux_to_bsd_msg_flags(args->flags); 1224 bsd_args.to = NULL; 1225 bsd_args.tolen = 0; 1226 error = sys_sendto(td, &bsd_args); 1227 if (error == ENOTCONN) { 1228 /* 1229 * Linux doesn't return ENOTCONN for non-blocking sockets. 1230 * Instead it returns the EAGAIN. 1231 */ 1232 error = getsock(td, args->s, &cap_send_rights, &fp); 1233 if (error == 0) { 1234 if (atomic_load_int(&fp->f_flag) & FNONBLOCK) 1235 error = EAGAIN; 1236 fdrop(fp, td); 1237 } 1238 } 1239 return (error); 1240 } 1241 1242 struct linux_recv_args { 1243 register_t s; 1244 register_t msg; 1245 register_t len; 1246 register_t flags; 1247 }; 1248 1249 static int 1250 linux_recv(struct thread *td, struct linux_recv_args *args) 1251 { 1252 struct recvfrom_args /* { 1253 int s; 1254 caddr_t buf; 1255 int len; 1256 int flags; 1257 struct sockaddr *from; 1258 socklen_t fromlenaddr; 1259 } */ bsd_args; 1260 1261 bsd_args.s = args->s; 1262 bsd_args.buf = (caddr_t)PTRIN(args->msg); 1263 bsd_args.len = args->len; 1264 bsd_args.flags = linux_to_bsd_msg_flags(args->flags); 1265 bsd_args.from = NULL; 1266 bsd_args.fromlenaddr = 0; 1267 return (sys_recvfrom(td, &bsd_args)); 1268 } 1269 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1270 1271 int 1272 linux_sendto(struct thread *td, struct linux_sendto_args *args) 1273 { 1274 struct msghdr msg; 1275 struct iovec aiov; 1276 struct socket *so; 1277 struct file *fp; 1278 int error; 1279 1280 if (linux_check_hdrincl(td, args->s) == 0) 1281 /* IP_HDRINCL set, tweak the packet before sending */ 1282 return (linux_sendto_hdrincl(td, args)); 1283 1284 bzero(&msg, sizeof(msg)); 1285 error = getsock(td, args->s, &cap_send_connect_rights, &fp); 1286 if (error != 0) 1287 return (error); 1288 so = fp->f_data; 1289 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { 1290 msg.msg_name = PTRIN(args->to); 1291 msg.msg_namelen = args->tolen; 1292 } 1293 msg.msg_iov = &aiov; 1294 msg.msg_iovlen = 1; 1295 aiov.iov_base = PTRIN(args->msg); 1296 aiov.iov_len = args->len; 1297 fdrop(fp, td); 1298 return (linux_sendit(td, args->s, &msg, args->flags, NULL, 1299 UIO_USERSPACE)); 1300 } 1301 1302 int 1303 linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args) 1304 { 1305 struct sockaddr *sa; 1306 struct msghdr msg; 1307 struct iovec aiov; 1308 int error, fromlen; 1309 1310 if (PTRIN(args->fromlen) != NULL) { 1311 error = copyin(PTRIN(args->fromlen), &fromlen, 1312 sizeof(fromlen)); 1313 if (error != 0) 1314 return (error); 1315 if (fromlen < 0) 1316 return (EINVAL); 1317 fromlen = min(fromlen, SOCK_MAXADDRLEN); 1318 sa = malloc(fromlen, M_SONAME, M_WAITOK); 1319 } else { 1320 fromlen = 0; 1321 sa = NULL; 1322 } 1323 1324 msg.msg_name = sa; 1325 msg.msg_namelen = fromlen; 1326 msg.msg_iov = &aiov; 1327 msg.msg_iovlen = 1; 1328 aiov.iov_base = PTRIN(args->buf); 1329 aiov.iov_len = args->len; 1330 msg.msg_control = 0; 1331 msg.msg_flags = linux_to_bsd_msg_flags(args->flags); 1332 1333 error = kern_recvit(td, args->s, &msg, UIO_SYSSPACE, NULL); 1334 if (error != 0) 1335 goto out; 1336 1337 /* 1338 * XXX. Seems that FreeBSD is different from Linux here. Linux 1339 * fill source address if underlying protocol provides it, while 1340 * FreeBSD fill it if underlying protocol is not connection-oriented. 1341 * So, kern_recvit() set msg.msg_namelen to 0 if protocol pr_flags 1342 * does not contains PR_ADDR flag. 1343 */ 1344 if (PTRIN(args->from) != NULL && msg.msg_namelen != 0) 1345 error = linux_copyout_sockaddr(sa, PTRIN(args->from), 1346 msg.msg_namelen); 1347 1348 if (error == 0 && PTRIN(args->fromlen) != NULL) 1349 error = copyout(&msg.msg_namelen, PTRIN(args->fromlen), 1350 sizeof(msg.msg_namelen)); 1351 out: 1352 free(sa, M_SONAME); 1353 return (error); 1354 } 1355 1356 static int 1357 linux_sendmsg_common(struct thread *td, l_int s, struct l_msghdr *msghdr, 1358 l_uint flags) 1359 { 1360 struct sockaddr_storage ss = { .ss_len = sizeof(ss) }; 1361 struct cmsghdr *cmsg; 1362 struct mbuf *control; 1363 struct msghdr msg; 1364 struct l_cmsghdr linux_cmsg; 1365 struct l_cmsghdr *ptr_cmsg; 1366 struct l_msghdr linux_msghdr; 1367 struct iovec *iov; 1368 socklen_t datalen; 1369 struct socket *so; 1370 sa_family_t sa_family; 1371 struct file *fp; 1372 void *data; 1373 l_size_t len; 1374 l_size_t clen; 1375 int error; 1376 1377 error = copyin(msghdr, &linux_msghdr, sizeof(linux_msghdr)); 1378 if (error != 0) 1379 return (error); 1380 1381 /* 1382 * Some Linux applications (ping) define a non-NULL control data 1383 * pointer, but a msg_controllen of 0, which is not allowed in the 1384 * FreeBSD system call interface. NULL the msg_control pointer in 1385 * order to handle this case. This should be checked, but allows the 1386 * Linux ping to work. 1387 */ 1388 if (PTRIN(linux_msghdr.msg_control) != NULL && 1389 linux_msghdr.msg_controllen == 0) 1390 linux_msghdr.msg_control = PTROUT(NULL); 1391 1392 error = linux_to_bsd_msghdr(&msg, &linux_msghdr); 1393 if (error != 0) 1394 return (error); 1395 1396 #ifdef COMPAT_LINUX32 1397 error = freebsd32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen, 1398 &iov, EMSGSIZE); 1399 #else 1400 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1401 #endif 1402 if (error != 0) 1403 return (error); 1404 1405 control = NULL; 1406 1407 error = kern_getsockname(td, s, (struct sockaddr *)&ss); 1408 if (error != 0) 1409 goto bad; 1410 sa_family = ss.ss_family; 1411 1412 if (flags & LINUX_MSG_OOB) { 1413 error = EOPNOTSUPP; 1414 if (sa_family == AF_UNIX) 1415 goto bad; 1416 1417 error = getsock(td, s, &cap_send_rights, &fp); 1418 if (error != 0) 1419 goto bad; 1420 so = fp->f_data; 1421 if (so->so_type != SOCK_STREAM) 1422 error = EOPNOTSUPP; 1423 fdrop(fp, td); 1424 if (error != 0) 1425 goto bad; 1426 } 1427 1428 if (linux_msghdr.msg_controllen >= sizeof(struct l_cmsghdr)) { 1429 error = ENOBUFS; 1430 control = m_get(M_WAITOK, MT_CONTROL); 1431 MCLGET(control, M_WAITOK); 1432 data = mtod(control, void *); 1433 datalen = 0; 1434 1435 ptr_cmsg = PTRIN(linux_msghdr.msg_control); 1436 clen = linux_msghdr.msg_controllen; 1437 do { 1438 error = copyin(ptr_cmsg, &linux_cmsg, 1439 sizeof(struct l_cmsghdr)); 1440 if (error != 0) 1441 goto bad; 1442 1443 error = EINVAL; 1444 if (linux_cmsg.cmsg_len < sizeof(struct l_cmsghdr) || 1445 linux_cmsg.cmsg_len > clen) 1446 goto bad; 1447 1448 if (datalen + CMSG_HDRSZ > MCLBYTES) 1449 goto bad; 1450 1451 /* 1452 * Now we support only SCM_RIGHTS and SCM_CRED, 1453 * so return EINVAL in any other cmsg_type 1454 */ 1455 cmsg = data; 1456 cmsg->cmsg_type = 1457 linux_to_bsd_cmsg_type(linux_cmsg.cmsg_type); 1458 cmsg->cmsg_level = 1459 linux_to_bsd_sockopt_level(linux_cmsg.cmsg_level); 1460 if (cmsg->cmsg_type == -1 1461 || cmsg->cmsg_level != SOL_SOCKET) { 1462 linux_msg(curthread, 1463 "unsupported sendmsg cmsg level %d type %d", 1464 linux_cmsg.cmsg_level, linux_cmsg.cmsg_type); 1465 goto bad; 1466 } 1467 1468 /* 1469 * Some applications (e.g. pulseaudio) attempt to 1470 * send ancillary data even if the underlying protocol 1471 * doesn't support it which is not allowed in the 1472 * FreeBSD system call interface. 1473 */ 1474 if (sa_family != AF_UNIX) 1475 goto next; 1476 1477 if (cmsg->cmsg_type == SCM_CREDS) { 1478 len = sizeof(struct cmsgcred); 1479 if (datalen + CMSG_SPACE(len) > MCLBYTES) 1480 goto bad; 1481 1482 /* 1483 * The lower levels will fill in the structure 1484 */ 1485 memset(CMSG_DATA(data), 0, len); 1486 } else { 1487 len = linux_cmsg.cmsg_len - L_CMSG_HDRSZ; 1488 if (datalen + CMSG_SPACE(len) < datalen || 1489 datalen + CMSG_SPACE(len) > MCLBYTES) 1490 goto bad; 1491 1492 error = copyin(LINUX_CMSG_DATA(ptr_cmsg), 1493 CMSG_DATA(data), len); 1494 if (error != 0) 1495 goto bad; 1496 } 1497 1498 cmsg->cmsg_len = CMSG_LEN(len); 1499 data = (char *)data + CMSG_SPACE(len); 1500 datalen += CMSG_SPACE(len); 1501 1502 next: 1503 if (clen <= LINUX_CMSG_ALIGN(linux_cmsg.cmsg_len)) 1504 break; 1505 1506 clen -= LINUX_CMSG_ALIGN(linux_cmsg.cmsg_len); 1507 ptr_cmsg = (struct l_cmsghdr *)((char *)ptr_cmsg + 1508 LINUX_CMSG_ALIGN(linux_cmsg.cmsg_len)); 1509 } while(clen >= sizeof(struct l_cmsghdr)); 1510 1511 control->m_len = datalen; 1512 if (datalen == 0) { 1513 m_freem(control); 1514 control = NULL; 1515 } 1516 } 1517 1518 msg.msg_iov = iov; 1519 msg.msg_flags = 0; 1520 error = linux_sendit(td, s, &msg, flags, control, UIO_USERSPACE); 1521 control = NULL; 1522 1523 bad: 1524 m_freem(control); 1525 free(iov, M_IOV); 1526 return (error); 1527 } 1528 1529 int 1530 linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args) 1531 { 1532 1533 return (linux_sendmsg_common(td, args->s, PTRIN(args->msg), 1534 args->flags)); 1535 } 1536 1537 int 1538 linux_sendmmsg(struct thread *td, struct linux_sendmmsg_args *args) 1539 { 1540 struct l_mmsghdr *msg; 1541 l_uint retval; 1542 int error, datagrams; 1543 1544 if (args->vlen > UIO_MAXIOV) 1545 args->vlen = UIO_MAXIOV; 1546 1547 msg = PTRIN(args->msg); 1548 datagrams = 0; 1549 while (datagrams < args->vlen) { 1550 error = linux_sendmsg_common(td, args->s, &msg->msg_hdr, 1551 args->flags); 1552 if (error != 0) 1553 break; 1554 1555 retval = td->td_retval[0]; 1556 error = copyout(&retval, &msg->msg_len, sizeof(msg->msg_len)); 1557 if (error != 0) 1558 break; 1559 ++msg; 1560 ++datagrams; 1561 } 1562 if (error == 0) 1563 td->td_retval[0] = datagrams; 1564 return (error); 1565 } 1566 1567 static int 1568 recvmsg_scm_rights(struct thread *td, l_uint flags, socklen_t *datalen, 1569 void **data, void **udata) 1570 { 1571 int i, fd, fds, *fdp; 1572 1573 if (flags & LINUX_MSG_CMSG_CLOEXEC) { 1574 fds = *datalen / sizeof(int); 1575 fdp = *data; 1576 for (i = 0; i < fds; i++) { 1577 fd = *fdp++; 1578 (void)kern_fcntl(td, fd, F_SETFD, FD_CLOEXEC); 1579 } 1580 } 1581 return (0); 1582 } 1583 1584 1585 static int 1586 recvmsg_scm_creds(socklen_t *datalen, void **data, void **udata) 1587 { 1588 struct cmsgcred *cmcred; 1589 struct l_ucred lu; 1590 1591 cmcred = *data; 1592 lu.pid = cmcred->cmcred_pid; 1593 lu.uid = cmcred->cmcred_uid; 1594 lu.gid = cmcred->cmcred_gid; 1595 memmove(*data, &lu, sizeof(lu)); 1596 *datalen = sizeof(lu); 1597 return (0); 1598 } 1599 _Static_assert(sizeof(struct cmsgcred) >= sizeof(struct l_ucred), 1600 "scm_creds sizeof l_ucred"); 1601 1602 static int 1603 recvmsg_scm_creds2(socklen_t *datalen, void **data, void **udata) 1604 { 1605 struct sockcred2 *scred; 1606 struct l_ucred lu; 1607 1608 scred = *data; 1609 lu.pid = scred->sc_pid; 1610 lu.uid = scred->sc_uid; 1611 lu.gid = scred->sc_gid; 1612 memmove(*data, &lu, sizeof(lu)); 1613 *datalen = sizeof(lu); 1614 return (0); 1615 } 1616 _Static_assert(sizeof(struct sockcred2) >= sizeof(struct l_ucred), 1617 "scm_creds2 sizeof l_ucred"); 1618 1619 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1620 static int 1621 recvmsg_scm_timestamp(l_int msg_type, socklen_t *datalen, void **data, 1622 void **udata) 1623 { 1624 l_sock_timeval ltv64; 1625 l_timeval ltv; 1626 struct timeval *tv; 1627 socklen_t len; 1628 void *buf; 1629 1630 if (*datalen != sizeof(struct timeval)) 1631 return (EMSGSIZE); 1632 1633 tv = *data; 1634 #if defined(COMPAT_LINUX32) 1635 if (msg_type == LINUX_SCM_TIMESTAMPO && 1636 (tv->tv_sec > INT_MAX || tv->tv_sec < INT_MIN)) 1637 return (EOVERFLOW); 1638 #endif 1639 if (msg_type == LINUX_SCM_TIMESTAMPN) 1640 len = sizeof(ltv64); 1641 else 1642 len = sizeof(ltv); 1643 1644 buf = malloc(len, M_LINUX, M_WAITOK); 1645 if (msg_type == LINUX_SCM_TIMESTAMPN) { 1646 ltv64.tv_sec = tv->tv_sec; 1647 ltv64.tv_usec = tv->tv_usec; 1648 memmove(buf, <v64, len); 1649 } else { 1650 ltv.tv_sec = tv->tv_sec; 1651 ltv.tv_usec = tv->tv_usec; 1652 memmove(buf, <v, len); 1653 } 1654 *data = *udata = buf; 1655 *datalen = len; 1656 return (0); 1657 } 1658 #else 1659 _Static_assert(sizeof(struct timeval) == sizeof(l_timeval), 1660 "scm_timestamp sizeof l_timeval"); 1661 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1662 1663 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1664 static int 1665 recvmsg_scm_timestampns(l_int msg_type, socklen_t *datalen, void **data, 1666 void **udata) 1667 { 1668 struct l_timespec64 ts64; 1669 struct l_timespec ts32; 1670 struct timespec ts; 1671 socklen_t len; 1672 void *buf; 1673 1674 if (msg_type == LINUX_SCM_TIMESTAMPNSO) 1675 len = sizeof(ts32); 1676 else 1677 len = sizeof(ts64); 1678 1679 buf = malloc(len, M_LINUX, M_WAITOK); 1680 bintime2timespec(*data, &ts); 1681 if (msg_type == LINUX_SCM_TIMESTAMPNSO) { 1682 ts32.tv_sec = ts.tv_sec; 1683 ts32.tv_nsec = ts.tv_nsec; 1684 memmove(buf, &ts32, len); 1685 } else { 1686 ts64.tv_sec = ts.tv_sec; 1687 ts64.tv_nsec = ts.tv_nsec; 1688 memmove(buf, &ts64, len); 1689 } 1690 *data = *udata = buf; 1691 *datalen = len; 1692 return (0); 1693 } 1694 #else 1695 static int 1696 recvmsg_scm_timestampns(l_int msg_type, socklen_t *datalen, void **data, 1697 void **udata) 1698 { 1699 struct timespec ts; 1700 1701 bintime2timespec(*data, &ts); 1702 memmove(*data, &ts, sizeof(struct timespec)); 1703 *datalen = sizeof(struct timespec); 1704 return (0); 1705 } 1706 _Static_assert(sizeof(struct bintime) >= sizeof(struct timespec), 1707 "scm_timestampns sizeof timespec"); 1708 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1709 1710 static int 1711 recvmsg_scm_sol_socket(struct thread *td, l_int msg_type, l_int lmsg_type, 1712 l_uint flags, socklen_t *datalen, void **data, void **udata) 1713 { 1714 int error; 1715 1716 error = 0; 1717 switch (msg_type) { 1718 case SCM_RIGHTS: 1719 error = recvmsg_scm_rights(td, flags, datalen, 1720 data, udata); 1721 break; 1722 case SCM_CREDS: 1723 error = recvmsg_scm_creds(datalen, data, udata); 1724 break; 1725 case SCM_CREDS2: 1726 error = recvmsg_scm_creds2(datalen, data, udata); 1727 break; 1728 case SCM_TIMESTAMP: 1729 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1730 error = recvmsg_scm_timestamp(lmsg_type, datalen, 1731 data, udata); 1732 #endif 1733 break; 1734 case SCM_BINTIME: 1735 error = recvmsg_scm_timestampns(lmsg_type, datalen, 1736 data, udata); 1737 break; 1738 } 1739 1740 return (error); 1741 } 1742 1743 static int 1744 recvmsg_scm_ip_origdstaddr(socklen_t *datalen, void **data, void **udata) 1745 { 1746 struct l_sockaddr *lsa; 1747 int error; 1748 1749 error = bsd_to_linux_sockaddr(*data, &lsa, *datalen); 1750 if (error == 0) { 1751 *data = *udata = lsa; 1752 *datalen = sizeof(*lsa); 1753 } 1754 return (error); 1755 } 1756 1757 static int 1758 recvmsg_scm_ipproto_ip(l_int msg_type, l_int lmsg_type, socklen_t *datalen, 1759 void **data, void **udata) 1760 { 1761 int error; 1762 1763 error = 0; 1764 switch (msg_type) { 1765 case IP_ORIGDSTADDR: 1766 error = recvmsg_scm_ip_origdstaddr(datalen, data, 1767 udata); 1768 break; 1769 } 1770 1771 return (error); 1772 } 1773 1774 static int 1775 linux_recvmsg_common(struct thread *td, l_int s, struct l_msghdr *msghdr, 1776 l_uint flags, struct msghdr *msg) 1777 { 1778 struct proc *p = td->td_proc; 1779 struct cmsghdr *cm; 1780 struct l_cmsghdr *lcm = NULL; 1781 socklen_t datalen, maxlen, outlen; 1782 struct l_msghdr l_msghdr; 1783 struct iovec *iov, *uiov; 1784 struct mbuf *m, *control = NULL; 1785 struct mbuf **controlp; 1786 struct sockaddr *sa; 1787 caddr_t outbuf; 1788 void *data, *udata; 1789 int error, skiped; 1790 1791 error = copyin(msghdr, &l_msghdr, sizeof(l_msghdr)); 1792 if (error != 0) 1793 return (error); 1794 1795 /* 1796 * Pass user-supplied recvmsg() flags in msg_flags field, 1797 * following sys_recvmsg() convention. 1798 */ 1799 l_msghdr.msg_flags = flags; 1800 1801 error = linux_to_bsd_msghdr(msg, &l_msghdr); 1802 if (error != 0) 1803 return (error); 1804 1805 #ifdef COMPAT_LINUX32 1806 error = freebsd32_copyiniov(PTRIN(msg->msg_iov), msg->msg_iovlen, 1807 &iov, EMSGSIZE); 1808 #else 1809 error = copyiniov(msg->msg_iov, msg->msg_iovlen, &iov, EMSGSIZE); 1810 #endif 1811 if (error != 0) 1812 return (error); 1813 1814 if (msg->msg_name != NULL && msg->msg_namelen > 0) { 1815 msg->msg_namelen = min(msg->msg_namelen, SOCK_MAXADDRLEN); 1816 sa = malloc(msg->msg_namelen, M_SONAME, M_WAITOK); 1817 msg->msg_name = sa; 1818 } else { 1819 sa = NULL; 1820 msg->msg_name = NULL; 1821 } 1822 1823 uiov = msg->msg_iov; 1824 msg->msg_iov = iov; 1825 controlp = (msg->msg_control != NULL) ? &control : NULL; 1826 error = kern_recvit(td, s, msg, UIO_SYSSPACE, controlp); 1827 msg->msg_iov = uiov; 1828 if (error != 0) 1829 goto bad; 1830 1831 /* 1832 * Note that kern_recvit() updates msg->msg_namelen. 1833 */ 1834 if (msg->msg_name != NULL && msg->msg_namelen > 0) { 1835 msg->msg_name = PTRIN(l_msghdr.msg_name); 1836 error = linux_copyout_sockaddr(sa, msg->msg_name, 1837 msg->msg_namelen); 1838 if (error != 0) 1839 goto bad; 1840 } 1841 1842 error = bsd_to_linux_msghdr(msg, &l_msghdr); 1843 if (error != 0) 1844 goto bad; 1845 1846 skiped = outlen = 0; 1847 maxlen = l_msghdr.msg_controllen; 1848 if (control == NULL) 1849 goto out; 1850 1851 lcm = malloc(L_CMSG_HDRSZ, M_LINUX, M_WAITOK | M_ZERO); 1852 msg->msg_control = mtod(control, struct cmsghdr *); 1853 msg->msg_controllen = control->m_len; 1854 outbuf = PTRIN(l_msghdr.msg_control); 1855 for (m = control; m != NULL; m = m->m_next) { 1856 cm = mtod(m, struct cmsghdr *); 1857 lcm->cmsg_type = bsd_to_linux_cmsg_type(p, cm->cmsg_type, 1858 cm->cmsg_level); 1859 lcm->cmsg_level = bsd_to_linux_sockopt_level(cm->cmsg_level); 1860 1861 if (lcm->cmsg_type == -1 || 1862 lcm->cmsg_level == -1) { 1863 LINUX_RATELIMIT_MSG_OPT2( 1864 "unsupported recvmsg cmsg level %d type %d", 1865 cm->cmsg_level, cm->cmsg_type); 1866 /* Skip unsupported messages */ 1867 skiped++; 1868 continue; 1869 } 1870 data = CMSG_DATA(cm); 1871 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1872 udata = NULL; 1873 error = 0; 1874 1875 switch (cm->cmsg_level) { 1876 case IPPROTO_IP: 1877 error = recvmsg_scm_ipproto_ip(cm->cmsg_type, 1878 lcm->cmsg_type, &datalen, &data, &udata); 1879 break; 1880 case SOL_SOCKET: 1881 error = recvmsg_scm_sol_socket(td, cm->cmsg_type, 1882 lcm->cmsg_type, flags, &datalen, &data, &udata); 1883 break; 1884 } 1885 1886 /* The recvmsg_scm_ is responsible to free udata on error. */ 1887 if (error != 0) 1888 goto bad; 1889 1890 if (outlen + LINUX_CMSG_LEN(datalen) > maxlen) { 1891 if (outlen == 0) { 1892 error = EMSGSIZE; 1893 goto err; 1894 } else { 1895 l_msghdr.msg_flags |= LINUX_MSG_CTRUNC; 1896 m_dispose_extcontrolm(control); 1897 free(udata, M_LINUX); 1898 goto out; 1899 } 1900 } 1901 1902 lcm->cmsg_len = LINUX_CMSG_LEN(datalen); 1903 error = copyout(lcm, outbuf, L_CMSG_HDRSZ); 1904 if (error == 0) { 1905 error = copyout(data, LINUX_CMSG_DATA(outbuf), datalen); 1906 if (error == 0) { 1907 outbuf += LINUX_CMSG_SPACE(datalen); 1908 outlen += LINUX_CMSG_SPACE(datalen); 1909 } 1910 } 1911 err: 1912 free(udata, M_LINUX); 1913 if (error != 0) 1914 goto bad; 1915 } 1916 if (outlen == 0 && skiped > 0) { 1917 error = EINVAL; 1918 goto bad; 1919 } 1920 1921 out: 1922 l_msghdr.msg_controllen = outlen; 1923 error = copyout(&l_msghdr, msghdr, sizeof(l_msghdr)); 1924 1925 bad: 1926 if (control != NULL) { 1927 if (error != 0) 1928 m_dispose_extcontrolm(control); 1929 m_freem(control); 1930 } 1931 free(iov, M_IOV); 1932 free(lcm, M_LINUX); 1933 free(sa, M_SONAME); 1934 1935 return (error); 1936 } 1937 1938 int 1939 linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) 1940 { 1941 struct msghdr bsd_msg; 1942 struct file *fp; 1943 int error; 1944 1945 error = getsock(td, args->s, &cap_recv_rights, &fp); 1946 if (error != 0) 1947 return (error); 1948 fdrop(fp, td); 1949 return (linux_recvmsg_common(td, args->s, PTRIN(args->msg), 1950 args->flags, &bsd_msg)); 1951 } 1952 1953 static int 1954 linux_recvmmsg_common(struct thread *td, l_int s, struct l_mmsghdr *msg, 1955 l_uint vlen, l_uint flags, struct timespec *tts) 1956 { 1957 struct msghdr bsd_msg; 1958 struct timespec ts; 1959 struct file *fp; 1960 l_uint retval; 1961 int error, datagrams; 1962 1963 error = getsock(td, s, &cap_recv_rights, &fp); 1964 if (error != 0) 1965 return (error); 1966 datagrams = 0; 1967 while (datagrams < vlen) { 1968 error = linux_recvmsg_common(td, s, &msg->msg_hdr, 1969 flags & ~LINUX_MSG_WAITFORONE, &bsd_msg); 1970 if (error != 0) 1971 break; 1972 1973 retval = td->td_retval[0]; 1974 error = copyout(&retval, &msg->msg_len, sizeof(msg->msg_len)); 1975 if (error != 0) 1976 break; 1977 ++msg; 1978 ++datagrams; 1979 1980 /* 1981 * MSG_WAITFORONE turns on MSG_DONTWAIT after one packet. 1982 */ 1983 if (flags & LINUX_MSG_WAITFORONE) 1984 flags |= LINUX_MSG_DONTWAIT; 1985 1986 /* 1987 * See BUGS section of recvmmsg(2). 1988 */ 1989 if (tts) { 1990 getnanotime(&ts); 1991 timespecsub(&ts, tts, &ts); 1992 if (!timespecisset(&ts) || ts.tv_sec > 0) 1993 break; 1994 } 1995 /* Out of band data, return right away. */ 1996 if (bsd_msg.msg_flags & MSG_OOB) 1997 break; 1998 } 1999 if (error == 0) 2000 td->td_retval[0] = datagrams; 2001 fdrop(fp, td); 2002 return (error); 2003 } 2004 2005 int 2006 linux_recvmmsg(struct thread *td, struct linux_recvmmsg_args *args) 2007 { 2008 struct timespec ts, tts, *ptts; 2009 int error; 2010 2011 if (args->timeout) { 2012 error = linux_get_timespec(&ts, args->timeout); 2013 if (error != 0) 2014 return (error); 2015 getnanotime(&tts); 2016 timespecadd(&tts, &ts, &tts); 2017 ptts = &tts; 2018 } 2019 else ptts = NULL; 2020 2021 return (linux_recvmmsg_common(td, args->s, PTRIN(args->msg), 2022 args->vlen, args->flags, ptts)); 2023 } 2024 2025 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2026 int 2027 linux_recvmmsg_time64(struct thread *td, struct linux_recvmmsg_time64_args *args) 2028 { 2029 struct timespec ts, tts, *ptts; 2030 int error; 2031 2032 if (args->timeout) { 2033 error = linux_get_timespec64(&ts, args->timeout); 2034 if (error != 0) 2035 return (error); 2036 getnanotime(&tts); 2037 timespecadd(&tts, &ts, &tts); 2038 ptts = &tts; 2039 } 2040 else ptts = NULL; 2041 2042 return (linux_recvmmsg_common(td, args->s, PTRIN(args->msg), 2043 args->vlen, args->flags, ptts)); 2044 } 2045 #endif 2046 2047 int 2048 linux_shutdown(struct thread *td, struct linux_shutdown_args *args) 2049 { 2050 2051 return (kern_shutdown(td, args->s, args->how)); 2052 } 2053 2054 int 2055 linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args) 2056 { 2057 struct proc *p = td->td_proc; 2058 struct linux_pemuldata *pem; 2059 l_timeval linux_tv; 2060 struct sockaddr *sa; 2061 struct timeval tv; 2062 socklen_t len; 2063 int error, level, name, val; 2064 2065 level = linux_to_bsd_sockopt_level(args->level); 2066 switch (level) { 2067 case SOL_SOCKET: 2068 name = linux_to_bsd_so_sockopt(args->optname); 2069 switch (name) { 2070 case LOCAL_CREDS_PERSISTENT: 2071 level = SOL_LOCAL; 2072 break; 2073 case SO_RCVTIMEO: 2074 /* FALLTHROUGH */ 2075 case SO_SNDTIMEO: 2076 error = copyin(PTRIN(args->optval), &linux_tv, 2077 sizeof(linux_tv)); 2078 if (error != 0) 2079 return (error); 2080 tv.tv_sec = linux_tv.tv_sec; 2081 tv.tv_usec = linux_tv.tv_usec; 2082 return (kern_setsockopt(td, args->s, level, 2083 name, &tv, UIO_SYSSPACE, sizeof(tv))); 2084 /* NOTREACHED */ 2085 case SO_TIMESTAMP: 2086 /* overwrite SO_BINTIME */ 2087 val = 0; 2088 error = kern_setsockopt(td, args->s, level, 2089 SO_BINTIME, &val, UIO_SYSSPACE, sizeof(val)); 2090 if (error != 0) 2091 return (error); 2092 pem = pem_find(p); 2093 pem->so_timestamp = args->optname; 2094 break; 2095 case SO_BINTIME: 2096 /* overwrite SO_TIMESTAMP */ 2097 val = 0; 2098 error = kern_setsockopt(td, args->s, level, 2099 SO_TIMESTAMP, &val, UIO_SYSSPACE, sizeof(val)); 2100 if (error != 0) 2101 return (error); 2102 pem = pem_find(p); 2103 pem->so_timestampns = args->optname; 2104 break; 2105 default: 2106 break; 2107 } 2108 break; 2109 case IPPROTO_IP: 2110 if (args->optname == LINUX_IP_RECVERR && 2111 linux_ignore_ip_recverr) { 2112 /* 2113 * XXX: This is a hack to unbreak DNS resolution 2114 * with glibc 2.30 and above. 2115 */ 2116 return (0); 2117 } 2118 name = linux_to_bsd_ip_sockopt(args->optname); 2119 break; 2120 case IPPROTO_IPV6: 2121 if (args->optname == LINUX_IPV6_RECVERR && 2122 linux_ignore_ip_recverr) { 2123 /* 2124 * XXX: This is a hack to unbreak DNS resolution 2125 * with glibc 2.30 and above. 2126 */ 2127 return (0); 2128 } 2129 name = linux_to_bsd_ip6_sockopt(args->optname); 2130 break; 2131 case IPPROTO_TCP: 2132 name = linux_to_bsd_tcp_sockopt(args->optname); 2133 break; 2134 case SOL_NETLINK: 2135 name = args->optname; 2136 break; 2137 default: 2138 name = -1; 2139 break; 2140 } 2141 if (name < 0) { 2142 if (name == -1) 2143 linux_msg(curthread, 2144 "unsupported setsockopt level %d optname %d", 2145 args->level, args->optname); 2146 return (ENOPROTOOPT); 2147 } 2148 2149 if (name == IPV6_NEXTHOP) { 2150 len = args->optlen; 2151 error = linux_to_bsd_sockaddr(PTRIN(args->optval), &sa, &len); 2152 if (error != 0) 2153 return (error); 2154 2155 error = kern_setsockopt(td, args->s, level, 2156 name, sa, UIO_SYSSPACE, len); 2157 free(sa, M_SONAME); 2158 } else { 2159 error = kern_setsockopt(td, args->s, level, 2160 name, PTRIN(args->optval), UIO_USERSPACE, args->optlen); 2161 } 2162 2163 return (error); 2164 } 2165 2166 static int 2167 linux_sockopt_copyout(struct thread *td, void *val, socklen_t len, 2168 struct linux_getsockopt_args *args) 2169 { 2170 int error; 2171 2172 error = copyout(val, PTRIN(args->optval), len); 2173 if (error == 0) 2174 error = copyout(&len, PTRIN(args->optlen), sizeof(len)); 2175 return (error); 2176 } 2177 2178 static int 2179 linux_getsockopt_so_peergroups(struct thread *td, 2180 struct linux_getsockopt_args *args) 2181 { 2182 struct xucred xu; 2183 socklen_t xulen, len; 2184 int error, i; 2185 2186 xulen = sizeof(xu); 2187 error = kern_getsockopt(td, args->s, 0, 2188 LOCAL_PEERCRED, &xu, UIO_SYSSPACE, &xulen); 2189 if (error != 0) 2190 return (error); 2191 2192 len = xu.cr_ngroups * sizeof(l_gid_t); 2193 if (args->optlen < len) { 2194 error = copyout(&len, PTRIN(args->optlen), sizeof(len)); 2195 if (error == 0) 2196 error = ERANGE; 2197 return (error); 2198 } 2199 2200 /* 2201 * "- 1" to skip the primary group. 2202 */ 2203 for (i = 0; i < xu.cr_ngroups - 1; i++) { 2204 error = copyout(xu.cr_groups + i + 1, 2205 (void *)(args->optval + i * sizeof(l_gid_t)), 2206 sizeof(l_gid_t)); 2207 if (error != 0) 2208 return (error); 2209 } 2210 2211 error = copyout(&len, PTRIN(args->optlen), sizeof(len)); 2212 return (error); 2213 } 2214 2215 static int 2216 linux_getsockopt_so_peersec(struct thread *td, 2217 struct linux_getsockopt_args *args) 2218 { 2219 socklen_t len; 2220 int error; 2221 2222 len = sizeof(SECURITY_CONTEXT_STRING); 2223 if (args->optlen < len) { 2224 error = copyout(&len, PTRIN(args->optlen), sizeof(len)); 2225 if (error == 0) 2226 error = ERANGE; 2227 return (error); 2228 } 2229 2230 return (linux_sockopt_copyout(td, SECURITY_CONTEXT_STRING, 2231 len, args)); 2232 } 2233 2234 static int 2235 linux_getsockopt_so_linger(struct thread *td, 2236 struct linux_getsockopt_args *args) 2237 { 2238 struct linger ling; 2239 socklen_t len; 2240 int error; 2241 2242 len = sizeof(ling); 2243 error = kern_getsockopt(td, args->s, SOL_SOCKET, 2244 SO_LINGER, &ling, UIO_SYSSPACE, &len); 2245 if (error != 0) 2246 return (error); 2247 ling.l_onoff = ((ling.l_onoff & SO_LINGER) != 0); 2248 return (linux_sockopt_copyout(td, &ling, len, args)); 2249 } 2250 2251 int 2252 linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args) 2253 { 2254 l_timeval linux_tv; 2255 struct timeval tv; 2256 socklen_t tv_len, xulen, len; 2257 struct sockaddr *sa; 2258 struct xucred xu; 2259 struct l_ucred lxu; 2260 int error, level, name, newval; 2261 2262 level = linux_to_bsd_sockopt_level(args->level); 2263 switch (level) { 2264 case SOL_SOCKET: 2265 switch (args->optname) { 2266 case LINUX_SO_PEERGROUPS: 2267 return (linux_getsockopt_so_peergroups(td, args)); 2268 case LINUX_SO_PEERSEC: 2269 return (linux_getsockopt_so_peersec(td, args)); 2270 default: 2271 break; 2272 } 2273 2274 name = linux_to_bsd_so_sockopt(args->optname); 2275 switch (name) { 2276 case LOCAL_CREDS_PERSISTENT: 2277 level = SOL_LOCAL; 2278 break; 2279 case SO_RCVTIMEO: 2280 /* FALLTHROUGH */ 2281 case SO_SNDTIMEO: 2282 tv_len = sizeof(tv); 2283 error = kern_getsockopt(td, args->s, level, 2284 name, &tv, UIO_SYSSPACE, &tv_len); 2285 if (error != 0) 2286 return (error); 2287 linux_tv.tv_sec = tv.tv_sec; 2288 linux_tv.tv_usec = tv.tv_usec; 2289 return (linux_sockopt_copyout(td, &linux_tv, 2290 sizeof(linux_tv), args)); 2291 /* NOTREACHED */ 2292 case LOCAL_PEERCRED: 2293 if (args->optlen < sizeof(lxu)) 2294 return (EINVAL); 2295 /* 2296 * LOCAL_PEERCRED is not served at the SOL_SOCKET level, 2297 * but by the Unix socket's level 0. 2298 */ 2299 level = 0; 2300 xulen = sizeof(xu); 2301 error = kern_getsockopt(td, args->s, level, 2302 name, &xu, UIO_SYSSPACE, &xulen); 2303 if (error != 0) 2304 return (error); 2305 lxu.pid = xu.cr_pid; 2306 lxu.uid = xu.cr_uid; 2307 lxu.gid = xu.cr_gid; 2308 return (linux_sockopt_copyout(td, &lxu, 2309 sizeof(lxu), args)); 2310 /* NOTREACHED */ 2311 case SO_ERROR: 2312 len = sizeof(newval); 2313 error = kern_getsockopt(td, args->s, level, 2314 name, &newval, UIO_SYSSPACE, &len); 2315 if (error != 0) 2316 return (error); 2317 newval = -bsd_to_linux_errno(newval); 2318 return (linux_sockopt_copyout(td, &newval, 2319 len, args)); 2320 /* NOTREACHED */ 2321 case SO_DOMAIN: 2322 len = sizeof(newval); 2323 error = kern_getsockopt(td, args->s, level, 2324 name, &newval, UIO_SYSSPACE, &len); 2325 if (error != 0) 2326 return (error); 2327 newval = bsd_to_linux_domain((sa_family_t)newval); 2328 if (newval == AF_UNKNOWN) 2329 return (ENOPROTOOPT); 2330 return (linux_sockopt_copyout(td, &newval, 2331 len, args)); 2332 /* NOTREACHED */ 2333 case SO_LINGER: 2334 return (linux_getsockopt_so_linger(td, args)); 2335 /* NOTREACHED */ 2336 default: 2337 break; 2338 } 2339 break; 2340 case IPPROTO_IP: 2341 name = linux_to_bsd_ip_sockopt(args->optname); 2342 break; 2343 case IPPROTO_IPV6: 2344 name = linux_to_bsd_ip6_sockopt(args->optname); 2345 break; 2346 case IPPROTO_TCP: 2347 name = linux_to_bsd_tcp_sockopt(args->optname); 2348 break; 2349 default: 2350 name = -1; 2351 break; 2352 } 2353 if (name < 0) { 2354 if (name == -1) 2355 linux_msg(curthread, 2356 "unsupported getsockopt level %d optname %d", 2357 args->level, args->optname); 2358 return (EINVAL); 2359 } 2360 2361 if (name == IPV6_NEXTHOP) { 2362 error = copyin(PTRIN(args->optlen), &len, sizeof(len)); 2363 if (error != 0) 2364 return (error); 2365 sa = malloc(len, M_SONAME, M_WAITOK); 2366 2367 error = kern_getsockopt(td, args->s, level, 2368 name, sa, UIO_SYSSPACE, &len); 2369 if (error != 0) 2370 goto out; 2371 2372 error = linux_copyout_sockaddr(sa, PTRIN(args->optval), len); 2373 if (error == 0) 2374 error = copyout(&len, PTRIN(args->optlen), 2375 sizeof(len)); 2376 out: 2377 free(sa, M_SONAME); 2378 } else { 2379 if (args->optval) { 2380 error = copyin(PTRIN(args->optlen), &len, sizeof(len)); 2381 if (error != 0) 2382 return (error); 2383 } 2384 error = kern_getsockopt(td, args->s, level, 2385 name, PTRIN(args->optval), UIO_USERSPACE, &len); 2386 if (error == 0) 2387 error = copyout(&len, PTRIN(args->optlen), 2388 sizeof(len)); 2389 } 2390 2391 return (error); 2392 } 2393 2394 /* 2395 * Based on sendfile_getsock from kern_sendfile.c 2396 * Determines whether an fd is a stream socket that can be used 2397 * with FreeBSD sendfile. 2398 */ 2399 static bool 2400 is_sendfile(struct file *fp, struct file *ofp) 2401 { 2402 struct socket *so; 2403 2404 /* 2405 * FreeBSD sendfile() system call sends a regular file or 2406 * shared memory object out a stream socket. 2407 */ 2408 if ((fp->f_type != DTYPE_SHM && fp->f_type != DTYPE_VNODE) || 2409 (fp->f_type == DTYPE_VNODE && 2410 (fp->f_vnode == NULL || fp->f_vnode->v_type != VREG))) 2411 return (false); 2412 /* 2413 * The socket must be a stream socket and connected. 2414 */ 2415 if (ofp->f_type != DTYPE_SOCKET) 2416 return (false); 2417 so = ofp->f_data; 2418 if (so->so_type != SOCK_STREAM) 2419 return (false); 2420 /* 2421 * SCTP one-to-one style sockets currently don't work with 2422 * sendfile(). 2423 */ 2424 if (so->so_proto->pr_protocol == IPPROTO_SCTP) 2425 return (false); 2426 return (!SOLISTENING(so)); 2427 } 2428 2429 static bool 2430 is_regular_file(struct file *fp) 2431 { 2432 2433 return (fp->f_type == DTYPE_VNODE && fp->f_vnode != NULL && 2434 fp->f_vnode->v_type == VREG); 2435 } 2436 2437 static int 2438 sendfile_fallback(struct thread *td, struct file *fp, l_int out, 2439 off_t *offset, l_size_t count, off_t *sbytes) 2440 { 2441 off_t current_offset, out_offset, to_send; 2442 l_size_t bytes_sent, n_read; 2443 struct file *ofp; 2444 struct iovec aiov; 2445 struct uio auio; 2446 bool seekable; 2447 size_t bufsz; 2448 void *buf; 2449 int flags, error; 2450 2451 if (offset == NULL) { 2452 if ((error = fo_seek(fp, 0, SEEK_CUR, td)) != 0) 2453 return (error); 2454 current_offset = td->td_uretoff.tdu_off; 2455 } else { 2456 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) 2457 return (ESPIPE); 2458 current_offset = *offset; 2459 } 2460 error = fget_write(td, out, &cap_pwrite_rights, &ofp); 2461 if (error != 0) 2462 return (error); 2463 seekable = (ofp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0; 2464 if (seekable) { 2465 if ((error = fo_seek(ofp, 0, SEEK_CUR, td)) != 0) 2466 goto drop; 2467 out_offset = td->td_uretoff.tdu_off; 2468 } else 2469 out_offset = 0; 2470 2471 flags = FOF_OFFSET | FOF_NOUPDATE; 2472 bufsz = min(count, maxphys); 2473 buf = malloc(bufsz, M_LINUX, M_WAITOK); 2474 bytes_sent = 0; 2475 while (bytes_sent < count) { 2476 to_send = min(count - bytes_sent, bufsz); 2477 aiov.iov_base = buf; 2478 aiov.iov_len = bufsz; 2479 auio.uio_iov = &aiov; 2480 auio.uio_iovcnt = 1; 2481 auio.uio_segflg = UIO_SYSSPACE; 2482 auio.uio_td = td; 2483 auio.uio_rw = UIO_READ; 2484 auio.uio_offset = current_offset; 2485 auio.uio_resid = to_send; 2486 error = fo_read(fp, &auio, fp->f_cred, flags, td); 2487 if (error != 0) 2488 break; 2489 n_read = to_send - auio.uio_resid; 2490 if (n_read == 0) 2491 break; 2492 aiov.iov_base = buf; 2493 aiov.iov_len = bufsz; 2494 auio.uio_iov = &aiov; 2495 auio.uio_iovcnt = 1; 2496 auio.uio_segflg = UIO_SYSSPACE; 2497 auio.uio_td = td; 2498 auio.uio_rw = UIO_WRITE; 2499 auio.uio_offset = (seekable) ? out_offset : 0; 2500 auio.uio_resid = n_read; 2501 error = fo_write(ofp, &auio, ofp->f_cred, flags, td); 2502 if (error != 0) 2503 break; 2504 bytes_sent += n_read; 2505 current_offset += n_read; 2506 out_offset += n_read; 2507 } 2508 free(buf, M_LINUX); 2509 2510 if (error == 0) { 2511 *sbytes = bytes_sent; 2512 if (offset != NULL) 2513 *offset = current_offset; 2514 else 2515 error = fo_seek(fp, current_offset, SEEK_SET, td); 2516 } 2517 if (error == 0 && seekable) 2518 error = fo_seek(ofp, out_offset, SEEK_SET, td); 2519 2520 drop: 2521 fdrop(ofp, td); 2522 return (error); 2523 } 2524 2525 static int 2526 sendfile_sendfile(struct thread *td, struct file *fp, l_int out, 2527 off_t *offset, l_size_t count, off_t *sbytes) 2528 { 2529 off_t current_offset; 2530 int error; 2531 2532 if (offset == NULL) { 2533 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) 2534 return (ESPIPE); 2535 if ((error = fo_seek(fp, 0, SEEK_CUR, td)) != 0) 2536 return (error); 2537 current_offset = td->td_uretoff.tdu_off; 2538 } else 2539 current_offset = *offset; 2540 error = fo_sendfile(fp, out, NULL, NULL, current_offset, count, 2541 sbytes, 0, td); 2542 if (error == EAGAIN && *sbytes > 0) { 2543 /* 2544 * The socket is non-blocking and we didn't finish sending. 2545 * Squash the error, since that's what Linux does. 2546 */ 2547 error = 0; 2548 } 2549 if (error == 0) { 2550 current_offset += *sbytes; 2551 if (offset != NULL) 2552 *offset = current_offset; 2553 else 2554 error = fo_seek(fp, current_offset, SEEK_SET, td); 2555 } 2556 return (error); 2557 } 2558 2559 static int 2560 linux_sendfile_common(struct thread *td, l_int out, l_int in, 2561 off_t *offset, l_size_t count) 2562 { 2563 struct file *fp, *ofp; 2564 off_t sbytes; 2565 int error; 2566 2567 /* Linux cannot have 0 count. */ 2568 if (count <= 0 || (offset != NULL && *offset < 0)) 2569 return (EINVAL); 2570 2571 AUDIT_ARG_FD(in); 2572 error = fget_read(td, in, &cap_pread_rights, &fp); 2573 if (error != 0) 2574 return (error); 2575 if ((fp->f_type != DTYPE_SHM && fp->f_type != DTYPE_VNODE) || 2576 (fp->f_type == DTYPE_VNODE && 2577 (fp->f_vnode == NULL || fp->f_vnode->v_type != VREG))) { 2578 error = EINVAL; 2579 goto drop; 2580 } 2581 error = fget_unlocked(td, out, &cap_no_rights, &ofp); 2582 if (error != 0) 2583 goto drop; 2584 2585 if (is_regular_file(fp) && is_regular_file(ofp)) { 2586 error = kern_copy_file_range(td, in, offset, out, NULL, count, 2587 0); 2588 } else { 2589 sbytes = 0; 2590 if (is_sendfile(fp, ofp)) 2591 error = sendfile_sendfile(td, fp, out, offset, count, 2592 &sbytes); 2593 else 2594 error = sendfile_fallback(td, fp, out, offset, count, 2595 &sbytes); 2596 if (error == ENOBUFS && (ofp->f_flag & FNONBLOCK) != 0) 2597 error = EAGAIN; 2598 if (error == 0) 2599 td->td_retval[0] = sbytes; 2600 } 2601 fdrop(ofp, td); 2602 2603 drop: 2604 fdrop(fp, td); 2605 return (error); 2606 } 2607 2608 int 2609 linux_sendfile(struct thread *td, struct linux_sendfile_args *arg) 2610 { 2611 /* 2612 * Differences between FreeBSD and Linux sendfile: 2613 * - Linux doesn't send anything when count is 0 (FreeBSD uses 0 to 2614 * mean send the whole file). 2615 * - Linux can send to any fd whereas FreeBSD only supports sockets. 2616 * We therefore use FreeBSD sendfile where possible for performance, 2617 * but fall back on a manual copy (sendfile_fallback). 2618 * - Linux doesn't have an equivalent for FreeBSD's flags and sf_hdtr. 2619 * - Linux takes an offset pointer and updates it to the read location. 2620 * FreeBSD takes in an offset and a 'bytes read' parameter which is 2621 * only filled if it isn't NULL. We use this parameter to update the 2622 * offset pointer if it exists. 2623 * - Linux sendfile returns bytes read on success while FreeBSD 2624 * returns 0. We use the 'bytes read' parameter to get this value. 2625 */ 2626 2627 off_t offset64; 2628 l_off_t offset; 2629 int error; 2630 2631 if (arg->offset != NULL) { 2632 error = copyin(arg->offset, &offset, sizeof(offset)); 2633 if (error != 0) 2634 return (error); 2635 offset64 = offset; 2636 } 2637 2638 error = linux_sendfile_common(td, arg->out, arg->in, 2639 arg->offset != NULL ? &offset64 : NULL, arg->count); 2640 2641 if (error == 0 && arg->offset != NULL) { 2642 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2643 if (offset64 > INT32_MAX) 2644 return (EOVERFLOW); 2645 #endif 2646 offset = (l_off_t)offset64; 2647 error = copyout(&offset, arg->offset, sizeof(offset)); 2648 } 2649 2650 return (error); 2651 } 2652 2653 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2654 int 2655 linux_sendfile64(struct thread *td, struct linux_sendfile64_args *arg) 2656 { 2657 off_t offset; 2658 int error; 2659 2660 if (arg->offset != NULL) { 2661 error = copyin(arg->offset, &offset, sizeof(offset)); 2662 if (error != 0) 2663 return (error); 2664 } 2665 2666 error = linux_sendfile_common(td, arg->out, arg->in, 2667 arg->offset != NULL ? &offset : NULL, arg->count); 2668 2669 if (error == 0 && arg->offset != NULL) 2670 error = copyout(&offset, arg->offset, sizeof(offset)); 2671 2672 return (error); 2673 } 2674 2675 /* Argument list sizes for linux_socketcall */ 2676 static const unsigned char lxs_args_cnt[] = { 2677 0 /* unused*/, 3 /* socket */, 2678 3 /* bind */, 3 /* connect */, 2679 2 /* listen */, 3 /* accept */, 2680 3 /* getsockname */, 3 /* getpeername */, 2681 4 /* socketpair */, 4 /* send */, 2682 4 /* recv */, 6 /* sendto */, 2683 6 /* recvfrom */, 2 /* shutdown */, 2684 5 /* setsockopt */, 5 /* getsockopt */, 2685 3 /* sendmsg */, 3 /* recvmsg */, 2686 4 /* accept4 */, 5 /* recvmmsg */, 2687 4 /* sendmmsg */, 4 /* sendfile */ 2688 }; 2689 #define LINUX_ARGS_CNT (nitems(lxs_args_cnt) - 1) 2690 #define LINUX_ARG_SIZE(x) (lxs_args_cnt[x] * sizeof(l_ulong)) 2691 2692 int 2693 linux_socketcall(struct thread *td, struct linux_socketcall_args *args) 2694 { 2695 l_ulong a[6]; 2696 #if defined(__amd64__) && defined(COMPAT_LINUX32) 2697 register_t l_args[6]; 2698 #endif 2699 void *arg; 2700 int error; 2701 2702 if (args->what < LINUX_SOCKET || args->what > LINUX_ARGS_CNT) 2703 return (EINVAL); 2704 error = copyin(PTRIN(args->args), a, LINUX_ARG_SIZE(args->what)); 2705 if (error != 0) 2706 return (error); 2707 2708 #if defined(__amd64__) && defined(COMPAT_LINUX32) 2709 for (int i = 0; i < lxs_args_cnt[args->what]; ++i) 2710 l_args[i] = a[i]; 2711 arg = l_args; 2712 #else 2713 arg = a; 2714 #endif 2715 switch (args->what) { 2716 case LINUX_SOCKET: 2717 return (linux_socket(td, arg)); 2718 case LINUX_BIND: 2719 return (linux_bind(td, arg)); 2720 case LINUX_CONNECT: 2721 return (linux_connect(td, arg)); 2722 case LINUX_LISTEN: 2723 return (linux_listen(td, arg)); 2724 case LINUX_ACCEPT: 2725 return (linux_accept(td, arg)); 2726 case LINUX_GETSOCKNAME: 2727 return (linux_getsockname(td, arg)); 2728 case LINUX_GETPEERNAME: 2729 return (linux_getpeername(td, arg)); 2730 case LINUX_SOCKETPAIR: 2731 return (linux_socketpair(td, arg)); 2732 case LINUX_SEND: 2733 return (linux_send(td, arg)); 2734 case LINUX_RECV: 2735 return (linux_recv(td, arg)); 2736 case LINUX_SENDTO: 2737 return (linux_sendto(td, arg)); 2738 case LINUX_RECVFROM: 2739 return (linux_recvfrom(td, arg)); 2740 case LINUX_SHUTDOWN: 2741 return (linux_shutdown(td, arg)); 2742 case LINUX_SETSOCKOPT: 2743 return (linux_setsockopt(td, arg)); 2744 case LINUX_GETSOCKOPT: 2745 return (linux_getsockopt(td, arg)); 2746 case LINUX_SENDMSG: 2747 return (linux_sendmsg(td, arg)); 2748 case LINUX_RECVMSG: 2749 return (linux_recvmsg(td, arg)); 2750 case LINUX_ACCEPT4: 2751 return (linux_accept4(td, arg)); 2752 case LINUX_RECVMMSG: 2753 return (linux_recvmmsg(td, arg)); 2754 case LINUX_SENDMMSG: 2755 return (linux_sendmmsg(td, arg)); 2756 case LINUX_SENDFILE: 2757 return (linux_sendfile(td, arg)); 2758 } 2759 2760 linux_msg(td, "socket type %d not implemented", args->what); 2761 return (ENOSYS); 2762 } 2763 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2764