1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2021 Ng Peng Nam Sean 5 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * This file contains socket and protocol bindings for netlink. 31 */ 32 33 #include "opt_netlink.h" 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/malloc.h> 37 #include <sys/lock.h> 38 #include <sys/rmlock.h> 39 #include <sys/domain.h> 40 #include <sys/jail.h> 41 #include <sys/mbuf.h> 42 #include <sys/osd.h> 43 #include <sys/protosw.h> 44 #include <sys/proc.h> 45 #include <sys/ck.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sysent.h> 49 #include <sys/syslog.h> 50 #include <sys/priv.h> /* priv_check */ 51 52 #include <netlink/netlink.h> 53 #include <netlink/netlink_ctl.h> 54 #include <netlink/netlink_var.h> 55 56 #define DEBUG_MOD_NAME nl_domain 57 #define DEBUG_MAX_LEVEL LOG_DEBUG3 58 #include <netlink/netlink_debug.h> 59 _DECLARE_DEBUG(LOG_INFO); 60 61 _Static_assert((NLP_MAX_GROUPS % 64) == 0, 62 "NLP_MAX_GROUPS has to be multiple of 64"); 63 _Static_assert(NLP_MAX_GROUPS >= 64, 64 "NLP_MAX_GROUPS has to be at least 64"); 65 66 #define NLCTL_TRACKER struct rm_priotracker nl_tracker 67 #define NLCTL_RLOCK(_ctl) rm_rlock(&((_ctl)->ctl_lock), &nl_tracker) 68 #define NLCTL_RUNLOCK(_ctl) rm_runlock(&((_ctl)->ctl_lock), &nl_tracker) 69 70 #define NLCTL_WLOCK(_ctl) rm_wlock(&((_ctl)->ctl_lock)) 71 #define NLCTL_WUNLOCK(_ctl) rm_wunlock(&((_ctl)->ctl_lock)) 72 73 static u_long nl_sendspace = NLSNDQ; 74 SYSCTL_ULONG(_net_netlink, OID_AUTO, sendspace, CTLFLAG_RW, &nl_sendspace, 0, 75 "Default netlink socket send space"); 76 77 static u_long nl_recvspace = NLSNDQ; 78 SYSCTL_ULONG(_net_netlink, OID_AUTO, recvspace, CTLFLAG_RW, &nl_recvspace, 0, 79 "Default netlink socket receive space"); 80 81 extern u_long sb_max_adj; 82 static u_long nl_maxsockbuf = 512 * 1024 * 1024; /* 512M, XXX: init based on physmem */ 83 static int sysctl_handle_nl_maxsockbuf(SYSCTL_HANDLER_ARGS); 84 SYSCTL_OID(_net_netlink, OID_AUTO, nl_maxsockbuf, 85 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &nl_maxsockbuf, 0, 86 sysctl_handle_nl_maxsockbuf, "LU", 87 "Maximum Netlink socket buffer size"); 88 89 90 static unsigned int osd_slot_id = 0; 91 92 void 93 nl_osd_register(void) 94 { 95 osd_slot_id = osd_register(OSD_THREAD, NULL, NULL); 96 } 97 98 void 99 nl_osd_unregister(void) 100 { 101 osd_deregister(OSD_THREAD, osd_slot_id); 102 } 103 104 struct nlpcb * 105 _nl_get_thread_nlp(struct thread *td) 106 { 107 return (osd_get(OSD_THREAD, &td->td_osd, osd_slot_id)); 108 } 109 110 void 111 nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp) 112 { 113 NLP_LOG(LOG_DEBUG2, nlp, "Set thread %p nlp to %p (slot %u)", td, nlp, osd_slot_id); 114 if (osd_set(OSD_THREAD, &td->td_osd, osd_slot_id, nlp) == 0) 115 return; 116 /* Failed, need to realloc */ 117 void **rsv = osd_reserve(osd_slot_id); 118 osd_set_reserved(OSD_THREAD, &td->td_osd, osd_slot_id, rsv, nlp); 119 } 120 121 /* 122 * Looks up a nlpcb struct based on the @portid. Need to claim nlsock_mtx. 123 * Returns nlpcb pointer if present else NULL 124 */ 125 static struct nlpcb * 126 nl_port_lookup(uint32_t port_id) 127 { 128 struct nlpcb *nlp; 129 130 CK_LIST_FOREACH(nlp, &V_nl_ctl->ctl_port_head, nl_port_next) { 131 if (nlp->nl_port == port_id) 132 return (nlp); 133 } 134 return (NULL); 135 } 136 137 static void 138 nl_add_group_locked(struct nlpcb *nlp, unsigned int group_id) 139 { 140 MPASS(group_id <= NLP_MAX_GROUPS); 141 --group_id; 142 143 /* TODO: add family handler callback */ 144 if (!nlp_unconstrained_vnet(nlp)) 145 return; 146 147 nlp->nl_groups[group_id / 64] |= (uint64_t)1 << (group_id % 64); 148 } 149 150 static void 151 nl_del_group_locked(struct nlpcb *nlp, unsigned int group_id) 152 { 153 MPASS(group_id <= NLP_MAX_GROUPS); 154 --group_id; 155 156 nlp->nl_groups[group_id / 64] &= ~((uint64_t)1 << (group_id % 64)); 157 } 158 159 static bool 160 nl_isset_group_locked(struct nlpcb *nlp, unsigned int group_id) 161 { 162 MPASS(group_id <= NLP_MAX_GROUPS); 163 --group_id; 164 165 return (nlp->nl_groups[group_id / 64] & ((uint64_t)1 << (group_id % 64))); 166 } 167 168 static uint32_t 169 nl_get_groups_compat(struct nlpcb *nlp) 170 { 171 uint32_t groups_mask = 0; 172 173 for (int i = 0; i < 32; i++) { 174 if (nl_isset_group_locked(nlp, i + 1)) 175 groups_mask |= (1 << i); 176 } 177 178 return (groups_mask); 179 } 180 181 static void 182 nl_send_one_group(struct mbuf *m, struct nlpcb *nlp, int num_messages, 183 int io_flags) 184 { 185 if (__predict_false(nlp->nl_flags & NLF_MSG_INFO)) 186 nl_add_msg_info(m); 187 nl_send_one(m, nlp, num_messages, io_flags); 188 } 189 190 /* 191 * Broadcasts message @m to the protocol @proto group specified by @group_id 192 */ 193 void 194 nl_send_group(struct mbuf *m, int num_messages, int proto, int group_id) 195 { 196 struct nlpcb *nlp_last = NULL; 197 struct nlpcb *nlp; 198 NLCTL_TRACKER; 199 200 IF_DEBUG_LEVEL(LOG_DEBUG2) { 201 struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *); 202 NL_LOG(LOG_DEBUG2, "MCAST mbuf len %u msg type %d len %u to group %d/%d", 203 m->m_len, hdr->nlmsg_type, hdr->nlmsg_len, proto, group_id); 204 } 205 206 struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); 207 if (__predict_false(ctl == NULL)) { 208 /* 209 * Can be the case when notification is sent within VNET 210 * which doesn't have any netlink sockets. 211 */ 212 m_freem(m); 213 return; 214 } 215 216 NLCTL_RLOCK(ctl); 217 218 int io_flags = NL_IOF_UNTRANSLATED; 219 220 CK_LIST_FOREACH(nlp, &ctl->ctl_pcb_head, nl_next) { 221 if (nl_isset_group_locked(nlp, group_id) && nlp->nl_proto == proto) { 222 if (nlp_last != NULL) { 223 struct mbuf *m_copy; 224 m_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT); 225 if (m_copy != NULL) 226 nl_send_one_group(m_copy, nlp_last, 227 num_messages, io_flags); 228 else { 229 NLP_LOCK(nlp_last); 230 if (nlp_last->nl_socket != NULL) 231 sorwakeup(nlp_last->nl_socket); 232 NLP_UNLOCK(nlp_last); 233 } 234 } 235 nlp_last = nlp; 236 } 237 } 238 if (nlp_last != NULL) 239 nl_send_one_group(m, nlp_last, num_messages, io_flags); 240 else 241 m_freem(m); 242 243 NLCTL_RUNLOCK(ctl); 244 } 245 246 bool 247 nl_has_listeners(int netlink_family, uint32_t groups_mask) 248 { 249 return (V_nl_ctl != NULL); 250 } 251 252 static uint32_t 253 nl_find_port(void) 254 { 255 /* 256 * app can open multiple netlink sockets. 257 * Start with current pid, if already taken, 258 * try random numbers in 65k..256k+65k space, 259 * avoiding clash with pids. 260 */ 261 if (nl_port_lookup(curproc->p_pid) == NULL) 262 return (curproc->p_pid); 263 for (int i = 0; i < 16; i++) { 264 uint32_t nl_port = (arc4random() % 65536) + 65536 * 4; 265 if (nl_port_lookup(nl_port) == 0) 266 return (nl_port); 267 NL_LOG(LOG_DEBUG3, "tried %u\n", nl_port); 268 } 269 return (curproc->p_pid); 270 } 271 272 static int 273 nl_bind_locked(struct nlpcb *nlp, struct sockaddr_nl *snl) 274 { 275 if (nlp->nl_bound) { 276 if (nlp->nl_port != snl->nl_pid) { 277 NL_LOG(LOG_DEBUG, 278 "bind() failed: program pid %d " 279 "is different from provided pid %d", 280 nlp->nl_port, snl->nl_pid); 281 return (EINVAL); // XXX: better error 282 } 283 } else { 284 if (snl->nl_pid == 0) 285 snl->nl_pid = nl_find_port(); 286 if (nl_port_lookup(snl->nl_pid) != NULL) 287 return (EADDRINUSE); 288 nlp->nl_port = snl->nl_pid; 289 nlp->nl_bound = true; 290 CK_LIST_INSERT_HEAD(&V_nl_ctl->ctl_port_head, nlp, nl_port_next); 291 } 292 for (int i = 0; i < 32; i++) { 293 if (snl->nl_groups & ((uint32_t)1 << i)) 294 nl_add_group_locked(nlp, i + 1); 295 else 296 nl_del_group_locked(nlp, i + 1); 297 } 298 299 return (0); 300 } 301 302 static int 303 nl_pru_attach(struct socket *so, int proto, struct thread *td) 304 { 305 struct nlpcb *nlp; 306 int error; 307 308 if (__predict_false(netlink_unloading != 0)) 309 return (EAFNOSUPPORT); 310 311 error = nl_verify_proto(proto); 312 if (error != 0) 313 return (error); 314 315 bool is_linux = SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX; 316 NL_LOG(LOG_DEBUG2, "socket %p, %sPID %d: attaching socket to %s", 317 so, is_linux ? "(linux) " : "", curproc->p_pid, 318 nl_get_proto_name(proto)); 319 320 /* Create per-VNET state on first socket init */ 321 struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); 322 if (ctl == NULL) 323 ctl = vnet_nl_ctl_init(); 324 KASSERT(V_nl_ctl != NULL, ("nl_attach: vnet_sock_init() failed")); 325 326 MPASS(sotonlpcb(so) == NULL); 327 328 nlp = malloc(sizeof(struct nlpcb), M_PCB, M_WAITOK | M_ZERO); 329 error = soreserve(so, nl_sendspace, nl_recvspace); 330 if (error != 0) { 331 free(nlp, M_PCB); 332 return (error); 333 } 334 so->so_pcb = nlp; 335 nlp->nl_socket = so; 336 /* Copy so_cred to avoid having socket_var.h in every header */ 337 nlp->nl_cred = so->so_cred; 338 nlp->nl_proto = proto; 339 nlp->nl_process_id = curproc->p_pid; 340 nlp->nl_linux = is_linux; 341 nlp->nl_active = true; 342 nlp->nl_unconstrained_vnet = !jailed_without_vnet(so->so_cred); 343 nlp->nl_need_thread_setup = true; 344 NLP_LOCK_INIT(nlp); 345 refcount_init(&nlp->nl_refcount, 1); 346 nl_init_io(nlp); 347 348 nlp->nl_taskqueue = taskqueue_create("netlink_socket", M_WAITOK, 349 taskqueue_thread_enqueue, &nlp->nl_taskqueue); 350 TASK_INIT(&nlp->nl_task, 0, nl_taskqueue_handler, nlp); 351 taskqueue_start_threads(&nlp->nl_taskqueue, 1, PWAIT, 352 "netlink_socket (PID %u)", nlp->nl_process_id); 353 354 NLCTL_WLOCK(ctl); 355 /* XXX: check ctl is still alive */ 356 CK_LIST_INSERT_HEAD(&ctl->ctl_pcb_head, nlp, nl_next); 357 NLCTL_WUNLOCK(ctl); 358 359 soisconnected(so); 360 361 return (0); 362 } 363 364 static void 365 nl_pru_abort(struct socket *so) 366 { 367 NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); 368 MPASS(sotonlpcb(so) != NULL); 369 soisdisconnected(so); 370 } 371 372 static int 373 nl_pru_bind(struct socket *so, struct sockaddr *sa, struct thread *td) 374 { 375 struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); 376 struct nlpcb *nlp = sotonlpcb(so); 377 struct sockaddr_nl *snl = (struct sockaddr_nl *)sa; 378 int error; 379 380 NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); 381 if (snl->nl_len != sizeof(*snl)) { 382 NL_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so); 383 return (EINVAL); 384 } 385 386 387 NLCTL_WLOCK(ctl); 388 NLP_LOCK(nlp); 389 error = nl_bind_locked(nlp, snl); 390 NLP_UNLOCK(nlp); 391 NLCTL_WUNLOCK(ctl); 392 NL_LOG(LOG_DEBUG2, "socket %p, bind() to %u, groups %u, error %d", so, 393 snl->nl_pid, snl->nl_groups, error); 394 395 return (error); 396 } 397 398 399 static int 400 nl_assign_port(struct nlpcb *nlp, uint32_t port_id) 401 { 402 struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); 403 struct sockaddr_nl snl = { 404 .nl_pid = port_id, 405 }; 406 int error; 407 408 NLCTL_WLOCK(ctl); 409 NLP_LOCK(nlp); 410 snl.nl_groups = nl_get_groups_compat(nlp); 411 error = nl_bind_locked(nlp, &snl); 412 NLP_UNLOCK(nlp); 413 NLCTL_WUNLOCK(ctl); 414 415 NL_LOG(LOG_DEBUG3, "socket %p, port assign: %d, error: %d", nlp->nl_socket, port_id, error); 416 return (error); 417 } 418 419 /* 420 * nl_autobind_port binds a unused portid to @nlp 421 * @nlp: pcb data for the netlink socket 422 * @candidate_id: first id to consider 423 */ 424 static int 425 nl_autobind_port(struct nlpcb *nlp, uint32_t candidate_id) 426 { 427 struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); 428 uint32_t port_id = candidate_id; 429 NLCTL_TRACKER; 430 bool exist; 431 int error = EADDRINUSE; 432 433 for (int i = 0; i < 10; i++) { 434 NL_LOG(LOG_DEBUG3, "socket %p, trying to assign port %d", nlp->nl_socket, port_id); 435 NLCTL_RLOCK(ctl); 436 exist = nl_port_lookup(port_id) != 0; 437 NLCTL_RUNLOCK(ctl); 438 if (!exist) { 439 error = nl_assign_port(nlp, port_id); 440 if (error != EADDRINUSE) 441 break; 442 } 443 port_id++; 444 } 445 NL_LOG(LOG_DEBUG3, "socket %p, autobind to %d, error: %d", nlp->nl_socket, port_id, error); 446 return (error); 447 } 448 449 static int 450 nl_pru_connect(struct socket *so, struct sockaddr *sa, struct thread *td) 451 { 452 struct sockaddr_nl *snl = (struct sockaddr_nl *)sa; 453 struct nlpcb *nlp; 454 455 NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); 456 if (snl->nl_len != sizeof(*snl)) { 457 NL_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so); 458 return (EINVAL); 459 } 460 461 nlp = sotonlpcb(so); 462 if (!nlp->nl_bound) { 463 int error = nl_autobind_port(nlp, td->td_proc->p_pid); 464 if (error != 0) { 465 NL_LOG(LOG_DEBUG, "socket %p, nl_autobind() failed: %d", so, error); 466 return (error); 467 } 468 } 469 /* XXX: Handle socket flags & multicast */ 470 soisconnected(so); 471 472 NL_LOG(LOG_DEBUG2, "socket %p, connect to %u", so, snl->nl_pid); 473 474 return (0); 475 } 476 477 static void 478 destroy_nlpcb(struct nlpcb *nlp) 479 { 480 NLP_LOCK(nlp); 481 nl_free_io(nlp); 482 NLP_LOCK_DESTROY(nlp); 483 free(nlp, M_PCB); 484 } 485 486 static void 487 destroy_nlpcb_epoch(epoch_context_t ctx) 488 { 489 struct nlpcb *nlp; 490 491 nlp = __containerof(ctx, struct nlpcb, nl_epoch_ctx); 492 493 destroy_nlpcb(nlp); 494 } 495 496 497 static void 498 nl_pru_detach(struct socket *so) 499 { 500 struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); 501 MPASS(sotonlpcb(so) != NULL); 502 struct nlpcb *nlp; 503 504 NL_LOG(LOG_DEBUG2, "detaching socket %p, PID %d", so, curproc->p_pid); 505 nlp = sotonlpcb(so); 506 507 /* Mark as inactive so no new work can be enqueued */ 508 NLP_LOCK(nlp); 509 bool was_bound = nlp->nl_bound; 510 nlp->nl_active = false; 511 NLP_UNLOCK(nlp); 512 513 /* Wait till all scheduled work has been completed */ 514 taskqueue_drain_all(nlp->nl_taskqueue); 515 taskqueue_free(nlp->nl_taskqueue); 516 517 NLCTL_WLOCK(ctl); 518 NLP_LOCK(nlp); 519 if (was_bound) { 520 CK_LIST_REMOVE(nlp, nl_port_next); 521 NL_LOG(LOG_DEBUG3, "socket %p, unlinking bound pid %u", so, nlp->nl_port); 522 } 523 CK_LIST_REMOVE(nlp, nl_next); 524 nlp->nl_socket = NULL; 525 NLP_UNLOCK(nlp); 526 NLCTL_WUNLOCK(ctl); 527 528 so->so_pcb = NULL; 529 530 NL_LOG(LOG_DEBUG3, "socket %p, detached", so); 531 532 /* XXX: is delayed free needed? */ 533 NET_EPOCH_CALL(destroy_nlpcb_epoch, &nlp->nl_epoch_ctx); 534 } 535 536 static int 537 nl_pru_disconnect(struct socket *so) 538 { 539 NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); 540 MPASS(sotonlpcb(so) != NULL); 541 return (ENOTCONN); 542 } 543 544 static int 545 nl_pru_peeraddr(struct socket *so, struct sockaddr **sa) 546 { 547 NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); 548 MPASS(sotonlpcb(so) != NULL); 549 return (ENOTCONN); 550 } 551 552 static int 553 nl_pru_shutdown(struct socket *so) 554 { 555 NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); 556 MPASS(sotonlpcb(so) != NULL); 557 socantsendmore(so); 558 return (0); 559 } 560 561 static int 562 nl_pru_sockaddr(struct socket *so, struct sockaddr **sa) 563 { 564 struct sockaddr_nl *snl; 565 566 snl = malloc(sizeof(struct sockaddr_nl), M_SONAME, M_WAITOK | M_ZERO); 567 /* TODO: set other fields */ 568 snl->nl_len = sizeof(struct sockaddr_nl); 569 snl->nl_family = AF_NETLINK; 570 snl->nl_pid = sotonlpcb(so)->nl_port; 571 *sa = (struct sockaddr *)snl; 572 return (0); 573 } 574 575 static void 576 nl_pru_close(struct socket *so) 577 { 578 NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); 579 MPASS(sotonlpcb(so) != NULL); 580 soisdisconnected(so); 581 } 582 583 static int 584 nl_pru_output(struct mbuf *m, struct socket *so, ...) 585 { 586 587 if (__predict_false(m == NULL || 588 ((m->m_len < sizeof(struct nlmsghdr)) && 589 (m = m_pullup(m, sizeof(struct nlmsghdr))) == NULL))) 590 return (ENOBUFS); 591 MPASS((m->m_flags & M_PKTHDR) != 0); 592 593 NL_LOG(LOG_DEBUG3, "sending message to kernel async processing"); 594 nl_receive_async(m, so); 595 return (0); 596 } 597 598 599 static int 600 nl_pru_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *sa, 601 struct mbuf *control, struct thread *td) 602 { 603 NL_LOG(LOG_DEBUG2, "sending message to kernel"); 604 605 if (__predict_false(control != NULL)) { 606 if (control->m_len) { 607 m_freem(control); 608 return (EINVAL); 609 } 610 m_freem(control); 611 } 612 613 return (nl_pru_output(m, so)); 614 } 615 616 static int 617 nl_pru_rcvd(struct socket *so, int flags) 618 { 619 NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); 620 MPASS(sotonlpcb(so) != NULL); 621 622 nl_on_transmit(sotonlpcb(so)); 623 624 return (0); 625 } 626 627 static int 628 nl_getoptflag(int sopt_name) 629 { 630 switch (sopt_name) { 631 case NETLINK_CAP_ACK: 632 return (NLF_CAP_ACK); 633 case NETLINK_EXT_ACK: 634 return (NLF_EXT_ACK); 635 case NETLINK_GET_STRICT_CHK: 636 return (NLF_STRICT); 637 case NETLINK_MSG_INFO: 638 return (NLF_MSG_INFO); 639 } 640 641 return (0); 642 } 643 644 static int 645 nl_ctloutput(struct socket *so, struct sockopt *sopt) 646 { 647 struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); 648 struct nlpcb *nlp = sotonlpcb(so); 649 uint32_t flag; 650 int optval, error = 0; 651 NLCTL_TRACKER; 652 653 NL_LOG(LOG_DEBUG2, "%ssockopt(%p, %d)", (sopt->sopt_dir) ? "set" : "get", 654 so, sopt->sopt_name); 655 656 switch (sopt->sopt_dir) { 657 case SOPT_SET: 658 switch (sopt->sopt_name) { 659 case NETLINK_ADD_MEMBERSHIP: 660 case NETLINK_DROP_MEMBERSHIP: 661 error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); 662 if (error != 0) 663 break; 664 if (optval <= 0 || optval >= NLP_MAX_GROUPS) { 665 error = ERANGE; 666 break; 667 } 668 NL_LOG(LOG_DEBUG2, "ADD/DEL group %d", (uint32_t)optval); 669 670 NLCTL_WLOCK(ctl); 671 if (sopt->sopt_name == NETLINK_ADD_MEMBERSHIP) 672 nl_add_group_locked(nlp, optval); 673 else 674 nl_del_group_locked(nlp, optval); 675 NLCTL_WUNLOCK(ctl); 676 break; 677 case NETLINK_CAP_ACK: 678 case NETLINK_EXT_ACK: 679 case NETLINK_GET_STRICT_CHK: 680 case NETLINK_MSG_INFO: 681 error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); 682 if (error != 0) 683 break; 684 685 flag = nl_getoptflag(sopt->sopt_name); 686 687 if ((flag == NLF_MSG_INFO) && nlp->nl_linux) { 688 error = EINVAL; 689 break; 690 } 691 692 NLCTL_WLOCK(ctl); 693 if (optval != 0) 694 nlp->nl_flags |= flag; 695 else 696 nlp->nl_flags &= ~flag; 697 NLCTL_WUNLOCK(ctl); 698 break; 699 default: 700 error = ENOPROTOOPT; 701 } 702 break; 703 case SOPT_GET: 704 switch (sopt->sopt_name) { 705 case NETLINK_LIST_MEMBERSHIPS: 706 NLCTL_RLOCK(ctl); 707 optval = nl_get_groups_compat(nlp); 708 NLCTL_RUNLOCK(ctl); 709 error = sooptcopyout(sopt, &optval, sizeof(optval)); 710 break; 711 case NETLINK_CAP_ACK: 712 case NETLINK_EXT_ACK: 713 case NETLINK_GET_STRICT_CHK: 714 case NETLINK_MSG_INFO: 715 NLCTL_RLOCK(ctl); 716 optval = (nlp->nl_flags & nl_getoptflag(sopt->sopt_name)) != 0; 717 NLCTL_RUNLOCK(ctl); 718 error = sooptcopyout(sopt, &optval, sizeof(optval)); 719 break; 720 default: 721 error = ENOPROTOOPT; 722 } 723 break; 724 default: 725 error = ENOPROTOOPT; 726 } 727 728 return (error); 729 } 730 731 static int 732 sysctl_handle_nl_maxsockbuf(SYSCTL_HANDLER_ARGS) 733 { 734 int error = 0; 735 u_long tmp_maxsockbuf = nl_maxsockbuf; 736 737 error = sysctl_handle_long(oidp, &tmp_maxsockbuf, arg2, req); 738 if (error || !req->newptr) 739 return (error); 740 if (tmp_maxsockbuf < MSIZE + MCLBYTES) 741 return (EINVAL); 742 nl_maxsockbuf = tmp_maxsockbuf; 743 744 return (0); 745 } 746 747 static int 748 nl_setsbopt(struct socket *so, struct sockopt *sopt) 749 { 750 int error, optval; 751 bool result; 752 753 if (sopt->sopt_name != SO_RCVBUF) 754 return (sbsetopt(so, sopt)); 755 756 /* Allow to override max buffer size in certain conditions */ 757 758 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 759 if (error != 0) 760 return (error); 761 NL_LOG(LOG_DEBUG2, "socket %p, PID %d, SO_RCVBUF=%d", so, curproc->p_pid, optval); 762 if (optval > sb_max_adj) { 763 if (priv_check(curthread, PRIV_NET_ROUTE) != 0) 764 return (EPERM); 765 } 766 767 SOCK_RECVBUF_LOCK(so); 768 result = sbreserve_locked_limit(so, SO_RCV, optval, nl_maxsockbuf, curthread); 769 SOCK_RECVBUF_UNLOCK(so); 770 771 return (result ? 0 : ENOBUFS); 772 } 773 774 #define NETLINK_PROTOSW \ 775 .pr_flags = PR_ATOMIC | PR_ADDR | PR_WANTRCVD, \ 776 .pr_ctloutput = nl_ctloutput, \ 777 .pr_setsbopt = nl_setsbopt, \ 778 .pr_abort = nl_pru_abort, \ 779 .pr_attach = nl_pru_attach, \ 780 .pr_bind = nl_pru_bind, \ 781 .pr_connect = nl_pru_connect, \ 782 .pr_detach = nl_pru_detach, \ 783 .pr_disconnect = nl_pru_disconnect, \ 784 .pr_peeraddr = nl_pru_peeraddr, \ 785 .pr_send = nl_pru_send, \ 786 .pr_rcvd = nl_pru_rcvd, \ 787 .pr_shutdown = nl_pru_shutdown, \ 788 .pr_sockaddr = nl_pru_sockaddr, \ 789 .pr_close = nl_pru_close 790 791 static struct protosw netlink_raw_sw = { 792 .pr_type = SOCK_RAW, 793 NETLINK_PROTOSW 794 }; 795 796 static struct protosw netlink_dgram_sw = { 797 .pr_type = SOCK_DGRAM, 798 NETLINK_PROTOSW 799 }; 800 801 static struct domain netlinkdomain = { 802 .dom_family = PF_NETLINK, 803 .dom_name = "netlink", 804 .dom_flags = DOMF_UNLOADABLE, 805 .dom_nprotosw = 2, 806 .dom_protosw = { &netlink_raw_sw, &netlink_dgram_sw }, 807 }; 808 809 DOMAIN_SET(netlink); 810