17e5bf684SAlexander V. Chernikov /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
37e5bf684SAlexander V. Chernikov *
47e5bf684SAlexander V. Chernikov * Copyright (c) 2021 Ng Peng Nam Sean
57e5bf684SAlexander V. Chernikov * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
67e19c018SGleb Smirnoff * Copyright (c) 2023 Gleb Smirnoff <glebius@FreeBSD.org>
77e5bf684SAlexander V. Chernikov *
87e5bf684SAlexander V. Chernikov * Redistribution and use in source and binary forms, with or without
97e5bf684SAlexander V. Chernikov * modification, are permitted provided that the following conditions
107e5bf684SAlexander V. Chernikov * are met:
117e5bf684SAlexander V. Chernikov * 1. Redistributions of source code must retain the above copyright
127e5bf684SAlexander V. Chernikov * notice, this list of conditions and the following disclaimer.
137e5bf684SAlexander V. Chernikov * 2. Redistributions in binary form must reproduce the above copyright
147e5bf684SAlexander V. Chernikov * notice, this list of conditions and the following disclaimer in the
157e5bf684SAlexander V. Chernikov * documentation and/or other materials provided with the distribution.
167e5bf684SAlexander V. Chernikov *
177e5bf684SAlexander V. Chernikov * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
187e5bf684SAlexander V. Chernikov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
197e5bf684SAlexander V. Chernikov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
207e5bf684SAlexander V. Chernikov * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
217e5bf684SAlexander V. Chernikov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
227e5bf684SAlexander V. Chernikov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
237e5bf684SAlexander V. Chernikov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
247e5bf684SAlexander V. Chernikov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
257e5bf684SAlexander V. Chernikov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
267e5bf684SAlexander V. Chernikov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
277e5bf684SAlexander V. Chernikov * SUCH DAMAGE.
287e5bf684SAlexander V. Chernikov */
297e5bf684SAlexander V. Chernikov
307e5bf684SAlexander V. Chernikov /*
317e5bf684SAlexander V. Chernikov * This file contains socket and protocol bindings for netlink.
327e5bf684SAlexander V. Chernikov */
337e5bf684SAlexander V. Chernikov
347e5bf684SAlexander V. Chernikov #include <sys/param.h>
35fc083c3eSJung-uk Kim #include <sys/kernel.h>
367e5bf684SAlexander V. Chernikov #include <sys/malloc.h>
377e5bf684SAlexander V. Chernikov #include <sys/lock.h>
387e5bf684SAlexander V. Chernikov #include <sys/rmlock.h>
397e5bf684SAlexander V. Chernikov #include <sys/domain.h>
4004f75b98SAlexander V. Chernikov #include <sys/jail.h>
417e5bf684SAlexander V. Chernikov #include <sys/mbuf.h>
4230d7e724SAlexander V. Chernikov #include <sys/osd.h>
437e5bf684SAlexander V. Chernikov #include <sys/protosw.h>
448d9f3e05SAlexander V. Chernikov #include <sys/proc.h>
457e5bf684SAlexander V. Chernikov #include <sys/ck.h>
467e5bf684SAlexander V. Chernikov #include <sys/socket.h>
477e5bf684SAlexander V. Chernikov #include <sys/socketvar.h>
487e5bf684SAlexander V. Chernikov #include <sys/sysent.h>
497e5bf684SAlexander V. Chernikov #include <sys/syslog.h>
500fda4ffdSGleb Smirnoff #include <sys/priv.h>
51660bd40aSGleb Smirnoff #include <sys/uio.h>
527e5bf684SAlexander V. Chernikov
537e5bf684SAlexander V. Chernikov #include <netlink/netlink.h>
547e5bf684SAlexander V. Chernikov #include <netlink/netlink_ctl.h>
557e5bf684SAlexander V. Chernikov #include <netlink/netlink_var.h>
567e5bf684SAlexander V. Chernikov
577e5bf684SAlexander V. Chernikov #define DEBUG_MOD_NAME nl_domain
587e5bf684SAlexander V. Chernikov #define DEBUG_MAX_LEVEL LOG_DEBUG3
597e5bf684SAlexander V. Chernikov #include <netlink/netlink_debug.h>
60fa554de7SKristof Provost _DECLARE_DEBUG(LOG_INFO);
617e5bf684SAlexander V. Chernikov
624dfd380eSAlexander V. Chernikov _Static_assert((NLP_MAX_GROUPS % 64) == 0,
634dfd380eSAlexander V. Chernikov "NLP_MAX_GROUPS has to be multiple of 64");
644dfd380eSAlexander V. Chernikov _Static_assert(NLP_MAX_GROUPS >= 64,
654dfd380eSAlexander V. Chernikov "NLP_MAX_GROUPS has to be at least 64");
667e5bf684SAlexander V. Chernikov
677e5bf684SAlexander V. Chernikov #define NLCTL_TRACKER struct rm_priotracker nl_tracker
68841dcdcdSGleb Smirnoff #define NLCTL_RLOCK() rm_rlock(&V_nl_ctl.ctl_lock, &nl_tracker)
69841dcdcdSGleb Smirnoff #define NLCTL_RUNLOCK() rm_runlock(&V_nl_ctl.ctl_lock, &nl_tracker)
70841dcdcdSGleb Smirnoff #define NLCTL_LOCK_ASSERT() rm_assert(&V_nl_ctl.ctl_lock, RA_LOCKED)
717e5bf684SAlexander V. Chernikov
72841dcdcdSGleb Smirnoff #define NLCTL_WLOCK() rm_wlock(&V_nl_ctl.ctl_lock)
73841dcdcdSGleb Smirnoff #define NLCTL_WUNLOCK() rm_wunlock(&V_nl_ctl.ctl_lock)
74841dcdcdSGleb Smirnoff #define NLCTL_WLOCK_ASSERT() rm_assert(&V_nl_ctl.ctl_lock, RA_WLOCKED)
757e5bf684SAlexander V. Chernikov
767e5bf684SAlexander V. Chernikov static u_long nl_sendspace = NLSNDQ;
777e5bf684SAlexander V. Chernikov SYSCTL_ULONG(_net_netlink, OID_AUTO, sendspace, CTLFLAG_RW, &nl_sendspace, 0,
787e5bf684SAlexander V. Chernikov "Default netlink socket send space");
797e5bf684SAlexander V. Chernikov
807e5bf684SAlexander V. Chernikov static u_long nl_recvspace = NLSNDQ;
817e5bf684SAlexander V. Chernikov SYSCTL_ULONG(_net_netlink, OID_AUTO, recvspace, CTLFLAG_RW, &nl_recvspace, 0,
827e5bf684SAlexander V. Chernikov "Default netlink socket receive space");
837e5bf684SAlexander V. Chernikov
847e5bf684SAlexander V. Chernikov extern u_long sb_max_adj;
857e5bf684SAlexander V. Chernikov static u_long nl_maxsockbuf = 512 * 1024 * 1024; /* 512M, XXX: init based on physmem */
8628a5d88fSAlexander V. Chernikov static int sysctl_handle_nl_maxsockbuf(SYSCTL_HANDLER_ARGS);
8728a5d88fSAlexander V. Chernikov SYSCTL_OID(_net_netlink, OID_AUTO, nl_maxsockbuf,
8828a5d88fSAlexander V. Chernikov CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &nl_maxsockbuf, 0,
8928a5d88fSAlexander V. Chernikov sysctl_handle_nl_maxsockbuf, "LU",
9028a5d88fSAlexander V. Chernikov "Maximum Netlink socket buffer size");
917e5bf684SAlexander V. Chernikov
9230d7e724SAlexander V. Chernikov
9330d7e724SAlexander V. Chernikov static unsigned int osd_slot_id = 0;
9430d7e724SAlexander V. Chernikov
9530d7e724SAlexander V. Chernikov void
nl_osd_register(void)9630d7e724SAlexander V. Chernikov nl_osd_register(void)
9730d7e724SAlexander V. Chernikov {
9830d7e724SAlexander V. Chernikov osd_slot_id = osd_register(OSD_THREAD, NULL, NULL);
9930d7e724SAlexander V. Chernikov }
10030d7e724SAlexander V. Chernikov
10130d7e724SAlexander V. Chernikov void
nl_osd_unregister(void)10230d7e724SAlexander V. Chernikov nl_osd_unregister(void)
10330d7e724SAlexander V. Chernikov {
10430d7e724SAlexander V. Chernikov osd_deregister(OSD_THREAD, osd_slot_id);
10530d7e724SAlexander V. Chernikov }
10630d7e724SAlexander V. Chernikov
10730d7e724SAlexander V. Chernikov struct nlpcb *
_nl_get_thread_nlp(struct thread * td)10830d7e724SAlexander V. Chernikov _nl_get_thread_nlp(struct thread *td)
10930d7e724SAlexander V. Chernikov {
11030d7e724SAlexander V. Chernikov return (osd_get(OSD_THREAD, &td->td_osd, osd_slot_id));
11130d7e724SAlexander V. Chernikov }
11230d7e724SAlexander V. Chernikov
11330d7e724SAlexander V. Chernikov void
nl_set_thread_nlp(struct thread * td,struct nlpcb * nlp)11430d7e724SAlexander V. Chernikov nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp)
11530d7e724SAlexander V. Chernikov {
11630d7e724SAlexander V. Chernikov NLP_LOG(LOG_DEBUG2, nlp, "Set thread %p nlp to %p (slot %u)", td, nlp, osd_slot_id);
11730d7e724SAlexander V. Chernikov if (osd_set(OSD_THREAD, &td->td_osd, osd_slot_id, nlp) == 0)
11830d7e724SAlexander V. Chernikov return;
11930d7e724SAlexander V. Chernikov /* Failed, need to realloc */
12030d7e724SAlexander V. Chernikov void **rsv = osd_reserve(osd_slot_id);
12130d7e724SAlexander V. Chernikov osd_set_reserved(OSD_THREAD, &td->td_osd, osd_slot_id, rsv, nlp);
12230d7e724SAlexander V. Chernikov }
12330d7e724SAlexander V. Chernikov
1247e5bf684SAlexander V. Chernikov /*
1257e5bf684SAlexander V. Chernikov * Looks up a nlpcb struct based on the @portid. Need to claim nlsock_mtx.
1267e5bf684SAlexander V. Chernikov * Returns nlpcb pointer if present else NULL
1277e5bf684SAlexander V. Chernikov */
1287e5bf684SAlexander V. Chernikov static struct nlpcb *
nl_port_lookup(uint32_t port_id)1297e5bf684SAlexander V. Chernikov nl_port_lookup(uint32_t port_id)
1307e5bf684SAlexander V. Chernikov {
1317e5bf684SAlexander V. Chernikov struct nlpcb *nlp;
1327e5bf684SAlexander V. Chernikov
133841dcdcdSGleb Smirnoff CK_LIST_FOREACH(nlp, &V_nl_ctl.ctl_port_head, nl_port_next) {
1347e5bf684SAlexander V. Chernikov if (nlp->nl_port == port_id)
1357e5bf684SAlexander V. Chernikov return (nlp);
1367e5bf684SAlexander V. Chernikov }
1377e5bf684SAlexander V. Chernikov return (NULL);
1387e5bf684SAlexander V. Chernikov }
1397e5bf684SAlexander V. Chernikov
1407e5bf684SAlexander V. Chernikov static void
nlp_join_group(struct nlpcb * nlp,unsigned int group_id)141164dec88SGleb Smirnoff nlp_join_group(struct nlpcb *nlp, unsigned int group_id)
1427e5bf684SAlexander V. Chernikov {
1436ed34869SGleb Smirnoff MPASS(group_id < NLP_MAX_GROUPS);
144164dec88SGleb Smirnoff NLCTL_WLOCK_ASSERT();
1454dfd380eSAlexander V. Chernikov
14604f75b98SAlexander V. Chernikov /* TODO: add family handler callback */
14704f75b98SAlexander V. Chernikov if (!nlp_unconstrained_vnet(nlp))
14804f75b98SAlexander V. Chernikov return;
14904f75b98SAlexander V. Chernikov
150edf5608bSGleb Smirnoff BIT_SET(NLP_MAX_GROUPS, group_id, &nlp->nl_groups);
1514dfd380eSAlexander V. Chernikov }
1524dfd380eSAlexander V. Chernikov
1534dfd380eSAlexander V. Chernikov static void
nlp_leave_group(struct nlpcb * nlp,unsigned int group_id)154164dec88SGleb Smirnoff nlp_leave_group(struct nlpcb *nlp, unsigned int group_id)
1554dfd380eSAlexander V. Chernikov {
1566ed34869SGleb Smirnoff MPASS(group_id < NLP_MAX_GROUPS);
157164dec88SGleb Smirnoff NLCTL_WLOCK_ASSERT();
1584dfd380eSAlexander V. Chernikov
159edf5608bSGleb Smirnoff BIT_CLR(NLP_MAX_GROUPS, group_id, &nlp->nl_groups);
1604dfd380eSAlexander V. Chernikov }
1614dfd380eSAlexander V. Chernikov
1624dfd380eSAlexander V. Chernikov static bool
nlp_memberof_group(struct nlpcb * nlp,unsigned int group_id)163164dec88SGleb Smirnoff nlp_memberof_group(struct nlpcb *nlp, unsigned int group_id)
1644dfd380eSAlexander V. Chernikov {
1656ed34869SGleb Smirnoff MPASS(group_id < NLP_MAX_GROUPS);
166164dec88SGleb Smirnoff NLCTL_LOCK_ASSERT();
1674dfd380eSAlexander V. Chernikov
168edf5608bSGleb Smirnoff return (BIT_ISSET(NLP_MAX_GROUPS, group_id, &nlp->nl_groups));
1694dfd380eSAlexander V. Chernikov }
1704dfd380eSAlexander V. Chernikov
1714dfd380eSAlexander V. Chernikov static uint32_t
nlp_get_groups_compat(struct nlpcb * nlp)172164dec88SGleb Smirnoff nlp_get_groups_compat(struct nlpcb *nlp)
1734dfd380eSAlexander V. Chernikov {
1744dfd380eSAlexander V. Chernikov uint32_t groups_mask = 0;
1754dfd380eSAlexander V. Chernikov
176164dec88SGleb Smirnoff NLCTL_LOCK_ASSERT();
177164dec88SGleb Smirnoff
1784dfd380eSAlexander V. Chernikov for (int i = 0; i < 32; i++) {
179164dec88SGleb Smirnoff if (nlp_memberof_group(nlp, i + 1))
1804dfd380eSAlexander V. Chernikov groups_mask |= (1 << i);
1814dfd380eSAlexander V. Chernikov }
1824dfd380eSAlexander V. Chernikov
1834dfd380eSAlexander V. Chernikov return (groups_mask);
1847e5bf684SAlexander V. Chernikov }
1857e5bf684SAlexander V. Chernikov
18617083b94SGleb Smirnoff static struct nl_buf *
nl_buf_copy(struct nl_buf * nb)18717083b94SGleb Smirnoff nl_buf_copy(struct nl_buf *nb)
18817083b94SGleb Smirnoff {
18917083b94SGleb Smirnoff struct nl_buf *copy;
19017083b94SGleb Smirnoff
19117083b94SGleb Smirnoff copy = nl_buf_alloc(nb->buflen, M_NOWAIT);
19217083b94SGleb Smirnoff if (__predict_false(copy == NULL))
19317083b94SGleb Smirnoff return (NULL);
19417083b94SGleb Smirnoff memcpy(copy, nb, sizeof(*nb) + nb->buflen);
19517083b94SGleb Smirnoff
19617083b94SGleb Smirnoff return (copy);
19730d7e724SAlexander V. Chernikov }
19830d7e724SAlexander V. Chernikov
1997e5bf684SAlexander V. Chernikov /*
20017083b94SGleb Smirnoff * Broadcasts in the writer's buffer.
2017e5bf684SAlexander V. Chernikov */
20217083b94SGleb Smirnoff bool
nl_send_group(struct nl_writer * nw)20317083b94SGleb Smirnoff nl_send_group(struct nl_writer *nw)
2047e5bf684SAlexander V. Chernikov {
20517083b94SGleb Smirnoff struct nl_buf *nb = nw->buf;
2067e5bf684SAlexander V. Chernikov struct nlpcb *nlp_last = NULL;
2077e5bf684SAlexander V. Chernikov struct nlpcb *nlp;
2087e5bf684SAlexander V. Chernikov NLCTL_TRACKER;
2097e5bf684SAlexander V. Chernikov
2107e5bf684SAlexander V. Chernikov IF_DEBUG_LEVEL(LOG_DEBUG2) {
21117083b94SGleb Smirnoff struct nlmsghdr *hdr = (struct nlmsghdr *)nb->data;
21217083b94SGleb Smirnoff NL_LOG(LOG_DEBUG2, "MCAST len %u msg type %d len %u to group %d/%d",
21317083b94SGleb Smirnoff nb->datalen, hdr->nlmsg_type, hdr->nlmsg_len,
21417083b94SGleb Smirnoff nw->group.proto, nw->group.id);
2157e5bf684SAlexander V. Chernikov }
2167e5bf684SAlexander V. Chernikov
21717083b94SGleb Smirnoff nw->buf = NULL;
21817083b94SGleb Smirnoff
219841dcdcdSGleb Smirnoff NLCTL_RLOCK();
220841dcdcdSGleb Smirnoff CK_LIST_FOREACH(nlp, &V_nl_ctl.ctl_pcb_head, nl_next) {
2210fda4ffdSGleb Smirnoff if ((nw->group.priv == 0 || priv_check_cred(
2220fda4ffdSGleb Smirnoff nlp->nl_socket->so_cred, nw->group.priv) == 0) &&
2230fda4ffdSGleb Smirnoff nlp->nl_proto == nw->group.proto &&
224164dec88SGleb Smirnoff nlp_memberof_group(nlp, nw->group.id)) {
2257e5bf684SAlexander V. Chernikov if (nlp_last != NULL) {
22617083b94SGleb Smirnoff struct nl_buf *copy;
22717083b94SGleb Smirnoff
22817083b94SGleb Smirnoff copy = nl_buf_copy(nb);
22917083b94SGleb Smirnoff if (copy != NULL) {
230ff5ad900SGleb Smirnoff nw->buf = copy;
23109fa78d4SGleb Smirnoff (void)nl_send(nw, nlp_last);
23217083b94SGleb Smirnoff } else {
2337e5bf684SAlexander V. Chernikov NLP_LOCK(nlp_last);
2347e5bf684SAlexander V. Chernikov if (nlp_last->nl_socket != NULL)
2357e5bf684SAlexander V. Chernikov sorwakeup(nlp_last->nl_socket);
2367e5bf684SAlexander V. Chernikov NLP_UNLOCK(nlp_last);
2377e5bf684SAlexander V. Chernikov }
2387e5bf684SAlexander V. Chernikov }
2397e5bf684SAlexander V. Chernikov nlp_last = nlp;
2407e5bf684SAlexander V. Chernikov }
2417e5bf684SAlexander V. Chernikov }
242ff5ad900SGleb Smirnoff if (nlp_last != NULL) {
243ff5ad900SGleb Smirnoff nw->buf = nb;
24409fa78d4SGleb Smirnoff (void)nl_send(nw, nlp_last);
245ff5ad900SGleb Smirnoff } else
24617083b94SGleb Smirnoff nl_buf_free(nb);
2477e5bf684SAlexander V. Chernikov
248841dcdcdSGleb Smirnoff NLCTL_RUNLOCK();
24917083b94SGleb Smirnoff
25017083b94SGleb Smirnoff return (true);
2517e5bf684SAlexander V. Chernikov }
2527e5bf684SAlexander V. Chernikov
25306cf3651SGleb Smirnoff void
nl_clear_group(u_int group)25406cf3651SGleb Smirnoff nl_clear_group(u_int group)
25506cf3651SGleb Smirnoff {
25606cf3651SGleb Smirnoff struct nlpcb *nlp;
25706cf3651SGleb Smirnoff
25806cf3651SGleb Smirnoff NLCTL_WLOCK();
25906cf3651SGleb Smirnoff CK_LIST_FOREACH(nlp, &V_nl_ctl.ctl_pcb_head, nl_next)
26006cf3651SGleb Smirnoff if (nlp_memberof_group(nlp, group))
26106cf3651SGleb Smirnoff nlp_leave_group(nlp, group);
26206cf3651SGleb Smirnoff NLCTL_WUNLOCK();
26306cf3651SGleb Smirnoff }
26406cf3651SGleb Smirnoff
2657e5bf684SAlexander V. Chernikov static uint32_t
nl_find_port(void)26643d0c2ddSEd Maste nl_find_port(void)
26743d0c2ddSEd Maste {
2687e5bf684SAlexander V. Chernikov /*
2697e5bf684SAlexander V. Chernikov * app can open multiple netlink sockets.
2707e5bf684SAlexander V. Chernikov * Start with current pid, if already taken,
2717e5bf684SAlexander V. Chernikov * try random numbers in 65k..256k+65k space,
2727e5bf684SAlexander V. Chernikov * avoiding clash with pids.
2737e5bf684SAlexander V. Chernikov */
2747e5bf684SAlexander V. Chernikov if (nl_port_lookup(curproc->p_pid) == NULL)
2757e5bf684SAlexander V. Chernikov return (curproc->p_pid);
2767e5bf684SAlexander V. Chernikov for (int i = 0; i < 16; i++) {
2777e5bf684SAlexander V. Chernikov uint32_t nl_port = (arc4random() % 65536) + 65536 * 4;
2787e5bf684SAlexander V. Chernikov if (nl_port_lookup(nl_port) == 0)
2797e5bf684SAlexander V. Chernikov return (nl_port);
2807e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG3, "tried %u\n", nl_port);
2817e5bf684SAlexander V. Chernikov }
2827e5bf684SAlexander V. Chernikov return (curproc->p_pid);
2837e5bf684SAlexander V. Chernikov }
2847e5bf684SAlexander V. Chernikov
2857e5bf684SAlexander V. Chernikov static int
nl_bind_locked(struct nlpcb * nlp,struct sockaddr_nl * snl)2867e5bf684SAlexander V. Chernikov nl_bind_locked(struct nlpcb *nlp, struct sockaddr_nl *snl)
2877e5bf684SAlexander V. Chernikov {
2887e5bf684SAlexander V. Chernikov if (nlp->nl_bound) {
2897e5bf684SAlexander V. Chernikov if (nlp->nl_port != snl->nl_pid) {
2907e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG,
2917e5bf684SAlexander V. Chernikov "bind() failed: program pid %d "
2927e5bf684SAlexander V. Chernikov "is different from provided pid %d",
2937e5bf684SAlexander V. Chernikov nlp->nl_port, snl->nl_pid);
2947e5bf684SAlexander V. Chernikov return (EINVAL); // XXX: better error
2957e5bf684SAlexander V. Chernikov }
2967e5bf684SAlexander V. Chernikov } else {
2977e5bf684SAlexander V. Chernikov if (snl->nl_pid == 0)
2987e5bf684SAlexander V. Chernikov snl->nl_pid = nl_find_port();
2997e5bf684SAlexander V. Chernikov if (nl_port_lookup(snl->nl_pid) != NULL)
3007e5bf684SAlexander V. Chernikov return (EADDRINUSE);
3017e5bf684SAlexander V. Chernikov nlp->nl_port = snl->nl_pid;
3027e5bf684SAlexander V. Chernikov nlp->nl_bound = true;
303841dcdcdSGleb Smirnoff CK_LIST_INSERT_HEAD(&V_nl_ctl.ctl_port_head, nlp, nl_port_next);
3047e5bf684SAlexander V. Chernikov }
3054dfd380eSAlexander V. Chernikov for (int i = 0; i < 32; i++) {
3064dfd380eSAlexander V. Chernikov if (snl->nl_groups & ((uint32_t)1 << i))
307164dec88SGleb Smirnoff nlp_join_group(nlp, i + 1);
3084dfd380eSAlexander V. Chernikov else
309164dec88SGleb Smirnoff nlp_leave_group(nlp, i + 1);
3104dfd380eSAlexander V. Chernikov }
3117e5bf684SAlexander V. Chernikov
3127e5bf684SAlexander V. Chernikov return (0);
3137e5bf684SAlexander V. Chernikov }
3147e5bf684SAlexander V. Chernikov
3157e5bf684SAlexander V. Chernikov static int
nl_attach(struct socket * so,int proto,struct thread * td)31656304579SGleb Smirnoff nl_attach(struct socket *so, int proto, struct thread *td)
3177e5bf684SAlexander V. Chernikov {
3187e5bf684SAlexander V. Chernikov struct nlpcb *nlp;
3197e5bf684SAlexander V. Chernikov int error;
3207e5bf684SAlexander V. Chernikov
3217e5bf684SAlexander V. Chernikov if (__predict_false(netlink_unloading != 0))
3227e5bf684SAlexander V. Chernikov return (EAFNOSUPPORT);
3237e5bf684SAlexander V. Chernikov
3247e5bf684SAlexander V. Chernikov error = nl_verify_proto(proto);
3257e5bf684SAlexander V. Chernikov if (error != 0)
3267e5bf684SAlexander V. Chernikov return (error);
3277e5bf684SAlexander V. Chernikov
3287e5bf684SAlexander V. Chernikov bool is_linux = SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX;
3297e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG2, "socket %p, %sPID %d: attaching socket to %s",
3307e5bf684SAlexander V. Chernikov so, is_linux ? "(linux) " : "", curproc->p_pid,
3317e5bf684SAlexander V. Chernikov nl_get_proto_name(proto));
3327e5bf684SAlexander V. Chernikov
3337e5bf684SAlexander V. Chernikov nlp = malloc(sizeof(struct nlpcb), M_PCB, M_WAITOK | M_ZERO);
3347e5bf684SAlexander V. Chernikov error = soreserve(so, nl_sendspace, nl_recvspace);
3357e5bf684SAlexander V. Chernikov if (error != 0) {
3367e5bf684SAlexander V. Chernikov free(nlp, M_PCB);
3377e5bf684SAlexander V. Chernikov return (error);
3387e5bf684SAlexander V. Chernikov }
33917083b94SGleb Smirnoff TAILQ_INIT(&so->so_rcv.nl_queue);
340660bd40aSGleb Smirnoff TAILQ_INIT(&so->so_snd.nl_queue);
3417e5bf684SAlexander V. Chernikov so->so_pcb = nlp;
3427e5bf684SAlexander V. Chernikov nlp->nl_socket = so;
3437e5bf684SAlexander V. Chernikov nlp->nl_proto = proto;
3447e5bf684SAlexander V. Chernikov nlp->nl_process_id = curproc->p_pid;
3457e5bf684SAlexander V. Chernikov nlp->nl_linux = is_linux;
34604f75b98SAlexander V. Chernikov nlp->nl_unconstrained_vnet = !jailed_without_vnet(so->so_cred);
34730d7e724SAlexander V. Chernikov nlp->nl_need_thread_setup = true;
3487e5bf684SAlexander V. Chernikov NLP_LOCK_INIT(nlp);
3497e5bf684SAlexander V. Chernikov refcount_init(&nlp->nl_refcount, 1);
3507e5bf684SAlexander V. Chernikov
3517e5bf684SAlexander V. Chernikov nlp->nl_taskqueue = taskqueue_create("netlink_socket", M_WAITOK,
3527e5bf684SAlexander V. Chernikov taskqueue_thread_enqueue, &nlp->nl_taskqueue);
3537e5bf684SAlexander V. Chernikov TASK_INIT(&nlp->nl_task, 0, nl_taskqueue_handler, nlp);
3547e5bf684SAlexander V. Chernikov taskqueue_start_threads(&nlp->nl_taskqueue, 1, PWAIT,
3557e5bf684SAlexander V. Chernikov "netlink_socket (PID %u)", nlp->nl_process_id);
3567e5bf684SAlexander V. Chernikov
357841dcdcdSGleb Smirnoff NLCTL_WLOCK();
358841dcdcdSGleb Smirnoff CK_LIST_INSERT_HEAD(&V_nl_ctl.ctl_pcb_head, nlp, nl_next);
359841dcdcdSGleb Smirnoff NLCTL_WUNLOCK();
3607e5bf684SAlexander V. Chernikov
3617e5bf684SAlexander V. Chernikov soisconnected(so);
3627e5bf684SAlexander V. Chernikov
3637e5bf684SAlexander V. Chernikov return (0);
3647e5bf684SAlexander V. Chernikov }
3657e5bf684SAlexander V. Chernikov
3667e5bf684SAlexander V. Chernikov static int
nl_bind(struct socket * so,struct sockaddr * sa,struct thread * td)36756304579SGleb Smirnoff nl_bind(struct socket *so, struct sockaddr *sa, struct thread *td)
3687e5bf684SAlexander V. Chernikov {
3697e5bf684SAlexander V. Chernikov struct nlpcb *nlp = sotonlpcb(so);
3707e5bf684SAlexander V. Chernikov struct sockaddr_nl *snl = (struct sockaddr_nl *)sa;
3717e5bf684SAlexander V. Chernikov int error;
3727e5bf684SAlexander V. Chernikov
3737e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
3747e5bf684SAlexander V. Chernikov if (snl->nl_len != sizeof(*snl)) {
3757e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so);
3767e5bf684SAlexander V. Chernikov return (EINVAL);
3777e5bf684SAlexander V. Chernikov }
3787e5bf684SAlexander V. Chernikov
3797e5bf684SAlexander V. Chernikov
380841dcdcdSGleb Smirnoff NLCTL_WLOCK();
3817e5bf684SAlexander V. Chernikov NLP_LOCK(nlp);
3827e5bf684SAlexander V. Chernikov error = nl_bind_locked(nlp, snl);
3837e5bf684SAlexander V. Chernikov NLP_UNLOCK(nlp);
384841dcdcdSGleb Smirnoff NLCTL_WUNLOCK();
3857e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG2, "socket %p, bind() to %u, groups %u, error %d", so,
3867e5bf684SAlexander V. Chernikov snl->nl_pid, snl->nl_groups, error);
3877e5bf684SAlexander V. Chernikov
3887e5bf684SAlexander V. Chernikov return (error);
3897e5bf684SAlexander V. Chernikov }
3907e5bf684SAlexander V. Chernikov
3917e5bf684SAlexander V. Chernikov
3927e5bf684SAlexander V. Chernikov static int
nl_assign_port(struct nlpcb * nlp,uint32_t port_id)3937e5bf684SAlexander V. Chernikov nl_assign_port(struct nlpcb *nlp, uint32_t port_id)
3947e5bf684SAlexander V. Chernikov {
3957e5bf684SAlexander V. Chernikov struct sockaddr_nl snl = {
3967e5bf684SAlexander V. Chernikov .nl_pid = port_id,
3977e5bf684SAlexander V. Chernikov };
3987e5bf684SAlexander V. Chernikov int error;
3997e5bf684SAlexander V. Chernikov
400841dcdcdSGleb Smirnoff NLCTL_WLOCK();
4017e5bf684SAlexander V. Chernikov NLP_LOCK(nlp);
402164dec88SGleb Smirnoff snl.nl_groups = nlp_get_groups_compat(nlp);
4037e5bf684SAlexander V. Chernikov error = nl_bind_locked(nlp, &snl);
4047e5bf684SAlexander V. Chernikov NLP_UNLOCK(nlp);
405841dcdcdSGleb Smirnoff NLCTL_WUNLOCK();
4067e5bf684SAlexander V. Chernikov
4077e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG3, "socket %p, port assign: %d, error: %d", nlp->nl_socket, port_id, error);
4087e5bf684SAlexander V. Chernikov return (error);
4097e5bf684SAlexander V. Chernikov }
4107e5bf684SAlexander V. Chernikov
4117e5bf684SAlexander V. Chernikov /*
4127e5bf684SAlexander V. Chernikov * nl_autobind_port binds a unused portid to @nlp
4137e5bf684SAlexander V. Chernikov * @nlp: pcb data for the netlink socket
4147e5bf684SAlexander V. Chernikov * @candidate_id: first id to consider
4157e5bf684SAlexander V. Chernikov */
4167e5bf684SAlexander V. Chernikov static int
nl_autobind_port(struct nlpcb * nlp,uint32_t candidate_id)4177e5bf684SAlexander V. Chernikov nl_autobind_port(struct nlpcb *nlp, uint32_t candidate_id)
4187e5bf684SAlexander V. Chernikov {
4197e5bf684SAlexander V. Chernikov uint32_t port_id = candidate_id;
4207e5bf684SAlexander V. Chernikov NLCTL_TRACKER;
4217e5bf684SAlexander V. Chernikov bool exist;
4224404e840SAlexander V. Chernikov int error = EADDRINUSE;
4237e5bf684SAlexander V. Chernikov
4247e5bf684SAlexander V. Chernikov for (int i = 0; i < 10; i++) {
4257e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG3, "socket %p, trying to assign port %d", nlp->nl_socket, port_id);
426841dcdcdSGleb Smirnoff NLCTL_RLOCK();
4277e5bf684SAlexander V. Chernikov exist = nl_port_lookup(port_id) != 0;
428841dcdcdSGleb Smirnoff NLCTL_RUNLOCK();
4297e5bf684SAlexander V. Chernikov if (!exist) {
4307e5bf684SAlexander V. Chernikov error = nl_assign_port(nlp, port_id);
4317e5bf684SAlexander V. Chernikov if (error != EADDRINUSE)
4327e5bf684SAlexander V. Chernikov break;
4337e5bf684SAlexander V. Chernikov }
4347e5bf684SAlexander V. Chernikov port_id++;
4357e5bf684SAlexander V. Chernikov }
4367e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG3, "socket %p, autobind to %d, error: %d", nlp->nl_socket, port_id, error);
4377e5bf684SAlexander V. Chernikov return (error);
4387e5bf684SAlexander V. Chernikov }
4397e5bf684SAlexander V. Chernikov
4407e5bf684SAlexander V. Chernikov static int
nl_connect(struct socket * so,struct sockaddr * sa,struct thread * td)44156304579SGleb Smirnoff nl_connect(struct socket *so, struct sockaddr *sa, struct thread *td)
4427e5bf684SAlexander V. Chernikov {
4437e5bf684SAlexander V. Chernikov struct sockaddr_nl *snl = (struct sockaddr_nl *)sa;
4447e5bf684SAlexander V. Chernikov struct nlpcb *nlp;
4457e5bf684SAlexander V. Chernikov
4467e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
4477e5bf684SAlexander V. Chernikov if (snl->nl_len != sizeof(*snl)) {
4487e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so);
4497e5bf684SAlexander V. Chernikov return (EINVAL);
4507e5bf684SAlexander V. Chernikov }
4517e5bf684SAlexander V. Chernikov
4527e5bf684SAlexander V. Chernikov nlp = sotonlpcb(so);
4537e5bf684SAlexander V. Chernikov if (!nlp->nl_bound) {
4547e5bf684SAlexander V. Chernikov int error = nl_autobind_port(nlp, td->td_proc->p_pid);
4557e5bf684SAlexander V. Chernikov if (error != 0) {
4567e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG, "socket %p, nl_autobind() failed: %d", so, error);
4577e5bf684SAlexander V. Chernikov return (error);
4587e5bf684SAlexander V. Chernikov }
4597e5bf684SAlexander V. Chernikov }
4607e5bf684SAlexander V. Chernikov /* XXX: Handle socket flags & multicast */
4617e5bf684SAlexander V. Chernikov soisconnected(so);
4627e5bf684SAlexander V. Chernikov
4637e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG2, "socket %p, connect to %u", so, snl->nl_pid);
4647e5bf684SAlexander V. Chernikov
4657e5bf684SAlexander V. Chernikov return (0);
4667e5bf684SAlexander V. Chernikov }
4677e5bf684SAlexander V. Chernikov
4687e5bf684SAlexander V. Chernikov static void
destroy_nlpcb_epoch(epoch_context_t ctx)4697e5bf684SAlexander V. Chernikov destroy_nlpcb_epoch(epoch_context_t ctx)
4707e5bf684SAlexander V. Chernikov {
4717e5bf684SAlexander V. Chernikov struct nlpcb *nlp;
4727e5bf684SAlexander V. Chernikov
4737e5bf684SAlexander V. Chernikov nlp = __containerof(ctx, struct nlpcb, nl_epoch_ctx);
4747e5bf684SAlexander V. Chernikov
47517083b94SGleb Smirnoff NLP_LOCK_DESTROY(nlp);
47617083b94SGleb Smirnoff free(nlp, M_PCB);
4777e5bf684SAlexander V. Chernikov }
4787e5bf684SAlexander V. Chernikov
4797e5bf684SAlexander V. Chernikov static void
nl_close(struct socket * so)48097958f5dSGleb Smirnoff nl_close(struct socket *so)
4817e5bf684SAlexander V. Chernikov {
4827e5bf684SAlexander V. Chernikov MPASS(sotonlpcb(so) != NULL);
4837e5bf684SAlexander V. Chernikov struct nlpcb *nlp;
484660bd40aSGleb Smirnoff struct nl_buf *nb;
4857e5bf684SAlexander V. Chernikov
4867e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG2, "detaching socket %p, PID %d", so, curproc->p_pid);
4877e5bf684SAlexander V. Chernikov nlp = sotonlpcb(so);
4887e5bf684SAlexander V. Chernikov
4897e5bf684SAlexander V. Chernikov /* Mark as inactive so no new work can be enqueued */
4907e5bf684SAlexander V. Chernikov NLP_LOCK(nlp);
4917e5bf684SAlexander V. Chernikov bool was_bound = nlp->nl_bound;
4927e5bf684SAlexander V. Chernikov NLP_UNLOCK(nlp);
4937e5bf684SAlexander V. Chernikov
4947e5bf684SAlexander V. Chernikov /* Wait till all scheduled work has been completed */
4957e5bf684SAlexander V. Chernikov taskqueue_drain_all(nlp->nl_taskqueue);
4967e5bf684SAlexander V. Chernikov taskqueue_free(nlp->nl_taskqueue);
4977e5bf684SAlexander V. Chernikov
498841dcdcdSGleb Smirnoff NLCTL_WLOCK();
4997e5bf684SAlexander V. Chernikov NLP_LOCK(nlp);
5007e5bf684SAlexander V. Chernikov if (was_bound) {
5017e5bf684SAlexander V. Chernikov CK_LIST_REMOVE(nlp, nl_port_next);
5027e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG3, "socket %p, unlinking bound pid %u", so, nlp->nl_port);
5037e5bf684SAlexander V. Chernikov }
5047e5bf684SAlexander V. Chernikov CK_LIST_REMOVE(nlp, nl_next);
5057e5bf684SAlexander V. Chernikov nlp->nl_socket = NULL;
5067e5bf684SAlexander V. Chernikov NLP_UNLOCK(nlp);
507841dcdcdSGleb Smirnoff NLCTL_WUNLOCK();
5087e5bf684SAlexander V. Chernikov
5097e5bf684SAlexander V. Chernikov so->so_pcb = NULL;
5107e5bf684SAlexander V. Chernikov
511660bd40aSGleb Smirnoff while ((nb = TAILQ_FIRST(&so->so_snd.nl_queue)) != NULL) {
512660bd40aSGleb Smirnoff TAILQ_REMOVE(&so->so_snd.nl_queue, nb, tailq);
51317083b94SGleb Smirnoff nl_buf_free(nb);
514660bd40aSGleb Smirnoff }
51517083b94SGleb Smirnoff while ((nb = TAILQ_FIRST(&so->so_rcv.nl_queue)) != NULL) {
51617083b94SGleb Smirnoff TAILQ_REMOVE(&so->so_rcv.nl_queue, nb, tailq);
51717083b94SGleb Smirnoff nl_buf_free(nb);
51817083b94SGleb Smirnoff }
519660bd40aSGleb Smirnoff
5207e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG3, "socket %p, detached", so);
5217e5bf684SAlexander V. Chernikov
5227e5bf684SAlexander V. Chernikov /* XXX: is delayed free needed? */
523ab591c87SZhenlei Huang NET_EPOCH_CALL(destroy_nlpcb_epoch, &nlp->nl_epoch_ctx);
5247e5bf684SAlexander V. Chernikov }
5257e5bf684SAlexander V. Chernikov
5267e5bf684SAlexander V. Chernikov static int
nl_disconnect(struct socket * so)52756304579SGleb Smirnoff nl_disconnect(struct socket *so)
5287e5bf684SAlexander V. Chernikov {
5297e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
5307e5bf684SAlexander V. Chernikov MPASS(sotonlpcb(so) != NULL);
5317e5bf684SAlexander V. Chernikov return (ENOTCONN);
5327e5bf684SAlexander V. Chernikov }
5337e5bf684SAlexander V. Chernikov
5347e5bf684SAlexander V. Chernikov static int
nl_sockaddr(struct socket * so,struct sockaddr * sa)5350fac350cSGleb Smirnoff nl_sockaddr(struct socket *so, struct sockaddr *sa)
5367e5bf684SAlexander V. Chernikov {
5377e5bf684SAlexander V. Chernikov
5380fac350cSGleb Smirnoff *(struct sockaddr_nl *)sa = (struct sockaddr_nl ){
5397e5bf684SAlexander V. Chernikov /* TODO: set other fields */
5400fac350cSGleb Smirnoff .nl_len = sizeof(struct sockaddr_nl),
5410fac350cSGleb Smirnoff .nl_family = AF_NETLINK,
5420fac350cSGleb Smirnoff .nl_pid = sotonlpcb(so)->nl_port,
5430fac350cSGleb Smirnoff };
5440fac350cSGleb Smirnoff
5457e5bf684SAlexander V. Chernikov return (0);
5467e5bf684SAlexander V. Chernikov }
5477e5bf684SAlexander V. Chernikov
5487e5bf684SAlexander V. Chernikov static int
nl_sosend(struct socket * so,struct sockaddr * addr,struct uio * uio,struct mbuf * m,struct mbuf * control,int flags,struct thread * td)549660bd40aSGleb Smirnoff nl_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
550660bd40aSGleb Smirnoff struct mbuf *m, struct mbuf *control, int flags, struct thread *td)
5517e5bf684SAlexander V. Chernikov {
552660bd40aSGleb Smirnoff struct nlpcb *nlp = sotonlpcb(so);
553660bd40aSGleb Smirnoff struct sockbuf *sb = &so->so_snd;
554660bd40aSGleb Smirnoff struct nl_buf *nb;
555f1c6edbaSGleb Smirnoff size_t len;
556660bd40aSGleb Smirnoff int error;
5577e5bf684SAlexander V. Chernikov
558660bd40aSGleb Smirnoff MPASS(m == NULL && uio != NULL);
5597e5bf684SAlexander V. Chernikov
5607e5bf684SAlexander V. Chernikov if (__predict_false(control != NULL)) {
5617e5bf684SAlexander V. Chernikov m_freem(control);
5627e5bf684SAlexander V. Chernikov return (EINVAL);
5637e5bf684SAlexander V. Chernikov }
564660bd40aSGleb Smirnoff
565660bd40aSGleb Smirnoff if (__predict_false(flags & MSG_OOB)) /* XXXGL: or just ignore? */
566660bd40aSGleb Smirnoff return (EOPNOTSUPP);
567660bd40aSGleb Smirnoff
568660bd40aSGleb Smirnoff if (__predict_false(uio->uio_resid < sizeof(struct nlmsghdr)))
569660bd40aSGleb Smirnoff return (ENOBUFS); /* XXXGL: any better error? */
570660bd40aSGleb Smirnoff
571660bd40aSGleb Smirnoff error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
572660bd40aSGleb Smirnoff if (error)
573660bd40aSGleb Smirnoff return (error);
574660bd40aSGleb Smirnoff
575660bd40aSGleb Smirnoff len = roundup2(uio->uio_resid, 8) + SCRATCH_BUFFER_SIZE;
576660bd40aSGleb Smirnoff if (nlp->nl_linux)
577660bd40aSGleb Smirnoff len += roundup2(uio->uio_resid, 8);
57817083b94SGleb Smirnoff nb = nl_buf_alloc(len, M_WAITOK);
579660bd40aSGleb Smirnoff nb->datalen = uio->uio_resid;
580660bd40aSGleb Smirnoff error = uiomove(&nb->data[0], uio->uio_resid, uio);
581660bd40aSGleb Smirnoff if (__predict_false(error))
582660bd40aSGleb Smirnoff goto out;
583660bd40aSGleb Smirnoff
584*ea03febeSGleb Smirnoff NL_LOG(LOG_DEBUG2, "sending message to kernel %u bytes", nb->datalen);
585*ea03febeSGleb Smirnoff
586660bd40aSGleb Smirnoff SOCK_SENDBUF_LOCK(so);
587660bd40aSGleb Smirnoff restart:
588660bd40aSGleb Smirnoff if (sb->sb_hiwat - sb->sb_ccc >= nb->datalen) {
589660bd40aSGleb Smirnoff TAILQ_INSERT_TAIL(&sb->nl_queue, nb, tailq);
590660bd40aSGleb Smirnoff sb->sb_acc += nb->datalen;
591660bd40aSGleb Smirnoff sb->sb_ccc += nb->datalen;
592660bd40aSGleb Smirnoff nb = NULL;
593660bd40aSGleb Smirnoff } else if ((so->so_state & SS_NBIO) ||
594660bd40aSGleb Smirnoff (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
595660bd40aSGleb Smirnoff SOCK_SENDBUF_UNLOCK(so);
596660bd40aSGleb Smirnoff error = EWOULDBLOCK;
597660bd40aSGleb Smirnoff goto out;
598660bd40aSGleb Smirnoff } else {
599660bd40aSGleb Smirnoff if ((error = sbwait(so, SO_SND)) != 0) {
600660bd40aSGleb Smirnoff SOCK_SENDBUF_UNLOCK(so);
601660bd40aSGleb Smirnoff goto out;
602660bd40aSGleb Smirnoff } else
603660bd40aSGleb Smirnoff goto restart;
604660bd40aSGleb Smirnoff }
605660bd40aSGleb Smirnoff SOCK_SENDBUF_UNLOCK(so);
606660bd40aSGleb Smirnoff
607660bd40aSGleb Smirnoff if (nb == NULL) {
608*ea03febeSGleb Smirnoff NL_LOG(LOG_DEBUG3, "success");
609660bd40aSGleb Smirnoff NLP_LOCK(nlp);
610660bd40aSGleb Smirnoff nl_schedule_taskqueue(nlp);
611660bd40aSGleb Smirnoff NLP_UNLOCK(nlp);
6127e5bf684SAlexander V. Chernikov }
6137e5bf684SAlexander V. Chernikov
614660bd40aSGleb Smirnoff out:
615660bd40aSGleb Smirnoff SOCK_IO_SEND_UNLOCK(so);
616*ea03febeSGleb Smirnoff if (nb != NULL) {
617*ea03febeSGleb Smirnoff NL_LOG(LOG_DEBUG3, "failure, error %d", error);
61817083b94SGleb Smirnoff nl_buf_free(nb);
619*ea03febeSGleb Smirnoff }
620660bd40aSGleb Smirnoff return (error);
6217e5bf684SAlexander V. Chernikov }
6227e5bf684SAlexander V. Chernikov
623ff5ad900SGleb Smirnoff /* Create control data for recvmsg(2) on Netlink socket. */
624ff5ad900SGleb Smirnoff static struct mbuf *
nl_createcontrol(struct nlpcb * nlp)625ff5ad900SGleb Smirnoff nl_createcontrol(struct nlpcb *nlp)
626ff5ad900SGleb Smirnoff {
627ff5ad900SGleb Smirnoff struct {
628ff5ad900SGleb Smirnoff struct nlattr nla;
629ff5ad900SGleb Smirnoff uint32_t val;
630ff5ad900SGleb Smirnoff } data[] = {
631ff5ad900SGleb Smirnoff {
632ff5ad900SGleb Smirnoff .nla.nla_len = sizeof(struct nlattr) + sizeof(uint32_t),
633ff5ad900SGleb Smirnoff .nla.nla_type = NLMSGINFO_ATTR_PROCESS_ID,
634ff5ad900SGleb Smirnoff .val = nlp->nl_process_id,
635ff5ad900SGleb Smirnoff },
636ff5ad900SGleb Smirnoff {
637ff5ad900SGleb Smirnoff .nla.nla_len = sizeof(struct nlattr) + sizeof(uint32_t),
638ff5ad900SGleb Smirnoff .nla.nla_type = NLMSGINFO_ATTR_PORT_ID,
639ff5ad900SGleb Smirnoff .val = nlp->nl_port,
640ff5ad900SGleb Smirnoff },
641ff5ad900SGleb Smirnoff };
642ff5ad900SGleb Smirnoff
643ff5ad900SGleb Smirnoff return (sbcreatecontrol(data, sizeof(data), NETLINK_MSG_INFO,
644ff5ad900SGleb Smirnoff SOL_NETLINK, M_WAITOK));
645ff5ad900SGleb Smirnoff }
646ff5ad900SGleb Smirnoff
6477e5bf684SAlexander V. Chernikov static int
nl_soreceive(struct socket * so,struct sockaddr ** psa,struct uio * uio,struct mbuf ** mp,struct mbuf ** controlp,int * flagsp)64817083b94SGleb Smirnoff nl_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
64917083b94SGleb Smirnoff struct mbuf **mp, struct mbuf **controlp, int *flagsp)
6507e5bf684SAlexander V. Chernikov {
65117083b94SGleb Smirnoff static const struct sockaddr_nl nl_empty_src = {
65217083b94SGleb Smirnoff .nl_len = sizeof(struct sockaddr_nl),
65317083b94SGleb Smirnoff .nl_family = PF_NETLINK,
65417083b94SGleb Smirnoff .nl_pid = 0 /* comes from the kernel */
65517083b94SGleb Smirnoff };
65617083b94SGleb Smirnoff struct sockbuf *sb = &so->so_rcv;
657ff5ad900SGleb Smirnoff struct nlpcb *nlp = sotonlpcb(so);
6587e19c018SGleb Smirnoff struct nl_buf *first, *last, *nb, *next;
6597e19c018SGleb Smirnoff struct nlmsghdr *hdr;
66017083b94SGleb Smirnoff int flags, error;
6617e19c018SGleb Smirnoff u_int len, overflow, partoff, partlen, msgrcv, datalen;
66217083b94SGleb Smirnoff bool nonblock, trunc, peek;
66317083b94SGleb Smirnoff
66417083b94SGleb Smirnoff MPASS(mp == NULL && uio != NULL);
66517083b94SGleb Smirnoff
6667e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
66717083b94SGleb Smirnoff
66817083b94SGleb Smirnoff if (psa != NULL)
66917083b94SGleb Smirnoff *psa = sodupsockaddr((const struct sockaddr *)&nl_empty_src,
67017083b94SGleb Smirnoff M_WAITOK);
67117083b94SGleb Smirnoff
672ff5ad900SGleb Smirnoff if (controlp != NULL && (nlp->nl_flags & NLF_MSG_INFO))
673ff5ad900SGleb Smirnoff *controlp = nl_createcontrol(nlp);
674ff5ad900SGleb Smirnoff
67517083b94SGleb Smirnoff flags = flagsp != NULL ? *flagsp & ~MSG_TRUNC : 0;
67617083b94SGleb Smirnoff trunc = flagsp != NULL ? *flagsp & MSG_TRUNC : false;
67717083b94SGleb Smirnoff nonblock = (so->so_state & SS_NBIO) ||
67817083b94SGleb Smirnoff (flags & (MSG_DONTWAIT | MSG_NBIO));
67917083b94SGleb Smirnoff peek = flags & MSG_PEEK;
68017083b94SGleb Smirnoff
68117083b94SGleb Smirnoff error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
68217083b94SGleb Smirnoff if (__predict_false(error))
68317083b94SGleb Smirnoff return (error);
68417083b94SGleb Smirnoff
6857e19c018SGleb Smirnoff len = 0;
6867e19c018SGleb Smirnoff overflow = 0;
6877e19c018SGleb Smirnoff msgrcv = 0;
6887e19c018SGleb Smirnoff datalen = 0;
6897e19c018SGleb Smirnoff
69017083b94SGleb Smirnoff SOCK_RECVBUF_LOCK(so);
6917e19c018SGleb Smirnoff while ((first = TAILQ_FIRST(&sb->nl_queue)) == NULL) {
69217083b94SGleb Smirnoff if (nonblock) {
69317083b94SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so);
69417083b94SGleb Smirnoff SOCK_IO_RECV_UNLOCK(so);
69517083b94SGleb Smirnoff return (EWOULDBLOCK);
69617083b94SGleb Smirnoff }
69717083b94SGleb Smirnoff error = sbwait(so, SO_RCV);
69817083b94SGleb Smirnoff if (error) {
69917083b94SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so);
70017083b94SGleb Smirnoff SOCK_IO_RECV_UNLOCK(so);
70117083b94SGleb Smirnoff return (error);
70217083b94SGleb Smirnoff }
70317083b94SGleb Smirnoff }
70417083b94SGleb Smirnoff
70517083b94SGleb Smirnoff /*
7067e19c018SGleb Smirnoff * Netlink socket buffer consists of a queue of nl_bufs, but for the
7077e19c018SGleb Smirnoff * userland there should be no boundaries. However, there are Netlink
7087e19c018SGleb Smirnoff * messages, that shouldn't be split. Internal invariant is that a
7097e19c018SGleb Smirnoff * message never spans two nl_bufs.
7107e19c018SGleb Smirnoff * If a large userland buffer is provided, we would traverse the queue
7117e19c018SGleb Smirnoff * until either queue end is reached or the buffer is fulfilled. If
7127e19c018SGleb Smirnoff * an application provides a buffer that isn't able to fit a single
7137e19c018SGleb Smirnoff * message, we would truncate it and lose its tail. This is the only
7147e19c018SGleb Smirnoff * condition where we would lose data. If buffer is able to fit at
7157e19c018SGleb Smirnoff * least one message, we would return it and won't truncate the next.
7167e19c018SGleb Smirnoff *
7177e19c018SGleb Smirnoff * We use same code for normal and MSG_PEEK case. At first queue pass
7187e19c018SGleb Smirnoff * we scan nl_bufs and count lenght. In case we can read entire buffer
7197e19c018SGleb Smirnoff * at one write everything is trivial. In case we can not, we save
7207e19c018SGleb Smirnoff * pointer to the last (or partial) nl_buf and in the !peek case we
7217e19c018SGleb Smirnoff * split the queue into two pieces. We can safely drop the queue lock,
7227e19c018SGleb Smirnoff * as kernel would only append nl_bufs to the end of the queue, and
7237e19c018SGleb Smirnoff * we are the exclusive owner of queue beginning due to sleepable lock.
7247e19c018SGleb Smirnoff * At the second pass we copy data out and in !peek case free nl_bufs.
72517083b94SGleb Smirnoff */
7267e19c018SGleb Smirnoff TAILQ_FOREACH(nb, &sb->nl_queue, tailq) {
7277e19c018SGleb Smirnoff u_int offset;
7287e19c018SGleb Smirnoff
729f75d7facSGleb Smirnoff MPASS(nb->offset < nb->datalen);
7307e19c018SGleb Smirnoff offset = nb->offset;
7317e19c018SGleb Smirnoff while (offset < nb->datalen) {
7327e19c018SGleb Smirnoff hdr = (struct nlmsghdr *)&nb->data[offset];
733d9b1f6fbSGleb Smirnoff MPASS(nb->offset + hdr->nlmsg_len <= nb->datalen);
7347e19c018SGleb Smirnoff if (uio->uio_resid < len + hdr->nlmsg_len) {
7357e19c018SGleb Smirnoff overflow = len + hdr->nlmsg_len -
7367e19c018SGleb Smirnoff uio->uio_resid;
7377e19c018SGleb Smirnoff partoff = nb->offset;
7387e19c018SGleb Smirnoff if (offset > partoff) {
7397e19c018SGleb Smirnoff partlen = offset - partoff;
7407e19c018SGleb Smirnoff if (!peek) {
7417e19c018SGleb Smirnoff nb->offset = offset;
7427e19c018SGleb Smirnoff datalen += partlen;
7437e19c018SGleb Smirnoff }
7447e19c018SGleb Smirnoff } else if (len == 0 && uio->uio_resid > 0) {
7457e19c018SGleb Smirnoff flags |= MSG_TRUNC;
7467e19c018SGleb Smirnoff partlen = uio->uio_resid;
747e6f4c314SGleb Smirnoff if (peek)
748e6f4c314SGleb Smirnoff goto nospace;
7497e19c018SGleb Smirnoff datalen += hdr->nlmsg_len;
750e6f4c314SGleb Smirnoff if (nb->offset + hdr->nlmsg_len ==
751e6f4c314SGleb Smirnoff nb->datalen) {
752e6f4c314SGleb Smirnoff /*
753e6f4c314SGleb Smirnoff * Avoid leaving empty nb.
754e6f4c314SGleb Smirnoff * Process last nb normally.
755e6f4c314SGleb Smirnoff * Trust uiomove() to care
756e6f4c314SGleb Smirnoff * about negative uio_resid.
757e6f4c314SGleb Smirnoff */
758e6f4c314SGleb Smirnoff nb = TAILQ_NEXT(nb, tailq);
759e6f4c314SGleb Smirnoff overflow = 0;
760e6f4c314SGleb Smirnoff partlen = 0;
761e6f4c314SGleb Smirnoff } else
762e6f4c314SGleb Smirnoff nb->offset += hdr->nlmsg_len;
763e6f4c314SGleb Smirnoff msgrcv++;
7647e19c018SGleb Smirnoff } else
7657e19c018SGleb Smirnoff partlen = 0;
7667e19c018SGleb Smirnoff goto nospace;
7677e19c018SGleb Smirnoff }
7687e19c018SGleb Smirnoff len += hdr->nlmsg_len;
7697e19c018SGleb Smirnoff offset += hdr->nlmsg_len;
7707e19c018SGleb Smirnoff MPASS(offset <= nb->buflen);
7717e19c018SGleb Smirnoff msgrcv++;
7727e19c018SGleb Smirnoff }
7737e19c018SGleb Smirnoff MPASS(offset == nb->datalen);
774d9b1f6fbSGleb Smirnoff datalen += nb->datalen - nb->offset;
7757e19c018SGleb Smirnoff }
7767e19c018SGleb Smirnoff nospace:
7777e19c018SGleb Smirnoff last = nb;
7787e19c018SGleb Smirnoff if (!peek) {
7797e19c018SGleb Smirnoff if (last == NULL)
7807e19c018SGleb Smirnoff TAILQ_INIT(&sb->nl_queue);
7817e19c018SGleb Smirnoff else {
7827e19c018SGleb Smirnoff /* XXXGL: create TAILQ_SPLIT */
7837e19c018SGleb Smirnoff TAILQ_FIRST(&sb->nl_queue) = last;
7847e19c018SGleb Smirnoff last->tailq.tqe_prev = &TAILQ_FIRST(&sb->nl_queue);
7857e19c018SGleb Smirnoff }
786d9b1f6fbSGleb Smirnoff MPASS(sb->sb_acc >= datalen);
7877e19c018SGleb Smirnoff sb->sb_acc -= datalen;
7887e19c018SGleb Smirnoff sb->sb_ccc -= datalen;
78917083b94SGleb Smirnoff }
79017083b94SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so);
79117083b94SGleb Smirnoff
7927e19c018SGleb Smirnoff for (nb = first; nb != last; nb = next) {
7937e19c018SGleb Smirnoff next = TAILQ_NEXT(nb, tailq);
7947e19c018SGleb Smirnoff if (__predict_true(error == 0))
7957e19c018SGleb Smirnoff error = uiomove(&nb->data[nb->offset],
7967e19c018SGleb Smirnoff (int)(nb->datalen - nb->offset), uio);
7977e19c018SGleb Smirnoff if (!peek)
79817083b94SGleb Smirnoff nl_buf_free(nb);
7997e19c018SGleb Smirnoff }
8007e19c018SGleb Smirnoff if (last != NULL && partlen > 0 && __predict_true(error == 0))
8017e19c018SGleb Smirnoff error = uiomove(&nb->data[partoff], (int)partlen, uio);
8027e19c018SGleb Smirnoff
8037e19c018SGleb Smirnoff if (trunc && overflow > 0) {
8047e19c018SGleb Smirnoff uio->uio_resid -= overflow;
8057e19c018SGleb Smirnoff MPASS(uio->uio_resid < 0);
8067e19c018SGleb Smirnoff } else
8077e19c018SGleb Smirnoff MPASS(uio->uio_resid >= 0);
80817083b94SGleb Smirnoff
80917083b94SGleb Smirnoff if (uio->uio_td)
8107e19c018SGleb Smirnoff uio->uio_td->td_ru.ru_msgrcv += msgrcv;
81117083b94SGleb Smirnoff
81217083b94SGleb Smirnoff if (flagsp != NULL)
81317083b94SGleb Smirnoff *flagsp |= flags;
81417083b94SGleb Smirnoff
81517083b94SGleb Smirnoff SOCK_IO_RECV_UNLOCK(so);
8167e5bf684SAlexander V. Chernikov
8177e5bf684SAlexander V. Chernikov nl_on_transmit(sotonlpcb(so));
8187e5bf684SAlexander V. Chernikov
81917083b94SGleb Smirnoff return (error);
8207e5bf684SAlexander V. Chernikov }
8217e5bf684SAlexander V. Chernikov
8227e5bf684SAlexander V. Chernikov static int
nl_getoptflag(int sopt_name)8237e5bf684SAlexander V. Chernikov nl_getoptflag(int sopt_name)
8247e5bf684SAlexander V. Chernikov {
8257e5bf684SAlexander V. Chernikov switch (sopt_name) {
8267e5bf684SAlexander V. Chernikov case NETLINK_CAP_ACK:
8277e5bf684SAlexander V. Chernikov return (NLF_CAP_ACK);
8287e5bf684SAlexander V. Chernikov case NETLINK_EXT_ACK:
8297e5bf684SAlexander V. Chernikov return (NLF_EXT_ACK);
8307e5bf684SAlexander V. Chernikov case NETLINK_GET_STRICT_CHK:
8317e5bf684SAlexander V. Chernikov return (NLF_STRICT);
83230d7e724SAlexander V. Chernikov case NETLINK_MSG_INFO:
83330d7e724SAlexander V. Chernikov return (NLF_MSG_INFO);
8347e5bf684SAlexander V. Chernikov }
8357e5bf684SAlexander V. Chernikov
8367e5bf684SAlexander V. Chernikov return (0);
8377e5bf684SAlexander V. Chernikov }
8387e5bf684SAlexander V. Chernikov
8397e5bf684SAlexander V. Chernikov static int
nl_ctloutput(struct socket * so,struct sockopt * sopt)8407e5bf684SAlexander V. Chernikov nl_ctloutput(struct socket *so, struct sockopt *sopt)
8417e5bf684SAlexander V. Chernikov {
8427e5bf684SAlexander V. Chernikov struct nlpcb *nlp = sotonlpcb(so);
8437e5bf684SAlexander V. Chernikov uint32_t flag;
8447e5bf684SAlexander V. Chernikov int optval, error = 0;
8457e5bf684SAlexander V. Chernikov NLCTL_TRACKER;
8467e5bf684SAlexander V. Chernikov
8477e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG2, "%ssockopt(%p, %d)", (sopt->sopt_dir) ? "set" : "get",
8487e5bf684SAlexander V. Chernikov so, sopt->sopt_name);
8497e5bf684SAlexander V. Chernikov
8507e5bf684SAlexander V. Chernikov switch (sopt->sopt_dir) {
8517e5bf684SAlexander V. Chernikov case SOPT_SET:
8527e5bf684SAlexander V. Chernikov switch (sopt->sopt_name) {
8537e5bf684SAlexander V. Chernikov case NETLINK_ADD_MEMBERSHIP:
8547e5bf684SAlexander V. Chernikov case NETLINK_DROP_MEMBERSHIP:
8553f70fca9SAlexander V. Chernikov error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
8563f70fca9SAlexander V. Chernikov if (error != 0)
8573f70fca9SAlexander V. Chernikov break;
8584dfd380eSAlexander V. Chernikov if (optval <= 0 || optval >= NLP_MAX_GROUPS) {
8597e5bf684SAlexander V. Chernikov error = ERANGE;
8607e5bf684SAlexander V. Chernikov break;
8617e5bf684SAlexander V. Chernikov }
8624dfd380eSAlexander V. Chernikov NL_LOG(LOG_DEBUG2, "ADD/DEL group %d", (uint32_t)optval);
8637e5bf684SAlexander V. Chernikov
864841dcdcdSGleb Smirnoff NLCTL_WLOCK();
8657e5bf684SAlexander V. Chernikov if (sopt->sopt_name == NETLINK_ADD_MEMBERSHIP)
866164dec88SGleb Smirnoff nlp_join_group(nlp, optval);
8677e5bf684SAlexander V. Chernikov else
868164dec88SGleb Smirnoff nlp_leave_group(nlp, optval);
869841dcdcdSGleb Smirnoff NLCTL_WUNLOCK();
8707e5bf684SAlexander V. Chernikov break;
8717e5bf684SAlexander V. Chernikov case NETLINK_CAP_ACK:
8727e5bf684SAlexander V. Chernikov case NETLINK_EXT_ACK:
8737e5bf684SAlexander V. Chernikov case NETLINK_GET_STRICT_CHK:
87430d7e724SAlexander V. Chernikov case NETLINK_MSG_INFO:
8753f70fca9SAlexander V. Chernikov error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
8763f70fca9SAlexander V. Chernikov if (error != 0)
8773f70fca9SAlexander V. Chernikov break;
8787e5bf684SAlexander V. Chernikov
8797e5bf684SAlexander V. Chernikov flag = nl_getoptflag(sopt->sopt_name);
8807e5bf684SAlexander V. Chernikov
88130d7e724SAlexander V. Chernikov if ((flag == NLF_MSG_INFO) && nlp->nl_linux) {
88230d7e724SAlexander V. Chernikov error = EINVAL;
88330d7e724SAlexander V. Chernikov break;
88430d7e724SAlexander V. Chernikov }
88530d7e724SAlexander V. Chernikov
886841dcdcdSGleb Smirnoff NLCTL_WLOCK();
8877e5bf684SAlexander V. Chernikov if (optval != 0)
8887e5bf684SAlexander V. Chernikov nlp->nl_flags |= flag;
8897e5bf684SAlexander V. Chernikov else
8907e5bf684SAlexander V. Chernikov nlp->nl_flags &= ~flag;
891841dcdcdSGleb Smirnoff NLCTL_WUNLOCK();
8927e5bf684SAlexander V. Chernikov break;
8937e5bf684SAlexander V. Chernikov default:
8947e5bf684SAlexander V. Chernikov error = ENOPROTOOPT;
8957e5bf684SAlexander V. Chernikov }
8967e5bf684SAlexander V. Chernikov break;
8977e5bf684SAlexander V. Chernikov case SOPT_GET:
8987e5bf684SAlexander V. Chernikov switch (sopt->sopt_name) {
8997e5bf684SAlexander V. Chernikov case NETLINK_LIST_MEMBERSHIPS:
900841dcdcdSGleb Smirnoff NLCTL_RLOCK();
901164dec88SGleb Smirnoff optval = nlp_get_groups_compat(nlp);
902841dcdcdSGleb Smirnoff NLCTL_RUNLOCK();
9037e5bf684SAlexander V. Chernikov error = sooptcopyout(sopt, &optval, sizeof(optval));
9047e5bf684SAlexander V. Chernikov break;
9057e5bf684SAlexander V. Chernikov case NETLINK_CAP_ACK:
9067e5bf684SAlexander V. Chernikov case NETLINK_EXT_ACK:
9077e5bf684SAlexander V. Chernikov case NETLINK_GET_STRICT_CHK:
90830d7e724SAlexander V. Chernikov case NETLINK_MSG_INFO:
909841dcdcdSGleb Smirnoff NLCTL_RLOCK();
9107e5bf684SAlexander V. Chernikov optval = (nlp->nl_flags & nl_getoptflag(sopt->sopt_name)) != 0;
911841dcdcdSGleb Smirnoff NLCTL_RUNLOCK();
9127e5bf684SAlexander V. Chernikov error = sooptcopyout(sopt, &optval, sizeof(optval));
9137e5bf684SAlexander V. Chernikov break;
9147e5bf684SAlexander V. Chernikov default:
9157e5bf684SAlexander V. Chernikov error = ENOPROTOOPT;
9167e5bf684SAlexander V. Chernikov }
9177e5bf684SAlexander V. Chernikov break;
9187e5bf684SAlexander V. Chernikov default:
9197e5bf684SAlexander V. Chernikov error = ENOPROTOOPT;
9207e5bf684SAlexander V. Chernikov }
9217e5bf684SAlexander V. Chernikov
9227e5bf684SAlexander V. Chernikov return (error);
9237e5bf684SAlexander V. Chernikov }
9247e5bf684SAlexander V. Chernikov
9257e5bf684SAlexander V. Chernikov static int
sysctl_handle_nl_maxsockbuf(SYSCTL_HANDLER_ARGS)92628a5d88fSAlexander V. Chernikov sysctl_handle_nl_maxsockbuf(SYSCTL_HANDLER_ARGS)
92728a5d88fSAlexander V. Chernikov {
92828a5d88fSAlexander V. Chernikov int error = 0;
92928a5d88fSAlexander V. Chernikov u_long tmp_maxsockbuf = nl_maxsockbuf;
93028a5d88fSAlexander V. Chernikov
93128a5d88fSAlexander V. Chernikov error = sysctl_handle_long(oidp, &tmp_maxsockbuf, arg2, req);
93228a5d88fSAlexander V. Chernikov if (error || !req->newptr)
93328a5d88fSAlexander V. Chernikov return (error);
93428a5d88fSAlexander V. Chernikov if (tmp_maxsockbuf < MSIZE + MCLBYTES)
93528a5d88fSAlexander V. Chernikov return (EINVAL);
93628a5d88fSAlexander V. Chernikov nl_maxsockbuf = tmp_maxsockbuf;
93728a5d88fSAlexander V. Chernikov
93828a5d88fSAlexander V. Chernikov return (0);
93928a5d88fSAlexander V. Chernikov }
94028a5d88fSAlexander V. Chernikov
94128a5d88fSAlexander V. Chernikov static int
nl_setsbopt(struct socket * so,struct sockopt * sopt)9427e5bf684SAlexander V. Chernikov nl_setsbopt(struct socket *so, struct sockopt *sopt)
9437e5bf684SAlexander V. Chernikov {
9447e5bf684SAlexander V. Chernikov int error, optval;
9457e5bf684SAlexander V. Chernikov bool result;
9467e5bf684SAlexander V. Chernikov
9477e5bf684SAlexander V. Chernikov if (sopt->sopt_name != SO_RCVBUF)
9487e5bf684SAlexander V. Chernikov return (sbsetopt(so, sopt));
9497e5bf684SAlexander V. Chernikov
9507e5bf684SAlexander V. Chernikov /* Allow to override max buffer size in certain conditions */
9517e5bf684SAlexander V. Chernikov
9527e5bf684SAlexander V. Chernikov error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
9537e5bf684SAlexander V. Chernikov if (error != 0)
9547e5bf684SAlexander V. Chernikov return (error);
9557e5bf684SAlexander V. Chernikov NL_LOG(LOG_DEBUG2, "socket %p, PID %d, SO_RCVBUF=%d", so, curproc->p_pid, optval);
9567e5bf684SAlexander V. Chernikov if (optval > sb_max_adj) {
9577e5bf684SAlexander V. Chernikov if (priv_check(curthread, PRIV_NET_ROUTE) != 0)
9587e5bf684SAlexander V. Chernikov return (EPERM);
9597e5bf684SAlexander V. Chernikov }
9607e5bf684SAlexander V. Chernikov
9617e5bf684SAlexander V. Chernikov SOCK_RECVBUF_LOCK(so);
9627e5bf684SAlexander V. Chernikov result = sbreserve_locked_limit(so, SO_RCV, optval, nl_maxsockbuf, curthread);
9637e5bf684SAlexander V. Chernikov SOCK_RECVBUF_UNLOCK(so);
9647e5bf684SAlexander V. Chernikov
9657e5bf684SAlexander V. Chernikov return (result ? 0 : ENOBUFS);
9667e5bf684SAlexander V. Chernikov }
9677e5bf684SAlexander V. Chernikov
9680079d177SAlexander V. Chernikov #define NETLINK_PROTOSW \
96917083b94SGleb Smirnoff .pr_flags = PR_ATOMIC | PR_ADDR | PR_SOCKBUF, \
9700079d177SAlexander V. Chernikov .pr_ctloutput = nl_ctloutput, \
9710079d177SAlexander V. Chernikov .pr_setsbopt = nl_setsbopt, \
97256304579SGleb Smirnoff .pr_attach = nl_attach, \
97356304579SGleb Smirnoff .pr_bind = nl_bind, \
97456304579SGleb Smirnoff .pr_connect = nl_connect, \
97556304579SGleb Smirnoff .pr_disconnect = nl_disconnect, \
976660bd40aSGleb Smirnoff .pr_sosend = nl_sosend, \
97717083b94SGleb Smirnoff .pr_soreceive = nl_soreceive, \
9780fac350cSGleb Smirnoff .pr_sockaddr = nl_sockaddr, \
97997958f5dSGleb Smirnoff .pr_close = nl_close
9800079d177SAlexander V. Chernikov
9810079d177SAlexander V. Chernikov static struct protosw netlink_raw_sw = {
9820079d177SAlexander V. Chernikov .pr_type = SOCK_RAW,
9830079d177SAlexander V. Chernikov NETLINK_PROTOSW
9840079d177SAlexander V. Chernikov };
9850079d177SAlexander V. Chernikov
9860079d177SAlexander V. Chernikov static struct protosw netlink_dgram_sw = {
9870079d177SAlexander V. Chernikov .pr_type = SOCK_DGRAM,
9880079d177SAlexander V. Chernikov NETLINK_PROTOSW
9897e5bf684SAlexander V. Chernikov };
9907e5bf684SAlexander V. Chernikov
9917e5bf684SAlexander V. Chernikov static struct domain netlinkdomain = {
9927e5bf684SAlexander V. Chernikov .dom_family = PF_NETLINK,
9937e5bf684SAlexander V. Chernikov .dom_name = "netlink",
9947e5bf684SAlexander V. Chernikov .dom_flags = DOMF_UNLOADABLE,
9950079d177SAlexander V. Chernikov .dom_nprotosw = 2,
9960079d177SAlexander V. Chernikov .dom_protosw = { &netlink_raw_sw, &netlink_dgram_sw },
9977e5bf684SAlexander V. Chernikov };
9987e5bf684SAlexander V. Chernikov
9997e5bf684SAlexander V. Chernikov DOMAIN_SET(netlink);
1000