xref: /freebsd/sys/netinet/ip_output.c (revision 0ff2d00d2aa37cd883ffd8c7363dddef9cba267e)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1990, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  */
31df8bae1dSRodney W. Grimes 
324b421e2dSMike Silbersack #include <sys/cdefs.h>
335d6d7e75SGleb Smirnoff #include "opt_inet.h"
346a800098SYoshinobu Inoue #include "opt_ipsec.h"
35b2e60773SJohn Baldwin #include "opt_kern_tls.h"
3653dcc544SMike Silbersack #include "opt_mbuf_stress_test.h"
3777a01441SJohn Baldwin #include "opt_ratelimit.h"
3857f60867SMark Johnston #include "opt_route.h"
399c423972SAdrian Chadd #include "opt_rss.h"
4077a01441SJohn Baldwin #include "opt_sctp.h"
41fbd1372aSJoerg Wunsch 
42df8bae1dSRodney W. Grimes #include <sys/param.h>
4326f9a767SRodney W. Grimes #include <sys/systm.h>
44f0f6d643SLuigi Rizzo #include <sys/kernel.h>
45b2e60773SJohn Baldwin #include <sys/ktls.h>
46cc0a3c8cSAndrey V. Elsukov #include <sys/lock.h>
47df8bae1dSRodney W. Grimes #include <sys/malloc.h>
48df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
49acd3428bSRobert Watson #include <sys/priv.h>
50c26fe973SBjoern A. Zeeb #include <sys/proc.h>
51df8bae1dSRodney W. Grimes #include <sys/protosw.h>
5257f60867SMark Johnston #include <sys/sdt.h>
53df8bae1dSRodney W. Grimes #include <sys/socket.h>
54df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
559d9edc56SMike Silbersack #include <sys/sysctl.h>
56c26fe973SBjoern A. Zeeb #include <sys/ucred.h>
57df8bae1dSRodney W. Grimes 
58df8bae1dSRodney W. Grimes #include <net/if.h>
5976039bc8SGleb Smirnoff #include <net/if_var.h>
603d0d5b21SJustin Hibbits #include <net/if_private.h>
61868aabb4SRichard Scheffenegger #include <net/if_vlan_var.h>
623ef5e21dSQing Li #include <net/if_llatbl.h>
63868aabb4SRichard Scheffenegger #include <net/ethernet.h>
649b932e9eSAndre Oppermann #include <net/netisr.h>
65c21fd232SAndre Oppermann #include <net/pfil.h>
66df8bae1dSRodney W. Grimes #include <net/route.h>
67983066f0SAlexander V. Chernikov #include <net/route/nhop.h>
68b2bdc62aSAdrian Chadd #include <net/rss_config.h>
694b79449eSBjoern A. Zeeb #include <net/vnet.h>
70df8bae1dSRodney W. Grimes 
71df8bae1dSRodney W. Grimes #include <netinet/in.h>
726ca363ebSGleb Smirnoff #include <netinet/in_fib.h>
7357f60867SMark Johnston #include <netinet/in_kdtrace.h>
74df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip.h>
76983066f0SAlexander V. Chernikov #include <netinet/in_fib.h>
77df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
789c423972SAdrian Chadd #include <netinet/in_rss.h>
79df8bae1dSRodney W. Grimes #include <netinet/in_var.h>
80df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
81ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
821fdbfb90STom Jones 
831fdbfb90STom Jones #include <netinet/udp.h>
841fdbfb90STom Jones #include <netinet/udp_var.h>
851fdbfb90STom Jones 
8695033af9SMark Johnston #if defined(SCTP) || defined(SCTP_SUPPORT)
872f4afd21SRandall Stewart #include <netinet/sctp.h>
882f4afd21SRandall Stewart #include <netinet/sctp_crc32.h>
892f4afd21SRandall Stewart #endif
90df8bae1dSRodney W. Grimes 
91fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h>
926a800098SYoshinobu Inoue 
931dfcf0d2SAndre Oppermann #include <machine/in_cksum.h>
941dfcf0d2SAndre Oppermann 
95aed55708SRobert Watson #include <security/mac/mac_framework.h>
96aed55708SRobert Watson 
9753dcc544SMike Silbersack #ifdef MBUF_STRESS_TEST
9805b9d121SBjoern A. Zeeb static int mbuf_frag_size = 0;
999d9edc56SMike Silbersack SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
1009d9edc56SMike Silbersack 	&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
1019d9edc56SMike Silbersack #endif
1029d9edc56SMike Silbersack 
103331dff07SAlexander V. Chernikov static void	ip_mloopback(struct ifnet *, const struct mbuf *, int);
104afed1b49SDarren Reed 
1058b889dbbSBruce M Simpson extern int in_mcast_loop;
10693e0e116SJulian Elischer 
107d9f2a782SErmal Luçi static inline int
10805fc9d78SKristof Provost ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags,
10905fc9d78SKristof Provost     struct inpcb *inp, struct sockaddr_in *dst, int *fibnum, int *error)
110d9f2a782SErmal Luçi {
111d9f2a782SErmal Luçi 	struct m_tag *fwd_tag = NULL;
1128f980c01SMark Johnston 	struct mbuf *m;
113d9f2a782SErmal Luçi 	struct in_addr odst;
114d9f2a782SErmal Luçi 	struct ip *ip;
11505fc9d78SKristof Provost 
1168f980c01SMark Johnston 	m = *mp;
117d9f2a782SErmal Luçi 	ip = mtod(m, struct ip *);
118d9f2a782SErmal Luçi 
119d9f2a782SErmal Luçi 	/* Run through list of hooks for output packets. */
120d9f2a782SErmal Luçi 	odst.s_addr = ip->ip_dst.s_addr;
121a2256150SGleb Smirnoff 	switch (pfil_mbuf_out(V_inet_pfil_head, mp, ifp, inp)) {
122b252313fSGleb Smirnoff 	case PFIL_DROPPED:
1238d5c56daSGleb Smirnoff 		*error = EACCES;
124b252313fSGleb Smirnoff 		/* FALLTHROUGH */
125b252313fSGleb Smirnoff 	case PFIL_CONSUMED:
126d9f2a782SErmal Luçi 		return 1; /* Finished */
127b252313fSGleb Smirnoff 	case PFIL_PASS:
128b252313fSGleb Smirnoff 		*error = 0;
129b252313fSGleb Smirnoff 	}
130b252313fSGleb Smirnoff 	m = *mp;
131d9f2a782SErmal Luçi 	ip = mtod(m, struct ip *);
132d9f2a782SErmal Luçi 
133d9f2a782SErmal Luçi 	/* See if destination IP address was changed by packet filter. */
134d9f2a782SErmal Luçi 	if (odst.s_addr != ip->ip_dst.s_addr) {
135d9f2a782SErmal Luçi 		m->m_flags |= M_SKIP_FIREWALL;
136d9f2a782SErmal Luçi 		/* If destination is now ourself drop to ip_input(). */
137d9f2a782SErmal Luçi 		if (in_localip(ip->ip_dst)) {
138d9f2a782SErmal Luçi 			m->m_flags |= M_FASTFWD_OURS;
139d9f2a782SErmal Luçi 			if (m->m_pkthdr.rcvif == NULL)
140d9f2a782SErmal Luçi 				m->m_pkthdr.rcvif = V_loif;
141d9f2a782SErmal Luçi 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
142d9f2a782SErmal Luçi 				m->m_pkthdr.csum_flags |=
143d9f2a782SErmal Luçi 					CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
144d9f2a782SErmal Luçi 				m->m_pkthdr.csum_data = 0xffff;
145d9f2a782SErmal Luçi 			}
146d9f2a782SErmal Luçi 			m->m_pkthdr.csum_flags |=
147d9f2a782SErmal Luçi 				CSUM_IP_CHECKED | CSUM_IP_VALID;
14895033af9SMark Johnston #if defined(SCTP) || defined(SCTP_SUPPORT)
149d9f2a782SErmal Luçi 			if (m->m_pkthdr.csum_flags & CSUM_SCTP)
150d9f2a782SErmal Luçi 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
151d9f2a782SErmal Luçi #endif
152d9f2a782SErmal Luçi 			*error = netisr_queue(NETISR_IP, m);
153d9f2a782SErmal Luçi 			return 1; /* Finished */
154d9f2a782SErmal Luçi 		}
155d9f2a782SErmal Luçi 
156d9f2a782SErmal Luçi 		bzero(dst, sizeof(*dst));
157d9f2a782SErmal Luçi 		dst->sin_family = AF_INET;
158d9f2a782SErmal Luçi 		dst->sin_len = sizeof(*dst);
159d9f2a782SErmal Luçi 		dst->sin_addr = ip->ip_dst;
160d9f2a782SErmal Luçi 
161d9f2a782SErmal Luçi 		return -1; /* Reloop */
162d9f2a782SErmal Luçi 	}
163d9f2a782SErmal Luçi 	/* See if fib was changed by packet filter. */
164d9f2a782SErmal Luçi 	if ((*fibnum) != M_GETFIB(m)) {
165d9f2a782SErmal Luçi 		m->m_flags |= M_SKIP_FIREWALL;
166d9f2a782SErmal Luçi 		*fibnum = M_GETFIB(m);
167d9f2a782SErmal Luçi 		return -1; /* Reloop for FIB change */
168d9f2a782SErmal Luçi 	}
169d9f2a782SErmal Luçi 
170d9f2a782SErmal Luçi 	/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
171d9f2a782SErmal Luçi 	if (m->m_flags & M_FASTFWD_OURS) {
172d9f2a782SErmal Luçi 		if (m->m_pkthdr.rcvif == NULL)
173d9f2a782SErmal Luçi 			m->m_pkthdr.rcvif = V_loif;
174d9f2a782SErmal Luçi 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
175d9f2a782SErmal Luçi 			m->m_pkthdr.csum_flags |=
176d9f2a782SErmal Luçi 				CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
177d9f2a782SErmal Luçi 			m->m_pkthdr.csum_data = 0xffff;
178d9f2a782SErmal Luçi 		}
17995033af9SMark Johnston #if defined(SCTP) || defined(SCTP_SUPPORT)
180d9f2a782SErmal Luçi 		if (m->m_pkthdr.csum_flags & CSUM_SCTP)
181d9f2a782SErmal Luçi 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
182d9f2a782SErmal Luçi #endif
183d9f2a782SErmal Luçi 		m->m_pkthdr.csum_flags |=
184d9f2a782SErmal Luçi 			CSUM_IP_CHECKED | CSUM_IP_VALID;
185d9f2a782SErmal Luçi 
186d9f2a782SErmal Luçi 		*error = netisr_queue(NETISR_IP, m);
187d9f2a782SErmal Luçi 		return 1; /* Finished */
188d9f2a782SErmal Luçi 	}
189d9f2a782SErmal Luçi 	/* Or forward to some other address? */
190d9f2a782SErmal Luçi 	if ((m->m_flags & M_IP_NEXTHOP) &&
191d9f2a782SErmal Luçi 	    ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) {
192d9f2a782SErmal Luçi 		bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
193d9f2a782SErmal Luçi 		m->m_flags |= M_SKIP_FIREWALL;
194d9f2a782SErmal Luçi 		m->m_flags &= ~M_IP_NEXTHOP;
195d9f2a782SErmal Luçi 		m_tag_delete(m, fwd_tag);
196d9f2a782SErmal Luçi 
197d9f2a782SErmal Luçi 		return -1; /* Reloop for CHANGE of dst */
198d9f2a782SErmal Luçi 	}
199d9f2a782SErmal Luçi 
200d9f2a782SErmal Luçi 	return 0;
201d9f2a782SErmal Luçi }
202d9f2a782SErmal Luçi 
203fb3bc596SJohn Baldwin static int
204fb3bc596SJohn Baldwin ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
20562e1a437SZhenlei Huang     const struct sockaddr *gw, struct route *ro, bool stamp_tag)
206fb3bc596SJohn Baldwin {
207b2e60773SJohn Baldwin #ifdef KERN_TLS
208b2e60773SJohn Baldwin 	struct ktls_session *tls = NULL;
209b2e60773SJohn Baldwin #endif
210fb3bc596SJohn Baldwin 	struct m_snd_tag *mst;
211fb3bc596SJohn Baldwin 	int error;
212fb3bc596SJohn Baldwin 
213fb3bc596SJohn Baldwin 	MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
214fb3bc596SJohn Baldwin 	mst = NULL;
215fb3bc596SJohn Baldwin 
216b2e60773SJohn Baldwin #ifdef KERN_TLS
217b2e60773SJohn Baldwin 	/*
218b2e60773SJohn Baldwin 	 * If this is an unencrypted TLS record, save a reference to
219b2e60773SJohn Baldwin 	 * the record.  This local reference is used to call
220b2e60773SJohn Baldwin 	 * ktls_output_eagain after the mbuf has been freed (thus
221b2e60773SJohn Baldwin 	 * dropping the mbuf's reference) in if_output.
222b2e60773SJohn Baldwin 	 */
223b2e60773SJohn Baldwin 	if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) {
2247b6c99d0SGleb Smirnoff 		tls = ktls_hold(m->m_next->m_epg_tls);
225b2e60773SJohn Baldwin 		mst = tls->snd_tag;
226b2e60773SJohn Baldwin 
227b2e60773SJohn Baldwin 		/*
228b2e60773SJohn Baldwin 		 * If a TLS session doesn't have a valid tag, it must
229b2e60773SJohn Baldwin 		 * have had an earlier ifp mismatch, so drop this
230b2e60773SJohn Baldwin 		 * packet.
231b2e60773SJohn Baldwin 		 */
232b2e60773SJohn Baldwin 		if (mst == NULL) {
2339ba11796SAndrew Gallatin 			m_freem(m);
234b2e60773SJohn Baldwin 			error = EAGAIN;
235b2e60773SJohn Baldwin 			goto done;
236b2e60773SJohn Baldwin 		}
2376043ac20SAndrew Gallatin 		/*
2386043ac20SAndrew Gallatin 		 * Always stamp tags that include NIC ktls.
2396043ac20SAndrew Gallatin 		 */
2406043ac20SAndrew Gallatin 		stamp_tag = true;
241b2e60773SJohn Baldwin 	}
242b2e60773SJohn Baldwin #endif
243fb3bc596SJohn Baldwin #ifdef RATELIMIT
244b2e60773SJohn Baldwin 	if (inp != NULL && mst == NULL) {
245fb3bc596SJohn Baldwin 		if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 ||
246fb3bc596SJohn Baldwin 		    (inp->inp_snd_tag != NULL &&
247fb3bc596SJohn Baldwin 		    inp->inp_snd_tag->ifp != ifp))
248fb3bc596SJohn Baldwin 			in_pcboutput_txrtlmt(inp, ifp, m);
249fb3bc596SJohn Baldwin 
250fb3bc596SJohn Baldwin 		if (inp->inp_snd_tag != NULL)
251fb3bc596SJohn Baldwin 			mst = inp->inp_snd_tag;
252fb3bc596SJohn Baldwin 	}
253fb3bc596SJohn Baldwin #endif
25435c7bb34SRandall Stewart 	if (stamp_tag && mst != NULL) {
255fb3bc596SJohn Baldwin 		KASSERT(m->m_pkthdr.rcvif == NULL,
256fb3bc596SJohn Baldwin 		    ("trying to add a send tag to a forwarded packet"));
257fb3bc596SJohn Baldwin 		if (mst->ifp != ifp) {
2589ba11796SAndrew Gallatin 			m_freem(m);
259fb3bc596SJohn Baldwin 			error = EAGAIN;
260fb3bc596SJohn Baldwin 			goto done;
261fb3bc596SJohn Baldwin 		}
262fb3bc596SJohn Baldwin 
263fb3bc596SJohn Baldwin 		/* stamp send tag on mbuf */
264fb3bc596SJohn Baldwin 		m->m_pkthdr.snd_tag = m_snd_tag_ref(mst);
265fb3bc596SJohn Baldwin 		m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
266fb3bc596SJohn Baldwin 	}
267fb3bc596SJohn Baldwin 
26862e1a437SZhenlei Huang 	error = (*ifp->if_output)(ifp, m, gw, ro);
269fb3bc596SJohn Baldwin 
270fb3bc596SJohn Baldwin done:
271fb3bc596SJohn Baldwin 	/* Check for route change invalidating send tags. */
272b2e60773SJohn Baldwin #ifdef KERN_TLS
273b2e60773SJohn Baldwin 	if (tls != NULL) {
274b2e60773SJohn Baldwin 		if (error == EAGAIN)
275b2e60773SJohn Baldwin 			error = ktls_output_eagain(inp, tls);
276b2e60773SJohn Baldwin 		ktls_free(tls);
277b2e60773SJohn Baldwin 	}
278b2e60773SJohn Baldwin #endif
279fb3bc596SJohn Baldwin #ifdef RATELIMIT
280fb3bc596SJohn Baldwin 	if (error == EAGAIN)
281fb3bc596SJohn Baldwin 		in_pcboutput_eagain(inp);
282fb3bc596SJohn Baldwin #endif
283fb3bc596SJohn Baldwin 	return (error);
284fb3bc596SJohn Baldwin }
285fb3bc596SJohn Baldwin 
286983066f0SAlexander V. Chernikov /* rte<>ro_flags translation */
287983066f0SAlexander V. Chernikov static inline void
2889748eb74SAlexander V. Chernikov rt_update_ro_flags(struct route *ro, const struct nhop_object *nh)
289983066f0SAlexander V. Chernikov {
2909748eb74SAlexander V. Chernikov 	int nh_flags = nh->nh_flags;
291983066f0SAlexander V. Chernikov 
292983066f0SAlexander V. Chernikov 	ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
293983066f0SAlexander V. Chernikov 
294983066f0SAlexander V. Chernikov 	ro->ro_flags |= (nh_flags & NHF_REJECT) ? RT_REJECT : 0;
295983066f0SAlexander V. Chernikov 	ro->ro_flags |= (nh_flags & NHF_BLACKHOLE) ? RT_BLACKHOLE : 0;
296983066f0SAlexander V. Chernikov 	ro->ro_flags |= (nh_flags & NHF_GATEWAY) ? RT_HAS_GW : 0;
297983066f0SAlexander V. Chernikov }
298983066f0SAlexander V. Chernikov 
299df8bae1dSRodney W. Grimes /*
300df8bae1dSRodney W. Grimes  * IP output.  The packet in mbuf chain m contains a skeletal IP
301df8bae1dSRodney W. Grimes  * header (with len, off, ttl, proto, tos, src, dst).
302df8bae1dSRodney W. Grimes  * The mbuf chain containing the packet will be freed.
303df8bae1dSRodney W. Grimes  * The mbuf opt, if present, will not be freed.
304bf984051SGleb Smirnoff  * If route ro is present and has ro_rt initialized, route lookup would be
305bf984051SGleb Smirnoff  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
306bf984051SGleb Smirnoff  * then result of route lookup is stored in ro->ro_rt.
307bf984051SGleb Smirnoff  *
3083de758d3SRobert Watson  * In the IP forwarding case, the packet will arrive with options already
3093de758d3SRobert Watson  * inserted, so must have a NULL opt pointer.
310df8bae1dSRodney W. Grimes  */
311df8bae1dSRodney W. Grimes int
312f2565d68SRobert Watson ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
313f2565d68SRobert Watson     struct ip_moptions *imo, struct inpcb *inp)
314df8bae1dSRodney W. Grimes {
3151e78ac21SJeffrey Hsu 	struct ip *ip;
316fc74d005SBjoern A. Zeeb 	struct ifnet *ifp = NULL;	/* keep compiler happy */
317ac9d7e26SMax Laier 	struct mbuf *m0;
31823bf9953SPoul-Henning Kamp 	int hlen = sizeof (struct ip);
319374ce248SMitchell Horne 	int mtu = 0;
320ca8b83b0SLuigi Rizzo 	int error = 0;
321868aabb4SRichard Scheffenegger 	int vlan_pcp = -1;
32262e1a437SZhenlei Huang 	struct sockaddr_in *dst;
32362e1a437SZhenlei Huang 	const struct sockaddr *gw;
324374ce248SMitchell Horne 	struct in_ifaddr *ia = NULL;
3256ca363ebSGleb Smirnoff 	struct in_addr src;
32621d172a3SGleb Smirnoff 	int isbroadcast;
327078468edSGleb Smirnoff 	uint16_t ip_len, ip_off;
32862e1a437SZhenlei Huang 	struct route iproute;
3299c57a5b6SHiroki Sato 	uint32_t fibnum;
330fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
3311a499816SEdward Tomasz Napierala 	int no_route_but_check_spd = 0;
3321a499816SEdward Tomasz Napierala #endif
33377a01441SJohn Baldwin 
334ac9d7e26SMax Laier 	M_ASSERTPKTHDR(m);
335b9555453SGleb Smirnoff 	NET_EPOCH_ASSERT();
33636e8826fSMax Laier 
337bc97ba51SJulian Elischer 	if (inp != NULL) {
338baa45840SRobert Watson 		INP_LOCK_ASSERT(inp);
33962e1ba83SRobert Watson 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
340c2529042SHans Petter Selasky 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
34180cb9f21SKip Macy 			m->m_pkthdr.flowid = inp->inp_flowid;
3429c423972SAdrian Chadd 			M_HASHTYPE_SET(m, inp->inp_flowtype);
34380cb9f21SKip Macy 		}
344868aabb4SRichard Scheffenegger 		if ((inp->inp_flags2 & INP_2PCP_SET) != 0)
345868aabb4SRichard Scheffenegger 			vlan_pcp = (inp->inp_flags2 & INP_2PCP_MASK) >>
346868aabb4SRichard Scheffenegger 			    INP_2PCP_SHIFT;
34750575ce1SAndrew Gallatin #ifdef NUMA
34850575ce1SAndrew Gallatin 		m->m_pkthdr.numa_domain = inp->inp_numa_domain;
34950575ce1SAndrew Gallatin #endif
350bc97ba51SJulian Elischer 	}
35162e1ba83SRobert Watson 
352df8bae1dSRodney W. Grimes 	if (opt) {
353ca8b83b0SLuigi Rizzo 		int len = 0;
354df8bae1dSRodney W. Grimes 		m = ip_insertoptions(m, opt, &len);
355cb7641e8SMaxim Konovalov 		if (len != 0)
356ca8b83b0SLuigi Rizzo 			hlen = len; /* ip->ip_hl is updated above */
357df8bae1dSRodney W. Grimes 	}
358df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
3598f134647SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
3608f134647SGleb Smirnoff 	ip_off = ntohs(ip->ip_off);
3613efc3014SJulian Elischer 
362df8bae1dSRodney W. Grimes 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
36353be11f6SPoul-Henning Kamp 		ip->ip_v = IPVERSION;
36453be11f6SPoul-Henning Kamp 		ip->ip_hl = hlen >> 2;
3656d947416SGleb Smirnoff 		ip_fillid(ip);
366df8bae1dSRodney W. Grimes 	} else {
367ca8b83b0SLuigi Rizzo 		/* Header already set, fetch hlen from there */
36853be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
369df8bae1dSRodney W. Grimes 	}
3703924dfa7SMichael Tuexen 	if ((flags & IP_FORWARDING) == 0)
3713924dfa7SMichael Tuexen 		IPSTAT_INC(ips_localout);
3729c9137eaSGarrett Wollman 
373054692a4SAlexander V. Chernikov 	/*
374054692a4SAlexander V. Chernikov 	 * dst/gw handling:
375054692a4SAlexander V. Chernikov 	 *
3763c065f2fSGleb Smirnoff 	 * gw is readonly but can point either to dst OR rt_gateway,
3773c065f2fSGleb Smirnoff 	 * therefore we need restore gw if we're redoing lookup.
378054692a4SAlexander V. Chernikov 	 */
3799c57a5b6SHiroki Sato 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
38062e1a437SZhenlei Huang 	if (ro == NULL) {
38162e1a437SZhenlei Huang 		ro = &iproute;
38262e1a437SZhenlei Huang 		bzero(ro, sizeof (*ro));
38362e1a437SZhenlei Huang 	}
3846ca363ebSGleb Smirnoff 	dst = (struct sockaddr_in *)&ro->ro_dst;
38562e1a437SZhenlei Huang 	if (ro->ro_nh == NULL) {
386df8bae1dSRodney W. Grimes 		dst->sin_family = AF_INET;
387df8bae1dSRodney W. Grimes 		dst->sin_len = sizeof(*dst);
3889b932e9eSAndre Oppermann 		dst->sin_addr = ip->ip_dst;
389df8bae1dSRodney W. Grimes 	}
39062e1a437SZhenlei Huang 	gw = (const struct sockaddr *)dst;
391d9f2a782SErmal Luçi again:
39284cc0778SGeorge V. Neville-Neil 	/*
39384cc0778SGeorge V. Neville-Neil 	 * Validate route against routing table additions;
39484cc0778SGeorge V. Neville-Neil 	 * a better/more specific route might have been added.
39584cc0778SGeorge V. Neville-Neil 	 */
39662e1a437SZhenlei Huang 	if (inp != NULL && ro->ro_nh != NULL)
397983066f0SAlexander V. Chernikov 		NH_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
39884cc0778SGeorge V. Neville-Neil 	/*
39984cc0778SGeorge V. Neville-Neil 	 * If there is a cached route,
40084cc0778SGeorge V. Neville-Neil 	 * check that it is to the same destination
40184cc0778SGeorge V. Neville-Neil 	 * and is still up.  If not, free it and try again.
40284cc0778SGeorge V. Neville-Neil 	 * The address family should also be checked in case of sharing the
40384cc0778SGeorge V. Neville-Neil 	 * cache with IPv6.
40484cc0778SGeorge V. Neville-Neil 	 * Also check whether routing cache needs invalidation.
40584cc0778SGeorge V. Neville-Neil 	 */
40662e1a437SZhenlei Huang 	if (ro->ro_nh != NULL &&
407174fb9dbSAlexander V. Chernikov 	    ((!NH_IS_VALID(ro->ro_nh)) || dst->sin_family != AF_INET ||
4086ca363ebSGleb Smirnoff 	    dst->sin_addr.s_addr != ip->ip_dst.s_addr))
409fc21c53fSRyan Stone 		RO_INVALIDATE_CACHE(ro);
410d9f2a782SErmal Luçi 	ia = NULL;
411df8bae1dSRodney W. Grimes 	/*
412a3fd02d8SBruce M Simpson 	 * If routing to interface only, short circuit routing lookup.
413a3fd02d8SBruce M Simpson 	 * The use of an all-ones broadcast address implies this; an
414a3fd02d8SBruce M Simpson 	 * interface is specified by the broadcast address of an interface,
415a3fd02d8SBruce M Simpson 	 * or the destination address of a ptp interface.
416df8bae1dSRodney W. Grimes 	 */
417a3fd02d8SBruce M Simpson 	if (flags & IP_SENDONES) {
4184f8585e0SAlan Somers 		if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst),
41958a39d8cSAlan Somers 						      M_GETFIB(m)))) == NULL &&
4204f8585e0SAlan Somers 		    (ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
42158a39d8cSAlan Somers 						    M_GETFIB(m)))) == NULL) {
42286425c62SRobert Watson 			IPSTAT_INC(ips_noroute);
423a3fd02d8SBruce M Simpson 			error = ENETUNREACH;
424a3fd02d8SBruce M Simpson 			goto bad;
425a3fd02d8SBruce M Simpson 		}
426a3fd02d8SBruce M Simpson 		ip->ip_dst.s_addr = INADDR_BROADCAST;
427a3fd02d8SBruce M Simpson 		dst->sin_addr = ip->ip_dst;
428a3fd02d8SBruce M Simpson 		ifp = ia->ia_ifp;
4296ca363ebSGleb Smirnoff 		mtu = ifp->if_mtu;
430a3fd02d8SBruce M Simpson 		ip->ip_ttl = 1;
431a3fd02d8SBruce M Simpson 		isbroadcast = 1;
4326ca363ebSGleb Smirnoff 		src = IA_SIN(ia)->sin_addr;
433a3fd02d8SBruce M Simpson 	} else if (flags & IP_ROUTETOIF) {
4344f8585e0SAlan Somers 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
43558a39d8cSAlan Somers 						    M_GETFIB(m)))) == NULL &&
4364f8585e0SAlan Somers 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst), 0,
43758a39d8cSAlan Somers 						M_GETFIB(m)))) == NULL) {
43886425c62SRobert Watson 			IPSTAT_INC(ips_noroute);
439df8bae1dSRodney W. Grimes 			error = ENETUNREACH;
440df8bae1dSRodney W. Grimes 			goto bad;
441df8bae1dSRodney W. Grimes 		}
442df8bae1dSRodney W. Grimes 		ifp = ia->ia_ifp;
4436ca363ebSGleb Smirnoff 		mtu = ifp->if_mtu;
444df8bae1dSRodney W. Grimes 		ip->ip_ttl = 1;
4456c1bd558SRyan Stone 		isbroadcast = ifp->if_flags & IFF_BROADCAST ?
4466c1bd558SRyan Stone 		    in_ifaddr_broadcast(dst->sin_addr, ia) : 0;
4476ca363ebSGleb Smirnoff 		src = IA_SIN(ia)->sin_addr;
4483afefa39SDaniel C. Sobral 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
44938c1bc35SRuslan Ermilov 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
4503afefa39SDaniel C. Sobral 		/*
45138c1bc35SRuslan Ermilov 		 * Bypass the normal routing lookup for multicast
45238c1bc35SRuslan Ermilov 		 * packets if the interface is specified.
4533afefa39SDaniel C. Sobral 		 */
45438c1bc35SRuslan Ermilov 		ifp = imo->imo_multicast_ifp;
4556ca363ebSGleb Smirnoff 		mtu = ifp->if_mtu;
4562144431cSGleb Smirnoff 		IFP_TO_IA(ifp, ia);
45738c1bc35SRuslan Ermilov 		isbroadcast = 0;	/* fool gcc */
45854bb7ac0SGleb Smirnoff 		/* Interface may have no addresses. */
45954bb7ac0SGleb Smirnoff 		if (ia != NULL)
4606ca363ebSGleb Smirnoff 			src = IA_SIN(ia)->sin_addr;
46154bb7ac0SGleb Smirnoff 		else
46254bb7ac0SGleb Smirnoff 			src.s_addr = INADDR_ANY;
46362e1a437SZhenlei Huang 	} else if (ro != &iproute) {
464983066f0SAlexander V. Chernikov 		if (ro->ro_nh == NULL) {
4652c17fe93SGarrett Wollman 			/*
4666ca363ebSGleb Smirnoff 			 * We want to do any cloning requested by the link
4676ca363ebSGleb Smirnoff 			 * layer, as this is probably required in all cases
4686ca363ebSGleb Smirnoff 			 * for correct operation (as it is for ARP).
4692c17fe93SGarrett Wollman 			 */
4704043ee3cSAlexander V. Chernikov 			uint32_t flowid;
4714043ee3cSAlexander V. Chernikov 			flowid = m->m_pkthdr.flowid;
4724043ee3cSAlexander V. Chernikov 			ro->ro_nh = fib4_lookup(fibnum, dst->sin_addr, 0,
4734043ee3cSAlexander V. Chernikov 			    NHR_REF, flowid);
4744043ee3cSAlexander V. Chernikov 
475174fb9dbSAlexander V. Chernikov 			if (ro->ro_nh == NULL || (!NH_IS_VALID(ro->ro_nh))) {
476fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
4771a499816SEdward Tomasz Napierala 				/*
4781a499816SEdward Tomasz Napierala 				 * There is no route for this packet, but it is
4791a499816SEdward Tomasz Napierala 				 * possible that a matching SPD entry exists.
4801a499816SEdward Tomasz Napierala 				 */
4811a499816SEdward Tomasz Napierala 				no_route_but_check_spd = 1;
4821a499816SEdward Tomasz Napierala 				goto sendit;
4831a499816SEdward Tomasz Napierala #endif
48486425c62SRobert Watson 				IPSTAT_INC(ips_noroute);
485df8bae1dSRodney W. Grimes 				error = EHOSTUNREACH;
486df8bae1dSRodney W. Grimes 				goto bad;
487df8bae1dSRodney W. Grimes 			}
4886ca363ebSGleb Smirnoff 		}
4899748eb74SAlexander V. Chernikov 		struct nhop_object *nh = ro->ro_nh;
4909748eb74SAlexander V. Chernikov 
4919748eb74SAlexander V. Chernikov 		ia = ifatoia(nh->nh_ifa);
4929748eb74SAlexander V. Chernikov 		ifp = nh->nh_ifp;
4939748eb74SAlexander V. Chernikov 		counter_u64_add(nh->nh_pksent, 1);
4949748eb74SAlexander V. Chernikov 		rt_update_ro_flags(ro, nh);
4959748eb74SAlexander V. Chernikov 		if (nh->nh_flags & NHF_GATEWAY)
49662e1a437SZhenlei Huang 			gw = &nh->gw_sa;
4979748eb74SAlexander V. Chernikov 		if (nh->nh_flags & NHF_HOST)
4989748eb74SAlexander V. Chernikov 			isbroadcast = (nh->nh_flags & NHF_BROADCAST);
49962e1a437SZhenlei Huang 		else if ((ifp->if_flags & IFF_BROADCAST) && (gw->sa_family == AF_INET))
50062e1a437SZhenlei Huang 			isbroadcast = in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia);
5016c1bd558SRyan Stone 		else
5026c1bd558SRyan Stone 			isbroadcast = 0;
5039748eb74SAlexander V. Chernikov 		mtu = nh->nh_mtu;
5046ca363ebSGleb Smirnoff 		src = IA_SIN(ia)->sin_addr;
5056ca363ebSGleb Smirnoff 	} else {
5063553b300SAlexander V. Chernikov 		struct nhop_object *nh;
5076ca363ebSGleb Smirnoff 
5087d98cc09SAndrey V. Elsukov 		nh = fib4_lookup(M_GETFIB(m), dst->sin_addr, 0, NHR_NONE,
5092259a030SAlexander V. Chernikov 		    m->m_pkthdr.flowid);
5103553b300SAlexander V. Chernikov 		if (nh == NULL) {
5116ca363ebSGleb Smirnoff #if defined(IPSEC) || defined(IPSEC_SUPPORT)
5126ca363ebSGleb Smirnoff 			/*
5136ca363ebSGleb Smirnoff 			 * There is no route for this packet, but it is
5146ca363ebSGleb Smirnoff 			 * possible that a matching SPD entry exists.
5156ca363ebSGleb Smirnoff 			 */
5166ca363ebSGleb Smirnoff 			no_route_but_check_spd = 1;
5176ca363ebSGleb Smirnoff 			goto sendit;
5186ca363ebSGleb Smirnoff #endif
5196ca363ebSGleb Smirnoff 			IPSTAT_INC(ips_noroute);
5206ca363ebSGleb Smirnoff 			error = EHOSTUNREACH;
5216ca363ebSGleb Smirnoff 			goto bad;
5226ca363ebSGleb Smirnoff 		}
5233553b300SAlexander V. Chernikov 		ifp = nh->nh_ifp;
5243553b300SAlexander V. Chernikov 		mtu = nh->nh_mtu;
52562e1a437SZhenlei Huang 		rt_update_ro_flags(ro, nh);
5263553b300SAlexander V. Chernikov 		if (nh->nh_flags & NHF_GATEWAY)
52762e1a437SZhenlei Huang 			gw = &nh->gw_sa;
5283553b300SAlexander V. Chernikov 		ia = ifatoia(nh->nh_ifa);
5293553b300SAlexander V. Chernikov 		src = IA_SIN(ia)->sin_addr;
5303553b300SAlexander V. Chernikov 		isbroadcast = (((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
5316ca363ebSGleb Smirnoff 		    (NHF_HOST | NHF_BROADCAST)) ||
5326ca363ebSGleb Smirnoff 		    ((ifp->if_flags & IFF_BROADCAST) &&
53362e1a437SZhenlei Huang 		    (gw->sa_family == AF_INET) &&
53462e1a437SZhenlei Huang 		    in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia)));
5356ca363ebSGleb Smirnoff 	}
5366ca363ebSGleb Smirnoff 
537c744cde4SBjoern A. Zeeb 	/* Catch a possible divide by zero later. */
538983066f0SAlexander V. Chernikov 	KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (nh_flags=0x%08x) ifp=%p",
5396ca363ebSGleb Smirnoff 	    __func__, mtu, ro,
540983066f0SAlexander V. Chernikov 	    (ro != NULL && ro->ro_nh != NULL) ? ro->ro_nh->nh_flags : 0, ifp));
541d9f2a782SErmal Luçi 
5429b932e9eSAndre Oppermann 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
543df8bae1dSRodney W. Grimes 		m->m_flags |= M_MCAST;
544df8bae1dSRodney W. Grimes 		/*
545183e1c86SGleb Smirnoff 		 * IP destination address is multicast.  Make sure "gw"
546183e1c86SGleb Smirnoff 		 * still points to the address in "ro".  (It may have been
547183e1c86SGleb Smirnoff 		 * changed to point to a gateway address, above.)
548183e1c86SGleb Smirnoff 		 */
54962e1a437SZhenlei Huang 		gw = (const struct sockaddr *)dst;
550183e1c86SGleb Smirnoff 		/*
551df8bae1dSRodney W. Grimes 		 * See if the caller provided any multicast options
552df8bae1dSRodney W. Grimes 		 */
553df8bae1dSRodney W. Grimes 		if (imo != NULL) {
554df8bae1dSRodney W. Grimes 			ip->ip_ttl = imo->imo_multicast_ttl;
5551c5de19aSGarrett Wollman 			if (imo->imo_multicast_vif != -1)
5561c5de19aSGarrett Wollman 				ip->ip_src.s_addr =
557bbb4330bSLuigi Rizzo 				    ip_mcast_src ?
558bbb4330bSLuigi Rizzo 				    ip_mcast_src(imo->imo_multicast_vif) :
559bbb4330bSLuigi Rizzo 				    INADDR_ANY;
560df8bae1dSRodney W. Grimes 		} else
561df8bae1dSRodney W. Grimes 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
562df8bae1dSRodney W. Grimes 		/*
563df8bae1dSRodney W. Grimes 		 * Confirm that the outgoing interface supports multicast.
564df8bae1dSRodney W. Grimes 		 */
5651c5de19aSGarrett Wollman 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
566df8bae1dSRodney W. Grimes 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
56786425c62SRobert Watson 				IPSTAT_INC(ips_noroute);
568df8bae1dSRodney W. Grimes 				error = ENETUNREACH;
569df8bae1dSRodney W. Grimes 				goto bad;
570df8bae1dSRodney W. Grimes 			}
5711c5de19aSGarrett Wollman 		}
572df8bae1dSRodney W. Grimes 		/*
573df8bae1dSRodney W. Grimes 		 * If source address not specified yet, use address
574df8bae1dSRodney W. Grimes 		 * of outgoing interface.
575df8bae1dSRodney W. Grimes 		 */
5766ca363ebSGleb Smirnoff 		if (ip->ip_src.s_addr == INADDR_ANY)
5776ca363ebSGleb Smirnoff 			ip->ip_src = src;
578df8bae1dSRodney W. Grimes 
5798b889dbbSBruce M Simpson 		if ((imo == NULL && in_mcast_loop) ||
5808b889dbbSBruce M Simpson 		    (imo && imo->imo_multicast_loop)) {
581df8bae1dSRodney W. Grimes 			/*
5828b889dbbSBruce M Simpson 			 * Loop back multicast datagram if not expressly
5838b889dbbSBruce M Simpson 			 * forbidden to do so, even if we are not a member
5848b889dbbSBruce M Simpson 			 * of the group; ip_input() will filter it later,
5858b889dbbSBruce M Simpson 			 * thus deferring a hash lookup and mutex acquisition
5868b889dbbSBruce M Simpson 			 * at the expense of a cheap copy using m_copym().
587df8bae1dSRodney W. Grimes 			 */
588331dff07SAlexander V. Chernikov 			ip_mloopback(ifp, m, hlen);
5898b889dbbSBruce M Simpson 		} else {
590df8bae1dSRodney W. Grimes 			/*
591df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, perform
592df8bae1dSRodney W. Grimes 			 * multicast forwarding as if the packet had just
593df8bae1dSRodney W. Grimes 			 * arrived on the interface to which we are about
594df8bae1dSRodney W. Grimes 			 * to send.  The multicast forwarding function
595df8bae1dSRodney W. Grimes 			 * recursively calls this function, using the
596df8bae1dSRodney W. Grimes 			 * IP_FORWARDING flag to prevent infinite recursion.
597df8bae1dSRodney W. Grimes 			 *
598df8bae1dSRodney W. Grimes 			 * Multicasts that are looped back by ip_mloopback(),
599df8bae1dSRodney W. Grimes 			 * above, will be forwarded by the ip_input() routine,
600df8bae1dSRodney W. Grimes 			 * if necessary.
601df8bae1dSRodney W. Grimes 			 */
602603724d3SBjoern A. Zeeb 			if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
603f0068c4aSGarrett Wollman 				/*
604bbb4330bSLuigi Rizzo 				 * If rsvp daemon is not running, do not
605f0068c4aSGarrett Wollman 				 * set ip_moptions. This ensures that the packet
606f0068c4aSGarrett Wollman 				 * is multicast and not just sent down one link
607f0068c4aSGarrett Wollman 				 * as prescribed by rsvpd.
608f0068c4aSGarrett Wollman 				 */
609603724d3SBjoern A. Zeeb 				if (!V_rsvp_on)
610f0068c4aSGarrett Wollman 					imo = NULL;
611bbb4330bSLuigi Rizzo 				if (ip_mforward &&
612bbb4330bSLuigi Rizzo 				    ip_mforward(ip, ifp, m, imo) != 0) {
613df8bae1dSRodney W. Grimes 					m_freem(m);
614df8bae1dSRodney W. Grimes 					goto done;
615df8bae1dSRodney W. Grimes 				}
616df8bae1dSRodney W. Grimes 			}
617df8bae1dSRodney W. Grimes 		}
6185e9ae478SGarrett Wollman 
619df8bae1dSRodney W. Grimes 		/*
620df8bae1dSRodney W. Grimes 		 * Multicasts with a time-to-live of zero may be looped-
621df8bae1dSRodney W. Grimes 		 * back, above, but must not be transmitted on a network.
622df8bae1dSRodney W. Grimes 		 * Also, multicasts addressed to the loopback interface
623df8bae1dSRodney W. Grimes 		 * are not sent -- the above call to ip_mloopback() will
6248b889dbbSBruce M Simpson 		 * loop back a copy. ip_input() will drop the copy if
6258b889dbbSBruce M Simpson 		 * this host does not belong to the destination group on
6268b889dbbSBruce M Simpson 		 * the loopback interface.
627df8bae1dSRodney W. Grimes 		 */
628f5fea3ddSPaul Traina 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
629df8bae1dSRodney W. Grimes 			m_freem(m);
630df8bae1dSRodney W. Grimes 			goto done;
631df8bae1dSRodney W. Grimes 		}
632df8bae1dSRodney W. Grimes 
633df8bae1dSRodney W. Grimes 		goto sendit;
634df8bae1dSRodney W. Grimes 	}
6356a7c943cSAndre Oppermann 
636df8bae1dSRodney W. Grimes 	/*
6372b25acc1SLuigi Rizzo 	 * If the source address is not specified yet, use the address
638cb459254SJohn-Mark Gurney 	 * of the outoing interface.
639df8bae1dSRodney W. Grimes 	 */
6406ca363ebSGleb Smirnoff 	if (ip->ip_src.s_addr == INADDR_ANY)
6416ca363ebSGleb Smirnoff 		ip->ip_src = src;
6426a7c943cSAndre Oppermann 
643ca7a789aSMax Laier 	/*
644df8bae1dSRodney W. Grimes 	 * Look for broadcast address and
6458c3f5566SRuslan Ermilov 	 * verify user is allowed to send
646df8bae1dSRodney W. Grimes 	 * such a packet.
647df8bae1dSRodney W. Grimes 	 */
6489f9b3dc4SGarrett Wollman 	if (isbroadcast) {
649df8bae1dSRodney W. Grimes 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
650df8bae1dSRodney W. Grimes 			error = EADDRNOTAVAIL;
651df8bae1dSRodney W. Grimes 			goto bad;
652df8bae1dSRodney W. Grimes 		}
653df8bae1dSRodney W. Grimes 		if ((flags & IP_ALLOWBROADCAST) == 0) {
654df8bae1dSRodney W. Grimes 			error = EACCES;
655df8bae1dSRodney W. Grimes 			goto bad;
656df8bae1dSRodney W. Grimes 		}
657df8bae1dSRodney W. Grimes 		/* don't allow broadcast messages to be fragmented */
6588f134647SGleb Smirnoff 		if (ip_len > mtu) {
659df8bae1dSRodney W. Grimes 			error = EMSGSIZE;
660df8bae1dSRodney W. Grimes 			goto bad;
661df8bae1dSRodney W. Grimes 		}
662df8bae1dSRodney W. Grimes 		m->m_flags |= M_BCAST;
6639f9b3dc4SGarrett Wollman 	} else {
664df8bae1dSRodney W. Grimes 		m->m_flags &= ~M_BCAST;
6659f9b3dc4SGarrett Wollman 	}
666df8bae1dSRodney W. Grimes 
667f1743588SDarren Reed sendit:
668fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
669fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
670*0ff2d00dSKonstantin Belousov 		struct ip ip_hdr;
671b0e02076SKonstantin Belousov 
67200524fd4SKonstantin Belousov 		if ((error = IPSEC_OUTPUT(ipv4, ifp, m, inp, mtu)) != 0) {
673fcf59617SAndrey V. Elsukov 			if (error == EINPROGRESS)
674fcf59617SAndrey V. Elsukov 				error = 0;
6751dfcf0d2SAndre Oppermann 			goto done;
676fcf59617SAndrey V. Elsukov 		}
677*0ff2d00dSKonstantin Belousov 
678*0ff2d00dSKonstantin Belousov 		/* Update variables that are affected by ipsec4_output(). */
679*0ff2d00dSKonstantin Belousov 		m_copydata(m, 0, sizeof(ip_hdr), (char *)&ip_hdr);
680*0ff2d00dSKonstantin Belousov 		hlen = ip_hdr.ip_hl << 2;
68133841545SHajimu UMEMOTO 	}
682*0ff2d00dSKonstantin Belousov 
6831a499816SEdward Tomasz Napierala 	/*
6841a499816SEdward Tomasz Napierala 	 * Check if there was a route for this packet; return error if not.
6851a499816SEdward Tomasz Napierala 	 */
6861a499816SEdward Tomasz Napierala 	if (no_route_but_check_spd) {
6871a499816SEdward Tomasz Napierala 		IPSTAT_INC(ips_noroute);
6881a499816SEdward Tomasz Napierala 		error = EHOSTUNREACH;
6891a499816SEdward Tomasz Napierala 		goto bad;
6901a499816SEdward Tomasz Napierala 	}
691b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
69233841545SHajimu UMEMOTO 
693c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
694b252313fSGleb Smirnoff 	if (PFIL_HOOKED_OUT(V_inet_pfil_head)) {
69505fc9d78SKristof Provost 		switch (ip_output_pfil(&m, ifp, flags, inp, dst, &fibnum,
69605fc9d78SKristof Provost 		    &error)) {
697d9f2a782SErmal Luçi 		case 1: /* Finished */
698c4ac87eaSDarren Reed 			goto done;
6999b932e9eSAndre Oppermann 
700d9f2a782SErmal Luçi 		case 0: /* Continue normally */
701c4ac87eaSDarren Reed 			ip = mtod(m, struct ip *);
702185c1cddSKristof Provost 			ip_len = ntohs(ip->ip_len);
703d9f2a782SErmal Luçi 			break;
704fed1c7e9SSøren Schmidt 
705d9f2a782SErmal Luçi 		case -1: /* Need to try again */
706d9f2a782SErmal Luçi 			/* Reset everything for a new round */
7076ca363ebSGleb Smirnoff 			if (ro != NULL) {
708983066f0SAlexander V. Chernikov 				RO_NHFREE(ro);
7094fb3a820SAlexander V. Chernikov 				ro->ro_prepend = NULL;
7106ca363ebSGleb Smirnoff 			}
71162e1a437SZhenlei Huang 			gw = (const struct sockaddr *)dst;
712d9f2a782SErmal Luçi 			ip = mtod(m, struct ip *);
7139b932e9eSAndre Oppermann 			goto again;
714d9f2a782SErmal Luçi 		}
715099dd043SAndre Oppermann 	}
7162b25acc1SLuigi Rizzo 
717868aabb4SRichard Scheffenegger 	if (vlan_pcp > -1)
718868aabb4SRichard Scheffenegger 		EVL_APPLY_PRI(m, vlan_pcp);
719868aabb4SRichard Scheffenegger 
7206c1c6ae5SRodney W. Grimes 	/* IN_LOOPBACK must not appear on the wire - RFC1122. */
7216c1c6ae5SRodney W. Grimes 	if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
7226c1c6ae5SRodney W. Grimes 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
72351c8ec4aSRuslan Ermilov 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
72486425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
72551c8ec4aSRuslan Ermilov 			error = EADDRNOTAVAIL;
72651c8ec4aSRuslan Ermilov 			goto bad;
72751c8ec4aSRuslan Ermilov 		}
72851c8ec4aSRuslan Ermilov 	}
72951c8ec4aSRuslan Ermilov 
73044775b16SMark Johnston 	/* Ensure the packet data is mapped if the interface requires it. */
73144775b16SMark Johnston 	if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) {
732b0e02076SKonstantin Belousov 		struct mbuf *m1;
733b0e02076SKonstantin Belousov 
734b0e02076SKonstantin Belousov 		error = mb_unmapped_to_ext(m, &m1);
735b0e02076SKonstantin Belousov 		if (error != 0) {
736b0e02076SKonstantin Belousov 			if (error == EINVAL) {
737b0e02076SKonstantin Belousov 				if_printf(ifp, "TLS packet\n");
738b0e02076SKonstantin Belousov 				/* XXXKIB */
739b0e02076SKonstantin Belousov 			} else if (error == ENOMEM) {
74044775b16SMark Johnston 				error = ENOBUFS;
741b0e02076SKonstantin Belousov 			}
742b0e02076SKonstantin Belousov 			IPSTAT_INC(ips_odropped);
74344775b16SMark Johnston 			goto bad;
744b0e02076SKonstantin Belousov 		} else {
745b0e02076SKonstantin Belousov 			m = m1;
74644775b16SMark Johnston 		}
74744775b16SMark Johnston 	}
74844775b16SMark Johnston 
749206a3274SRuslan Ermilov 	m->m_pkthdr.csum_flags |= CSUM_IP;
750078468edSGleb Smirnoff 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
751db4f9cc7SJonathan Lemon 		in_delayed_cksum(m);
752078468edSGleb Smirnoff 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
753db4f9cc7SJonathan Lemon 	}
75495033af9SMark Johnston #if defined(SCTP) || defined(SCTP_SUPPORT)
755078468edSGleb Smirnoff 	if (m->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
7561966e5b5SRandall Stewart 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
757078468edSGleb Smirnoff 		m->m_pkthdr.csum_flags &= ~CSUM_SCTP;
7582f4afd21SRandall Stewart 	}
7592f4afd21SRandall Stewart #endif
760db4f9cc7SJonathan Lemon 
761e7319babSPoul-Henning Kamp 	/*
762db4f9cc7SJonathan Lemon 	 * If small enough for interface, or the interface will take
763233dcce1SAndre Oppermann 	 * care of the fragmentation for us, we can just send directly.
764b092fd6cSNavdeep Parhar 	 * Note that if_vxlan could have requested TSO even though the outer
765b092fd6cSNavdeep Parhar 	 * frame is UDP.  It is correct to not fragment such datagrams and
766b092fd6cSNavdeep Parhar 	 * instead just pass them on to the driver.
767df8bae1dSRodney W. Grimes 	 */
76821d172a3SGleb Smirnoff 	if (ip_len <= mtu ||
769b092fd6cSNavdeep Parhar 	    (m->m_pkthdr.csum_flags & ifp->if_hwassist &
770b092fd6cSNavdeep Parhar 	    (CSUM_TSO | CSUM_INNER_TSO)) != 0) {
771df8bae1dSRodney W. Grimes 		ip->ip_sum = 0;
772078468edSGleb Smirnoff 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
773df8bae1dSRodney W. Grimes 			ip->ip_sum = in_cksum(m, hlen);
774078468edSGleb Smirnoff 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
775078468edSGleb Smirnoff 		}
7765da9f8faSJosef Karthauser 
777233dcce1SAndre Oppermann 		/*
778233dcce1SAndre Oppermann 		 * Record statistics for this interface address.
779233dcce1SAndre Oppermann 		 * With CSUM_TSO the byte/packet count will be slightly
780233dcce1SAndre Oppermann 		 * incorrect because we count the IP+TCP headers only
781233dcce1SAndre Oppermann 		 * once instead of for every generated packet.
782233dcce1SAndre Oppermann 		 */
78338c1bc35SRuslan Ermilov 		if (!(flags & IP_FORWARDING) && ia) {
784b092fd6cSNavdeep Parhar 			if (m->m_pkthdr.csum_flags &
785b092fd6cSNavdeep Parhar 			    (CSUM_TSO | CSUM_INNER_TSO))
7867caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_opackets,
7877caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len / m->m_pkthdr.tso_segsz);
788233dcce1SAndre Oppermann 			else
7897caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
7907caf4ab7SGleb Smirnoff 
7917caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len);
7925da9f8faSJosef Karthauser 		}
79353dcc544SMike Silbersack #ifdef MBUF_STRESS_TEST
7943390d476SMike Silbersack 		if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
795eb1b1807SGleb Smirnoff 			m = m_fragment(m, M_NOWAIT, mbuf_frag_size);
7969d9edc56SMike Silbersack #endif
797147f74d1SAndre Oppermann 		/*
798147f74d1SAndre Oppermann 		 * Reset layer specific mbuf flags
799147f74d1SAndre Oppermann 		 * to avoid confusing lower layers.
800147f74d1SAndre Oppermann 		 */
80186bd0491SAndre Oppermann 		m_clrprotoflags(m);
80257f60867SMark Johnston 		IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL);
80335c7bb34SRandall Stewart 		error = ip_output_send(inp, ifp, m, gw, ro,
80435c7bb34SRandall Stewart 		    (flags & IP_NO_SND_TAG_RL) ? false : true);
805df8bae1dSRodney W. Grimes 		goto done;
806df8bae1dSRodney W. Grimes 	}
8071e78ac21SJeffrey Hsu 
808233dcce1SAndre Oppermann 	/* Balk when DF bit is set or the interface didn't support TSO. */
809b092fd6cSNavdeep Parhar 	if ((ip_off & IP_DF) ||
810b092fd6cSNavdeep Parhar 	    (m->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_INNER_TSO))) {
811df8bae1dSRodney W. Grimes 		error = EMSGSIZE;
81286425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
813df8bae1dSRodney W. Grimes 		goto bad;
814df8bae1dSRodney W. Grimes 	}
8151e78ac21SJeffrey Hsu 
8161e78ac21SJeffrey Hsu 	/*
8171e78ac21SJeffrey Hsu 	 * Too large for interface; fragment if possible. If successful,
8181e78ac21SJeffrey Hsu 	 * on return, m will point to a list of packets to be sent.
8191e78ac21SJeffrey Hsu 	 */
820078468edSGleb Smirnoff 	error = ip_fragment(ip, &m, mtu, ifp->if_hwassist);
8211e78ac21SJeffrey Hsu 	if (error)
822df8bae1dSRodney W. Grimes 		goto bad;
8231e78ac21SJeffrey Hsu 	for (; m; m = m0) {
824df8bae1dSRodney W. Grimes 		m0 = m->m_nextpkt;
825b375c9ecSLuigi Rizzo 		m->m_nextpkt = 0;
82607203494SDaniel C. Sobral 		if (error == 0) {
827fe937674SJosef Karthauser 			/* Record statistics for this interface address. */
82807203494SDaniel C. Sobral 			if (ia != NULL) {
8297caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
8307caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_obytes,
8317caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
83207203494SDaniel C. Sobral 			}
833147f74d1SAndre Oppermann 			/*
834147f74d1SAndre Oppermann 			 * Reset layer specific mbuf flags
835147f74d1SAndre Oppermann 			 * to avoid confusing upper layers.
836147f74d1SAndre Oppermann 			 */
83786bd0491SAndre Oppermann 			m_clrprotoflags(m);
838fe937674SJosef Karthauser 
8392e77d270SAndrey V. Elsukov 			IP_PROBE(send, NULL, NULL, mtod(m, struct ip *), ifp,
8402e77d270SAndrey V. Elsukov 			    mtod(m, struct ip *), NULL);
84135c7bb34SRandall Stewart 			error = ip_output_send(inp, ifp, m, gw, ro, true);
842fe937674SJosef Karthauser 		} else
843df8bae1dSRodney W. Grimes 			m_freem(m);
844df8bae1dSRodney W. Grimes 	}
845df8bae1dSRodney W. Grimes 
846df8bae1dSRodney W. Grimes 	if (error == 0)
84786425c62SRobert Watson 		IPSTAT_INC(ips_fragmented);
8481e78ac21SJeffrey Hsu 
849df8bae1dSRodney W. Grimes done:
850df8bae1dSRodney W. Grimes 	return (error);
851df8bae1dSRodney W. Grimes  bad:
8523528d68fSBill Paul 	m_freem(m);
853df8bae1dSRodney W. Grimes 	goto done;
854df8bae1dSRodney W. Grimes }
855df8bae1dSRodney W. Grimes 
8561e78ac21SJeffrey Hsu /*
8571e78ac21SJeffrey Hsu  * Create a chain of fragments which fit the given mtu. m_frag points to the
8581e78ac21SJeffrey Hsu  * mbuf to be fragmented; on return it points to the chain with the fragments.
8591e78ac21SJeffrey Hsu  * Return 0 if no error. If error, m_frag may contain a partially built
8601e78ac21SJeffrey Hsu  * chain of fragments that should be freed by the caller.
8611e78ac21SJeffrey Hsu  *
8621e78ac21SJeffrey Hsu  * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
8631e78ac21SJeffrey Hsu  */
8641e78ac21SJeffrey Hsu int
8651e78ac21SJeffrey Hsu ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
866078468edSGleb Smirnoff     u_long if_hwassist_flags)
8671e78ac21SJeffrey Hsu {
8681e78ac21SJeffrey Hsu 	int error = 0;
8691e78ac21SJeffrey Hsu 	int hlen = ip->ip_hl << 2;
8701e78ac21SJeffrey Hsu 	int len = (mtu - hlen) & ~7;	/* size of payload in each fragment */
8711e78ac21SJeffrey Hsu 	int off;
8721e78ac21SJeffrey Hsu 	struct mbuf *m0 = *m_frag;	/* the original packet		*/
8731e78ac21SJeffrey Hsu 	int firstlen;
8741e78ac21SJeffrey Hsu 	struct mbuf **mnext;
8751e78ac21SJeffrey Hsu 	int nfrags;
87621d172a3SGleb Smirnoff 	uint16_t ip_len, ip_off;
8771e78ac21SJeffrey Hsu 
87821d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
87921d172a3SGleb Smirnoff 	ip_off = ntohs(ip->ip_off);
88021d172a3SGleb Smirnoff 
8811e78ac21SJeffrey Hsu 	/*
882da6715bbSGleb Smirnoff 	 * Packet shall not have "Don't Fragment" flag and have at least 8
883da6715bbSGleb Smirnoff 	 * bytes of payload.
8841e78ac21SJeffrey Hsu 	 */
885da6715bbSGleb Smirnoff 	if (__predict_false((ip_off & IP_DF) || len < 8)) {
886da6715bbSGleb Smirnoff 		IPSTAT_INC(ips_cantfrag);
887da6715bbSGleb Smirnoff 		return (EMSGSIZE);
888da6715bbSGleb Smirnoff 	}
8891e78ac21SJeffrey Hsu 
8901e78ac21SJeffrey Hsu 	/*
8911e78ac21SJeffrey Hsu 	 * If the interface will not calculate checksums on
8921e78ac21SJeffrey Hsu 	 * fragmented packets, then do it here.
8931e78ac21SJeffrey Hsu 	 */
894da2299c5SAndre Oppermann 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
8951e78ac21SJeffrey Hsu 		in_delayed_cksum(m0);
8961e78ac21SJeffrey Hsu 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
8971e78ac21SJeffrey Hsu 	}
89895033af9SMark Johnston #if defined(SCTP) || defined(SCTP_SUPPORT)
899da2299c5SAndre Oppermann 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP) {
9001966e5b5SRandall Stewart 		sctp_delayed_cksum(m0, hlen);
9012f4afd21SRandall Stewart 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
9022f4afd21SRandall Stewart 	}
9032f4afd21SRandall Stewart #endif
9041e78ac21SJeffrey Hsu 	if (len > PAGE_SIZE) {
9051e78ac21SJeffrey Hsu 		/*
9061e78ac21SJeffrey Hsu 		 * Fragment large datagrams such that each segment
9071e78ac21SJeffrey Hsu 		 * contains a multiple of PAGE_SIZE amount of data,
9081e78ac21SJeffrey Hsu 		 * plus headers. This enables a receiver to perform
9091e78ac21SJeffrey Hsu 		 * page-flipping zero-copy optimizations.
9101e78ac21SJeffrey Hsu 		 *
9111e78ac21SJeffrey Hsu 		 * XXX When does this help given that sender and receiver
9121e78ac21SJeffrey Hsu 		 * could have different page sizes, and also mtu could
9131e78ac21SJeffrey Hsu 		 * be less than the receiver's page size ?
9141e78ac21SJeffrey Hsu 		 */
9151e78ac21SJeffrey Hsu 		int newlen;
9161e78ac21SJeffrey Hsu 
9179c0f6aa7SHans Petter Selasky 		off = MIN(mtu, m0->m_pkthdr.len);
9181e78ac21SJeffrey Hsu 
9191e78ac21SJeffrey Hsu 		/*
9201e78ac21SJeffrey Hsu 		 * firstlen (off - hlen) must be aligned on an
9211e78ac21SJeffrey Hsu 		 * 8-byte boundary
9221e78ac21SJeffrey Hsu 		 */
9231e78ac21SJeffrey Hsu 		if (off < hlen)
9241e78ac21SJeffrey Hsu 			goto smart_frag_failure;
9251e78ac21SJeffrey Hsu 		off = ((off - hlen) & ~7) + hlen;
9261e78ac21SJeffrey Hsu 		newlen = (~PAGE_MASK) & mtu;
9271e78ac21SJeffrey Hsu 		if ((newlen + sizeof (struct ip)) > mtu) {
9281e78ac21SJeffrey Hsu 			/* we failed, go back the default */
9291e78ac21SJeffrey Hsu smart_frag_failure:
9301e78ac21SJeffrey Hsu 			newlen = len;
9311e78ac21SJeffrey Hsu 			off = hlen + len;
9321e78ac21SJeffrey Hsu 		}
9331e78ac21SJeffrey Hsu 		len = newlen;
9341e78ac21SJeffrey Hsu 
9351e78ac21SJeffrey Hsu 	} else {
9361e78ac21SJeffrey Hsu 		off = hlen + len;
9371e78ac21SJeffrey Hsu 	}
9381e78ac21SJeffrey Hsu 
9391e78ac21SJeffrey Hsu 	firstlen = off - hlen;
9401e78ac21SJeffrey Hsu 	mnext = &m0->m_nextpkt;		/* pointer to next packet */
9411e78ac21SJeffrey Hsu 
9421e78ac21SJeffrey Hsu 	/*
9431e78ac21SJeffrey Hsu 	 * Loop through length of segment after first fragment,
9441e78ac21SJeffrey Hsu 	 * make new header and copy data of each part and link onto chain.
9451e78ac21SJeffrey Hsu 	 * Here, m0 is the original packet, m is the fragment being created.
9461e78ac21SJeffrey Hsu 	 * The fragments are linked off the m_nextpkt of the original
9471e78ac21SJeffrey Hsu 	 * packet, which after processing serves as the first fragment.
9481e78ac21SJeffrey Hsu 	 */
94921d172a3SGleb Smirnoff 	for (nfrags = 1; off < ip_len; off += len, nfrags++) {
9501e78ac21SJeffrey Hsu 		struct ip *mhip;	/* ip header on the fragment */
9511e78ac21SJeffrey Hsu 		struct mbuf *m;
9521e78ac21SJeffrey Hsu 		int mhlen = sizeof (struct ip);
9531e78ac21SJeffrey Hsu 
954dc4ad05eSGleb Smirnoff 		m = m_gethdr(M_NOWAIT, MT_DATA);
9550b17fba7SAndre Oppermann 		if (m == NULL) {
9561e78ac21SJeffrey Hsu 			error = ENOBUFS;
95786425c62SRobert Watson 			IPSTAT_INC(ips_odropped);
9581e78ac21SJeffrey Hsu 			goto done;
9591e78ac21SJeffrey Hsu 		}
960c4c4346fSHans Petter Selasky 		/*
961c4c4346fSHans Petter Selasky 		 * Make sure the complete packet header gets copied
962c4c4346fSHans Petter Selasky 		 * from the originating mbuf to the newly created
963c4c4346fSHans Petter Selasky 		 * mbuf. This also ensures that existing firewall
964c4c4346fSHans Petter Selasky 		 * classification(s), VLAN tags and so on get copied
965c4c4346fSHans Petter Selasky 		 * to the resulting fragmented packet(s):
966c4c4346fSHans Petter Selasky 		 */
967c4c4346fSHans Petter Selasky 		if (m_dup_pkthdr(m, m0, M_NOWAIT) == 0) {
968c4c4346fSHans Petter Selasky 			m_free(m);
969c4c4346fSHans Petter Selasky 			error = ENOBUFS;
970c4c4346fSHans Petter Selasky 			IPSTAT_INC(ips_odropped);
971c4c4346fSHans Petter Selasky 			goto done;
972c4c4346fSHans Petter Selasky 		}
9731e78ac21SJeffrey Hsu 		/*
9741e78ac21SJeffrey Hsu 		 * In the first mbuf, leave room for the link header, then
9751e78ac21SJeffrey Hsu 		 * copy the original IP header including options. The payload
9768b889dbbSBruce M Simpson 		 * goes into an additional mbuf chain returned by m_copym().
9771e78ac21SJeffrey Hsu 		 */
9781e78ac21SJeffrey Hsu 		m->m_data += max_linkhdr;
9791e78ac21SJeffrey Hsu 		mhip = mtod(m, struct ip *);
9801e78ac21SJeffrey Hsu 		*mhip = *ip;
9811e78ac21SJeffrey Hsu 		if (hlen > sizeof (struct ip)) {
9821e78ac21SJeffrey Hsu 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
9831e78ac21SJeffrey Hsu 			mhip->ip_v = IPVERSION;
9841e78ac21SJeffrey Hsu 			mhip->ip_hl = mhlen >> 2;
9851e78ac21SJeffrey Hsu 		}
9861e78ac21SJeffrey Hsu 		m->m_len = mhlen;
98721d172a3SGleb Smirnoff 		/* XXX do we need to add ip_off below ? */
98821d172a3SGleb Smirnoff 		mhip->ip_off = ((off - hlen) >> 3) + ip_off;
989fb86dfcdSAndre Oppermann 		if (off + len >= ip_len)
99021d172a3SGleb Smirnoff 			len = ip_len - off;
991fb86dfcdSAndre Oppermann 		else
9921e78ac21SJeffrey Hsu 			mhip->ip_off |= IP_MF;
9931e78ac21SJeffrey Hsu 		mhip->ip_len = htons((u_short)(len + mhlen));
994eb1b1807SGleb Smirnoff 		m->m_next = m_copym(m0, off, len, M_NOWAIT);
9950b17fba7SAndre Oppermann 		if (m->m_next == NULL) {	/* copy failed */
9961e78ac21SJeffrey Hsu 			m_free(m);
9971e78ac21SJeffrey Hsu 			error = ENOBUFS;	/* ??? */
99886425c62SRobert Watson 			IPSTAT_INC(ips_odropped);
9991e78ac21SJeffrey Hsu 			goto done;
10001e78ac21SJeffrey Hsu 		}
10011e78ac21SJeffrey Hsu 		m->m_pkthdr.len = mhlen + len;
10021e78ac21SJeffrey Hsu #ifdef MAC
100330d239bcSRobert Watson 		mac_netinet_fragment(m0, m);
10041e78ac21SJeffrey Hsu #endif
10051e78ac21SJeffrey Hsu 		mhip->ip_off = htons(mhip->ip_off);
10061e78ac21SJeffrey Hsu 		mhip->ip_sum = 0;
1007078468edSGleb Smirnoff 		if (m->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
10081e78ac21SJeffrey Hsu 			mhip->ip_sum = in_cksum(m, mhlen);
1009078468edSGleb Smirnoff 			m->m_pkthdr.csum_flags &= ~CSUM_IP;
1010078468edSGleb Smirnoff 		}
10111e78ac21SJeffrey Hsu 		*mnext = m;
10121e78ac21SJeffrey Hsu 		mnext = &m->m_nextpkt;
10131e78ac21SJeffrey Hsu 	}
101486425c62SRobert Watson 	IPSTAT_ADD(ips_ofragments, nfrags);
10151e78ac21SJeffrey Hsu 
10161e78ac21SJeffrey Hsu 	/*
10171e78ac21SJeffrey Hsu 	 * Update first fragment by trimming what's been copied out
10181e78ac21SJeffrey Hsu 	 * and updating header.
10191e78ac21SJeffrey Hsu 	 */
102021d172a3SGleb Smirnoff 	m_adj(m0, hlen + firstlen - ip_len);
10211e78ac21SJeffrey Hsu 	m0->m_pkthdr.len = hlen + firstlen;
10221e78ac21SJeffrey Hsu 	ip->ip_len = htons((u_short)m0->m_pkthdr.len);
102321d172a3SGleb Smirnoff 	ip->ip_off = htons(ip_off | IP_MF);
10241e78ac21SJeffrey Hsu 	ip->ip_sum = 0;
1025078468edSGleb Smirnoff 	if (m0->m_pkthdr.csum_flags & CSUM_IP & ~if_hwassist_flags) {
10261e78ac21SJeffrey Hsu 		ip->ip_sum = in_cksum(m0, hlen);
1027078468edSGleb Smirnoff 		m0->m_pkthdr.csum_flags &= ~CSUM_IP;
1028078468edSGleb Smirnoff 	}
10291e78ac21SJeffrey Hsu 
10301e78ac21SJeffrey Hsu done:
10311e78ac21SJeffrey Hsu 	*m_frag = m0;
10321e78ac21SJeffrey Hsu 	return error;
10331e78ac21SJeffrey Hsu }
10341e78ac21SJeffrey Hsu 
10351c238475SJonathan Lemon void
1036db4f9cc7SJonathan Lemon in_delayed_cksum(struct mbuf *m)
1037db4f9cc7SJonathan Lemon {
1038db4f9cc7SJonathan Lemon 	struct ip *ip;
10391fdbfb90STom Jones 	struct udphdr *uh;
10401fdbfb90STom Jones 	uint16_t cklen, csum, offset;
1041db4f9cc7SJonathan Lemon 
1042db4f9cc7SJonathan Lemon 	ip = mtod(m, struct ip *);
104353be11f6SPoul-Henning Kamp 	offset = ip->ip_hl << 2 ;
10441fdbfb90STom Jones 
10451fdbfb90STom Jones 	if (m->m_pkthdr.csum_flags & CSUM_UDP) {
10461fdbfb90STom Jones 		/* if udp header is not in the first mbuf copy udplen */
1047b6e87011STom Jones 		if (offset + sizeof(struct udphdr) > m->m_len) {
1048590d0a43SAndrey V. Elsukov 			m_copydata(m, offset + offsetof(struct udphdr,
1049590d0a43SAndrey V. Elsukov 			    uh_ulen), sizeof(cklen), (caddr_t)&cklen);
1050b6e87011STom Jones 			cklen = ntohs(cklen);
1051b6e87011STom Jones 		} else {
1052590d0a43SAndrey V. Elsukov 			uh = (struct udphdr *)mtodo(m, offset);
10531fdbfb90STom Jones 			cklen = ntohs(uh->uh_ulen);
10541fdbfb90STom Jones 		}
10551fdbfb90STom Jones 		csum = in_cksum_skip(m, cklen + offset, offset);
10561fdbfb90STom Jones 		if (csum == 0)
1057206a3274SRuslan Ermilov 			csum = 0xffff;
10581fdbfb90STom Jones 	} else {
10591fdbfb90STom Jones 		cklen = ntohs(ip->ip_len);
10601fdbfb90STom Jones 		csum = in_cksum_skip(m, cklen, offset);
10611fdbfb90STom Jones 	}
1062db4f9cc7SJonathan Lemon 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
1063db4f9cc7SJonathan Lemon 
1064590d0a43SAndrey V. Elsukov 	if (offset + sizeof(csum) > m->m_len)
1065590d0a43SAndrey V. Elsukov 		m_copyback(m, offset, sizeof(csum), (caddr_t)&csum);
1066590d0a43SAndrey V. Elsukov 	else
1067590d0a43SAndrey V. Elsukov 		*(u_short *)mtodo(m, offset) = csum;
1068db4f9cc7SJonathan Lemon }
1069db4f9cc7SJonathan Lemon 
1070df8bae1dSRodney W. Grimes /*
1071df8bae1dSRodney W. Grimes  * IP socket option processing.
1072df8bae1dSRodney W. Grimes  */
1073df8bae1dSRodney W. Grimes int
1074f2565d68SRobert Watson ip_ctloutput(struct socket *so, struct sockopt *sopt)
1075df8bae1dSRodney W. Grimes {
1076cfe8b629SGarrett Wollman 	struct inpcb *inp = sotoinpcb(so);
1077cfe8b629SGarrett Wollman 	int	error, optval;
1078dc847eb6SAdrian Chadd #ifdef	RSS
1079dc847eb6SAdrian Chadd 	uint32_t rss_bucket;
1080dc847eb6SAdrian Chadd 	int retval;
1081dc847eb6SAdrian Chadd #endif
1082df8bae1dSRodney W. Grimes 
1083cfe8b629SGarrett Wollman 	error = optval = 0;
1084cfe8b629SGarrett Wollman 	if (sopt->sopt_level != IPPROTO_IP) {
1085fc06cd42SMikolaj Golub 		error = EINVAL;
1086fc06cd42SMikolaj Golub 
1087fc06cd42SMikolaj Golub 		if (sopt->sopt_level == SOL_SOCKET &&
1088fc06cd42SMikolaj Golub 		    sopt->sopt_dir == SOPT_SET) {
1089fc06cd42SMikolaj Golub 			switch (sopt->sopt_name) {
1090fc06cd42SMikolaj Golub 			case SO_SETFIB:
1091fc06cd42SMikolaj Golub 				INP_WLOCK(inp);
1092fc06cd42SMikolaj Golub 				inp->inp_inc.inc_fibnum = so->so_fibnum;
1093fc06cd42SMikolaj Golub 				INP_WUNLOCK(inp);
1094fc06cd42SMikolaj Golub 				error = 0;
1095fc06cd42SMikolaj Golub 				break;
1096f3e7afe2SHans Petter Selasky 			case SO_MAX_PACING_RATE:
1097f3e7afe2SHans Petter Selasky #ifdef RATELIMIT
1098f3e7afe2SHans Petter Selasky 				INP_WLOCK(inp);
1099f3e7afe2SHans Petter Selasky 				inp->inp_flags2 |= INP_RATE_LIMIT_CHANGED;
1100f3e7afe2SHans Petter Selasky 				INP_WUNLOCK(inp);
1101f3e7afe2SHans Petter Selasky 				error = 0;
1102f3e7afe2SHans Petter Selasky #else
1103f3e7afe2SHans Petter Selasky 				error = EOPNOTSUPP;
1104f3e7afe2SHans Petter Selasky #endif
1105f3e7afe2SHans Petter Selasky 				break;
1106fc06cd42SMikolaj Golub 			default:
1107fc06cd42SMikolaj Golub 				break;
1108fc06cd42SMikolaj Golub 			}
1109fc06cd42SMikolaj Golub 		}
1110fc06cd42SMikolaj Golub 		return (error);
1111cfe8b629SGarrett Wollman 	}
1112df8bae1dSRodney W. Grimes 
1113cfe8b629SGarrett Wollman 	switch (sopt->sopt_dir) {
1114cfe8b629SGarrett Wollman 	case SOPT_SET:
1115cfe8b629SGarrett Wollman 		switch (sopt->sopt_name) {
1116df8bae1dSRodney W. Grimes 		case IP_OPTIONS:
1117df8bae1dSRodney W. Grimes #ifdef notyet
1118df8bae1dSRodney W. Grimes 		case IP_RETOPTS:
1119df8bae1dSRodney W. Grimes #endif
1120cfe8b629SGarrett Wollman 		{
1121cfe8b629SGarrett Wollman 			struct mbuf *m;
1122cfe8b629SGarrett Wollman 			if (sopt->sopt_valsize > MLEN) {
1123cfe8b629SGarrett Wollman 				error = EMSGSIZE;
1124cfe8b629SGarrett Wollman 				break;
1125cfe8b629SGarrett Wollman 			}
1126dc4ad05eSGleb Smirnoff 			m = m_get(sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
11270b17fba7SAndre Oppermann 			if (m == NULL) {
1128cfe8b629SGarrett Wollman 				error = ENOBUFS;
1129cfe8b629SGarrett Wollman 				break;
1130cfe8b629SGarrett Wollman 			}
1131cfe8b629SGarrett Wollman 			m->m_len = sopt->sopt_valsize;
1132cfe8b629SGarrett Wollman 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1133cfe8b629SGarrett Wollman 					    m->m_len);
1134635354c4SMaxim Konovalov 			if (error) {
1135635354c4SMaxim Konovalov 				m_free(m);
1136635354c4SMaxim Konovalov 				break;
1137635354c4SMaxim Konovalov 			}
11388501a69cSRobert Watson 			INP_WLOCK(inp);
1139993d9505SRobert Watson 			error = ip_pcbopts(inp, sopt->sopt_name, m);
11408501a69cSRobert Watson 			INP_WUNLOCK(inp);
1141993d9505SRobert Watson 			return (error);
1142cfe8b629SGarrett Wollman 		}
1143df8bae1dSRodney W. Grimes 
1144f44270e7SPawel Jakub Dawidek 		case IP_BINDANY:
1145f44270e7SPawel Jakub Dawidek 			if (sopt->sopt_td != NULL) {
1146f44270e7SPawel Jakub Dawidek 				error = priv_check(sopt->sopt_td,
1147f44270e7SPawel Jakub Dawidek 				    PRIV_NETINET_BINDANY);
1148f44270e7SPawel Jakub Dawidek 				if (error)
1149be9347e3SAdrian Chadd 					break;
1150be9347e3SAdrian Chadd 			}
1151cef27294SAdrian Chadd 			/* FALLTHROUGH */
1152df8bae1dSRodney W. Grimes 		case IP_TOS:
1153df8bae1dSRodney W. Grimes 		case IP_TTL:
1154936cd18dSAndre Oppermann 		case IP_MINTTL:
1155df8bae1dSRodney W. Grimes 		case IP_RECVOPTS:
1156df8bae1dSRodney W. Grimes 		case IP_RECVRETOPTS:
1157dce33a45SErmal Luçi 		case IP_ORIGDSTADDR:
1158df8bae1dSRodney W. Grimes 		case IP_RECVDSTADDR:
11594957466bSMatthew N. Dodd 		case IP_RECVTTL:
116082c23ebaSBill Fenner 		case IP_RECVIF:
11618afa2304SBruce M Simpson 		case IP_ONESBCAST:
1162b2828ad2SAndre Oppermann 		case IP_DONTFRAG:
11633cca425bSMichael Tuexen 		case IP_RECVTOS:
11649d3ddf43SAdrian Chadd 		case IP_RECVFLOWID:
11659d3ddf43SAdrian Chadd #ifdef	RSS
11669d3ddf43SAdrian Chadd 		case IP_RECVRSSBUCKETID:
11679d3ddf43SAdrian Chadd #endif
1168868aabb4SRichard Scheffenegger 		case IP_VLAN_PCP:
1169cfe8b629SGarrett Wollman 			error = sooptcopyin(sopt, &optval, sizeof optval,
1170cfe8b629SGarrett Wollman 					    sizeof optval);
1171cfe8b629SGarrett Wollman 			if (error)
1172cfe8b629SGarrett Wollman 				break;
1173df8bae1dSRodney W. Grimes 
1174cfe8b629SGarrett Wollman 			switch (sopt->sopt_name) {
1175df8bae1dSRodney W. Grimes 			case IP_TOS:
1176ca98b82cSDavid Greenman 				inp->inp_ip_tos = optval;
1177df8bae1dSRodney W. Grimes 				break;
1178df8bae1dSRodney W. Grimes 
1179df8bae1dSRodney W. Grimes 			case IP_TTL:
1180ca98b82cSDavid Greenman 				inp->inp_ip_ttl = optval;
1181df8bae1dSRodney W. Grimes 				break;
1182936cd18dSAndre Oppermann 
1183936cd18dSAndre Oppermann 			case IP_MINTTL:
1184a603c811SRobert Watson 				if (optval >= 0 && optval <= MAXTTL)
1185936cd18dSAndre Oppermann 					inp->inp_ip_minttl = optval;
1186936cd18dSAndre Oppermann 				else
1187936cd18dSAndre Oppermann 					error = EINVAL;
1188936cd18dSAndre Oppermann 				break;
1189936cd18dSAndre Oppermann 
1190a138d217SRobert Watson #define	OPTSET(bit) do {						\
11918501a69cSRobert Watson 	INP_WLOCK(inp);							\
1192df8bae1dSRodney W. Grimes 	if (optval)							\
1193df8bae1dSRodney W. Grimes 		inp->inp_flags |= bit;					\
1194df8bae1dSRodney W. Grimes 	else								\
1195a138d217SRobert Watson 		inp->inp_flags &= ~bit;					\
11968501a69cSRobert Watson 	INP_WUNLOCK(inp);						\
1197a138d217SRobert Watson } while (0)
1198df8bae1dSRodney W. Grimes 
11990a100a6fSAdrian Chadd #define	OPTSET2(bit, val) do {						\
12000a100a6fSAdrian Chadd 	INP_WLOCK(inp);							\
12010a100a6fSAdrian Chadd 	if (val)							\
12020a100a6fSAdrian Chadd 		inp->inp_flags2 |= bit;					\
12030a100a6fSAdrian Chadd 	else								\
12040a100a6fSAdrian Chadd 		inp->inp_flags2 &= ~bit;				\
12050a100a6fSAdrian Chadd 	INP_WUNLOCK(inp);						\
12060a100a6fSAdrian Chadd } while (0)
12070a100a6fSAdrian Chadd 
1208df8bae1dSRodney W. Grimes 			case IP_RECVOPTS:
1209df8bae1dSRodney W. Grimes 				OPTSET(INP_RECVOPTS);
1210df8bae1dSRodney W. Grimes 				break;
1211df8bae1dSRodney W. Grimes 
1212df8bae1dSRodney W. Grimes 			case IP_RECVRETOPTS:
1213df8bae1dSRodney W. Grimes 				OPTSET(INP_RECVRETOPTS);
1214df8bae1dSRodney W. Grimes 				break;
1215df8bae1dSRodney W. Grimes 
1216df8bae1dSRodney W. Grimes 			case IP_RECVDSTADDR:
1217df8bae1dSRodney W. Grimes 				OPTSET(INP_RECVDSTADDR);
1218df8bae1dSRodney W. Grimes 				break;
121982c23ebaSBill Fenner 
1220dce33a45SErmal Luçi 			case IP_ORIGDSTADDR:
1221dce33a45SErmal Luçi 				OPTSET2(INP_ORIGDSTADDR, optval);
1222dce33a45SErmal Luçi 				break;
1223dce33a45SErmal Luçi 
12244957466bSMatthew N. Dodd 			case IP_RECVTTL:
12254957466bSMatthew N. Dodd 				OPTSET(INP_RECVTTL);
12264957466bSMatthew N. Dodd 				break;
12274957466bSMatthew N. Dodd 
122882c23ebaSBill Fenner 			case IP_RECVIF:
122982c23ebaSBill Fenner 				OPTSET(INP_RECVIF);
123082c23ebaSBill Fenner 				break;
12316a800098SYoshinobu Inoue 
12328afa2304SBruce M Simpson 			case IP_ONESBCAST:
12338afa2304SBruce M Simpson 				OPTSET(INP_ONESBCAST);
12348afa2304SBruce M Simpson 				break;
1235b2828ad2SAndre Oppermann 			case IP_DONTFRAG:
1236b2828ad2SAndre Oppermann 				OPTSET(INP_DONTFRAG);
1237b2828ad2SAndre Oppermann 				break;
1238f44270e7SPawel Jakub Dawidek 			case IP_BINDANY:
1239f44270e7SPawel Jakub Dawidek 				OPTSET(INP_BINDANY);
1240be9347e3SAdrian Chadd 				break;
12413cca425bSMichael Tuexen 			case IP_RECVTOS:
12423cca425bSMichael Tuexen 				OPTSET(INP_RECVTOS);
12433cca425bSMichael Tuexen 				break;
12449d3ddf43SAdrian Chadd 			case IP_RECVFLOWID:
12459d3ddf43SAdrian Chadd 				OPTSET2(INP_RECVFLOWID, optval);
12469d3ddf43SAdrian Chadd 				break;
12470a100a6fSAdrian Chadd #ifdef RSS
12489d3ddf43SAdrian Chadd 			case IP_RECVRSSBUCKETID:
12499d3ddf43SAdrian Chadd 				OPTSET2(INP_RECVRSSBUCKETID, optval);
12509d3ddf43SAdrian Chadd 				break;
12510a100a6fSAdrian Chadd #endif
1252868aabb4SRichard Scheffenegger 			case IP_VLAN_PCP:
1253868aabb4SRichard Scheffenegger 				if ((optval >= -1) && (optval <=
1254868aabb4SRichard Scheffenegger 				    (INP_2PCP_MASK >> INP_2PCP_SHIFT))) {
1255868aabb4SRichard Scheffenegger 					if (optval == -1) {
1256868aabb4SRichard Scheffenegger 						INP_WLOCK(inp);
1257868aabb4SRichard Scheffenegger 						inp->inp_flags2 &=
1258868aabb4SRichard Scheffenegger 						    ~(INP_2PCP_SET |
1259868aabb4SRichard Scheffenegger 						      INP_2PCP_MASK);
1260868aabb4SRichard Scheffenegger 						INP_WUNLOCK(inp);
1261868aabb4SRichard Scheffenegger 					} else {
1262868aabb4SRichard Scheffenegger 						INP_WLOCK(inp);
1263868aabb4SRichard Scheffenegger 						inp->inp_flags2 |=
1264868aabb4SRichard Scheffenegger 						    INP_2PCP_SET;
1265868aabb4SRichard Scheffenegger 						inp->inp_flags2 &=
1266868aabb4SRichard Scheffenegger 						    ~INP_2PCP_MASK;
1267868aabb4SRichard Scheffenegger 						inp->inp_flags2 |=
1268868aabb4SRichard Scheffenegger 						    optval << INP_2PCP_SHIFT;
1269868aabb4SRichard Scheffenegger 						INP_WUNLOCK(inp);
1270868aabb4SRichard Scheffenegger 					}
1271868aabb4SRichard Scheffenegger 				} else
1272868aabb4SRichard Scheffenegger 					error = EINVAL;
1273868aabb4SRichard Scheffenegger 				break;
1274df8bae1dSRodney W. Grimes 			}
1275df8bae1dSRodney W. Grimes 			break;
1276df8bae1dSRodney W. Grimes #undef OPTSET
12770a100a6fSAdrian Chadd #undef OPTSET2
1278df8bae1dSRodney W. Grimes 
127971498f30SBruce M Simpson 		/*
128071498f30SBruce M Simpson 		 * Multicast socket options are processed by the in_mcast
128171498f30SBruce M Simpson 		 * module.
128271498f30SBruce M Simpson 		 */
1283df8bae1dSRodney W. Grimes 		case IP_MULTICAST_IF:
1284f0068c4aSGarrett Wollman 		case IP_MULTICAST_VIF:
1285df8bae1dSRodney W. Grimes 		case IP_MULTICAST_TTL:
1286df8bae1dSRodney W. Grimes 		case IP_MULTICAST_LOOP:
1287df8bae1dSRodney W. Grimes 		case IP_ADD_MEMBERSHIP:
1288df8bae1dSRodney W. Grimes 		case IP_DROP_MEMBERSHIP:
128971498f30SBruce M Simpson 		case IP_ADD_SOURCE_MEMBERSHIP:
129071498f30SBruce M Simpson 		case IP_DROP_SOURCE_MEMBERSHIP:
129171498f30SBruce M Simpson 		case IP_BLOCK_SOURCE:
129271498f30SBruce M Simpson 		case IP_UNBLOCK_SOURCE:
129371498f30SBruce M Simpson 		case IP_MSFILTER:
129471498f30SBruce M Simpson 		case MCAST_JOIN_GROUP:
129571498f30SBruce M Simpson 		case MCAST_LEAVE_GROUP:
129671498f30SBruce M Simpson 		case MCAST_JOIN_SOURCE_GROUP:
129771498f30SBruce M Simpson 		case MCAST_LEAVE_SOURCE_GROUP:
129871498f30SBruce M Simpson 		case MCAST_BLOCK_SOURCE:
129971498f30SBruce M Simpson 		case MCAST_UNBLOCK_SOURCE:
130071498f30SBruce M Simpson 			error = inp_setmoptions(inp, sopt);
1301df8bae1dSRodney W. Grimes 			break;
1302df8bae1dSRodney W. Grimes 
130333b3ac06SPeter Wemm 		case IP_PORTRANGE:
1304cfe8b629SGarrett Wollman 			error = sooptcopyin(sopt, &optval, sizeof optval,
1305cfe8b629SGarrett Wollman 					    sizeof optval);
1306cfe8b629SGarrett Wollman 			if (error)
1307cfe8b629SGarrett Wollman 				break;
130833b3ac06SPeter Wemm 
13098501a69cSRobert Watson 			INP_WLOCK(inp);
131033b3ac06SPeter Wemm 			switch (optval) {
131133b3ac06SPeter Wemm 			case IP_PORTRANGE_DEFAULT:
131233b3ac06SPeter Wemm 				inp->inp_flags &= ~(INP_LOWPORT);
131333b3ac06SPeter Wemm 				inp->inp_flags &= ~(INP_HIGHPORT);
131433b3ac06SPeter Wemm 				break;
131533b3ac06SPeter Wemm 
131633b3ac06SPeter Wemm 			case IP_PORTRANGE_HIGH:
131733b3ac06SPeter Wemm 				inp->inp_flags &= ~(INP_LOWPORT);
131833b3ac06SPeter Wemm 				inp->inp_flags |= INP_HIGHPORT;
131933b3ac06SPeter Wemm 				break;
132033b3ac06SPeter Wemm 
132133b3ac06SPeter Wemm 			case IP_PORTRANGE_LOW:
132233b3ac06SPeter Wemm 				inp->inp_flags &= ~(INP_HIGHPORT);
132333b3ac06SPeter Wemm 				inp->inp_flags |= INP_LOWPORT;
132433b3ac06SPeter Wemm 				break;
132533b3ac06SPeter Wemm 
132633b3ac06SPeter Wemm 			default:
132733b3ac06SPeter Wemm 				error = EINVAL;
132833b3ac06SPeter Wemm 				break;
132933b3ac06SPeter Wemm 			}
13308501a69cSRobert Watson 			INP_WUNLOCK(inp);
1331ce8c72b1SPeter Wemm 			break;
133233b3ac06SPeter Wemm 
1333fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
13346a800098SYoshinobu Inoue 		case IP_IPSEC_POLICY:
1335fcf59617SAndrey V. Elsukov 			if (IPSEC_ENABLED(ipv4)) {
1336fcf59617SAndrey V. Elsukov 				error = IPSEC_PCBCTL(ipv4, inp, sopt);
13376a800098SYoshinobu Inoue 				break;
13386a800098SYoshinobu Inoue 			}
1339fcf59617SAndrey V. Elsukov 			/* FALLTHROUGH */
1340b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
13416a800098SYoshinobu Inoue 
1342df8bae1dSRodney W. Grimes 		default:
1343df8bae1dSRodney W. Grimes 			error = ENOPROTOOPT;
1344df8bae1dSRodney W. Grimes 			break;
1345df8bae1dSRodney W. Grimes 		}
1346df8bae1dSRodney W. Grimes 		break;
1347df8bae1dSRodney W. Grimes 
1348cfe8b629SGarrett Wollman 	case SOPT_GET:
1349cfe8b629SGarrett Wollman 		switch (sopt->sopt_name) {
1350df8bae1dSRodney W. Grimes 		case IP_OPTIONS:
1351df8bae1dSRodney W. Grimes 		case IP_RETOPTS:
1352e5e3e746SMatt Macy 			INP_RLOCK(inp);
1353e5e3e746SMatt Macy 			if (inp->inp_options) {
1354e5e3e746SMatt Macy 				struct mbuf *options;
1355e5e3e746SMatt Macy 
135610731c54SMichael Tuexen 				options = m_copym(inp->inp_options, 0,
135710731c54SMichael Tuexen 				    M_COPYALL, M_NOWAIT);
1358e5e3e746SMatt Macy 				INP_RUNLOCK(inp);
1359e5e3e746SMatt Macy 				if (options != NULL) {
1360cfe8b629SGarrett Wollman 					error = sooptcopyout(sopt,
1361e5e3e746SMatt Macy 							     mtod(options, char *),
1362e5e3e746SMatt Macy 							     options->m_len);
1363e5e3e746SMatt Macy 					m_freem(options);
1364e5e3e746SMatt Macy 				} else
1365e5e3e746SMatt Macy 					error = ENOMEM;
1366e5e3e746SMatt Macy 			} else {
1367e5e3e746SMatt Macy 				INP_RUNLOCK(inp);
1368cfe8b629SGarrett Wollman 				sopt->sopt_valsize = 0;
1369e5e3e746SMatt Macy 			}
1370df8bae1dSRodney W. Grimes 			break;
1371df8bae1dSRodney W. Grimes 
1372df8bae1dSRodney W. Grimes 		case IP_TOS:
1373df8bae1dSRodney W. Grimes 		case IP_TTL:
1374936cd18dSAndre Oppermann 		case IP_MINTTL:
1375df8bae1dSRodney W. Grimes 		case IP_RECVOPTS:
1376df8bae1dSRodney W. Grimes 		case IP_RECVRETOPTS:
1377dce33a45SErmal Luçi 		case IP_ORIGDSTADDR:
1378df8bae1dSRodney W. Grimes 		case IP_RECVDSTADDR:
13794957466bSMatthew N. Dodd 		case IP_RECVTTL:
138082c23ebaSBill Fenner 		case IP_RECVIF:
1381cfe8b629SGarrett Wollman 		case IP_PORTRANGE:
13828afa2304SBruce M Simpson 		case IP_ONESBCAST:
1383b2828ad2SAndre Oppermann 		case IP_DONTFRAG:
13845f6bf451SAttilio Rao 		case IP_BINDANY:
13853cca425bSMichael Tuexen 		case IP_RECVTOS:
13869c423972SAdrian Chadd 		case IP_FLOWID:
13879c423972SAdrian Chadd 		case IP_FLOWTYPE:
13889d3ddf43SAdrian Chadd 		case IP_RECVFLOWID:
13890a100a6fSAdrian Chadd #ifdef	RSS
13900a100a6fSAdrian Chadd 		case IP_RSSBUCKETID:
13919d3ddf43SAdrian Chadd 		case IP_RECVRSSBUCKETID:
13920a100a6fSAdrian Chadd #endif
1393868aabb4SRichard Scheffenegger 		case IP_VLAN_PCP:
1394cfe8b629SGarrett Wollman 			switch (sopt->sopt_name) {
1395df8bae1dSRodney W. Grimes 			case IP_TOS:
1396ca98b82cSDavid Greenman 				optval = inp->inp_ip_tos;
1397df8bae1dSRodney W. Grimes 				break;
1398df8bae1dSRodney W. Grimes 
1399df8bae1dSRodney W. Grimes 			case IP_TTL:
1400ca98b82cSDavid Greenman 				optval = inp->inp_ip_ttl;
1401df8bae1dSRodney W. Grimes 				break;
1402df8bae1dSRodney W. Grimes 
1403936cd18dSAndre Oppermann 			case IP_MINTTL:
1404936cd18dSAndre Oppermann 				optval = inp->inp_ip_minttl;
1405936cd18dSAndre Oppermann 				break;
1406936cd18dSAndre Oppermann 
1407df8bae1dSRodney W. Grimes #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
14080a100a6fSAdrian Chadd #define	OPTBIT2(bit)	(inp->inp_flags2 & bit ? 1 : 0)
1409df8bae1dSRodney W. Grimes 
1410df8bae1dSRodney W. Grimes 			case IP_RECVOPTS:
1411df8bae1dSRodney W. Grimes 				optval = OPTBIT(INP_RECVOPTS);
1412df8bae1dSRodney W. Grimes 				break;
1413df8bae1dSRodney W. Grimes 
1414df8bae1dSRodney W. Grimes 			case IP_RECVRETOPTS:
1415df8bae1dSRodney W. Grimes 				optval = OPTBIT(INP_RECVRETOPTS);
1416df8bae1dSRodney W. Grimes 				break;
1417df8bae1dSRodney W. Grimes 
1418df8bae1dSRodney W. Grimes 			case IP_RECVDSTADDR:
1419df8bae1dSRodney W. Grimes 				optval = OPTBIT(INP_RECVDSTADDR);
1420df8bae1dSRodney W. Grimes 				break;
142182c23ebaSBill Fenner 
1422dce33a45SErmal Luçi 			case IP_ORIGDSTADDR:
1423dce33a45SErmal Luçi 				optval = OPTBIT2(INP_ORIGDSTADDR);
1424dce33a45SErmal Luçi 				break;
1425dce33a45SErmal Luçi 
14264957466bSMatthew N. Dodd 			case IP_RECVTTL:
14274957466bSMatthew N. Dodd 				optval = OPTBIT(INP_RECVTTL);
14284957466bSMatthew N. Dodd 				break;
14294957466bSMatthew N. Dodd 
143082c23ebaSBill Fenner 			case IP_RECVIF:
143182c23ebaSBill Fenner 				optval = OPTBIT(INP_RECVIF);
143282c23ebaSBill Fenner 				break;
1433cfe8b629SGarrett Wollman 
1434cfe8b629SGarrett Wollman 			case IP_PORTRANGE:
1435cfe8b629SGarrett Wollman 				if (inp->inp_flags & INP_HIGHPORT)
1436cfe8b629SGarrett Wollman 					optval = IP_PORTRANGE_HIGH;
1437cfe8b629SGarrett Wollman 				else if (inp->inp_flags & INP_LOWPORT)
1438cfe8b629SGarrett Wollman 					optval = IP_PORTRANGE_LOW;
1439cfe8b629SGarrett Wollman 				else
1440cfe8b629SGarrett Wollman 					optval = 0;
1441cfe8b629SGarrett Wollman 				break;
14426a800098SYoshinobu Inoue 
14438afa2304SBruce M Simpson 			case IP_ONESBCAST:
14448afa2304SBruce M Simpson 				optval = OPTBIT(INP_ONESBCAST);
14458afa2304SBruce M Simpson 				break;
1446b2828ad2SAndre Oppermann 			case IP_DONTFRAG:
1447b2828ad2SAndre Oppermann 				optval = OPTBIT(INP_DONTFRAG);
1448b2828ad2SAndre Oppermann 				break;
14495f6bf451SAttilio Rao 			case IP_BINDANY:
14505f6bf451SAttilio Rao 				optval = OPTBIT(INP_BINDANY);
14515f6bf451SAttilio Rao 				break;
14523cca425bSMichael Tuexen 			case IP_RECVTOS:
14533cca425bSMichael Tuexen 				optval = OPTBIT(INP_RECVTOS);
14543cca425bSMichael Tuexen 				break;
14559c423972SAdrian Chadd 			case IP_FLOWID:
14569c423972SAdrian Chadd 				optval = inp->inp_flowid;
14579c423972SAdrian Chadd 				break;
14589c423972SAdrian Chadd 			case IP_FLOWTYPE:
14599c423972SAdrian Chadd 				optval = inp->inp_flowtype;
14609c423972SAdrian Chadd 				break;
14619d3ddf43SAdrian Chadd 			case IP_RECVFLOWID:
14629d3ddf43SAdrian Chadd 				optval = OPTBIT2(INP_RECVFLOWID);
14639d3ddf43SAdrian Chadd 				break;
14649c423972SAdrian Chadd #ifdef	RSS
14657847796aSAdrian Chadd 			case IP_RSSBUCKETID:
14667847796aSAdrian Chadd 				retval = rss_hash2bucket(inp->inp_flowid,
14677847796aSAdrian Chadd 				    inp->inp_flowtype,
14687847796aSAdrian Chadd 				    &rss_bucket);
14697847796aSAdrian Chadd 				if (retval == 0)
14707847796aSAdrian Chadd 					optval = rss_bucket;
14717847796aSAdrian Chadd 				else
14727847796aSAdrian Chadd 					error = EINVAL;
14739c423972SAdrian Chadd 				break;
14749d3ddf43SAdrian Chadd 			case IP_RECVRSSBUCKETID:
14759d3ddf43SAdrian Chadd 				optval = OPTBIT2(INP_RECVRSSBUCKETID);
14769d3ddf43SAdrian Chadd 				break;
14779c423972SAdrian Chadd #endif
1478868aabb4SRichard Scheffenegger 			case IP_VLAN_PCP:
1479868aabb4SRichard Scheffenegger 				if (OPTBIT2(INP_2PCP_SET)) {
1480868aabb4SRichard Scheffenegger 					optval = (inp->inp_flags2 &
1481868aabb4SRichard Scheffenegger 					    INP_2PCP_MASK) >> INP_2PCP_SHIFT;
1482868aabb4SRichard Scheffenegger 				} else {
1483868aabb4SRichard Scheffenegger 					optval = -1;
1484868aabb4SRichard Scheffenegger 				}
1485868aabb4SRichard Scheffenegger 				break;
1486df8bae1dSRodney W. Grimes 			}
1487cfe8b629SGarrett Wollman 			error = sooptcopyout(sopt, &optval, sizeof optval);
1488df8bae1dSRodney W. Grimes 			break;
1489df8bae1dSRodney W. Grimes 
149071498f30SBruce M Simpson 		/*
149171498f30SBruce M Simpson 		 * Multicast socket options are processed by the in_mcast
149271498f30SBruce M Simpson 		 * module.
149371498f30SBruce M Simpson 		 */
1494df8bae1dSRodney W. Grimes 		case IP_MULTICAST_IF:
1495f0068c4aSGarrett Wollman 		case IP_MULTICAST_VIF:
1496df8bae1dSRodney W. Grimes 		case IP_MULTICAST_TTL:
1497df8bae1dSRodney W. Grimes 		case IP_MULTICAST_LOOP:
149871498f30SBruce M Simpson 		case IP_MSFILTER:
149971498f30SBruce M Simpson 			error = inp_getmoptions(inp, sopt);
150033b3ac06SPeter Wemm 			break;
150133b3ac06SPeter Wemm 
1502fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
15036a800098SYoshinobu Inoue 		case IP_IPSEC_POLICY:
1504fcf59617SAndrey V. Elsukov 			if (IPSEC_ENABLED(ipv4)) {
1505fcf59617SAndrey V. Elsukov 				error = IPSEC_PCBCTL(ipv4, inp, sopt);
15066a800098SYoshinobu Inoue 				break;
15076a800098SYoshinobu Inoue 			}
1508fcf59617SAndrey V. Elsukov 			/* FALLTHROUGH */
1509b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
15106a800098SYoshinobu Inoue 
1511df8bae1dSRodney W. Grimes 		default:
1512df8bae1dSRodney W. Grimes 			error = ENOPROTOOPT;
1513df8bae1dSRodney W. Grimes 			break;
1514df8bae1dSRodney W. Grimes 		}
1515df8bae1dSRodney W. Grimes 		break;
1516df8bae1dSRodney W. Grimes 	}
1517df8bae1dSRodney W. Grimes 	return (error);
1518df8bae1dSRodney W. Grimes }
1519df8bae1dSRodney W. Grimes 
1520df8bae1dSRodney W. Grimes /*
1521df8bae1dSRodney W. Grimes  * Routine called from ip_output() to loop back a copy of an IP multicast
1522df8bae1dSRodney W. Grimes  * packet to the input queue of a specified interface.  Note that this
1523df8bae1dSRodney W. Grimes  * calls the output routine of the loopback "driver", but with an interface
1524f5fea3ddSPaul Traina  * pointer that might NOT be a loopback interface -- evil, but easier than
1525f5fea3ddSPaul Traina  * replicating that code here.
1526df8bae1dSRodney W. Grimes  */
1527df8bae1dSRodney W. Grimes static void
1528331dff07SAlexander V. Chernikov ip_mloopback(struct ifnet *ifp, const struct mbuf *m, int hlen)
1529df8bae1dSRodney W. Grimes {
1530331dff07SAlexander V. Chernikov 	struct ip *ip;
1531df8bae1dSRodney W. Grimes 	struct mbuf *copym;
1532df8bae1dSRodney W. Grimes 
1533e4762f75SGeorge V. Neville-Neil 	/*
1534e4762f75SGeorge V. Neville-Neil 	 * Make a deep copy of the packet because we're going to
1535e4762f75SGeorge V. Neville-Neil 	 * modify the pack in order to generate checksums.
1536e4762f75SGeorge V. Neville-Neil 	 */
1537eb1b1807SGleb Smirnoff 	copym = m_dup(m, M_NOWAIT);
1538f0cace5dSRobert Watson 	if (copym != NULL && (!M_WRITABLE(copym) || copym->m_len < hlen))
153986b1d6d2SBill Fenner 		copym = m_pullup(copym, hlen);
1540df8bae1dSRodney W. Grimes 	if (copym != NULL) {
1541390cdc6aSRuslan Ermilov 		/* If needed, compute the checksum and mark it as valid. */
1542390cdc6aSRuslan Ermilov 		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1543390cdc6aSRuslan Ermilov 			in_delayed_cksum(copym);
1544390cdc6aSRuslan Ermilov 			copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
1545390cdc6aSRuslan Ermilov 			copym->m_pkthdr.csum_flags |=
1546390cdc6aSRuslan Ermilov 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1547390cdc6aSRuslan Ermilov 			copym->m_pkthdr.csum_data = 0xffff;
1548390cdc6aSRuslan Ermilov 		}
1549df8bae1dSRodney W. Grimes 		/*
1550df8bae1dSRodney W. Grimes 		 * We don't bother to fragment if the IP length is greater
1551df8bae1dSRodney W. Grimes 		 * than the interface's MTU.  Can this possibly matter?
1552df8bae1dSRodney W. Grimes 		 */
15538f134647SGleb Smirnoff 		ip = mtod(copym, struct ip *);
1554df8bae1dSRodney W. Grimes 		ip->ip_sum = 0;
155586b1d6d2SBill Fenner 		ip->ip_sum = in_cksum(copym, hlen);
1556331dff07SAlexander V. Chernikov 		if_simloop(ifp, copym, AF_INET, 0);
1557df8bae1dSRodney W. Grimes 	}
1558df8bae1dSRodney W. Grimes }
1559