1 /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ 2 3 /*- 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 /* 32 * My grandfather said that there's a devil inside tunnelling technology... 33 * 34 * We have surprisingly many protocols that want packets with IP protocol 35 * #4 or #41. Here's a list of protocols that want protocol #41: 36 * RFC1933 configured tunnel 37 * RFC1933 automatic tunnel 38 * RFC2401 IPsec tunnel 39 * RFC2473 IPv6 generic packet tunnelling 40 * RFC2529 6over4 tunnel 41 * mobile-ip6 (uses RFC2473) 42 * RFC3056 6to4 tunnel 43 * isatap tunnel 44 * Here's a list of protocol that want protocol #4: 45 * RFC1853 IPv4-in-IPv4 tunnelling 46 * RFC2003 IPv4 encapsulation within IPv4 47 * RFC2344 reverse tunnelling for mobile-ip4 48 * RFC2401 IPsec tunnel 49 * Well, what can I say. They impose different en/decapsulation mechanism 50 * from each other, so they need separate protocol handler. The only one 51 * we can easily determine by protocol # is IPsec, which always has 52 * AH/ESP/IPComp header right after outer IP header. 53 * 54 * So, clearly good old protosw does not work for protocol #4 and #41. 55 * The code will let you match protocol via src/dst address pair. 56 */ 57 /* XXX is M_NETADDR correct? */ 58 59 #include <sys/cdefs.h> 60 __FBSDID("$FreeBSD$"); 61 62 #include "opt_mrouting.h" 63 #include "opt_inet.h" 64 #include "opt_inet6.h" 65 66 #include <sys/param.h> 67 #include <sys/systm.h> 68 #include <sys/socket.h> 69 #include <sys/sockio.h> 70 #include <sys/mbuf.h> 71 #include <sys/errno.h> 72 #include <sys/protosw.h> 73 #include <sys/queue.h> 74 75 #include <net/if.h> 76 #include <net/route.h> 77 78 #include <netinet/in.h> 79 #include <netinet/in_systm.h> 80 #include <netinet/ip.h> 81 #include <netinet/ip_var.h> 82 #include <netinet/ip_encap.h> 83 84 #ifdef INET6 85 #include <netinet/ip6.h> 86 #include <netinet6/ip6_var.h> 87 #endif 88 89 #include <machine/stdarg.h> 90 91 #include <sys/kernel.h> 92 #include <sys/malloc.h> 93 static MALLOC_DEFINE(M_NETADDR, "encap_export_host", "Export host address structure"); 94 95 static void encap_add(struct encaptab *); 96 static int mask_match(const struct encaptab *, const struct sockaddr *, 97 const struct sockaddr *); 98 static void encap_fillarg(struct mbuf *, void *); 99 100 /* 101 * All global variables in ip_encap.c are locked using encapmtx. 102 */ 103 static struct mtx encapmtx; 104 MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF); 105 static LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(encaptab); 106 107 /* 108 * We currently keey encap_init() for source code compatibility reasons -- 109 * it's referenced by KAME pieces in netinet6. 110 */ 111 void 112 encap_init(void) 113 { 114 } 115 116 #ifdef INET 117 int 118 encap4_input(struct mbuf **mp, int *offp, int proto) 119 { 120 struct ip *ip; 121 struct mbuf *m; 122 struct sockaddr_in s, d; 123 const struct protosw *psw; 124 struct encaptab *ep, *match; 125 void *arg; 126 int matchprio, off, prio; 127 128 m = *mp; 129 off = *offp; 130 ip = mtod(m, struct ip *); 131 132 bzero(&s, sizeof(s)); 133 s.sin_family = AF_INET; 134 s.sin_len = sizeof(struct sockaddr_in); 135 s.sin_addr = ip->ip_src; 136 bzero(&d, sizeof(d)); 137 d.sin_family = AF_INET; 138 d.sin_len = sizeof(struct sockaddr_in); 139 d.sin_addr = ip->ip_dst; 140 141 arg = NULL; 142 psw = NULL; 143 match = NULL; 144 matchprio = 0; 145 mtx_lock(&encapmtx); 146 LIST_FOREACH(ep, &encaptab, chain) { 147 if (ep->af != AF_INET) 148 continue; 149 if (ep->proto >= 0 && ep->proto != proto) 150 continue; 151 if (ep->func) 152 prio = (*ep->func)(m, off, proto, ep->arg); 153 else { 154 /* 155 * it's inbound traffic, we need to match in reverse 156 * order 157 */ 158 prio = mask_match(ep, (struct sockaddr *)&d, 159 (struct sockaddr *)&s); 160 } 161 162 /* 163 * We prioritize the matches by using bit length of the 164 * matches. mask_match() and user-supplied matching function 165 * should return the bit length of the matches (for example, 166 * if both src/dst are matched for IPv4, 64 should be returned). 167 * 0 or negative return value means "it did not match". 168 * 169 * The question is, since we have two "mask" portion, we 170 * cannot really define total order between entries. 171 * For example, which of these should be preferred? 172 * mask_match() returns 48 (32 + 16) for both of them. 173 * src=3ffe::/16, dst=3ffe:501::/32 174 * src=3ffe:501::/32, dst=3ffe::/16 175 * 176 * We need to loop through all the possible candidates 177 * to get the best match - the search takes O(n) for 178 * n attachments (i.e. interfaces). 179 */ 180 if (prio <= 0) 181 continue; 182 if (prio > matchprio) { 183 matchprio = prio; 184 match = ep; 185 } 186 } 187 if (match != NULL) { 188 psw = match->psw; 189 arg = match->arg; 190 } 191 mtx_unlock(&encapmtx); 192 193 if (match != NULL) { 194 /* found a match, "match" has the best one */ 195 if (psw != NULL && psw->pr_input != NULL) { 196 encap_fillarg(m, arg); 197 (*psw->pr_input)(mp, offp, proto); 198 } else 199 m_freem(m); 200 return (IPPROTO_DONE); 201 } 202 203 /* last resort: inject to raw socket */ 204 return (rip_input(mp, offp, proto)); 205 } 206 #endif 207 208 #ifdef INET6 209 int 210 encap6_input(struct mbuf **mp, int *offp, int proto) 211 { 212 struct mbuf *m = *mp; 213 struct ip6_hdr *ip6; 214 struct sockaddr_in6 s, d; 215 const struct protosw *psw; 216 struct encaptab *ep, *match; 217 void *arg; 218 int prio, matchprio; 219 220 ip6 = mtod(m, struct ip6_hdr *); 221 222 bzero(&s, sizeof(s)); 223 s.sin6_family = AF_INET6; 224 s.sin6_len = sizeof(struct sockaddr_in6); 225 s.sin6_addr = ip6->ip6_src; 226 bzero(&d, sizeof(d)); 227 d.sin6_family = AF_INET6; 228 d.sin6_len = sizeof(struct sockaddr_in6); 229 d.sin6_addr = ip6->ip6_dst; 230 231 arg = NULL; 232 psw = NULL; 233 match = NULL; 234 matchprio = 0; 235 mtx_lock(&encapmtx); 236 LIST_FOREACH(ep, &encaptab, chain) { 237 if (ep->af != AF_INET6) 238 continue; 239 if (ep->proto >= 0 && ep->proto != proto) 240 continue; 241 if (ep->func) 242 prio = (*ep->func)(m, *offp, proto, ep->arg); 243 else { 244 /* 245 * it's inbound traffic, we need to match in reverse 246 * order 247 */ 248 prio = mask_match(ep, (struct sockaddr *)&d, 249 (struct sockaddr *)&s); 250 } 251 252 /* see encap4_input() for issues here */ 253 if (prio <= 0) 254 continue; 255 if (prio > matchprio) { 256 matchprio = prio; 257 match = ep; 258 } 259 } 260 if (match != NULL) { 261 psw = match->psw; 262 arg = match->arg; 263 } 264 mtx_unlock(&encapmtx); 265 266 if (match != NULL) { 267 /* found a match */ 268 if (psw != NULL && psw->pr_input != NULL) { 269 encap_fillarg(m, arg); 270 return (*psw->pr_input)(mp, offp, proto); 271 } else { 272 m_freem(m); 273 return (IPPROTO_DONE); 274 } 275 } 276 277 /* last resort: inject to raw socket */ 278 return rip6_input(mp, offp, proto); 279 } 280 #endif 281 282 /*lint -sem(encap_add, custodial(1)) */ 283 static void 284 encap_add(struct encaptab *ep) 285 { 286 287 mtx_assert(&encapmtx, MA_OWNED); 288 LIST_INSERT_HEAD(&encaptab, ep, chain); 289 } 290 291 /* 292 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 293 * length of mask (sm and dm) is assumed to be same as sp/dp. 294 * Return value will be necessary as input (cookie) for encap_detach(). 295 */ 296 const struct encaptab * 297 encap_attach(int af, int proto, const struct sockaddr *sp, 298 const struct sockaddr *sm, const struct sockaddr *dp, 299 const struct sockaddr *dm, const struct protosw *psw, void *arg) 300 { 301 struct encaptab *ep; 302 303 /* sanity check on args */ 304 if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) 305 return (NULL); 306 if (sp->sa_len != dp->sa_len) 307 return (NULL); 308 if (af != sp->sa_family || af != dp->sa_family) 309 return (NULL); 310 311 /* check if anyone have already attached with exactly same config */ 312 mtx_lock(&encapmtx); 313 LIST_FOREACH(ep, &encaptab, chain) { 314 if (ep->af != af) 315 continue; 316 if (ep->proto != proto) 317 continue; 318 if (ep->src.ss_len != sp->sa_len || 319 bcmp(&ep->src, sp, sp->sa_len) != 0 || 320 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 321 continue; 322 if (ep->dst.ss_len != dp->sa_len || 323 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 324 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 325 continue; 326 327 mtx_unlock(&encapmtx); 328 return (NULL); 329 } 330 331 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 332 if (ep == NULL) { 333 mtx_unlock(&encapmtx); 334 return (NULL); 335 } 336 bzero(ep, sizeof(*ep)); 337 338 ep->af = af; 339 ep->proto = proto; 340 bcopy(sp, &ep->src, sp->sa_len); 341 bcopy(sm, &ep->srcmask, sp->sa_len); 342 bcopy(dp, &ep->dst, dp->sa_len); 343 bcopy(dm, &ep->dstmask, dp->sa_len); 344 ep->psw = psw; 345 ep->arg = arg; 346 347 encap_add(ep); 348 mtx_unlock(&encapmtx); 349 return (ep); 350 } 351 352 const struct encaptab * 353 encap_attach_func(int af, int proto, 354 int (*func)(const struct mbuf *, int, int, void *), 355 const struct protosw *psw, void *arg) 356 { 357 struct encaptab *ep; 358 359 /* sanity check on args */ 360 if (!func) 361 return (NULL); 362 363 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 364 if (ep == NULL) 365 return (NULL); 366 bzero(ep, sizeof(*ep)); 367 368 ep->af = af; 369 ep->proto = proto; 370 ep->func = func; 371 ep->psw = psw; 372 ep->arg = arg; 373 374 mtx_lock(&encapmtx); 375 encap_add(ep); 376 mtx_unlock(&encapmtx); 377 return (ep); 378 } 379 380 int 381 encap_detach(const struct encaptab *cookie) 382 { 383 const struct encaptab *ep = cookie; 384 struct encaptab *p; 385 386 mtx_lock(&encapmtx); 387 LIST_FOREACH(p, &encaptab, chain) { 388 if (p == ep) { 389 LIST_REMOVE(p, chain); 390 mtx_unlock(&encapmtx); 391 free(p, M_NETADDR); /*XXX*/ 392 return 0; 393 } 394 } 395 mtx_unlock(&encapmtx); 396 397 return EINVAL; 398 } 399 400 static int 401 mask_match(const struct encaptab *ep, const struct sockaddr *sp, 402 const struct sockaddr *dp) 403 { 404 struct sockaddr_storage s; 405 struct sockaddr_storage d; 406 int i; 407 const u_int8_t *p, *q; 408 u_int8_t *r; 409 int matchlen; 410 411 if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) 412 return 0; 413 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 414 return 0; 415 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 416 return 0; 417 418 matchlen = 0; 419 420 p = (const u_int8_t *)sp; 421 q = (const u_int8_t *)&ep->srcmask; 422 r = (u_int8_t *)&s; 423 for (i = 0 ; i < sp->sa_len; i++) { 424 r[i] = p[i] & q[i]; 425 /* XXX estimate */ 426 matchlen += (q[i] ? 8 : 0); 427 } 428 429 p = (const u_int8_t *)dp; 430 q = (const u_int8_t *)&ep->dstmask; 431 r = (u_int8_t *)&d; 432 for (i = 0 ; i < dp->sa_len; i++) { 433 r[i] = p[i] & q[i]; 434 /* XXX rough estimate */ 435 matchlen += (q[i] ? 8 : 0); 436 } 437 438 /* need to overwrite len/family portion as we don't compare them */ 439 s.ss_len = sp->sa_len; 440 s.ss_family = sp->sa_family; 441 d.ss_len = dp->sa_len; 442 d.ss_family = dp->sa_family; 443 444 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 445 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 446 return matchlen; 447 } else 448 return 0; 449 } 450 451 static void 452 encap_fillarg(struct mbuf *m, void *arg) 453 { 454 struct m_tag *tag; 455 456 if (arg != NULL) { 457 tag = m_tag_get(PACKET_TAG_ENCAP, sizeof(void *), M_NOWAIT); 458 if (tag != NULL) { 459 *(void**)(tag+1) = arg; 460 m_tag_prepend(m, tag); 461 } 462 } 463 } 464 465 void * 466 encap_getarg(struct mbuf *m) 467 { 468 void *p = NULL; 469 struct m_tag *tag; 470 471 tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL); 472 if (tag) { 473 p = *(void**)(tag+1); 474 m_tag_delete(m, tag); 475 } 476 return p; 477 } 478