1 /* $FreeBSD$ */ 2 /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 /* 33 * My grandfather said that there's a devil inside tunnelling technology... 34 * 35 * We have surprisingly many protocols that want packets with IP protocol 36 * #4 or #41. Here's a list of protocols that want protocol #41: 37 * RFC1933 configured tunnel 38 * RFC1933 automatic tunnel 39 * RFC2401 IPsec tunnel 40 * RFC2473 IPv6 generic packet tunnelling 41 * RFC2529 6over4 tunnel 42 * mobile-ip6 (uses RFC2473) 43 * 6to4 tunnel 44 * Here's a list of protocol that want protocol #4: 45 * RFC1853 IPv4-in-IPv4 tunnelling 46 * RFC2003 IPv4 encapsulation within IPv4 47 * RFC2344 reverse tunnelling for mobile-ip4 48 * RFC2401 IPsec tunnel 49 * Well, what can I say. They impose different en/decapsulation mechanism 50 * from each other, so they need separate protocol handler. The only one 51 * we can easily determine by protocol # is IPsec, which always has 52 * AH/ESP/IPComp header right after outer IP header. 53 * 54 * So, clearly good old protosw does not work for protocol #4 and #41. 55 * The code will let you match protocol via src/dst address pair. 56 */ 57 /* XXX is M_NETADDR correct? */ 58 59 #include "opt_mrouting.h" 60 #include "opt_inet.h" 61 #include "opt_inet6.h" 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/socket.h> 66 #include <sys/sockio.h> 67 #include <sys/mbuf.h> 68 #include <sys/errno.h> 69 #include <sys/protosw.h> 70 #include <sys/queue.h> 71 72 #include <net/if.h> 73 #include <net/route.h> 74 75 #include <netinet/in.h> 76 #include <netinet/in_var.h> 77 #include <netinet/in_systm.h> 78 #include <netinet/ip.h> 79 #include <netinet/ip_var.h> 80 #include <netinet/ip_encap.h> 81 82 #ifdef INET6 83 #include <netinet/ip6.h> 84 #include <netinet6/ip6_var.h> 85 #include <netinet6/ip6protosw.h> 86 #endif 87 88 #include <machine/stdarg.h> 89 90 #include <net/net_osdep.h> 91 92 #include <sys/kernel.h> 93 #include <sys/malloc.h> 94 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 95 96 static void encap_add __P((struct encaptab *)); 97 static int mask_match __P((const struct encaptab *, const struct sockaddr *, 98 const struct sockaddr *)); 99 static void encap_fillarg __P((struct mbuf *, const struct encaptab *)); 100 101 #ifndef LIST_HEAD_INITIALIZER 102 /* rely upon BSS initialization */ 103 LIST_HEAD(, encaptab) encaptab; 104 #else 105 LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); 106 #endif 107 108 void 109 encap_init() 110 { 111 static int initialized = 0; 112 113 if (initialized) 114 return; 115 initialized++; 116 #if 0 117 /* 118 * we cannot use LIST_INIT() here, since drivers may want to call 119 * encap_attach(), on driver attach. encap_init() will be called 120 * on AF_INET{,6} initialization, which happens after driver 121 * initialization - using LIST_INIT() here can nuke encap_attach() 122 * from drivers. 123 */ 124 LIST_INIT(&encaptab); 125 #endif 126 } 127 128 #ifdef INET 129 void 130 #if __STDC__ 131 encap4_input(struct mbuf *m, ...) 132 #else 133 encap4_input(m, va_alist) 134 struct mbuf *m; 135 va_dcl 136 #endif 137 { 138 int off, proto; 139 struct ip *ip; 140 struct sockaddr_in s, d; 141 const struct protosw *psw; 142 struct encaptab *ep, *match; 143 va_list ap; 144 int prio, matchprio; 145 146 va_start(ap, m); 147 off = va_arg(ap, int); 148 va_end(ap); 149 150 ip = mtod(m, struct ip *); 151 proto = ip->ip_p; 152 153 bzero(&s, sizeof(s)); 154 s.sin_family = AF_INET; 155 s.sin_len = sizeof(struct sockaddr_in); 156 s.sin_addr = ip->ip_src; 157 bzero(&d, sizeof(d)); 158 d.sin_family = AF_INET; 159 d.sin_len = sizeof(struct sockaddr_in); 160 d.sin_addr = ip->ip_dst; 161 162 match = NULL; 163 matchprio = 0; 164 LIST_FOREACH(ep, &encaptab, chain) { 165 if (ep->af != AF_INET) 166 continue; 167 if (ep->proto >= 0 && ep->proto != proto) 168 continue; 169 if (ep->func) 170 prio = (*ep->func)(m, off, proto, ep->arg); 171 else { 172 /* 173 * it's inbound traffic, we need to match in reverse 174 * order 175 */ 176 prio = mask_match(ep, (struct sockaddr *)&d, 177 (struct sockaddr *)&s); 178 } 179 180 /* 181 * We prioritize the matches by using bit length of the 182 * matches. mask_match() and user-supplied matching function 183 * should return the bit length of the matches (for example, 184 * if both src/dst are matched for IPv4, 64 should be returned). 185 * 0 or negative return value means "it did not match". 186 * 187 * The question is, since we have two "mask" portion, we 188 * cannot really define total order between entries. 189 * For example, which of these should be preferred? 190 * mask_match() returns 48 (32 + 16) for both of them. 191 * src=3ffe::/16, dst=3ffe:501::/32 192 * src=3ffe:501::/32, dst=3ffe::/16 193 * 194 * We need to loop through all the possible candidates 195 * to get the best match - the search takes O(n) for 196 * n attachments (i.e. interfaces). 197 */ 198 if (prio <= 0) 199 continue; 200 if (prio > matchprio) { 201 matchprio = prio; 202 match = ep; 203 } 204 } 205 206 if (match) { 207 /* found a match, "match" has the best one */ 208 psw = match->psw; 209 if (psw && psw->pr_input) { 210 encap_fillarg(m, match); 211 (*psw->pr_input)(m, off); 212 } else 213 m_freem(m); 214 return; 215 } 216 217 /* for backward compatibility - messy... */ 218 /* XXX 219 * I THINK they meant to call ip_input() 220 * The original code called ipip_input() 221 * which just calls rip_input() 222 * which makes no sense. 223 * (It is also not compiled in in LINT) 224 */ 225 if (proto == IPPROTO_IPV4) { 226 m_adj(m, off); 227 ip_input(m/*, off */); 228 return; 229 } 230 231 /* last resort: inject to raw socket */ 232 rip_input(m, off); 233 } 234 #endif 235 236 #ifdef INET6 237 int 238 encap6_input(mp, offp, proto) 239 struct mbuf **mp; 240 int *offp; 241 int proto; 242 { 243 struct mbuf *m = *mp; 244 struct ip6_hdr *ip6; 245 struct sockaddr_in6 s, d; 246 const struct ip6protosw *psw; 247 struct encaptab *ep, *match; 248 int prio, matchprio; 249 250 ip6 = mtod(m, struct ip6_hdr *); 251 252 bzero(&s, sizeof(s)); 253 s.sin6_family = AF_INET6; 254 s.sin6_len = sizeof(struct sockaddr_in6); 255 s.sin6_addr = ip6->ip6_src; 256 bzero(&d, sizeof(d)); 257 d.sin6_family = AF_INET6; 258 d.sin6_len = sizeof(struct sockaddr_in6); 259 d.sin6_addr = ip6->ip6_dst; 260 261 match = NULL; 262 matchprio = 0; 263 LIST_FOREACH(ep, &encaptab, chain) { 264 if (ep->af != AF_INET6) 265 continue; 266 if (ep->proto >= 0 && ep->proto != proto) 267 continue; 268 if (ep->func) 269 prio = (*ep->func)(m, *offp, proto, ep->arg); 270 else { 271 /* 272 * it's inbound traffic, we need to match in reverse 273 * order 274 */ 275 prio = mask_match(ep, (struct sockaddr *)&d, 276 (struct sockaddr *)&s); 277 } 278 279 /* see encap4_input() for issues here */ 280 if (prio <= 0) 281 continue; 282 if (prio > matchprio) { 283 matchprio = prio; 284 match = ep; 285 } 286 } 287 288 if (match) { 289 /* found a match */ 290 psw = (const struct ip6protosw *)match->psw; 291 if (psw && psw->pr_input) { 292 encap_fillarg(m, match); 293 return (*psw->pr_input)(mp, offp, proto); 294 } else { 295 m_freem(m); 296 return IPPROTO_DONE; 297 } 298 } 299 300 /* last resort: inject to raw socket */ 301 return rip6_input(mp, offp, proto); 302 } 303 #endif 304 305 static void 306 encap_add(ep) 307 struct encaptab *ep; 308 { 309 310 LIST_INSERT_HEAD(&encaptab, ep, chain); 311 } 312 313 /* 314 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 315 * length of mask (sm and dm) is assumed to be same as sp/dp. 316 * Return value will be necessary as input (cookie) for encap_detach(). 317 */ 318 const struct encaptab * 319 encap_attach(af, proto, sp, sm, dp, dm, psw, arg) 320 int af; 321 int proto; 322 const struct sockaddr *sp, *sm; 323 const struct sockaddr *dp, *dm; 324 const struct protosw *psw; 325 void *arg; 326 { 327 struct encaptab *ep; 328 int error; 329 int s; 330 331 s = splnet(); 332 /* sanity check on args */ 333 if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) { 334 error = EINVAL; 335 goto fail; 336 } 337 if (sp->sa_len != dp->sa_len) { 338 error = EINVAL; 339 goto fail; 340 } 341 if (af != sp->sa_family || af != dp->sa_family) { 342 error = EINVAL; 343 goto fail; 344 } 345 346 /* check if anyone have already attached with exactly same config */ 347 LIST_FOREACH(ep, &encaptab, chain) { 348 if (ep->af != af) 349 continue; 350 if (ep->proto != proto) 351 continue; 352 if (ep->src.ss_len != sp->sa_len || 353 bcmp(&ep->src, sp, sp->sa_len) != 0 || 354 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 355 continue; 356 if (ep->dst.ss_len != dp->sa_len || 357 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 358 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 359 continue; 360 361 error = EEXIST; 362 goto fail; 363 } 364 365 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 366 if (ep == NULL) { 367 error = ENOBUFS; 368 goto fail; 369 } 370 bzero(ep, sizeof(*ep)); 371 372 ep->af = af; 373 ep->proto = proto; 374 bcopy(sp, &ep->src, sp->sa_len); 375 bcopy(sm, &ep->srcmask, sp->sa_len); 376 bcopy(dp, &ep->dst, dp->sa_len); 377 bcopy(dm, &ep->dstmask, dp->sa_len); 378 ep->psw = psw; 379 ep->arg = arg; 380 381 encap_add(ep); 382 383 error = 0; 384 splx(s); 385 return ep; 386 387 fail: 388 splx(s); 389 return NULL; 390 } 391 392 const struct encaptab * 393 encap_attach_func(af, proto, func, psw, arg) 394 int af; 395 int proto; 396 int (*func) __P((const struct mbuf *, int, int, void *)); 397 const struct protosw *psw; 398 void *arg; 399 { 400 struct encaptab *ep; 401 int error; 402 int s; 403 404 s = splnet(); 405 /* sanity check on args */ 406 if (!func) { 407 error = EINVAL; 408 goto fail; 409 } 410 411 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 412 if (ep == NULL) { 413 error = ENOBUFS; 414 goto fail; 415 } 416 bzero(ep, sizeof(*ep)); 417 418 ep->af = af; 419 ep->proto = proto; 420 ep->func = func; 421 ep->psw = psw; 422 ep->arg = arg; 423 424 encap_add(ep); 425 426 error = 0; 427 splx(s); 428 return ep; 429 430 fail: 431 splx(s); 432 return NULL; 433 } 434 435 int 436 encap_detach(cookie) 437 const struct encaptab *cookie; 438 { 439 const struct encaptab *ep = cookie; 440 struct encaptab *p; 441 442 LIST_FOREACH(p, &encaptab, chain) { 443 if (p == ep) { 444 LIST_REMOVE(p, chain); 445 free(p, M_NETADDR); /*XXX*/ 446 return 0; 447 } 448 } 449 450 return EINVAL; 451 } 452 453 static int 454 mask_match(ep, sp, dp) 455 const struct encaptab *ep; 456 const struct sockaddr *sp; 457 const struct sockaddr *dp; 458 { 459 struct sockaddr_storage s; 460 struct sockaddr_storage d; 461 int i; 462 const u_int8_t *p, *q; 463 u_int8_t *r; 464 int matchlen; 465 466 if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) 467 return 0; 468 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 469 return 0; 470 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 471 return 0; 472 473 matchlen = 0; 474 475 p = (const u_int8_t *)sp; 476 q = (const u_int8_t *)&ep->srcmask; 477 r = (u_int8_t *)&s; 478 for (i = 0 ; i < sp->sa_len; i++) { 479 r[i] = p[i] & q[i]; 480 /* XXX estimate */ 481 matchlen += (q[i] ? 8 : 0); 482 } 483 484 p = (const u_int8_t *)dp; 485 q = (const u_int8_t *)&ep->dstmask; 486 r = (u_int8_t *)&d; 487 for (i = 0 ; i < dp->sa_len; i++) { 488 r[i] = p[i] & q[i]; 489 /* XXX rough estimate */ 490 matchlen += (q[i] ? 8 : 0); 491 } 492 493 /* need to overwrite len/family portion as we don't compare them */ 494 s.ss_len = sp->sa_len; 495 s.ss_family = sp->sa_family; 496 d.ss_len = dp->sa_len; 497 d.ss_family = dp->sa_family; 498 499 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 500 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 501 return matchlen; 502 } else 503 return 0; 504 } 505 506 static void 507 encap_fillarg(m, ep) 508 struct mbuf *m; 509 const struct encaptab *ep; 510 { 511 #if 0 512 m->m_pkthdr.aux = ep->arg; 513 #else 514 struct mbuf *n; 515 516 n = m_aux_add(m, AF_INET, IPPROTO_IPV4); 517 if (n) { 518 *mtod(n, void **) = ep->arg; 519 n->m_len = sizeof(void *); 520 } 521 #endif 522 } 523 524 void * 525 encap_getarg(m) 526 struct mbuf *m; 527 { 528 void *p; 529 #if 0 530 p = m->m_pkthdr.aux; 531 m->m_pkthdr.aux = NULL; 532 return p; 533 #else 534 struct mbuf *n; 535 536 p = NULL; 537 n = m_aux_find(m, AF_INET, IPPROTO_IPV4); 538 if (n) { 539 if (n->m_len == sizeof(void *)) 540 p = *mtod(n, void **); 541 m_aux_delete(m, n); 542 } 543 return p; 544 #endif 545 } 546