1 /* $FreeBSD$ */ 2 /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 /* 33 * My grandfather said that there's a devil inside tunnelling technology... 34 * 35 * We have surprisingly many protocols that want packets with IP protocol 36 * #4 or #41. Here's a list of protocols that want protocol #41: 37 * RFC1933 configured tunnel 38 * RFC1933 automatic tunnel 39 * RFC2401 IPsec tunnel 40 * RFC2473 IPv6 generic packet tunnelling 41 * RFC2529 6over4 tunnel 42 * mobile-ip6 (uses RFC2473) 43 * 6to4 tunnel 44 * Here's a list of protocol that want protocol #4: 45 * RFC1853 IPv4-in-IPv4 tunnelling 46 * RFC2003 IPv4 encapsulation within IPv4 47 * RFC2344 reverse tunnelling for mobile-ip4 48 * RFC2401 IPsec tunnel 49 * Well, what can I say. They impose different en/decapsulation mechanism 50 * from each other, so they need separate protocol handler. The only one 51 * we can easily determine by protocol # is IPsec, which always has 52 * AH/ESP/IPComp header right after outer IP header. 53 * 54 * So, clearly good old protosw does not work for protocol #4 and #41. 55 * The code will let you match protocol via src/dst address pair. 56 */ 57 /* XXX is M_NETADDR correct? */ 58 59 #include "opt_mrouting.h" 60 #include "opt_inet.h" 61 #include "opt_inet6.h" 62 63 #include <sys/param.h> 64 #include <sys/systm.h> 65 #include <sys/socket.h> 66 #include <sys/sockio.h> 67 #include <sys/mbuf.h> 68 #include <sys/errno.h> 69 #include <sys/protosw.h> 70 #include <sys/queue.h> 71 72 #include <net/if.h> 73 #include <net/route.h> 74 75 #include <netinet/in.h> 76 #include <netinet/in_systm.h> 77 #include <netinet/ip.h> 78 #include <netinet/ip_var.h> 79 #include <netinet/ip_encap.h> 80 #include <netinet/ipprotosw.h> 81 82 #ifdef INET6 83 #include <netinet/ip6.h> 84 #include <netinet6/ip6_var.h> 85 #include <netinet6/ip6protosw.h> 86 #endif 87 88 #include <machine/stdarg.h> 89 90 #include <net/net_osdep.h> 91 92 #include <sys/kernel.h> 93 #include <sys/malloc.h> 94 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 95 96 static void encap_add __P((struct encaptab *)); 97 static int mask_match __P((const struct encaptab *, const struct sockaddr *, 98 const struct sockaddr *)); 99 static void encap_fillarg __P((struct mbuf *, const struct encaptab *)); 100 101 #ifndef LIST_HEAD_INITIALIZER 102 /* rely upon BSS initialization */ 103 LIST_HEAD(, encaptab) encaptab; 104 #else 105 LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); 106 #endif 107 108 void 109 encap_init() 110 { 111 static int initialized = 0; 112 113 if (initialized) 114 return; 115 initialized++; 116 #if 0 117 /* 118 * we cannot use LIST_INIT() here, since drivers may want to call 119 * encap_attach(), on driver attach. encap_init() will be called 120 * on AF_INET{,6} initialization, which happens after driver 121 * initialization - using LIST_INIT() here can nuke encap_attach() 122 * from drivers. 123 */ 124 LIST_INIT(&encaptab); 125 #endif 126 } 127 128 #ifdef INET 129 void 130 #if __STDC__ 131 encap4_input(struct mbuf *m, ...) 132 #else 133 encap4_input(m, va_alist) 134 struct mbuf *m; 135 va_dcl 136 #endif 137 { 138 int off, proto; 139 struct ip *ip; 140 struct sockaddr_in s, d; 141 const struct ipprotosw *psw; 142 struct encaptab *ep, *match; 143 va_list ap; 144 int prio, matchprio; 145 146 va_start(ap, m); 147 off = va_arg(ap, int); 148 proto = va_arg(ap, int); 149 va_end(ap); 150 151 ip = mtod(m, struct ip *); 152 153 bzero(&s, sizeof(s)); 154 s.sin_family = AF_INET; 155 s.sin_len = sizeof(struct sockaddr_in); 156 s.sin_addr = ip->ip_src; 157 bzero(&d, sizeof(d)); 158 d.sin_family = AF_INET; 159 d.sin_len = sizeof(struct sockaddr_in); 160 d.sin_addr = ip->ip_dst; 161 162 match = NULL; 163 matchprio = 0; 164 LIST_FOREACH(ep, &encaptab, chain) { 165 if (ep->af != AF_INET) 166 continue; 167 if (ep->proto >= 0 && ep->proto != proto) 168 continue; 169 if (ep->func) 170 prio = (*ep->func)(m, off, proto, ep->arg); 171 else { 172 /* 173 * it's inbound traffic, we need to match in reverse 174 * order 175 */ 176 prio = mask_match(ep, (struct sockaddr *)&d, 177 (struct sockaddr *)&s); 178 } 179 180 /* 181 * We prioritize the matches by using bit length of the 182 * matches. mask_match() and user-supplied matching function 183 * should return the bit length of the matches (for example, 184 * if both src/dst are matched for IPv4, 64 should be returned). 185 * 0 or negative return value means "it did not match". 186 * 187 * The question is, since we have two "mask" portion, we 188 * cannot really define total order between entries. 189 * For example, which of these should be preferred? 190 * mask_match() returns 48 (32 + 16) for both of them. 191 * src=3ffe::/16, dst=3ffe:501::/32 192 * src=3ffe:501::/32, dst=3ffe::/16 193 * 194 * We need to loop through all the possible candidates 195 * to get the best match - the search takes O(n) for 196 * n attachments (i.e. interfaces). 197 */ 198 if (prio <= 0) 199 continue; 200 if (prio > matchprio) { 201 matchprio = prio; 202 match = ep; 203 } 204 } 205 206 if (match) { 207 /* found a match, "match" has the best one */ 208 psw = (const struct ipprotosw *)match->psw; 209 if (psw && psw->pr_input) { 210 encap_fillarg(m, match); 211 (*psw->pr_input)(m, off, proto); 212 } else 213 m_freem(m); 214 return; 215 } 216 217 /* last resort: inject to raw socket */ 218 rip_input(m, off, proto); 219 } 220 #endif 221 222 #ifdef INET6 223 int 224 encap6_input(mp, offp, proto) 225 struct mbuf **mp; 226 int *offp; 227 int proto; 228 { 229 struct mbuf *m = *mp; 230 struct ip6_hdr *ip6; 231 struct sockaddr_in6 s, d; 232 const struct ip6protosw *psw; 233 struct encaptab *ep, *match; 234 int prio, matchprio; 235 236 ip6 = mtod(m, struct ip6_hdr *); 237 238 bzero(&s, sizeof(s)); 239 s.sin6_family = AF_INET6; 240 s.sin6_len = sizeof(struct sockaddr_in6); 241 s.sin6_addr = ip6->ip6_src; 242 bzero(&d, sizeof(d)); 243 d.sin6_family = AF_INET6; 244 d.sin6_len = sizeof(struct sockaddr_in6); 245 d.sin6_addr = ip6->ip6_dst; 246 247 match = NULL; 248 matchprio = 0; 249 LIST_FOREACH(ep, &encaptab, chain) { 250 if (ep->af != AF_INET6) 251 continue; 252 if (ep->proto >= 0 && ep->proto != proto) 253 continue; 254 if (ep->func) 255 prio = (*ep->func)(m, *offp, proto, ep->arg); 256 else { 257 /* 258 * it's inbound traffic, we need to match in reverse 259 * order 260 */ 261 prio = mask_match(ep, (struct sockaddr *)&d, 262 (struct sockaddr *)&s); 263 } 264 265 /* see encap4_input() for issues here */ 266 if (prio <= 0) 267 continue; 268 if (prio > matchprio) { 269 matchprio = prio; 270 match = ep; 271 } 272 } 273 274 if (match) { 275 /* found a match */ 276 psw = (const struct ip6protosw *)match->psw; 277 if (psw && psw->pr_input) { 278 encap_fillarg(m, match); 279 return (*psw->pr_input)(mp, offp, proto); 280 } else { 281 m_freem(m); 282 return IPPROTO_DONE; 283 } 284 } 285 286 /* last resort: inject to raw socket */ 287 return rip6_input(mp, offp, proto); 288 } 289 #endif 290 291 static void 292 encap_add(ep) 293 struct encaptab *ep; 294 { 295 296 LIST_INSERT_HEAD(&encaptab, ep, chain); 297 } 298 299 /* 300 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 301 * length of mask (sm and dm) is assumed to be same as sp/dp. 302 * Return value will be necessary as input (cookie) for encap_detach(). 303 */ 304 const struct encaptab * 305 encap_attach(af, proto, sp, sm, dp, dm, psw, arg) 306 int af; 307 int proto; 308 const struct sockaddr *sp, *sm; 309 const struct sockaddr *dp, *dm; 310 const struct protosw *psw; 311 void *arg; 312 { 313 struct encaptab *ep; 314 int error; 315 int s; 316 317 s = splnet(); 318 /* sanity check on args */ 319 if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) { 320 error = EINVAL; 321 goto fail; 322 } 323 if (sp->sa_len != dp->sa_len) { 324 error = EINVAL; 325 goto fail; 326 } 327 if (af != sp->sa_family || af != dp->sa_family) { 328 error = EINVAL; 329 goto fail; 330 } 331 332 /* check if anyone have already attached with exactly same config */ 333 LIST_FOREACH(ep, &encaptab, chain) { 334 if (ep->af != af) 335 continue; 336 if (ep->proto != proto) 337 continue; 338 if (ep->src.ss_len != sp->sa_len || 339 bcmp(&ep->src, sp, sp->sa_len) != 0 || 340 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 341 continue; 342 if (ep->dst.ss_len != dp->sa_len || 343 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 344 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 345 continue; 346 347 error = EEXIST; 348 goto fail; 349 } 350 351 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 352 if (ep == NULL) { 353 error = ENOBUFS; 354 goto fail; 355 } 356 bzero(ep, sizeof(*ep)); 357 358 ep->af = af; 359 ep->proto = proto; 360 bcopy(sp, &ep->src, sp->sa_len); 361 bcopy(sm, &ep->srcmask, sp->sa_len); 362 bcopy(dp, &ep->dst, dp->sa_len); 363 bcopy(dm, &ep->dstmask, dp->sa_len); 364 ep->psw = psw; 365 ep->arg = arg; 366 367 encap_add(ep); 368 369 error = 0; 370 splx(s); 371 return ep; 372 373 fail: 374 splx(s); 375 return NULL; 376 } 377 378 const struct encaptab * 379 encap_attach_func(af, proto, func, psw, arg) 380 int af; 381 int proto; 382 int (*func) __P((const struct mbuf *, int, int, void *)); 383 const struct protosw *psw; 384 void *arg; 385 { 386 struct encaptab *ep; 387 int error; 388 int s; 389 390 s = splnet(); 391 /* sanity check on args */ 392 if (!func) { 393 error = EINVAL; 394 goto fail; 395 } 396 397 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 398 if (ep == NULL) { 399 error = ENOBUFS; 400 goto fail; 401 } 402 bzero(ep, sizeof(*ep)); 403 404 ep->af = af; 405 ep->proto = proto; 406 ep->func = func; 407 ep->psw = psw; 408 ep->arg = arg; 409 410 encap_add(ep); 411 412 error = 0; 413 splx(s); 414 return ep; 415 416 fail: 417 splx(s); 418 return NULL; 419 } 420 421 int 422 encap_detach(cookie) 423 const struct encaptab *cookie; 424 { 425 const struct encaptab *ep = cookie; 426 struct encaptab *p; 427 428 LIST_FOREACH(p, &encaptab, chain) { 429 if (p == ep) { 430 LIST_REMOVE(p, chain); 431 free(p, M_NETADDR); /*XXX*/ 432 return 0; 433 } 434 } 435 436 return EINVAL; 437 } 438 439 static int 440 mask_match(ep, sp, dp) 441 const struct encaptab *ep; 442 const struct sockaddr *sp; 443 const struct sockaddr *dp; 444 { 445 struct sockaddr_storage s; 446 struct sockaddr_storage d; 447 int i; 448 const u_int8_t *p, *q; 449 u_int8_t *r; 450 int matchlen; 451 452 if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) 453 return 0; 454 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 455 return 0; 456 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 457 return 0; 458 459 matchlen = 0; 460 461 p = (const u_int8_t *)sp; 462 q = (const u_int8_t *)&ep->srcmask; 463 r = (u_int8_t *)&s; 464 for (i = 0 ; i < sp->sa_len; i++) { 465 r[i] = p[i] & q[i]; 466 /* XXX estimate */ 467 matchlen += (q[i] ? 8 : 0); 468 } 469 470 p = (const u_int8_t *)dp; 471 q = (const u_int8_t *)&ep->dstmask; 472 r = (u_int8_t *)&d; 473 for (i = 0 ; i < dp->sa_len; i++) { 474 r[i] = p[i] & q[i]; 475 /* XXX rough estimate */ 476 matchlen += (q[i] ? 8 : 0); 477 } 478 479 /* need to overwrite len/family portion as we don't compare them */ 480 s.ss_len = sp->sa_len; 481 s.ss_family = sp->sa_family; 482 d.ss_len = dp->sa_len; 483 d.ss_family = dp->sa_family; 484 485 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 486 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 487 return matchlen; 488 } else 489 return 0; 490 } 491 492 static void 493 encap_fillarg(m, ep) 494 struct mbuf *m; 495 const struct encaptab *ep; 496 { 497 #if 0 498 m->m_pkthdr.aux = ep->arg; 499 #else 500 struct mbuf *n; 501 502 n = m_aux_add(m, AF_INET, IPPROTO_IPV4); 503 if (n) { 504 *mtod(n, void **) = ep->arg; 505 n->m_len = sizeof(void *); 506 } 507 #endif 508 } 509 510 void * 511 encap_getarg(m) 512 struct mbuf *m; 513 { 514 void *p; 515 #if 0 516 p = m->m_pkthdr.aux; 517 m->m_pkthdr.aux = NULL; 518 return p; 519 #else 520 struct mbuf *n; 521 522 p = NULL; 523 n = m_aux_find(m, AF_INET, IPPROTO_IPV4); 524 if (n) { 525 if (n->m_len == sizeof(void *)) 526 p = *mtod(n, void **); 527 m_aux_delete(m, n); 528 } 529 return p; 530 #endif 531 } 532