1 /* $FreeBSD$ */ 2 /* $KAME: ip_encap.c,v 1.41 2001/03/15 08:35:08 itojun Exp $ */ 3 4 /*- 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 /* 33 * My grandfather said that there's a devil inside tunnelling technology... 34 * 35 * We have surprisingly many protocols that want packets with IP protocol 36 * #4 or #41. Here's a list of protocols that want protocol #41: 37 * RFC1933 configured tunnel 38 * RFC1933 automatic tunnel 39 * RFC2401 IPsec tunnel 40 * RFC2473 IPv6 generic packet tunnelling 41 * RFC2529 6over4 tunnel 42 * mobile-ip6 (uses RFC2473) 43 * RFC3056 6to4 tunnel 44 * isatap tunnel 45 * Here's a list of protocol that want protocol #4: 46 * RFC1853 IPv4-in-IPv4 tunnelling 47 * RFC2003 IPv4 encapsulation within IPv4 48 * RFC2344 reverse tunnelling for mobile-ip4 49 * RFC2401 IPsec tunnel 50 * Well, what can I say. They impose different en/decapsulation mechanism 51 * from each other, so they need separate protocol handler. The only one 52 * we can easily determine by protocol # is IPsec, which always has 53 * AH/ESP/IPComp header right after outer IP header. 54 * 55 * So, clearly good old protosw does not work for protocol #4 and #41. 56 * The code will let you match protocol via src/dst address pair. 57 */ 58 /* XXX is M_NETADDR correct? */ 59 60 #include "opt_mrouting.h" 61 #include "opt_inet.h" 62 #include "opt_inet6.h" 63 64 #include <sys/param.h> 65 #include <sys/systm.h> 66 #include <sys/socket.h> 67 #include <sys/sockio.h> 68 #include <sys/mbuf.h> 69 #include <sys/errno.h> 70 #include <sys/protosw.h> 71 #include <sys/queue.h> 72 73 #include <net/if.h> 74 #include <net/route.h> 75 76 #include <netinet/in.h> 77 #include <netinet/in_systm.h> 78 #include <netinet/ip.h> 79 #include <netinet/ip_var.h> 80 #include <netinet/ip_encap.h> 81 82 #ifdef INET6 83 #include <netinet/ip6.h> 84 #include <netinet6/ip6_var.h> 85 #include <netinet6/ip6protosw.h> 86 #endif 87 88 #include <machine/stdarg.h> 89 90 #include <net/net_osdep.h> 91 92 #include <sys/kernel.h> 93 #include <sys/malloc.h> 94 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 95 96 static void encap_add(struct encaptab *); 97 static int mask_match(const struct encaptab *, const struct sockaddr *, 98 const struct sockaddr *); 99 static void encap_fillarg(struct mbuf *, const struct encaptab *); 100 101 /* 102 * All global variables in ip_encap.c are locked using encapmtx. 103 */ 104 static struct mtx encapmtx; 105 MTX_SYSINIT(encapmtx, &encapmtx, "encapmtx", MTX_DEF); 106 LIST_HEAD(, encaptab) encaptab = LIST_HEAD_INITIALIZER(&encaptab); 107 108 /* 109 * We currently keey encap_init() for source code compatibility reasons -- 110 * it's referenced by KAME pieces in netinet6. 111 */ 112 void 113 encap_init() 114 { 115 } 116 117 #ifdef INET 118 void 119 encap4_input(m, off) 120 struct mbuf *m; 121 int off; 122 { 123 struct ip *ip; 124 int proto; 125 struct sockaddr_in s, d; 126 const struct protosw *psw; 127 struct encaptab *ep, *match; 128 int prio, matchprio; 129 130 ip = mtod(m, struct ip *); 131 proto = ip->ip_p; 132 133 bzero(&s, sizeof(s)); 134 s.sin_family = AF_INET; 135 s.sin_len = sizeof(struct sockaddr_in); 136 s.sin_addr = ip->ip_src; 137 bzero(&d, sizeof(d)); 138 d.sin_family = AF_INET; 139 d.sin_len = sizeof(struct sockaddr_in); 140 d.sin_addr = ip->ip_dst; 141 142 match = NULL; 143 matchprio = 0; 144 mtx_lock(&encapmtx); 145 LIST_FOREACH(ep, &encaptab, chain) { 146 if (ep->af != AF_INET) 147 continue; 148 if (ep->proto >= 0 && ep->proto != proto) 149 continue; 150 if (ep->func) 151 prio = (*ep->func)(m, off, proto, ep->arg); 152 else { 153 /* 154 * it's inbound traffic, we need to match in reverse 155 * order 156 */ 157 prio = mask_match(ep, (struct sockaddr *)&d, 158 (struct sockaddr *)&s); 159 } 160 161 /* 162 * We prioritize the matches by using bit length of the 163 * matches. mask_match() and user-supplied matching function 164 * should return the bit length of the matches (for example, 165 * if both src/dst are matched for IPv4, 64 should be returned). 166 * 0 or negative return value means "it did not match". 167 * 168 * The question is, since we have two "mask" portion, we 169 * cannot really define total order between entries. 170 * For example, which of these should be preferred? 171 * mask_match() returns 48 (32 + 16) for both of them. 172 * src=3ffe::/16, dst=3ffe:501::/32 173 * src=3ffe:501::/32, dst=3ffe::/16 174 * 175 * We need to loop through all the possible candidates 176 * to get the best match - the search takes O(n) for 177 * n attachments (i.e. interfaces). 178 */ 179 if (prio <= 0) 180 continue; 181 if (prio > matchprio) { 182 matchprio = prio; 183 match = ep; 184 } 185 } 186 mtx_unlock(&encapmtx); 187 188 if (match) { 189 /* found a match, "match" has the best one */ 190 psw = match->psw; 191 if (psw && psw->pr_input) { 192 encap_fillarg(m, match); 193 (*psw->pr_input)(m, off); 194 } else 195 m_freem(m); 196 return; 197 } 198 199 /* last resort: inject to raw socket */ 200 rip_input(m, off); 201 } 202 #endif 203 204 #ifdef INET6 205 int 206 encap6_input(mp, offp, proto) 207 struct mbuf **mp; 208 int *offp; 209 int proto; 210 { 211 struct mbuf *m = *mp; 212 struct ip6_hdr *ip6; 213 struct sockaddr_in6 s, d; 214 const struct ip6protosw *psw; 215 struct encaptab *ep, *match; 216 int prio, matchprio; 217 218 ip6 = mtod(m, struct ip6_hdr *); 219 220 bzero(&s, sizeof(s)); 221 s.sin6_family = AF_INET6; 222 s.sin6_len = sizeof(struct sockaddr_in6); 223 s.sin6_addr = ip6->ip6_src; 224 bzero(&d, sizeof(d)); 225 d.sin6_family = AF_INET6; 226 d.sin6_len = sizeof(struct sockaddr_in6); 227 d.sin6_addr = ip6->ip6_dst; 228 229 match = NULL; 230 matchprio = 0; 231 mtx_lock(&encapmtx); 232 LIST_FOREACH(ep, &encaptab, chain) { 233 if (ep->af != AF_INET6) 234 continue; 235 if (ep->proto >= 0 && ep->proto != proto) 236 continue; 237 if (ep->func) 238 prio = (*ep->func)(m, *offp, proto, ep->arg); 239 else { 240 /* 241 * it's inbound traffic, we need to match in reverse 242 * order 243 */ 244 prio = mask_match(ep, (struct sockaddr *)&d, 245 (struct sockaddr *)&s); 246 } 247 248 /* see encap4_input() for issues here */ 249 if (prio <= 0) 250 continue; 251 if (prio > matchprio) { 252 matchprio = prio; 253 match = ep; 254 } 255 } 256 mtx_unlock(&encapmtx); 257 258 if (match) { 259 /* found a match */ 260 psw = (const struct ip6protosw *)match->psw; 261 if (psw && psw->pr_input) { 262 encap_fillarg(m, match); 263 return (*psw->pr_input)(mp, offp, proto); 264 } else { 265 m_freem(m); 266 return IPPROTO_DONE; 267 } 268 } 269 270 /* last resort: inject to raw socket */ 271 return rip6_input(mp, offp, proto); 272 } 273 #endif 274 275 /*lint -sem(encap_add, custodial(1)) */ 276 static void 277 encap_add(ep) 278 struct encaptab *ep; 279 { 280 281 mtx_assert(&encapmtx, MA_OWNED); 282 LIST_INSERT_HEAD(&encaptab, ep, chain); 283 } 284 285 /* 286 * sp (src ptr) is always my side, and dp (dst ptr) is always remote side. 287 * length of mask (sm and dm) is assumed to be same as sp/dp. 288 * Return value will be necessary as input (cookie) for encap_detach(). 289 */ 290 const struct encaptab * 291 encap_attach(af, proto, sp, sm, dp, dm, psw, arg) 292 int af; 293 int proto; 294 const struct sockaddr *sp, *sm; 295 const struct sockaddr *dp, *dm; 296 const struct protosw *psw; 297 void *arg; 298 { 299 struct encaptab *ep; 300 301 /* sanity check on args */ 302 if (sp->sa_len > sizeof(ep->src) || dp->sa_len > sizeof(ep->dst)) 303 return (NULL); 304 if (sp->sa_len != dp->sa_len) 305 return (NULL); 306 if (af != sp->sa_family || af != dp->sa_family) 307 return (NULL); 308 309 /* check if anyone have already attached with exactly same config */ 310 mtx_lock(&encapmtx); 311 LIST_FOREACH(ep, &encaptab, chain) { 312 if (ep->af != af) 313 continue; 314 if (ep->proto != proto) 315 continue; 316 if (ep->src.ss_len != sp->sa_len || 317 bcmp(&ep->src, sp, sp->sa_len) != 0 || 318 bcmp(&ep->srcmask, sm, sp->sa_len) != 0) 319 continue; 320 if (ep->dst.ss_len != dp->sa_len || 321 bcmp(&ep->dst, dp, dp->sa_len) != 0 || 322 bcmp(&ep->dstmask, dm, dp->sa_len) != 0) 323 continue; 324 325 mtx_unlock(&encapmtx); 326 return (NULL); 327 } 328 329 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 330 if (ep == NULL) { 331 mtx_unlock(&encapmtx); 332 return (NULL); 333 } 334 bzero(ep, sizeof(*ep)); 335 336 ep->af = af; 337 ep->proto = proto; 338 bcopy(sp, &ep->src, sp->sa_len); 339 bcopy(sm, &ep->srcmask, sp->sa_len); 340 bcopy(dp, &ep->dst, dp->sa_len); 341 bcopy(dm, &ep->dstmask, dp->sa_len); 342 ep->psw = psw; 343 ep->arg = arg; 344 345 encap_add(ep); 346 mtx_unlock(&encapmtx); 347 return (ep); 348 } 349 350 const struct encaptab * 351 encap_attach_func(af, proto, func, psw, arg) 352 int af; 353 int proto; 354 int (*func)(const struct mbuf *, int, int, void *); 355 const struct protosw *psw; 356 void *arg; 357 { 358 struct encaptab *ep; 359 360 /* sanity check on args */ 361 if (!func) 362 return (NULL); 363 364 ep = malloc(sizeof(*ep), M_NETADDR, M_NOWAIT); /*XXX*/ 365 if (ep == NULL) 366 return (NULL); 367 bzero(ep, sizeof(*ep)); 368 369 ep->af = af; 370 ep->proto = proto; 371 ep->func = func; 372 ep->psw = psw; 373 ep->arg = arg; 374 375 mtx_lock(&encapmtx); 376 encap_add(ep); 377 mtx_unlock(&encapmtx); 378 return (ep); 379 } 380 381 int 382 encap_detach(cookie) 383 const struct encaptab *cookie; 384 { 385 const struct encaptab *ep = cookie; 386 struct encaptab *p; 387 388 mtx_lock(&encapmtx); 389 LIST_FOREACH(p, &encaptab, chain) { 390 if (p == ep) { 391 LIST_REMOVE(p, chain); 392 mtx_unlock(&encapmtx); 393 free(p, M_NETADDR); /*XXX*/ 394 return 0; 395 } 396 } 397 mtx_unlock(&encapmtx); 398 399 return EINVAL; 400 } 401 402 static int 403 mask_match(ep, sp, dp) 404 const struct encaptab *ep; 405 const struct sockaddr *sp; 406 const struct sockaddr *dp; 407 { 408 struct sockaddr_storage s; 409 struct sockaddr_storage d; 410 int i; 411 const u_int8_t *p, *q; 412 u_int8_t *r; 413 int matchlen; 414 415 if (sp->sa_len > sizeof(s) || dp->sa_len > sizeof(d)) 416 return 0; 417 if (sp->sa_family != ep->af || dp->sa_family != ep->af) 418 return 0; 419 if (sp->sa_len != ep->src.ss_len || dp->sa_len != ep->dst.ss_len) 420 return 0; 421 422 matchlen = 0; 423 424 p = (const u_int8_t *)sp; 425 q = (const u_int8_t *)&ep->srcmask; 426 r = (u_int8_t *)&s; 427 for (i = 0 ; i < sp->sa_len; i++) { 428 r[i] = p[i] & q[i]; 429 /* XXX estimate */ 430 matchlen += (q[i] ? 8 : 0); 431 } 432 433 p = (const u_int8_t *)dp; 434 q = (const u_int8_t *)&ep->dstmask; 435 r = (u_int8_t *)&d; 436 for (i = 0 ; i < dp->sa_len; i++) { 437 r[i] = p[i] & q[i]; 438 /* XXX rough estimate */ 439 matchlen += (q[i] ? 8 : 0); 440 } 441 442 /* need to overwrite len/family portion as we don't compare them */ 443 s.ss_len = sp->sa_len; 444 s.ss_family = sp->sa_family; 445 d.ss_len = dp->sa_len; 446 d.ss_family = dp->sa_family; 447 448 if (bcmp(&s, &ep->src, ep->src.ss_len) == 0 && 449 bcmp(&d, &ep->dst, ep->dst.ss_len) == 0) { 450 return matchlen; 451 } else 452 return 0; 453 } 454 455 static void 456 encap_fillarg(m, ep) 457 struct mbuf *m; 458 const struct encaptab *ep; 459 { 460 struct m_tag *tag; 461 462 tag = m_tag_get(PACKET_TAG_ENCAP, sizeof (void*), M_NOWAIT); 463 if (tag) { 464 *(void**)(tag+1) = ep->arg; 465 m_tag_prepend(m, tag); 466 } 467 } 468 469 void * 470 encap_getarg(m) 471 struct mbuf *m; 472 { 473 void *p = NULL; 474 struct m_tag *tag; 475 476 tag = m_tag_find(m, PACKET_TAG_ENCAP, NULL); 477 if (tag) { 478 p = *(void**)(tag+1); 479 m_tag_delete(m, tag); 480 } 481 return p; 482 } 483