1 /* 2 * xfrm_input.c 3 * 4 * Changes: 5 * YOSHIFUJI Hideaki @USAGI 6 * Split up af-specific portion 7 * 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/netdevice.h> 13 #include <net/dst.h> 14 #include <net/ip.h> 15 #include <net/xfrm.h> 16 #include <net/ip_tunnels.h> 17 #include <net/ip6_tunnel.h> 18 19 static struct kmem_cache *secpath_cachep __read_mostly; 20 21 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); 22 static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; 23 24 static struct gro_cells gro_cells; 25 static struct net_device xfrm_napi_dev; 26 27 int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) 28 { 29 int err = 0; 30 31 if (unlikely(afinfo == NULL)) 32 return -EINVAL; 33 if (unlikely(afinfo->family >= NPROTO)) 34 return -EAFNOSUPPORT; 35 spin_lock_bh(&xfrm_input_afinfo_lock); 36 if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) 37 err = -EEXIST; 38 else 39 rcu_assign_pointer(xfrm_input_afinfo[afinfo->family], afinfo); 40 spin_unlock_bh(&xfrm_input_afinfo_lock); 41 return err; 42 } 43 EXPORT_SYMBOL(xfrm_input_register_afinfo); 44 45 int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) 46 { 47 int err = 0; 48 49 if (unlikely(afinfo == NULL)) 50 return -EINVAL; 51 if (unlikely(afinfo->family >= NPROTO)) 52 return -EAFNOSUPPORT; 53 spin_lock_bh(&xfrm_input_afinfo_lock); 54 if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { 55 if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) 56 err = -EINVAL; 57 else 58 RCU_INIT_POINTER(xfrm_input_afinfo[afinfo->family], NULL); 59 } 60 spin_unlock_bh(&xfrm_input_afinfo_lock); 61 synchronize_rcu(); 62 return err; 63 } 64 EXPORT_SYMBOL(xfrm_input_unregister_afinfo); 65 66 static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) 67 { 68 struct xfrm_input_afinfo *afinfo; 69 70 if (unlikely(family >= NPROTO)) 71 return NULL; 72 rcu_read_lock(); 73 afinfo = rcu_dereference(xfrm_input_afinfo[family]); 74 if (unlikely(!afinfo)) 75 rcu_read_unlock(); 76 return afinfo; 77 } 78 79 static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo) 80 { 81 rcu_read_unlock(); 82 } 83 84 static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, 85 int err) 86 { 87 int ret; 88 struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); 89 90 if (!afinfo) 91 return -EAFNOSUPPORT; 92 93 ret = afinfo->callback(skb, protocol, err); 94 xfrm_input_put_afinfo(afinfo); 95 96 return ret; 97 } 98 99 void __secpath_destroy(struct sec_path *sp) 100 { 101 int i; 102 for (i = 0; i < sp->len; i++) 103 xfrm_state_put(sp->xvec[i]); 104 kmem_cache_free(secpath_cachep, sp); 105 } 106 EXPORT_SYMBOL(__secpath_destroy); 107 108 struct sec_path *secpath_dup(struct sec_path *src) 109 { 110 struct sec_path *sp; 111 112 sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC); 113 if (!sp) 114 return NULL; 115 116 sp->len = 0; 117 if (src) { 118 int i; 119 120 memcpy(sp, src, sizeof(*sp)); 121 for (i = 0; i < sp->len; i++) 122 xfrm_state_hold(sp->xvec[i]); 123 } 124 atomic_set(&sp->refcnt, 1); 125 return sp; 126 } 127 EXPORT_SYMBOL(secpath_dup); 128 129 /* Fetch spi and seq from ipsec header */ 130 131 int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) 132 { 133 int offset, offset_seq; 134 int hlen; 135 136 switch (nexthdr) { 137 case IPPROTO_AH: 138 hlen = sizeof(struct ip_auth_hdr); 139 offset = offsetof(struct ip_auth_hdr, spi); 140 offset_seq = offsetof(struct ip_auth_hdr, seq_no); 141 break; 142 case IPPROTO_ESP: 143 hlen = sizeof(struct ip_esp_hdr); 144 offset = offsetof(struct ip_esp_hdr, spi); 145 offset_seq = offsetof(struct ip_esp_hdr, seq_no); 146 break; 147 case IPPROTO_COMP: 148 if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) 149 return -EINVAL; 150 *spi = htonl(ntohs(*(__be16 *)(skb_transport_header(skb) + 2))); 151 *seq = 0; 152 return 0; 153 default: 154 return 1; 155 } 156 157 if (!pskb_may_pull(skb, hlen)) 158 return -EINVAL; 159 160 *spi = *(__be32 *)(skb_transport_header(skb) + offset); 161 *seq = *(__be32 *)(skb_transport_header(skb) + offset_seq); 162 return 0; 163 } 164 165 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) 166 { 167 struct xfrm_mode *inner_mode = x->inner_mode; 168 int err; 169 170 err = x->outer_mode->afinfo->extract_input(x, skb); 171 if (err) 172 return err; 173 174 if (x->sel.family == AF_UNSPEC) { 175 inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); 176 if (inner_mode == NULL) 177 return -EAFNOSUPPORT; 178 } 179 180 skb->protocol = inner_mode->afinfo->eth_proto; 181 return inner_mode->input2(x, skb); 182 } 183 EXPORT_SYMBOL(xfrm_prepare_input); 184 185 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) 186 { 187 struct net *net = dev_net(skb->dev); 188 int err; 189 __be32 seq; 190 __be32 seq_hi; 191 struct xfrm_state *x = NULL; 192 xfrm_address_t *daddr; 193 struct xfrm_mode *inner_mode; 194 u32 mark = skb->mark; 195 unsigned int family; 196 int decaps = 0; 197 int async = 0; 198 199 /* A negative encap_type indicates async resumption. */ 200 if (encap_type < 0) { 201 async = 1; 202 x = xfrm_input_state(skb); 203 seq = XFRM_SKB_CB(skb)->seq.input.low; 204 family = x->outer_mode->afinfo->family; 205 goto resume; 206 } 207 208 daddr = (xfrm_address_t *)(skb_network_header(skb) + 209 XFRM_SPI_SKB_CB(skb)->daddroff); 210 family = XFRM_SPI_SKB_CB(skb)->family; 211 212 /* if tunnel is present override skb->mark value with tunnel i_key */ 213 switch (family) { 214 case AF_INET: 215 if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) 216 mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key); 217 break; 218 case AF_INET6: 219 if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6) 220 mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key); 221 break; 222 } 223 224 /* Allocate new secpath or COW existing one. */ 225 if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { 226 struct sec_path *sp; 227 228 sp = secpath_dup(skb->sp); 229 if (!sp) { 230 XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); 231 goto drop; 232 } 233 if (skb->sp) 234 secpath_put(skb->sp); 235 skb->sp = sp; 236 } 237 238 seq = 0; 239 if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { 240 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 241 goto drop; 242 } 243 244 do { 245 if (skb->sp->len == XFRM_MAX_DEPTH) { 246 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); 247 goto drop; 248 } 249 250 x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family); 251 if (x == NULL) { 252 XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); 253 xfrm_audit_state_notfound(skb, family, spi, seq); 254 goto drop; 255 } 256 257 skb->sp->xvec[skb->sp->len++] = x; 258 259 spin_lock(&x->lock); 260 261 if (unlikely(x->km.state != XFRM_STATE_VALID)) { 262 if (x->km.state == XFRM_STATE_ACQ) 263 XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); 264 else 265 XFRM_INC_STATS(net, 266 LINUX_MIB_XFRMINSTATEINVALID); 267 goto drop_unlock; 268 } 269 270 if ((x->encap ? x->encap->encap_type : 0) != encap_type) { 271 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); 272 goto drop_unlock; 273 } 274 275 if (x->repl->check(x, skb, seq)) { 276 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 277 goto drop_unlock; 278 } 279 280 if (xfrm_state_check_expire(x)) { 281 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED); 282 goto drop_unlock; 283 } 284 285 spin_unlock(&x->lock); 286 287 if (xfrm_tunnel_check(skb, x, family)) { 288 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 289 goto drop; 290 } 291 292 seq_hi = htonl(xfrm_replay_seqhi(x, seq)); 293 294 XFRM_SKB_CB(skb)->seq.input.low = seq; 295 XFRM_SKB_CB(skb)->seq.input.hi = seq_hi; 296 297 skb_dst_force(skb); 298 dev_hold(skb->dev); 299 300 nexthdr = x->type->input(x, skb); 301 302 if (nexthdr == -EINPROGRESS) 303 return 0; 304 resume: 305 dev_put(skb->dev); 306 307 spin_lock(&x->lock); 308 if (nexthdr <= 0) { 309 if (nexthdr == -EBADMSG) { 310 xfrm_audit_state_icvfail(x, skb, 311 x->type->proto); 312 x->stats.integrity_failed++; 313 } 314 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); 315 goto drop_unlock; 316 } 317 318 /* only the first xfrm gets the encap type */ 319 encap_type = 0; 320 321 if (async && x->repl->recheck(x, skb, seq)) { 322 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); 323 goto drop_unlock; 324 } 325 326 x->repl->advance(x, seq); 327 328 x->curlft.bytes += skb->len; 329 x->curlft.packets++; 330 331 spin_unlock(&x->lock); 332 333 XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; 334 335 inner_mode = x->inner_mode; 336 337 if (x->sel.family == AF_UNSPEC) { 338 inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); 339 if (inner_mode == NULL) { 340 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 341 goto drop; 342 } 343 } 344 345 if (inner_mode->input(x, skb)) { 346 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); 347 goto drop; 348 } 349 350 if (x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) { 351 decaps = 1; 352 break; 353 } 354 355 /* 356 * We need the inner address. However, we only get here for 357 * transport mode so the outer address is identical. 358 */ 359 daddr = &x->id.daddr; 360 family = x->outer_mode->afinfo->family; 361 362 err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); 363 if (err < 0) { 364 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); 365 goto drop; 366 } 367 } while (!err); 368 369 err = xfrm_rcv_cb(skb, family, x->type->proto, 0); 370 if (err) 371 goto drop; 372 373 nf_reset(skb); 374 375 if (decaps) { 376 skb_dst_drop(skb); 377 gro_cells_receive(&gro_cells, skb); 378 return 0; 379 } else { 380 return x->inner_mode->afinfo->transport_finish(skb, async); 381 } 382 383 drop_unlock: 384 spin_unlock(&x->lock); 385 drop: 386 xfrm_rcv_cb(skb, family, x && x->type ? x->type->proto : nexthdr, -1); 387 kfree_skb(skb); 388 return 0; 389 } 390 EXPORT_SYMBOL(xfrm_input); 391 392 int xfrm_input_resume(struct sk_buff *skb, int nexthdr) 393 { 394 return xfrm_input(skb, nexthdr, 0, -1); 395 } 396 EXPORT_SYMBOL(xfrm_input_resume); 397 398 void __init xfrm_input_init(void) 399 { 400 int err; 401 402 init_dummy_netdev(&xfrm_napi_dev); 403 err = gro_cells_init(&gro_cells, &xfrm_napi_dev); 404 if (err) 405 gro_cells.cells = NULL; 406 407 secpath_cachep = kmem_cache_create("secpath_cache", 408 sizeof(struct sec_path), 409 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 410 NULL); 411 } 412