1 /* 2 * Stateless NAT actions 3 * 4 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au> 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License as published by the Free 8 * Software Foundation; either version 2 of the License, or (at your option) 9 * any later version. 10 */ 11 12 #include <linux/errno.h> 13 #include <linux/init.h> 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/netfilter.h> 17 #include <linux/rtnetlink.h> 18 #include <linux/skbuff.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/string.h> 22 #include <linux/tc_act/tc_nat.h> 23 #include <net/act_api.h> 24 #include <net/icmp.h> 25 #include <net/ip.h> 26 #include <net/netlink.h> 27 #include <net/tc_act/tc_nat.h> 28 #include <net/tcp.h> 29 #include <net/udp.h> 30 31 32 #define NAT_TAB_MASK 15 33 static u32 nat_idx_gen; 34 35 static struct tcf_hashinfo nat_hash_info; 36 37 static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { 38 [TCA_NAT_PARMS] = { .len = sizeof(struct tc_nat) }, 39 }; 40 41 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, 42 struct tc_action *a, int ovr, int bind) 43 { 44 struct nlattr *tb[TCA_NAT_MAX + 1]; 45 struct tc_nat *parm; 46 int ret = 0, err; 47 struct tcf_nat *p; 48 struct tcf_common *pc; 49 50 if (nla == NULL) 51 return -EINVAL; 52 53 err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy); 54 if (err < 0) 55 return err; 56 57 if (tb[TCA_NAT_PARMS] == NULL) 58 return -EINVAL; 59 parm = nla_data(tb[TCA_NAT_PARMS]); 60 61 pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info); 62 if (!pc) { 63 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 64 &nat_idx_gen, &nat_hash_info); 65 if (IS_ERR(pc)) 66 return PTR_ERR(pc); 67 p = to_tcf_nat(pc); 68 ret = ACT_P_CREATED; 69 } else { 70 p = to_tcf_nat(pc); 71 if (!ovr) { 72 tcf_hash_release(pc, bind, &nat_hash_info); 73 return -EEXIST; 74 } 75 } 76 77 spin_lock_bh(&p->tcf_lock); 78 p->old_addr = parm->old_addr; 79 p->new_addr = parm->new_addr; 80 p->mask = parm->mask; 81 p->flags = parm->flags; 82 83 p->tcf_action = parm->action; 84 spin_unlock_bh(&p->tcf_lock); 85 86 if (ret == ACT_P_CREATED) 87 tcf_hash_insert(pc, &nat_hash_info); 88 89 return ret; 90 } 91 92 static int tcf_nat_cleanup(struct tc_action *a, int bind) 93 { 94 struct tcf_nat *p = a->priv; 95 96 return tcf_hash_release(&p->common, bind, &nat_hash_info); 97 } 98 99 static int tcf_nat(struct sk_buff *skb, const struct tc_action *a, 100 struct tcf_result *res) 101 { 102 struct tcf_nat *p = a->priv; 103 struct iphdr *iph; 104 __be32 old_addr; 105 __be32 new_addr; 106 __be32 mask; 107 __be32 addr; 108 int egress; 109 int action; 110 int ihl; 111 int noff; 112 113 spin_lock(&p->tcf_lock); 114 115 p->tcf_tm.lastuse = jiffies; 116 old_addr = p->old_addr; 117 new_addr = p->new_addr; 118 mask = p->mask; 119 egress = p->flags & TCA_NAT_FLAG_EGRESS; 120 action = p->tcf_action; 121 122 bstats_update(&p->tcf_bstats, skb); 123 124 spin_unlock(&p->tcf_lock); 125 126 if (unlikely(action == TC_ACT_SHOT)) 127 goto drop; 128 129 noff = skb_network_offset(skb); 130 if (!pskb_may_pull(skb, sizeof(*iph) + noff)) 131 goto drop; 132 133 iph = ip_hdr(skb); 134 135 if (egress) 136 addr = iph->saddr; 137 else 138 addr = iph->daddr; 139 140 if (!((old_addr ^ addr) & mask)) { 141 if (skb_cloned(skb) && 142 !skb_clone_writable(skb, sizeof(*iph) + noff) && 143 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 144 goto drop; 145 146 new_addr &= mask; 147 new_addr |= addr & ~mask; 148 149 /* Rewrite IP header */ 150 iph = ip_hdr(skb); 151 if (egress) 152 iph->saddr = new_addr; 153 else 154 iph->daddr = new_addr; 155 156 csum_replace4(&iph->check, addr, new_addr); 157 } else if ((iph->frag_off & htons(IP_OFFSET)) || 158 iph->protocol != IPPROTO_ICMP) { 159 goto out; 160 } 161 162 ihl = iph->ihl * 4; 163 164 /* It would be nice to share code with stateful NAT. */ 165 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { 166 case IPPROTO_TCP: 167 { 168 struct tcphdr *tcph; 169 170 if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) || 171 (skb_cloned(skb) && 172 !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) && 173 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 174 goto drop; 175 176 tcph = (void *)(skb_network_header(skb) + ihl); 177 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1); 178 break; 179 } 180 case IPPROTO_UDP: 181 { 182 struct udphdr *udph; 183 184 if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) || 185 (skb_cloned(skb) && 186 !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) && 187 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 188 goto drop; 189 190 udph = (void *)(skb_network_header(skb) + ihl); 191 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { 192 inet_proto_csum_replace4(&udph->check, skb, addr, 193 new_addr, 1); 194 if (!udph->check) 195 udph->check = CSUM_MANGLED_0; 196 } 197 break; 198 } 199 case IPPROTO_ICMP: 200 { 201 struct icmphdr *icmph; 202 203 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff)) 204 goto drop; 205 206 icmph = (void *)(skb_network_header(skb) + ihl); 207 208 if ((icmph->type != ICMP_DEST_UNREACH) && 209 (icmph->type != ICMP_TIME_EXCEEDED) && 210 (icmph->type != ICMP_PARAMETERPROB)) 211 break; 212 213 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) + 214 noff)) 215 goto drop; 216 217 icmph = (void *)(skb_network_header(skb) + ihl); 218 iph = (void *)(icmph + 1); 219 if (egress) 220 addr = iph->daddr; 221 else 222 addr = iph->saddr; 223 224 if ((old_addr ^ addr) & mask) 225 break; 226 227 if (skb_cloned(skb) && 228 !skb_clone_writable(skb, ihl + sizeof(*icmph) + 229 sizeof(*iph) + noff) && 230 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 231 goto drop; 232 233 icmph = (void *)(skb_network_header(skb) + ihl); 234 iph = (void *)(icmph + 1); 235 236 new_addr &= mask; 237 new_addr |= addr & ~mask; 238 239 /* XXX Fix up the inner checksums. */ 240 if (egress) 241 iph->daddr = new_addr; 242 else 243 iph->saddr = new_addr; 244 245 inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, 246 0); 247 break; 248 } 249 default: 250 break; 251 } 252 253 out: 254 return action; 255 256 drop: 257 spin_lock(&p->tcf_lock); 258 p->tcf_qstats.drops++; 259 spin_unlock(&p->tcf_lock); 260 return TC_ACT_SHOT; 261 } 262 263 static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, 264 int bind, int ref) 265 { 266 unsigned char *b = skb_tail_pointer(skb); 267 struct tcf_nat *p = a->priv; 268 struct tc_nat opt = { 269 .old_addr = p->old_addr, 270 .new_addr = p->new_addr, 271 .mask = p->mask, 272 .flags = p->flags, 273 274 .index = p->tcf_index, 275 .action = p->tcf_action, 276 .refcnt = p->tcf_refcnt - ref, 277 .bindcnt = p->tcf_bindcnt - bind, 278 }; 279 struct tcf_t t; 280 281 if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt)) 282 goto nla_put_failure; 283 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 284 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 285 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 286 if (nla_put(skb, TCA_NAT_TM, sizeof(t), &t)) 287 goto nla_put_failure; 288 289 return skb->len; 290 291 nla_put_failure: 292 nlmsg_trim(skb, b); 293 return -1; 294 } 295 296 static struct tc_action_ops act_nat_ops = { 297 .kind = "nat", 298 .hinfo = &nat_hash_info, 299 .type = TCA_ACT_NAT, 300 .capab = TCA_CAP_NONE, 301 .owner = THIS_MODULE, 302 .act = tcf_nat, 303 .dump = tcf_nat_dump, 304 .cleanup = tcf_nat_cleanup, 305 .init = tcf_nat_init, 306 }; 307 308 MODULE_DESCRIPTION("Stateless NAT actions"); 309 MODULE_LICENSE("GPL"); 310 311 static int __init nat_init_module(void) 312 { 313 int err = tcf_hashinfo_init(&nat_hash_info, NAT_TAB_MASK); 314 if (err) 315 return err; 316 return tcf_register_action(&act_nat_ops); 317 } 318 319 static void __exit nat_cleanup_module(void) 320 { 321 tcf_unregister_action(&act_nat_ops); 322 tcf_hashinfo_destroy(&nat_hash_info); 323 } 324 325 module_init(nat_init_module); 326 module_exit(nat_cleanup_module); 327