1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IPv4 Forwarding Information Base: policy rules. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * Thomas Graf <tgraf@suug.ch> 11 * 12 * Fixes: 13 * Rani Assaf : local_rule cannot be deleted 14 * Marc Boucher : routing by fwmark 15 */ 16 17 #include <linux/types.h> 18 #include <linux/kernel.h> 19 #include <linux/netdevice.h> 20 #include <linux/netlink.h> 21 #include <linux/inetdevice.h> 22 #include <linux/init.h> 23 #include <linux/list.h> 24 #include <linux/rcupdate.h> 25 #include <linux/export.h> 26 #include <net/inet_dscp.h> 27 #include <net/ip.h> 28 #include <net/route.h> 29 #include <net/tcp.h> 30 #include <net/ip_fib.h> 31 #include <net/nexthop.h> 32 #include <net/fib_rules.h> 33 #include <linux/indirect_call_wrapper.h> 34 35 struct fib4_rule { 36 struct fib_rule common; 37 u8 dst_len; 38 u8 src_len; 39 dscp_t dscp; 40 u8 dscp_full:1; /* DSCP or TOS selector */ 41 __be32 src; 42 __be32 srcmask; 43 __be32 dst; 44 __be32 dstmask; 45 #ifdef CONFIG_IP_ROUTE_CLASSID 46 u32 tclassid; 47 #endif 48 }; 49 50 static bool fib4_rule_matchall(const struct fib_rule *rule) 51 { 52 struct fib4_rule *r = container_of(rule, struct fib4_rule, common); 53 54 if (r->dst_len || r->src_len || r->dscp) 55 return false; 56 return fib_rule_matchall(rule); 57 } 58 59 bool fib4_rule_default(const struct fib_rule *rule) 60 { 61 if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL || 62 rule->l3mdev) 63 return false; 64 if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN && 65 rule->table != RT_TABLE_DEFAULT) 66 return false; 67 return true; 68 } 69 EXPORT_SYMBOL_GPL(fib4_rule_default); 70 71 int fib4_rules_dump(struct net *net, struct notifier_block *nb, 72 struct netlink_ext_ack *extack) 73 { 74 return fib_rules_dump(net, nb, AF_INET, extack); 75 } 76 77 unsigned int fib4_rules_seq_read(const struct net *net) 78 { 79 return fib_rules_seq_read(net, AF_INET); 80 } 81 82 int __fib_lookup(struct net *net, struct flowi4 *flp, 83 struct fib_result *res, unsigned int flags) 84 { 85 struct fib_lookup_arg arg = { 86 .result = res, 87 .flags = flags, 88 }; 89 int err; 90 91 /* update flow if oif or iif point to device enslaved to l3mdev */ 92 l3mdev_update_flow(net, flowi4_to_flowi(flp)); 93 94 err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); 95 #ifdef CONFIG_IP_ROUTE_CLASSID 96 if (arg.rule) 97 res->tclassid = ((struct fib4_rule *)arg.rule)->tclassid; 98 else 99 res->tclassid = 0; 100 #endif 101 102 if (err == -ESRCH) 103 err = -ENETUNREACH; 104 105 return err; 106 } 107 EXPORT_SYMBOL_GPL(__fib_lookup); 108 109 INDIRECT_CALLABLE_SCOPE int fib4_rule_action(struct fib_rule *rule, 110 struct flowi *flp, int flags, 111 struct fib_lookup_arg *arg) 112 { 113 int err = -EAGAIN; 114 struct fib_table *tbl; 115 u32 tb_id; 116 117 switch (rule->action) { 118 case FR_ACT_TO_TBL: 119 break; 120 121 case FR_ACT_UNREACHABLE: 122 return -ENETUNREACH; 123 124 case FR_ACT_PROHIBIT: 125 return -EACCES; 126 127 case FR_ACT_BLACKHOLE: 128 default: 129 return -EINVAL; 130 } 131 132 rcu_read_lock(); 133 134 tb_id = fib_rule_get_table(rule, arg); 135 tbl = fib_get_table(rule->fr_net, tb_id); 136 if (tbl) 137 err = fib_table_lookup(tbl, &flp->u.ip4, 138 (struct fib_result *)arg->result, 139 arg->flags); 140 141 rcu_read_unlock(); 142 return err; 143 } 144 145 INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule, 146 int flags, 147 struct fib_lookup_arg *arg) 148 { 149 struct fib_result *result = arg->result; 150 struct net_device *dev = NULL; 151 152 if (result->fi) { 153 struct fib_nh_common *nhc = fib_info_nhc(result->fi, 0); 154 155 dev = nhc->nhc_dev; 156 } 157 158 /* do not accept result if the route does 159 * not meet the required prefix length 160 */ 161 if (result->prefixlen <= rule->suppress_prefixlen) 162 goto suppress_route; 163 164 /* do not accept result if the route uses a device 165 * belonging to a forbidden interface group 166 */ 167 if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup) 168 goto suppress_route; 169 170 return false; 171 172 suppress_route: 173 if (!(arg->flags & FIB_LOOKUP_NOREF)) 174 fib_info_put(result->fi); 175 return true; 176 } 177 178 INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule, 179 struct flowi *fl, int flags) 180 { 181 struct fib4_rule *r = (struct fib4_rule *) rule; 182 struct flowi4 *fl4 = &fl->u.ip4; 183 __be32 daddr = fl4->daddr; 184 __be32 saddr = fl4->saddr; 185 186 if (((saddr ^ r->src) & r->srcmask) || 187 ((daddr ^ r->dst) & r->dstmask)) 188 return 0; 189 190 /* When DSCP selector is used we need to match on the entire DSCP field 191 * in the flow information structure. When TOS selector is used we need 192 * to mask the upper three DSCP bits prior to matching to maintain 193 * legacy behavior. 194 */ 195 if (r->dscp_full && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos)) 196 return 0; 197 else if (!r->dscp_full && r->dscp && 198 !fib_dscp_masked_match(r->dscp, fl4)) 199 return 0; 200 201 if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto)) 202 return 0; 203 204 if (fib_rule_port_range_set(&rule->sport_range) && 205 !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport)) 206 return 0; 207 208 if (fib_rule_port_range_set(&rule->dport_range) && 209 !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport)) 210 return 0; 211 212 return 1; 213 } 214 215 static struct fib_table *fib_empty_table(struct net *net) 216 { 217 u32 id = 1; 218 219 while (1) { 220 if (!fib_get_table(net, id)) 221 return fib_new_table(net, id); 222 223 if (id++ == RT_TABLE_MAX) 224 break; 225 } 226 return NULL; 227 } 228 229 static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4, 230 struct netlink_ext_ack *extack) 231 { 232 if (rule4->dscp) { 233 NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP"); 234 return -EINVAL; 235 } 236 237 rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); 238 rule4->dscp_full = true; 239 240 return 0; 241 } 242 243 static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 244 struct fib_rule_hdr *frh, 245 struct nlattr **tb, 246 struct netlink_ext_ack *extack) 247 { 248 struct net *net = sock_net(skb->sk); 249 int err = -EINVAL; 250 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 251 252 if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) { 253 NL_SET_ERR_MSG(extack, 254 "Flow label cannot be specified for IPv4 FIB rules"); 255 goto errout; 256 } 257 258 if (!inet_validate_dscp(frh->tos)) { 259 NL_SET_ERR_MSG(extack, 260 "Invalid dsfield (tos): ECN bits must be 0"); 261 goto errout; 262 } 263 /* IPv4 currently doesn't handle high order DSCP bits correctly */ 264 if (frh->tos & ~IPTOS_TOS_MASK) { 265 NL_SET_ERR_MSG(extack, "Invalid tos"); 266 goto errout; 267 } 268 rule4->dscp = inet_dsfield_to_dscp(frh->tos); 269 270 if (tb[FRA_DSCP] && 271 fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0) 272 goto errout; 273 274 /* split local/main if they are not already split */ 275 err = fib_unmerge(net); 276 if (err) 277 goto errout; 278 279 if (rule->table == RT_TABLE_UNSPEC && !rule->l3mdev) { 280 if (rule->action == FR_ACT_TO_TBL) { 281 struct fib_table *table; 282 283 table = fib_empty_table(net); 284 if (!table) { 285 err = -ENOBUFS; 286 goto errout; 287 } 288 289 rule->table = table->tb_id; 290 } 291 } 292 293 if (frh->src_len) 294 rule4->src = nla_get_in_addr(tb[FRA_SRC]); 295 296 if (frh->dst_len) 297 rule4->dst = nla_get_in_addr(tb[FRA_DST]); 298 299 #ifdef CONFIG_IP_ROUTE_CLASSID 300 if (tb[FRA_FLOW]) { 301 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); 302 if (rule4->tclassid) 303 atomic_inc(&net->ipv4.fib_num_tclassid_users); 304 } 305 #endif 306 307 if (fib_rule_requires_fldissect(rule)) 308 net->ipv4.fib_rules_require_fldissect++; 309 310 rule4->src_len = frh->src_len; 311 rule4->srcmask = inet_make_mask(rule4->src_len); 312 rule4->dst_len = frh->dst_len; 313 rule4->dstmask = inet_make_mask(rule4->dst_len); 314 315 net->ipv4.fib_has_custom_rules = true; 316 317 err = 0; 318 errout: 319 return err; 320 } 321 322 static int fib4_rule_delete(struct fib_rule *rule) 323 { 324 struct net *net = rule->fr_net; 325 int err; 326 327 /* split local/main if they are not already split */ 328 err = fib_unmerge(net); 329 if (err) 330 goto errout; 331 332 #ifdef CONFIG_IP_ROUTE_CLASSID 333 if (((struct fib4_rule *)rule)->tclassid) 334 atomic_dec(&net->ipv4.fib_num_tclassid_users); 335 #endif 336 net->ipv4.fib_has_custom_rules = true; 337 338 if (net->ipv4.fib_rules_require_fldissect && 339 fib_rule_requires_fldissect(rule)) 340 net->ipv4.fib_rules_require_fldissect--; 341 errout: 342 return err; 343 } 344 345 static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 346 struct nlattr **tb) 347 { 348 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 349 350 if (frh->src_len && (rule4->src_len != frh->src_len)) 351 return 0; 352 353 if (frh->dst_len && (rule4->dst_len != frh->dst_len)) 354 return 0; 355 356 if (frh->tos && 357 (rule4->dscp_full || 358 inet_dscp_to_dsfield(rule4->dscp) != frh->tos)) 359 return 0; 360 361 if (tb[FRA_DSCP]) { 362 dscp_t dscp; 363 364 dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2); 365 if (!rule4->dscp_full || rule4->dscp != dscp) 366 return 0; 367 } 368 369 #ifdef CONFIG_IP_ROUTE_CLASSID 370 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) 371 return 0; 372 #endif 373 374 if (frh->src_len && (rule4->src != nla_get_in_addr(tb[FRA_SRC]))) 375 return 0; 376 377 if (frh->dst_len && (rule4->dst != nla_get_in_addr(tb[FRA_DST]))) 378 return 0; 379 380 return 1; 381 } 382 383 static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 384 struct fib_rule_hdr *frh) 385 { 386 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 387 388 frh->dst_len = rule4->dst_len; 389 frh->src_len = rule4->src_len; 390 391 if (rule4->dscp_full) { 392 frh->tos = 0; 393 if (nla_put_u8(skb, FRA_DSCP, 394 inet_dscp_to_dsfield(rule4->dscp) >> 2)) 395 goto nla_put_failure; 396 } else { 397 frh->tos = inet_dscp_to_dsfield(rule4->dscp); 398 } 399 400 if ((rule4->dst_len && 401 nla_put_in_addr(skb, FRA_DST, rule4->dst)) || 402 (rule4->src_len && 403 nla_put_in_addr(skb, FRA_SRC, rule4->src))) 404 goto nla_put_failure; 405 #ifdef CONFIG_IP_ROUTE_CLASSID 406 if (rule4->tclassid && 407 nla_put_u32(skb, FRA_FLOW, rule4->tclassid)) 408 goto nla_put_failure; 409 #endif 410 return 0; 411 412 nla_put_failure: 413 return -ENOBUFS; 414 } 415 416 static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) 417 { 418 return nla_total_size(4) /* dst */ 419 + nla_total_size(4) /* src */ 420 + nla_total_size(4) /* flow */ 421 + nla_total_size(1); /* dscp */ 422 } 423 424 static void fib4_rule_flush_cache(struct fib_rules_ops *ops) 425 { 426 rt_cache_flush(ops->fro_net); 427 } 428 429 static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = { 430 .family = AF_INET, 431 .rule_size = sizeof(struct fib4_rule), 432 .addr_size = sizeof(u32), 433 .action = fib4_rule_action, 434 .suppress = fib4_rule_suppress, 435 .match = fib4_rule_match, 436 .configure = fib4_rule_configure, 437 .delete = fib4_rule_delete, 438 .compare = fib4_rule_compare, 439 .fill = fib4_rule_fill, 440 .nlmsg_payload = fib4_rule_nlmsg_payload, 441 .flush_cache = fib4_rule_flush_cache, 442 .nlgroup = RTNLGRP_IPV4_RULE, 443 .owner = THIS_MODULE, 444 }; 445 446 static int fib_default_rules_init(struct fib_rules_ops *ops) 447 { 448 int err; 449 450 err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL); 451 if (err < 0) 452 return err; 453 err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN); 454 if (err < 0) 455 return err; 456 err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT); 457 if (err < 0) 458 return err; 459 return 0; 460 } 461 462 int __net_init fib4_rules_init(struct net *net) 463 { 464 int err; 465 struct fib_rules_ops *ops; 466 467 ops = fib_rules_register(&fib4_rules_ops_template, net); 468 if (IS_ERR(ops)) 469 return PTR_ERR(ops); 470 471 err = fib_default_rules_init(ops); 472 if (err < 0) 473 goto fail; 474 net->ipv4.rules_ops = ops; 475 net->ipv4.fib_has_custom_rules = false; 476 net->ipv4.fib_rules_require_fldissect = 0; 477 return 0; 478 479 fail: 480 /* also cleans all rules already added */ 481 fib_rules_unregister(ops); 482 return err; 483 } 484 485 void __net_exit fib4_rules_exit(struct net *net) 486 { 487 fib_rules_unregister(net->ipv4.rules_ops); 488 } 489