1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IPv4 Forwarding Information Base: policy rules. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * Thomas Graf <tgraf@suug.ch> 11 * 12 * Fixes: 13 * Rani Assaf : local_rule cannot be deleted 14 * Marc Boucher : routing by fwmark 15 */ 16 17 #include <linux/types.h> 18 #include <linux/kernel.h> 19 #include <linux/netdevice.h> 20 #include <linux/netlink.h> 21 #include <linux/inetdevice.h> 22 #include <linux/init.h> 23 #include <linux/list.h> 24 #include <linux/rcupdate.h> 25 #include <linux/export.h> 26 #include <net/inet_dscp.h> 27 #include <net/ip.h> 28 #include <net/route.h> 29 #include <net/tcp.h> 30 #include <net/ip_fib.h> 31 #include <net/nexthop.h> 32 #include <net/fib_rules.h> 33 #include <linux/indirect_call_wrapper.h> 34 35 struct fib4_rule { 36 struct fib_rule common; 37 u8 dst_len; 38 u8 src_len; 39 dscp_t dscp; 40 dscp_t dscp_mask; 41 u8 dscp_full:1; /* DSCP or TOS selector */ 42 __be32 src; 43 __be32 srcmask; 44 __be32 dst; 45 __be32 dstmask; 46 #ifdef CONFIG_IP_ROUTE_CLASSID 47 u32 tclassid; 48 #endif 49 }; 50 51 static bool fib4_rule_matchall(const struct fib_rule *rule) 52 { 53 struct fib4_rule *r = container_of(rule, struct fib4_rule, common); 54 55 if (r->dst_len || r->src_len || r->dscp) 56 return false; 57 return fib_rule_matchall(rule); 58 } 59 60 bool fib4_rule_default(const struct fib_rule *rule) 61 { 62 if (!fib4_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL || 63 rule->l3mdev) 64 return false; 65 if (rule->table != RT_TABLE_LOCAL && rule->table != RT_TABLE_MAIN && 66 rule->table != RT_TABLE_DEFAULT) 67 return false; 68 return true; 69 } 70 EXPORT_SYMBOL_GPL(fib4_rule_default); 71 72 int fib4_rules_dump(struct net *net, struct notifier_block *nb, 73 struct netlink_ext_ack *extack) 74 { 75 return fib_rules_dump(net, nb, AF_INET, extack); 76 } 77 78 unsigned int fib4_rules_seq_read(const struct net *net) 79 { 80 return fib_rules_seq_read(net, AF_INET); 81 } 82 83 int __fib_lookup(struct net *net, struct flowi4 *flp, 84 struct fib_result *res, unsigned int flags) 85 { 86 struct fib_lookup_arg arg = { 87 .result = res, 88 .flags = flags, 89 }; 90 int err; 91 92 /* update flow if oif or iif point to device enslaved to l3mdev */ 93 l3mdev_update_flow(net, flowi4_to_flowi(flp)); 94 95 err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); 96 #ifdef CONFIG_IP_ROUTE_CLASSID 97 if (arg.rule) 98 res->tclassid = ((struct fib4_rule *)arg.rule)->tclassid; 99 else 100 res->tclassid = 0; 101 #endif 102 103 if (err == -ESRCH) 104 err = -ENETUNREACH; 105 106 return err; 107 } 108 EXPORT_SYMBOL_GPL(__fib_lookup); 109 110 INDIRECT_CALLABLE_SCOPE int fib4_rule_action(struct fib_rule *rule, 111 struct flowi *flp, int flags, 112 struct fib_lookup_arg *arg) 113 { 114 int err = -EAGAIN; 115 struct fib_table *tbl; 116 u32 tb_id; 117 118 switch (rule->action) { 119 case FR_ACT_TO_TBL: 120 break; 121 122 case FR_ACT_UNREACHABLE: 123 return -ENETUNREACH; 124 125 case FR_ACT_PROHIBIT: 126 return -EACCES; 127 128 case FR_ACT_BLACKHOLE: 129 default: 130 return -EINVAL; 131 } 132 133 rcu_read_lock(); 134 135 tb_id = fib_rule_get_table(rule, arg); 136 tbl = fib_get_table(rule->fr_net, tb_id); 137 if (tbl) 138 err = fib_table_lookup(tbl, &flp->u.ip4, 139 (struct fib_result *)arg->result, 140 arg->flags); 141 142 rcu_read_unlock(); 143 return err; 144 } 145 146 INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule, 147 int flags, 148 struct fib_lookup_arg *arg) 149 { 150 struct fib_result *result = arg->result; 151 struct net_device *dev = NULL; 152 153 if (result->fi) { 154 struct fib_nh_common *nhc = fib_info_nhc(result->fi, 0); 155 156 dev = nhc->nhc_dev; 157 } 158 159 /* do not accept result if the route does 160 * not meet the required prefix length 161 */ 162 if (result->prefixlen <= rule->suppress_prefixlen) 163 goto suppress_route; 164 165 /* do not accept result if the route uses a device 166 * belonging to a forbidden interface group 167 */ 168 if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup) 169 goto suppress_route; 170 171 return false; 172 173 suppress_route: 174 if (!(arg->flags & FIB_LOOKUP_NOREF)) 175 fib_info_put(result->fi); 176 return true; 177 } 178 179 INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule, 180 struct flowi *fl, int flags) 181 { 182 struct fib4_rule *r = (struct fib4_rule *) rule; 183 struct flowi4 *fl4 = &fl->u.ip4; 184 __be32 daddr = fl4->daddr; 185 __be32 saddr = fl4->saddr; 186 187 if (((saddr ^ r->src) & r->srcmask) || 188 ((daddr ^ r->dst) & r->dstmask)) 189 return 0; 190 191 /* When DSCP selector is used we need to match on the entire DSCP field 192 * in the flow information structure. When TOS selector is used we need 193 * to mask the upper three DSCP bits prior to matching to maintain 194 * legacy behavior. 195 */ 196 if (r->dscp_full && 197 (r->dscp ^ inet_dsfield_to_dscp(fl4->flowi4_tos)) & r->dscp_mask) 198 return 0; 199 else if (!r->dscp_full && r->dscp && 200 !fib_dscp_masked_match(r->dscp, fl4)) 201 return 0; 202 203 if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto)) 204 return 0; 205 206 if (!fib_rule_port_match(&rule->sport_range, rule->sport_mask, 207 fl4->fl4_sport)) 208 return 0; 209 210 if (!fib_rule_port_match(&rule->dport_range, rule->dport_mask, 211 fl4->fl4_dport)) 212 return 0; 213 214 return 1; 215 } 216 217 static struct fib_table *fib_empty_table(struct net *net) 218 { 219 u32 id = 1; 220 221 while (1) { 222 if (!fib_get_table(net, id)) 223 return fib_new_table(net, id); 224 225 if (id++ == RT_TABLE_MAX) 226 break; 227 } 228 return NULL; 229 } 230 231 static int fib4_nl2rule_dscp(const struct nlattr *nla, struct fib4_rule *rule4, 232 struct netlink_ext_ack *extack) 233 { 234 if (rule4->dscp) { 235 NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP"); 236 return -EINVAL; 237 } 238 239 rule4->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); 240 rule4->dscp_mask = inet_dsfield_to_dscp(INET_DSCP_MASK); 241 rule4->dscp_full = true; 242 243 return 0; 244 } 245 246 static int fib4_nl2rule_dscp_mask(const struct nlattr *nla, 247 struct fib4_rule *rule4, 248 struct netlink_ext_ack *extack) 249 { 250 dscp_t dscp_mask; 251 252 if (!rule4->dscp_full) { 253 NL_SET_ERR_MSG_ATTR(extack, nla, 254 "Cannot specify DSCP mask without DSCP value"); 255 return -EINVAL; 256 } 257 258 dscp_mask = inet_dsfield_to_dscp(nla_get_u8(nla) << 2); 259 if (rule4->dscp & ~dscp_mask) { 260 NL_SET_ERR_MSG_ATTR(extack, nla, "Invalid DSCP mask"); 261 return -EINVAL; 262 } 263 264 rule4->dscp_mask = dscp_mask; 265 266 return 0; 267 } 268 269 static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 270 struct fib_rule_hdr *frh, 271 struct nlattr **tb, 272 struct netlink_ext_ack *extack) 273 { 274 struct fib4_rule *rule4 = (struct fib4_rule *)rule; 275 struct net *net = rule->fr_net; 276 int err = -EINVAL; 277 278 if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) { 279 NL_SET_ERR_MSG(extack, 280 "Flow label cannot be specified for IPv4 FIB rules"); 281 goto errout; 282 } 283 284 if (!inet_validate_dscp(frh->tos)) { 285 NL_SET_ERR_MSG(extack, 286 "Invalid dsfield (tos): ECN bits must be 0"); 287 goto errout; 288 } 289 /* IPv4 currently doesn't handle high order DSCP bits correctly */ 290 if (frh->tos & ~IPTOS_TOS_MASK) { 291 NL_SET_ERR_MSG(extack, "Invalid tos"); 292 goto errout; 293 } 294 rule4->dscp = inet_dsfield_to_dscp(frh->tos); 295 296 if (tb[FRA_DSCP] && 297 fib4_nl2rule_dscp(tb[FRA_DSCP], rule4, extack) < 0) 298 goto errout; 299 300 if (tb[FRA_DSCP_MASK] && 301 fib4_nl2rule_dscp_mask(tb[FRA_DSCP_MASK], rule4, extack) < 0) 302 goto errout; 303 304 /* split local/main if they are not already split */ 305 err = fib_unmerge(net); 306 if (err) 307 goto errout; 308 309 if (rule->table == RT_TABLE_UNSPEC && !rule->l3mdev) { 310 if (rule->action == FR_ACT_TO_TBL) { 311 struct fib_table *table; 312 313 table = fib_empty_table(net); 314 if (!table) { 315 err = -ENOBUFS; 316 goto errout; 317 } 318 319 rule->table = table->tb_id; 320 } 321 } 322 323 if (frh->src_len) 324 rule4->src = nla_get_in_addr(tb[FRA_SRC]); 325 326 if (frh->dst_len) 327 rule4->dst = nla_get_in_addr(tb[FRA_DST]); 328 329 #ifdef CONFIG_IP_ROUTE_CLASSID 330 if (tb[FRA_FLOW]) { 331 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); 332 if (rule4->tclassid) 333 atomic_inc(&net->ipv4.fib_num_tclassid_users); 334 } 335 #endif 336 337 if (fib_rule_requires_fldissect(rule)) 338 net->ipv4.fib_rules_require_fldissect++; 339 340 rule4->src_len = frh->src_len; 341 rule4->srcmask = inet_make_mask(rule4->src_len); 342 rule4->dst_len = frh->dst_len; 343 rule4->dstmask = inet_make_mask(rule4->dst_len); 344 345 net->ipv4.fib_has_custom_rules = true; 346 347 err = 0; 348 errout: 349 return err; 350 } 351 352 static int fib4_rule_delete(struct fib_rule *rule) 353 { 354 struct net *net = rule->fr_net; 355 int err; 356 357 /* split local/main if they are not already split */ 358 err = fib_unmerge(net); 359 if (err) 360 goto errout; 361 362 #ifdef CONFIG_IP_ROUTE_CLASSID 363 if (((struct fib4_rule *)rule)->tclassid) 364 atomic_dec(&net->ipv4.fib_num_tclassid_users); 365 #endif 366 net->ipv4.fib_has_custom_rules = true; 367 368 if (net->ipv4.fib_rules_require_fldissect && 369 fib_rule_requires_fldissect(rule)) 370 net->ipv4.fib_rules_require_fldissect--; 371 errout: 372 return err; 373 } 374 375 static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, 376 struct nlattr **tb) 377 { 378 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 379 380 if (frh->src_len && (rule4->src_len != frh->src_len)) 381 return 0; 382 383 if (frh->dst_len && (rule4->dst_len != frh->dst_len)) 384 return 0; 385 386 if (frh->tos && 387 (rule4->dscp_full || 388 inet_dscp_to_dsfield(rule4->dscp) != frh->tos)) 389 return 0; 390 391 if (tb[FRA_DSCP]) { 392 dscp_t dscp; 393 394 dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2); 395 if (!rule4->dscp_full || rule4->dscp != dscp) 396 return 0; 397 } 398 399 if (tb[FRA_DSCP_MASK]) { 400 dscp_t dscp_mask; 401 402 dscp_mask = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP_MASK]) << 2); 403 if (!rule4->dscp_full || rule4->dscp_mask != dscp_mask) 404 return 0; 405 } 406 407 #ifdef CONFIG_IP_ROUTE_CLASSID 408 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) 409 return 0; 410 #endif 411 412 if (frh->src_len && (rule4->src != nla_get_in_addr(tb[FRA_SRC]))) 413 return 0; 414 415 if (frh->dst_len && (rule4->dst != nla_get_in_addr(tb[FRA_DST]))) 416 return 0; 417 418 return 1; 419 } 420 421 static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, 422 struct fib_rule_hdr *frh) 423 { 424 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 425 426 frh->dst_len = rule4->dst_len; 427 frh->src_len = rule4->src_len; 428 429 if (rule4->dscp_full) { 430 frh->tos = 0; 431 if (nla_put_u8(skb, FRA_DSCP, 432 inet_dscp_to_dsfield(rule4->dscp) >> 2) || 433 nla_put_u8(skb, FRA_DSCP_MASK, 434 inet_dscp_to_dsfield(rule4->dscp_mask) >> 2)) 435 goto nla_put_failure; 436 } else { 437 frh->tos = inet_dscp_to_dsfield(rule4->dscp); 438 } 439 440 if ((rule4->dst_len && 441 nla_put_in_addr(skb, FRA_DST, rule4->dst)) || 442 (rule4->src_len && 443 nla_put_in_addr(skb, FRA_SRC, rule4->src))) 444 goto nla_put_failure; 445 #ifdef CONFIG_IP_ROUTE_CLASSID 446 if (rule4->tclassid && 447 nla_put_u32(skb, FRA_FLOW, rule4->tclassid)) 448 goto nla_put_failure; 449 #endif 450 return 0; 451 452 nla_put_failure: 453 return -ENOBUFS; 454 } 455 456 static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) 457 { 458 return nla_total_size(4) /* dst */ 459 + nla_total_size(4) /* src */ 460 + nla_total_size(4) /* flow */ 461 + nla_total_size(1) /* dscp */ 462 + nla_total_size(1); /* dscp mask */ 463 } 464 465 static void fib4_rule_flush_cache(struct fib_rules_ops *ops) 466 { 467 rt_cache_flush(ops->fro_net); 468 } 469 470 static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = { 471 .family = AF_INET, 472 .rule_size = sizeof(struct fib4_rule), 473 .addr_size = sizeof(u32), 474 .action = fib4_rule_action, 475 .suppress = fib4_rule_suppress, 476 .match = fib4_rule_match, 477 .configure = fib4_rule_configure, 478 .delete = fib4_rule_delete, 479 .compare = fib4_rule_compare, 480 .fill = fib4_rule_fill, 481 .nlmsg_payload = fib4_rule_nlmsg_payload, 482 .flush_cache = fib4_rule_flush_cache, 483 .nlgroup = RTNLGRP_IPV4_RULE, 484 .owner = THIS_MODULE, 485 }; 486 487 static int fib_default_rules_init(struct fib_rules_ops *ops) 488 { 489 int err; 490 491 err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL); 492 if (err < 0) 493 return err; 494 err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN); 495 if (err < 0) 496 return err; 497 err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT); 498 if (err < 0) 499 return err; 500 return 0; 501 } 502 503 int __net_init fib4_rules_init(struct net *net) 504 { 505 int err; 506 struct fib_rules_ops *ops; 507 508 ops = fib_rules_register(&fib4_rules_ops_template, net); 509 if (IS_ERR(ops)) 510 return PTR_ERR(ops); 511 512 err = fib_default_rules_init(ops); 513 if (err < 0) 514 goto fail; 515 net->ipv4.rules_ops = ops; 516 net->ipv4.fib_has_custom_rules = false; 517 net->ipv4.fib_rules_require_fldissect = 0; 518 return 0; 519 520 fail: 521 /* also cleans all rules already added */ 522 fib_rules_unregister(ops); 523 return err; 524 } 525 526 void __net_exit fib4_rules_exit(struct net *net) 527 { 528 fib_rules_unregister(net->ipv4.rules_ops); 529 } 530