1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Monitoring code for network dropped packet alerts 4 * 5 * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> 6 */ 7 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 10 #include <linux/netdevice.h> 11 #include <linux/etherdevice.h> 12 #include <linux/string.h> 13 #include <linux/if_arp.h> 14 #include <linux/inetdevice.h> 15 #include <linux/inet.h> 16 #include <linux/interrupt.h> 17 #include <linux/netpoll.h> 18 #include <linux/sched.h> 19 #include <linux/delay.h> 20 #include <linux/types.h> 21 #include <linux/workqueue.h> 22 #include <linux/netlink.h> 23 #include <linux/net_dropmon.h> 24 #include <linux/percpu.h> 25 #include <linux/timer.h> 26 #include <linux/bitops.h> 27 #include <linux/slab.h> 28 #include <linux/module.h> 29 #include <net/genetlink.h> 30 #include <net/netevent.h> 31 #include <net/flow_offload.h> 32 #include <net/devlink.h> 33 34 #include <trace/events/skb.h> 35 #include <trace/events/napi.h> 36 #include <trace/events/devlink.h> 37 38 #include <asm/unaligned.h> 39 40 #define TRACE_ON 1 41 #define TRACE_OFF 0 42 43 /* 44 * Globals, our netlink socket pointer 45 * and the work handle that will send up 46 * netlink alerts 47 */ 48 static int trace_state = TRACE_OFF; 49 static bool monitor_hw; 50 51 #undef EM 52 #undef EMe 53 54 #define EM(a, b) [a] = #b, 55 #define EMe(a, b) [a] = #b 56 57 /* drop_reasons is used to translate 'enum skb_drop_reason' to string, 58 * which is reported to user space. 59 */ 60 static const char * const drop_reasons[] = { 61 TRACE_SKB_DROP_REASON 62 }; 63 64 /* net_dm_mutex 65 * 66 * An overall lock guarding every operation coming from userspace. 67 * It also guards the global 'hw_stats_list' list. 68 */ 69 static DEFINE_MUTEX(net_dm_mutex); 70 71 struct net_dm_stats { 72 u64 dropped; 73 struct u64_stats_sync syncp; 74 }; 75 76 #define NET_DM_MAX_HW_TRAP_NAME_LEN 40 77 78 struct net_dm_hw_entry { 79 char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN]; 80 u32 count; 81 }; 82 83 struct net_dm_hw_entries { 84 u32 num_entries; 85 struct net_dm_hw_entry entries[]; 86 }; 87 88 struct per_cpu_dm_data { 89 spinlock_t lock; /* Protects 'skb', 'hw_entries' and 90 * 'send_timer' 91 */ 92 union { 93 struct sk_buff *skb; 94 struct net_dm_hw_entries *hw_entries; 95 }; 96 struct sk_buff_head drop_queue; 97 struct work_struct dm_alert_work; 98 struct timer_list send_timer; 99 struct net_dm_stats stats; 100 }; 101 102 struct dm_hw_stat_delta { 103 struct net_device *dev; 104 unsigned long last_rx; 105 struct list_head list; 106 struct rcu_head rcu; 107 unsigned long last_drop_val; 108 }; 109 110 static struct genl_family net_drop_monitor_family; 111 112 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); 113 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data); 114 115 static int dm_hit_limit = 64; 116 static int dm_delay = 1; 117 static unsigned long dm_hw_check_delta = 2*HZ; 118 static LIST_HEAD(hw_stats_list); 119 120 static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY; 121 static u32 net_dm_trunc_len; 122 static u32 net_dm_queue_len = 1000; 123 124 struct net_dm_alert_ops { 125 void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, 126 void *location, 127 enum skb_drop_reason reason); 128 void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, 129 int work, int budget); 130 void (*work_item_func)(struct work_struct *work); 131 void (*hw_work_item_func)(struct work_struct *work); 132 void (*hw_trap_probe)(void *ignore, const struct devlink *devlink, 133 struct sk_buff *skb, 134 const struct devlink_trap_metadata *metadata); 135 }; 136 137 struct net_dm_skb_cb { 138 union { 139 struct devlink_trap_metadata *hw_metadata; 140 void *pc; 141 }; 142 enum skb_drop_reason reason; 143 }; 144 145 #define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0])) 146 147 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) 148 { 149 size_t al; 150 struct net_dm_alert_msg *msg; 151 struct nlattr *nla; 152 struct sk_buff *skb; 153 unsigned long flags; 154 void *msg_header; 155 156 al = sizeof(struct net_dm_alert_msg); 157 al += dm_hit_limit * sizeof(struct net_dm_drop_point); 158 al += sizeof(struct nlattr); 159 160 skb = genlmsg_new(al, GFP_KERNEL); 161 162 if (!skb) 163 goto err; 164 165 msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family, 166 0, NET_DM_CMD_ALERT); 167 if (!msg_header) { 168 nlmsg_free(skb); 169 skb = NULL; 170 goto err; 171 } 172 nla = nla_reserve(skb, NLA_UNSPEC, 173 sizeof(struct net_dm_alert_msg)); 174 if (!nla) { 175 nlmsg_free(skb); 176 skb = NULL; 177 goto err; 178 } 179 msg = nla_data(nla); 180 memset(msg, 0, al); 181 goto out; 182 183 err: 184 mod_timer(&data->send_timer, jiffies + HZ / 10); 185 out: 186 spin_lock_irqsave(&data->lock, flags); 187 swap(data->skb, skb); 188 spin_unlock_irqrestore(&data->lock, flags); 189 190 if (skb) { 191 struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; 192 struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh); 193 194 genlmsg_end(skb, genlmsg_data(gnlh)); 195 } 196 197 return skb; 198 } 199 200 static const struct genl_multicast_group dropmon_mcgrps[] = { 201 { .name = "events", }, 202 }; 203 204 static void send_dm_alert(struct work_struct *work) 205 { 206 struct sk_buff *skb; 207 struct per_cpu_dm_data *data; 208 209 data = container_of(work, struct per_cpu_dm_data, dm_alert_work); 210 211 skb = reset_per_cpu_data(data); 212 213 if (skb) 214 genlmsg_multicast(&net_drop_monitor_family, skb, 0, 215 0, GFP_KERNEL); 216 } 217 218 /* 219 * This is the timer function to delay the sending of an alert 220 * in the event that more drops will arrive during the 221 * hysteresis period. 222 */ 223 static void sched_send_work(struct timer_list *t) 224 { 225 struct per_cpu_dm_data *data = from_timer(data, t, send_timer); 226 227 schedule_work(&data->dm_alert_work); 228 } 229 230 static void trace_drop_common(struct sk_buff *skb, void *location) 231 { 232 struct net_dm_alert_msg *msg; 233 struct net_dm_drop_point *point; 234 struct nlmsghdr *nlh; 235 struct nlattr *nla; 236 int i; 237 struct sk_buff *dskb; 238 struct per_cpu_dm_data *data; 239 unsigned long flags; 240 241 local_irq_save(flags); 242 data = this_cpu_ptr(&dm_cpu_data); 243 spin_lock(&data->lock); 244 dskb = data->skb; 245 246 if (!dskb) 247 goto out; 248 249 nlh = (struct nlmsghdr *)dskb->data; 250 nla = genlmsg_data(nlmsg_data(nlh)); 251 msg = nla_data(nla); 252 point = msg->points; 253 for (i = 0; i < msg->entries; i++) { 254 if (!memcmp(&location, &point->pc, sizeof(void *))) { 255 point->count++; 256 goto out; 257 } 258 point++; 259 } 260 if (msg->entries == dm_hit_limit) 261 goto out; 262 /* 263 * We need to create a new entry 264 */ 265 __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); 266 nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); 267 memcpy(point->pc, &location, sizeof(void *)); 268 point->count = 1; 269 msg->entries++; 270 271 if (!timer_pending(&data->send_timer)) { 272 data->send_timer.expires = jiffies + dm_delay * HZ; 273 add_timer(&data->send_timer); 274 } 275 276 out: 277 spin_unlock_irqrestore(&data->lock, flags); 278 } 279 280 static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, 281 void *location, 282 enum skb_drop_reason reason) 283 { 284 trace_drop_common(skb, location); 285 } 286 287 static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, 288 int work, int budget) 289 { 290 struct dm_hw_stat_delta *new_stat; 291 292 /* 293 * Don't check napi structures with no associated device 294 */ 295 if (!napi->dev) 296 return; 297 298 rcu_read_lock(); 299 list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { 300 /* 301 * only add a note to our monitor buffer if: 302 * 1) this is the dev we received on 303 * 2) its after the last_rx delta 304 * 3) our rx_dropped count has gone up 305 */ 306 if ((new_stat->dev == napi->dev) && 307 (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) && 308 (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { 309 trace_drop_common(NULL, NULL); 310 new_stat->last_drop_val = napi->dev->stats.rx_dropped; 311 new_stat->last_rx = jiffies; 312 break; 313 } 314 } 315 rcu_read_unlock(); 316 } 317 318 static struct net_dm_hw_entries * 319 net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data) 320 { 321 struct net_dm_hw_entries *hw_entries; 322 unsigned long flags; 323 324 hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit), 325 GFP_KERNEL); 326 if (!hw_entries) { 327 /* If the memory allocation failed, we try to perform another 328 * allocation in 1/10 second. Otherwise, the probe function 329 * will constantly bail out. 330 */ 331 mod_timer(&hw_data->send_timer, jiffies + HZ / 10); 332 } 333 334 spin_lock_irqsave(&hw_data->lock, flags); 335 swap(hw_data->hw_entries, hw_entries); 336 spin_unlock_irqrestore(&hw_data->lock, flags); 337 338 return hw_entries; 339 } 340 341 static int net_dm_hw_entry_put(struct sk_buff *msg, 342 const struct net_dm_hw_entry *hw_entry) 343 { 344 struct nlattr *attr; 345 346 attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY); 347 if (!attr) 348 return -EMSGSIZE; 349 350 if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name)) 351 goto nla_put_failure; 352 353 if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count)) 354 goto nla_put_failure; 355 356 nla_nest_end(msg, attr); 357 358 return 0; 359 360 nla_put_failure: 361 nla_nest_cancel(msg, attr); 362 return -EMSGSIZE; 363 } 364 365 static int net_dm_hw_entries_put(struct sk_buff *msg, 366 const struct net_dm_hw_entries *hw_entries) 367 { 368 struct nlattr *attr; 369 int i; 370 371 attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES); 372 if (!attr) 373 return -EMSGSIZE; 374 375 for (i = 0; i < hw_entries->num_entries; i++) { 376 int rc; 377 378 rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]); 379 if (rc) 380 goto nla_put_failure; 381 } 382 383 nla_nest_end(msg, attr); 384 385 return 0; 386 387 nla_put_failure: 388 nla_nest_cancel(msg, attr); 389 return -EMSGSIZE; 390 } 391 392 static int 393 net_dm_hw_summary_report_fill(struct sk_buff *msg, 394 const struct net_dm_hw_entries *hw_entries) 395 { 396 struct net_dm_alert_msg anc_hdr = { 0 }; 397 void *hdr; 398 int rc; 399 400 hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, 401 NET_DM_CMD_ALERT); 402 if (!hdr) 403 return -EMSGSIZE; 404 405 /* We need to put the ancillary header in order not to break user 406 * space. 407 */ 408 if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr)) 409 goto nla_put_failure; 410 411 rc = net_dm_hw_entries_put(msg, hw_entries); 412 if (rc) 413 goto nla_put_failure; 414 415 genlmsg_end(msg, hdr); 416 417 return 0; 418 419 nla_put_failure: 420 genlmsg_cancel(msg, hdr); 421 return -EMSGSIZE; 422 } 423 424 static void net_dm_hw_summary_work(struct work_struct *work) 425 { 426 struct net_dm_hw_entries *hw_entries; 427 struct per_cpu_dm_data *hw_data; 428 struct sk_buff *msg; 429 int rc; 430 431 hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work); 432 433 hw_entries = net_dm_hw_reset_per_cpu_data(hw_data); 434 if (!hw_entries) 435 return; 436 437 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 438 if (!msg) 439 goto out; 440 441 rc = net_dm_hw_summary_report_fill(msg, hw_entries); 442 if (rc) { 443 nlmsg_free(msg); 444 goto out; 445 } 446 447 genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); 448 449 out: 450 kfree(hw_entries); 451 } 452 453 static void 454 net_dm_hw_trap_summary_probe(void *ignore, const struct devlink *devlink, 455 struct sk_buff *skb, 456 const struct devlink_trap_metadata *metadata) 457 { 458 struct net_dm_hw_entries *hw_entries; 459 struct net_dm_hw_entry *hw_entry; 460 struct per_cpu_dm_data *hw_data; 461 unsigned long flags; 462 int i; 463 464 if (metadata->trap_type == DEVLINK_TRAP_TYPE_CONTROL) 465 return; 466 467 hw_data = this_cpu_ptr(&dm_hw_cpu_data); 468 spin_lock_irqsave(&hw_data->lock, flags); 469 hw_entries = hw_data->hw_entries; 470 471 if (!hw_entries) 472 goto out; 473 474 for (i = 0; i < hw_entries->num_entries; i++) { 475 hw_entry = &hw_entries->entries[i]; 476 if (!strncmp(hw_entry->trap_name, metadata->trap_name, 477 NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) { 478 hw_entry->count++; 479 goto out; 480 } 481 } 482 if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit)) 483 goto out; 484 485 hw_entry = &hw_entries->entries[hw_entries->num_entries]; 486 strlcpy(hw_entry->trap_name, metadata->trap_name, 487 NET_DM_MAX_HW_TRAP_NAME_LEN - 1); 488 hw_entry->count = 1; 489 hw_entries->num_entries++; 490 491 if (!timer_pending(&hw_data->send_timer)) { 492 hw_data->send_timer.expires = jiffies + dm_delay * HZ; 493 add_timer(&hw_data->send_timer); 494 } 495 496 out: 497 spin_unlock_irqrestore(&hw_data->lock, flags); 498 } 499 500 static const struct net_dm_alert_ops net_dm_alert_summary_ops = { 501 .kfree_skb_probe = trace_kfree_skb_hit, 502 .napi_poll_probe = trace_napi_poll_hit, 503 .work_item_func = send_dm_alert, 504 .hw_work_item_func = net_dm_hw_summary_work, 505 .hw_trap_probe = net_dm_hw_trap_summary_probe, 506 }; 507 508 static void net_dm_packet_trace_kfree_skb_hit(void *ignore, 509 struct sk_buff *skb, 510 void *location, 511 enum skb_drop_reason reason) 512 { 513 ktime_t tstamp = ktime_get_real(); 514 struct per_cpu_dm_data *data; 515 struct net_dm_skb_cb *cb; 516 struct sk_buff *nskb; 517 unsigned long flags; 518 519 if (!skb_mac_header_was_set(skb)) 520 return; 521 522 nskb = skb_clone(skb, GFP_ATOMIC); 523 if (!nskb) 524 return; 525 526 if ((unsigned int)reason >= SKB_DROP_REASON_MAX) 527 reason = SKB_DROP_REASON_NOT_SPECIFIED; 528 cb = NET_DM_SKB_CB(nskb); 529 cb->reason = reason; 530 cb->pc = location; 531 /* Override the timestamp because we care about the time when the 532 * packet was dropped. 533 */ 534 nskb->tstamp = tstamp; 535 536 data = this_cpu_ptr(&dm_cpu_data); 537 538 spin_lock_irqsave(&data->drop_queue.lock, flags); 539 if (skb_queue_len(&data->drop_queue) < net_dm_queue_len) 540 __skb_queue_tail(&data->drop_queue, nskb); 541 else 542 goto unlock_free; 543 spin_unlock_irqrestore(&data->drop_queue.lock, flags); 544 545 schedule_work(&data->dm_alert_work); 546 547 return; 548 549 unlock_free: 550 spin_unlock_irqrestore(&data->drop_queue.lock, flags); 551 u64_stats_update_begin(&data->stats.syncp); 552 data->stats.dropped++; 553 u64_stats_update_end(&data->stats.syncp); 554 consume_skb(nskb); 555 } 556 557 static void net_dm_packet_trace_napi_poll_hit(void *ignore, 558 struct napi_struct *napi, 559 int work, int budget) 560 { 561 } 562 563 static size_t net_dm_in_port_size(void) 564 { 565 /* NET_DM_ATTR_IN_PORT nest */ 566 return nla_total_size(0) + 567 /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */ 568 nla_total_size(sizeof(u32)) + 569 /* NET_DM_ATTR_PORT_NETDEV_NAME */ 570 nla_total_size(IFNAMSIZ + 1); 571 } 572 573 #define NET_DM_MAX_SYMBOL_LEN 40 574 575 static size_t net_dm_packet_report_size(size_t payload_len, 576 enum skb_drop_reason reason) 577 { 578 size_t size; 579 580 size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize); 581 582 return NLMSG_ALIGN(size) + 583 /* NET_DM_ATTR_ORIGIN */ 584 nla_total_size(sizeof(u16)) + 585 /* NET_DM_ATTR_PC */ 586 nla_total_size(sizeof(u64)) + 587 /* NET_DM_ATTR_SYMBOL */ 588 nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) + 589 /* NET_DM_ATTR_IN_PORT */ 590 net_dm_in_port_size() + 591 /* NET_DM_ATTR_TIMESTAMP */ 592 nla_total_size(sizeof(u64)) + 593 /* NET_DM_ATTR_ORIG_LEN */ 594 nla_total_size(sizeof(u32)) + 595 /* NET_DM_ATTR_PROTO */ 596 nla_total_size(sizeof(u16)) + 597 /* NET_DM_ATTR_REASON */ 598 nla_total_size(strlen(drop_reasons[reason]) + 1) + 599 /* NET_DM_ATTR_PAYLOAD */ 600 nla_total_size(payload_len); 601 } 602 603 static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex, 604 const char *name) 605 { 606 struct nlattr *attr; 607 608 attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT); 609 if (!attr) 610 return -EMSGSIZE; 611 612 if (ifindex && 613 nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex)) 614 goto nla_put_failure; 615 616 if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name)) 617 goto nla_put_failure; 618 619 nla_nest_end(msg, attr); 620 621 return 0; 622 623 nla_put_failure: 624 nla_nest_cancel(msg, attr); 625 return -EMSGSIZE; 626 } 627 628 static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb, 629 size_t payload_len) 630 { 631 struct net_dm_skb_cb *cb = NET_DM_SKB_CB(skb); 632 char buf[NET_DM_MAX_SYMBOL_LEN]; 633 struct nlattr *attr; 634 void *hdr; 635 int rc; 636 637 hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, 638 NET_DM_CMD_PACKET_ALERT); 639 if (!hdr) 640 return -EMSGSIZE; 641 642 if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW)) 643 goto nla_put_failure; 644 645 if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, (u64)(uintptr_t)cb->pc, 646 NET_DM_ATTR_PAD)) 647 goto nla_put_failure; 648 649 if (nla_put_string(msg, NET_DM_ATTR_REASON, 650 drop_reasons[cb->reason])) 651 goto nla_put_failure; 652 653 snprintf(buf, sizeof(buf), "%pS", cb->pc); 654 if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf)) 655 goto nla_put_failure; 656 657 rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL); 658 if (rc) 659 goto nla_put_failure; 660 661 if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP, 662 ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD)) 663 goto nla_put_failure; 664 665 if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len)) 666 goto nla_put_failure; 667 668 if (!payload_len) 669 goto out; 670 671 if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol))) 672 goto nla_put_failure; 673 674 attr = skb_put(msg, nla_total_size(payload_len)); 675 attr->nla_type = NET_DM_ATTR_PAYLOAD; 676 attr->nla_len = nla_attr_size(payload_len); 677 if (skb_copy_bits(skb, 0, nla_data(attr), payload_len)) 678 goto nla_put_failure; 679 680 out: 681 genlmsg_end(msg, hdr); 682 683 return 0; 684 685 nla_put_failure: 686 genlmsg_cancel(msg, hdr); 687 return -EMSGSIZE; 688 } 689 690 #define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO) 691 692 static void net_dm_packet_report(struct sk_buff *skb) 693 { 694 struct sk_buff *msg; 695 size_t payload_len; 696 int rc; 697 698 /* Make sure we start copying the packet from the MAC header */ 699 if (skb->data > skb_mac_header(skb)) 700 skb_push(skb, skb->data - skb_mac_header(skb)); 701 else 702 skb_pull(skb, skb_mac_header(skb) - skb->data); 703 704 /* Ensure packet fits inside a single netlink attribute */ 705 payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE); 706 if (net_dm_trunc_len) 707 payload_len = min_t(size_t, net_dm_trunc_len, payload_len); 708 709 msg = nlmsg_new(net_dm_packet_report_size(payload_len, 710 NET_DM_SKB_CB(skb)->reason), 711 GFP_KERNEL); 712 if (!msg) 713 goto out; 714 715 rc = net_dm_packet_report_fill(msg, skb, payload_len); 716 if (rc) { 717 nlmsg_free(msg); 718 goto out; 719 } 720 721 genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); 722 723 out: 724 consume_skb(skb); 725 } 726 727 static void net_dm_packet_work(struct work_struct *work) 728 { 729 struct per_cpu_dm_data *data; 730 struct sk_buff_head list; 731 struct sk_buff *skb; 732 unsigned long flags; 733 734 data = container_of(work, struct per_cpu_dm_data, dm_alert_work); 735 736 __skb_queue_head_init(&list); 737 738 spin_lock_irqsave(&data->drop_queue.lock, flags); 739 skb_queue_splice_tail_init(&data->drop_queue, &list); 740 spin_unlock_irqrestore(&data->drop_queue.lock, flags); 741 742 while ((skb = __skb_dequeue(&list))) 743 net_dm_packet_report(skb); 744 } 745 746 static size_t 747 net_dm_flow_action_cookie_size(const struct devlink_trap_metadata *hw_metadata) 748 { 749 return hw_metadata->fa_cookie ? 750 nla_total_size(hw_metadata->fa_cookie->cookie_len) : 0; 751 } 752 753 static size_t 754 net_dm_hw_packet_report_size(size_t payload_len, 755 const struct devlink_trap_metadata *hw_metadata) 756 { 757 size_t size; 758 759 size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize); 760 761 return NLMSG_ALIGN(size) + 762 /* NET_DM_ATTR_ORIGIN */ 763 nla_total_size(sizeof(u16)) + 764 /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */ 765 nla_total_size(strlen(hw_metadata->trap_group_name) + 1) + 766 /* NET_DM_ATTR_HW_TRAP_NAME */ 767 nla_total_size(strlen(hw_metadata->trap_name) + 1) + 768 /* NET_DM_ATTR_IN_PORT */ 769 net_dm_in_port_size() + 770 /* NET_DM_ATTR_FLOW_ACTION_COOKIE */ 771 net_dm_flow_action_cookie_size(hw_metadata) + 772 /* NET_DM_ATTR_TIMESTAMP */ 773 nla_total_size(sizeof(u64)) + 774 /* NET_DM_ATTR_ORIG_LEN */ 775 nla_total_size(sizeof(u32)) + 776 /* NET_DM_ATTR_PROTO */ 777 nla_total_size(sizeof(u16)) + 778 /* NET_DM_ATTR_PAYLOAD */ 779 nla_total_size(payload_len); 780 } 781 782 static int net_dm_hw_packet_report_fill(struct sk_buff *msg, 783 struct sk_buff *skb, size_t payload_len) 784 { 785 struct devlink_trap_metadata *hw_metadata; 786 struct nlattr *attr; 787 void *hdr; 788 789 hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; 790 791 hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, 792 NET_DM_CMD_PACKET_ALERT); 793 if (!hdr) 794 return -EMSGSIZE; 795 796 if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW)) 797 goto nla_put_failure; 798 799 if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME, 800 hw_metadata->trap_group_name)) 801 goto nla_put_failure; 802 803 if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, 804 hw_metadata->trap_name)) 805 goto nla_put_failure; 806 807 if (hw_metadata->input_dev) { 808 struct net_device *dev = hw_metadata->input_dev; 809 int rc; 810 811 rc = net_dm_packet_report_in_port_put(msg, dev->ifindex, 812 dev->name); 813 if (rc) 814 goto nla_put_failure; 815 } 816 817 if (hw_metadata->fa_cookie && 818 nla_put(msg, NET_DM_ATTR_FLOW_ACTION_COOKIE, 819 hw_metadata->fa_cookie->cookie_len, 820 hw_metadata->fa_cookie->cookie)) 821 goto nla_put_failure; 822 823 if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP, 824 ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD)) 825 goto nla_put_failure; 826 827 if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len)) 828 goto nla_put_failure; 829 830 if (!payload_len) 831 goto out; 832 833 if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol))) 834 goto nla_put_failure; 835 836 attr = skb_put(msg, nla_total_size(payload_len)); 837 attr->nla_type = NET_DM_ATTR_PAYLOAD; 838 attr->nla_len = nla_attr_size(payload_len); 839 if (skb_copy_bits(skb, 0, nla_data(attr), payload_len)) 840 goto nla_put_failure; 841 842 out: 843 genlmsg_end(msg, hdr); 844 845 return 0; 846 847 nla_put_failure: 848 genlmsg_cancel(msg, hdr); 849 return -EMSGSIZE; 850 } 851 852 static struct devlink_trap_metadata * 853 net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) 854 { 855 const struct flow_action_cookie *fa_cookie; 856 struct devlink_trap_metadata *hw_metadata; 857 const char *trap_group_name; 858 const char *trap_name; 859 860 hw_metadata = kzalloc(sizeof(*hw_metadata), GFP_ATOMIC); 861 if (!hw_metadata) 862 return NULL; 863 864 trap_group_name = kstrdup(metadata->trap_group_name, GFP_ATOMIC); 865 if (!trap_group_name) 866 goto free_hw_metadata; 867 hw_metadata->trap_group_name = trap_group_name; 868 869 trap_name = kstrdup(metadata->trap_name, GFP_ATOMIC); 870 if (!trap_name) 871 goto free_trap_group; 872 hw_metadata->trap_name = trap_name; 873 874 if (metadata->fa_cookie) { 875 size_t cookie_size = sizeof(*fa_cookie) + 876 metadata->fa_cookie->cookie_len; 877 878 fa_cookie = kmemdup(metadata->fa_cookie, cookie_size, 879 GFP_ATOMIC); 880 if (!fa_cookie) 881 goto free_trap_name; 882 hw_metadata->fa_cookie = fa_cookie; 883 } 884 885 hw_metadata->input_dev = metadata->input_dev; 886 dev_hold_track(hw_metadata->input_dev, &hw_metadata->dev_tracker, GFP_ATOMIC); 887 888 return hw_metadata; 889 890 free_trap_name: 891 kfree(trap_name); 892 free_trap_group: 893 kfree(trap_group_name); 894 free_hw_metadata: 895 kfree(hw_metadata); 896 return NULL; 897 } 898 899 static void 900 net_dm_hw_metadata_free(struct devlink_trap_metadata *hw_metadata) 901 { 902 dev_put_track(hw_metadata->input_dev, &hw_metadata->dev_tracker); 903 kfree(hw_metadata->fa_cookie); 904 kfree(hw_metadata->trap_name); 905 kfree(hw_metadata->trap_group_name); 906 kfree(hw_metadata); 907 } 908 909 static void net_dm_hw_packet_report(struct sk_buff *skb) 910 { 911 struct devlink_trap_metadata *hw_metadata; 912 struct sk_buff *msg; 913 size_t payload_len; 914 int rc; 915 916 if (skb->data > skb_mac_header(skb)) 917 skb_push(skb, skb->data - skb_mac_header(skb)); 918 else 919 skb_pull(skb, skb_mac_header(skb) - skb->data); 920 921 payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE); 922 if (net_dm_trunc_len) 923 payload_len = min_t(size_t, net_dm_trunc_len, payload_len); 924 925 hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; 926 msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata), 927 GFP_KERNEL); 928 if (!msg) 929 goto out; 930 931 rc = net_dm_hw_packet_report_fill(msg, skb, payload_len); 932 if (rc) { 933 nlmsg_free(msg); 934 goto out; 935 } 936 937 genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); 938 939 out: 940 net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata); 941 consume_skb(skb); 942 } 943 944 static void net_dm_hw_packet_work(struct work_struct *work) 945 { 946 struct per_cpu_dm_data *hw_data; 947 struct sk_buff_head list; 948 struct sk_buff *skb; 949 unsigned long flags; 950 951 hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work); 952 953 __skb_queue_head_init(&list); 954 955 spin_lock_irqsave(&hw_data->drop_queue.lock, flags); 956 skb_queue_splice_tail_init(&hw_data->drop_queue, &list); 957 spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); 958 959 while ((skb = __skb_dequeue(&list))) 960 net_dm_hw_packet_report(skb); 961 } 962 963 static void 964 net_dm_hw_trap_packet_probe(void *ignore, const struct devlink *devlink, 965 struct sk_buff *skb, 966 const struct devlink_trap_metadata *metadata) 967 { 968 struct devlink_trap_metadata *n_hw_metadata; 969 ktime_t tstamp = ktime_get_real(); 970 struct per_cpu_dm_data *hw_data; 971 struct sk_buff *nskb; 972 unsigned long flags; 973 974 if (metadata->trap_type == DEVLINK_TRAP_TYPE_CONTROL) 975 return; 976 977 if (!skb_mac_header_was_set(skb)) 978 return; 979 980 nskb = skb_clone(skb, GFP_ATOMIC); 981 if (!nskb) 982 return; 983 984 n_hw_metadata = net_dm_hw_metadata_copy(metadata); 985 if (!n_hw_metadata) 986 goto free; 987 988 NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata; 989 nskb->tstamp = tstamp; 990 991 hw_data = this_cpu_ptr(&dm_hw_cpu_data); 992 993 spin_lock_irqsave(&hw_data->drop_queue.lock, flags); 994 if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len) 995 __skb_queue_tail(&hw_data->drop_queue, nskb); 996 else 997 goto unlock_free; 998 spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); 999 1000 schedule_work(&hw_data->dm_alert_work); 1001 1002 return; 1003 1004 unlock_free: 1005 spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); 1006 u64_stats_update_begin(&hw_data->stats.syncp); 1007 hw_data->stats.dropped++; 1008 u64_stats_update_end(&hw_data->stats.syncp); 1009 net_dm_hw_metadata_free(n_hw_metadata); 1010 free: 1011 consume_skb(nskb); 1012 } 1013 1014 static const struct net_dm_alert_ops net_dm_alert_packet_ops = { 1015 .kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit, 1016 .napi_poll_probe = net_dm_packet_trace_napi_poll_hit, 1017 .work_item_func = net_dm_packet_work, 1018 .hw_work_item_func = net_dm_hw_packet_work, 1019 .hw_trap_probe = net_dm_hw_trap_packet_probe, 1020 }; 1021 1022 static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = { 1023 [NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops, 1024 [NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops, 1025 }; 1026 1027 #if IS_ENABLED(CONFIG_NET_DEVLINK) 1028 static int net_dm_hw_probe_register(const struct net_dm_alert_ops *ops) 1029 { 1030 return register_trace_devlink_trap_report(ops->hw_trap_probe, NULL); 1031 } 1032 1033 static void net_dm_hw_probe_unregister(const struct net_dm_alert_ops *ops) 1034 { 1035 unregister_trace_devlink_trap_report(ops->hw_trap_probe, NULL); 1036 tracepoint_synchronize_unregister(); 1037 } 1038 #else 1039 static int net_dm_hw_probe_register(const struct net_dm_alert_ops *ops) 1040 { 1041 return -EOPNOTSUPP; 1042 } 1043 1044 static void net_dm_hw_probe_unregister(const struct net_dm_alert_ops *ops) 1045 { 1046 } 1047 #endif 1048 1049 static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack) 1050 { 1051 const struct net_dm_alert_ops *ops; 1052 int cpu, rc; 1053 1054 if (monitor_hw) { 1055 NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled"); 1056 return -EAGAIN; 1057 } 1058 1059 ops = net_dm_alert_ops_arr[net_dm_alert_mode]; 1060 1061 if (!try_module_get(THIS_MODULE)) { 1062 NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module"); 1063 return -ENODEV; 1064 } 1065 1066 for_each_possible_cpu(cpu) { 1067 struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); 1068 struct net_dm_hw_entries *hw_entries; 1069 1070 INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func); 1071 timer_setup(&hw_data->send_timer, sched_send_work, 0); 1072 hw_entries = net_dm_hw_reset_per_cpu_data(hw_data); 1073 kfree(hw_entries); 1074 } 1075 1076 rc = net_dm_hw_probe_register(ops); 1077 if (rc) { 1078 NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to devlink_trap_probe() tracepoint"); 1079 goto err_module_put; 1080 } 1081 1082 monitor_hw = true; 1083 1084 return 0; 1085 1086 err_module_put: 1087 for_each_possible_cpu(cpu) { 1088 struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); 1089 struct sk_buff *skb; 1090 1091 del_timer_sync(&hw_data->send_timer); 1092 cancel_work_sync(&hw_data->dm_alert_work); 1093 while ((skb = __skb_dequeue(&hw_data->drop_queue))) { 1094 struct devlink_trap_metadata *hw_metadata; 1095 1096 hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; 1097 net_dm_hw_metadata_free(hw_metadata); 1098 consume_skb(skb); 1099 } 1100 } 1101 module_put(THIS_MODULE); 1102 return rc; 1103 } 1104 1105 static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack) 1106 { 1107 const struct net_dm_alert_ops *ops; 1108 int cpu; 1109 1110 if (!monitor_hw) { 1111 NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled"); 1112 return; 1113 } 1114 1115 ops = net_dm_alert_ops_arr[net_dm_alert_mode]; 1116 1117 monitor_hw = false; 1118 1119 net_dm_hw_probe_unregister(ops); 1120 1121 for_each_possible_cpu(cpu) { 1122 struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); 1123 struct sk_buff *skb; 1124 1125 del_timer_sync(&hw_data->send_timer); 1126 cancel_work_sync(&hw_data->dm_alert_work); 1127 while ((skb = __skb_dequeue(&hw_data->drop_queue))) { 1128 struct devlink_trap_metadata *hw_metadata; 1129 1130 hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; 1131 net_dm_hw_metadata_free(hw_metadata); 1132 consume_skb(skb); 1133 } 1134 } 1135 1136 module_put(THIS_MODULE); 1137 } 1138 1139 static int net_dm_trace_on_set(struct netlink_ext_ack *extack) 1140 { 1141 const struct net_dm_alert_ops *ops; 1142 int cpu, rc; 1143 1144 ops = net_dm_alert_ops_arr[net_dm_alert_mode]; 1145 1146 if (!try_module_get(THIS_MODULE)) { 1147 NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module"); 1148 return -ENODEV; 1149 } 1150 1151 for_each_possible_cpu(cpu) { 1152 struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); 1153 struct sk_buff *skb; 1154 1155 INIT_WORK(&data->dm_alert_work, ops->work_item_func); 1156 timer_setup(&data->send_timer, sched_send_work, 0); 1157 /* Allocate a new per-CPU skb for the summary alert message and 1158 * free the old one which might contain stale data from 1159 * previous tracing. 1160 */ 1161 skb = reset_per_cpu_data(data); 1162 consume_skb(skb); 1163 } 1164 1165 rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL); 1166 if (rc) { 1167 NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint"); 1168 goto err_module_put; 1169 } 1170 1171 rc = register_trace_napi_poll(ops->napi_poll_probe, NULL); 1172 if (rc) { 1173 NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint"); 1174 goto err_unregister_trace; 1175 } 1176 1177 return 0; 1178 1179 err_unregister_trace: 1180 unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL); 1181 err_module_put: 1182 for_each_possible_cpu(cpu) { 1183 struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); 1184 struct sk_buff *skb; 1185 1186 del_timer_sync(&data->send_timer); 1187 cancel_work_sync(&data->dm_alert_work); 1188 while ((skb = __skb_dequeue(&data->drop_queue))) 1189 consume_skb(skb); 1190 } 1191 module_put(THIS_MODULE); 1192 return rc; 1193 } 1194 1195 static void net_dm_trace_off_set(void) 1196 { 1197 struct dm_hw_stat_delta *new_stat, *temp; 1198 const struct net_dm_alert_ops *ops; 1199 int cpu; 1200 1201 ops = net_dm_alert_ops_arr[net_dm_alert_mode]; 1202 1203 unregister_trace_napi_poll(ops->napi_poll_probe, NULL); 1204 unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL); 1205 1206 tracepoint_synchronize_unregister(); 1207 1208 /* Make sure we do not send notifications to user space after request 1209 * to stop tracing returns. 1210 */ 1211 for_each_possible_cpu(cpu) { 1212 struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); 1213 struct sk_buff *skb; 1214 1215 del_timer_sync(&data->send_timer); 1216 cancel_work_sync(&data->dm_alert_work); 1217 while ((skb = __skb_dequeue(&data->drop_queue))) 1218 consume_skb(skb); 1219 } 1220 1221 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { 1222 if (new_stat->dev == NULL) { 1223 list_del_rcu(&new_stat->list); 1224 kfree_rcu(new_stat, rcu); 1225 } 1226 } 1227 1228 module_put(THIS_MODULE); 1229 } 1230 1231 static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack) 1232 { 1233 int rc = 0; 1234 1235 if (state == trace_state) { 1236 NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state"); 1237 return -EAGAIN; 1238 } 1239 1240 switch (state) { 1241 case TRACE_ON: 1242 rc = net_dm_trace_on_set(extack); 1243 break; 1244 case TRACE_OFF: 1245 net_dm_trace_off_set(); 1246 break; 1247 default: 1248 rc = 1; 1249 break; 1250 } 1251 1252 if (!rc) 1253 trace_state = state; 1254 else 1255 rc = -EINPROGRESS; 1256 1257 return rc; 1258 } 1259 1260 static bool net_dm_is_monitoring(void) 1261 { 1262 return trace_state == TRACE_ON || monitor_hw; 1263 } 1264 1265 static int net_dm_alert_mode_get_from_info(struct genl_info *info, 1266 enum net_dm_alert_mode *p_alert_mode) 1267 { 1268 u8 val; 1269 1270 val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]); 1271 1272 switch (val) { 1273 case NET_DM_ALERT_MODE_SUMMARY: 1274 case NET_DM_ALERT_MODE_PACKET: 1275 *p_alert_mode = val; 1276 break; 1277 default: 1278 return -EINVAL; 1279 } 1280 1281 return 0; 1282 } 1283 1284 static int net_dm_alert_mode_set(struct genl_info *info) 1285 { 1286 struct netlink_ext_ack *extack = info->extack; 1287 enum net_dm_alert_mode alert_mode; 1288 int rc; 1289 1290 if (!info->attrs[NET_DM_ATTR_ALERT_MODE]) 1291 return 0; 1292 1293 rc = net_dm_alert_mode_get_from_info(info, &alert_mode); 1294 if (rc) { 1295 NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode"); 1296 return -EINVAL; 1297 } 1298 1299 net_dm_alert_mode = alert_mode; 1300 1301 return 0; 1302 } 1303 1304 static void net_dm_trunc_len_set(struct genl_info *info) 1305 { 1306 if (!info->attrs[NET_DM_ATTR_TRUNC_LEN]) 1307 return; 1308 1309 net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]); 1310 } 1311 1312 static void net_dm_queue_len_set(struct genl_info *info) 1313 { 1314 if (!info->attrs[NET_DM_ATTR_QUEUE_LEN]) 1315 return; 1316 1317 net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]); 1318 } 1319 1320 static int net_dm_cmd_config(struct sk_buff *skb, 1321 struct genl_info *info) 1322 { 1323 struct netlink_ext_ack *extack = info->extack; 1324 int rc; 1325 1326 if (net_dm_is_monitoring()) { 1327 NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring"); 1328 return -EBUSY; 1329 } 1330 1331 rc = net_dm_alert_mode_set(info); 1332 if (rc) 1333 return rc; 1334 1335 net_dm_trunc_len_set(info); 1336 1337 net_dm_queue_len_set(info); 1338 1339 return 0; 1340 } 1341 1342 static int net_dm_monitor_start(bool set_sw, bool set_hw, 1343 struct netlink_ext_ack *extack) 1344 { 1345 bool sw_set = false; 1346 int rc; 1347 1348 if (set_sw) { 1349 rc = set_all_monitor_traces(TRACE_ON, extack); 1350 if (rc) 1351 return rc; 1352 sw_set = true; 1353 } 1354 1355 if (set_hw) { 1356 rc = net_dm_hw_monitor_start(extack); 1357 if (rc) 1358 goto err_monitor_hw; 1359 } 1360 1361 return 0; 1362 1363 err_monitor_hw: 1364 if (sw_set) 1365 set_all_monitor_traces(TRACE_OFF, extack); 1366 return rc; 1367 } 1368 1369 static void net_dm_monitor_stop(bool set_sw, bool set_hw, 1370 struct netlink_ext_ack *extack) 1371 { 1372 if (set_hw) 1373 net_dm_hw_monitor_stop(extack); 1374 if (set_sw) 1375 set_all_monitor_traces(TRACE_OFF, extack); 1376 } 1377 1378 static int net_dm_cmd_trace(struct sk_buff *skb, 1379 struct genl_info *info) 1380 { 1381 bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS]; 1382 bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS]; 1383 struct netlink_ext_ack *extack = info->extack; 1384 1385 /* To maintain backward compatibility, we start / stop monitoring of 1386 * software drops if no flag is specified. 1387 */ 1388 if (!set_sw && !set_hw) 1389 set_sw = true; 1390 1391 switch (info->genlhdr->cmd) { 1392 case NET_DM_CMD_START: 1393 return net_dm_monitor_start(set_sw, set_hw, extack); 1394 case NET_DM_CMD_STOP: 1395 net_dm_monitor_stop(set_sw, set_hw, extack); 1396 return 0; 1397 } 1398 1399 return -EOPNOTSUPP; 1400 } 1401 1402 static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info) 1403 { 1404 void *hdr; 1405 1406 hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, 1407 &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW); 1408 if (!hdr) 1409 return -EMSGSIZE; 1410 1411 if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode)) 1412 goto nla_put_failure; 1413 1414 if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len)) 1415 goto nla_put_failure; 1416 1417 if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len)) 1418 goto nla_put_failure; 1419 1420 genlmsg_end(msg, hdr); 1421 1422 return 0; 1423 1424 nla_put_failure: 1425 genlmsg_cancel(msg, hdr); 1426 return -EMSGSIZE; 1427 } 1428 1429 static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info) 1430 { 1431 struct sk_buff *msg; 1432 int rc; 1433 1434 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1435 if (!msg) 1436 return -ENOMEM; 1437 1438 rc = net_dm_config_fill(msg, info); 1439 if (rc) 1440 goto free_msg; 1441 1442 return genlmsg_reply(msg, info); 1443 1444 free_msg: 1445 nlmsg_free(msg); 1446 return rc; 1447 } 1448 1449 static void net_dm_stats_read(struct net_dm_stats *stats) 1450 { 1451 int cpu; 1452 1453 memset(stats, 0, sizeof(*stats)); 1454 for_each_possible_cpu(cpu) { 1455 struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); 1456 struct net_dm_stats *cpu_stats = &data->stats; 1457 unsigned int start; 1458 u64 dropped; 1459 1460 do { 1461 start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); 1462 dropped = cpu_stats->dropped; 1463 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); 1464 1465 stats->dropped += dropped; 1466 } 1467 } 1468 1469 static int net_dm_stats_put(struct sk_buff *msg) 1470 { 1471 struct net_dm_stats stats; 1472 struct nlattr *attr; 1473 1474 net_dm_stats_read(&stats); 1475 1476 attr = nla_nest_start(msg, NET_DM_ATTR_STATS); 1477 if (!attr) 1478 return -EMSGSIZE; 1479 1480 if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, 1481 stats.dropped, NET_DM_ATTR_PAD)) 1482 goto nla_put_failure; 1483 1484 nla_nest_end(msg, attr); 1485 1486 return 0; 1487 1488 nla_put_failure: 1489 nla_nest_cancel(msg, attr); 1490 return -EMSGSIZE; 1491 } 1492 1493 static void net_dm_hw_stats_read(struct net_dm_stats *stats) 1494 { 1495 int cpu; 1496 1497 memset(stats, 0, sizeof(*stats)); 1498 for_each_possible_cpu(cpu) { 1499 struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); 1500 struct net_dm_stats *cpu_stats = &hw_data->stats; 1501 unsigned int start; 1502 u64 dropped; 1503 1504 do { 1505 start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); 1506 dropped = cpu_stats->dropped; 1507 } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); 1508 1509 stats->dropped += dropped; 1510 } 1511 } 1512 1513 static int net_dm_hw_stats_put(struct sk_buff *msg) 1514 { 1515 struct net_dm_stats stats; 1516 struct nlattr *attr; 1517 1518 net_dm_hw_stats_read(&stats); 1519 1520 attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS); 1521 if (!attr) 1522 return -EMSGSIZE; 1523 1524 if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, 1525 stats.dropped, NET_DM_ATTR_PAD)) 1526 goto nla_put_failure; 1527 1528 nla_nest_end(msg, attr); 1529 1530 return 0; 1531 1532 nla_put_failure: 1533 nla_nest_cancel(msg, attr); 1534 return -EMSGSIZE; 1535 } 1536 1537 static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info) 1538 { 1539 void *hdr; 1540 int rc; 1541 1542 hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, 1543 &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW); 1544 if (!hdr) 1545 return -EMSGSIZE; 1546 1547 rc = net_dm_stats_put(msg); 1548 if (rc) 1549 goto nla_put_failure; 1550 1551 rc = net_dm_hw_stats_put(msg); 1552 if (rc) 1553 goto nla_put_failure; 1554 1555 genlmsg_end(msg, hdr); 1556 1557 return 0; 1558 1559 nla_put_failure: 1560 genlmsg_cancel(msg, hdr); 1561 return -EMSGSIZE; 1562 } 1563 1564 static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info) 1565 { 1566 struct sk_buff *msg; 1567 int rc; 1568 1569 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1570 if (!msg) 1571 return -ENOMEM; 1572 1573 rc = net_dm_stats_fill(msg, info); 1574 if (rc) 1575 goto free_msg; 1576 1577 return genlmsg_reply(msg, info); 1578 1579 free_msg: 1580 nlmsg_free(msg); 1581 return rc; 1582 } 1583 1584 static int dropmon_net_event(struct notifier_block *ev_block, 1585 unsigned long event, void *ptr) 1586 { 1587 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1588 struct dm_hw_stat_delta *new_stat = NULL; 1589 struct dm_hw_stat_delta *tmp; 1590 1591 switch (event) { 1592 case NETDEV_REGISTER: 1593 new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); 1594 1595 if (!new_stat) 1596 goto out; 1597 1598 new_stat->dev = dev; 1599 new_stat->last_rx = jiffies; 1600 mutex_lock(&net_dm_mutex); 1601 list_add_rcu(&new_stat->list, &hw_stats_list); 1602 mutex_unlock(&net_dm_mutex); 1603 break; 1604 case NETDEV_UNREGISTER: 1605 mutex_lock(&net_dm_mutex); 1606 list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { 1607 if (new_stat->dev == dev) { 1608 new_stat->dev = NULL; 1609 if (trace_state == TRACE_OFF) { 1610 list_del_rcu(&new_stat->list); 1611 kfree_rcu(new_stat, rcu); 1612 break; 1613 } 1614 } 1615 } 1616 mutex_unlock(&net_dm_mutex); 1617 break; 1618 } 1619 out: 1620 return NOTIFY_DONE; 1621 } 1622 1623 static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = { 1624 [NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 }, 1625 [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 }, 1626 [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 }, 1627 [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 }, 1628 [NET_DM_ATTR_SW_DROPS] = {. type = NLA_FLAG }, 1629 [NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG }, 1630 }; 1631 1632 static const struct genl_small_ops dropmon_ops[] = { 1633 { 1634 .cmd = NET_DM_CMD_CONFIG, 1635 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1636 .doit = net_dm_cmd_config, 1637 .flags = GENL_ADMIN_PERM, 1638 }, 1639 { 1640 .cmd = NET_DM_CMD_START, 1641 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1642 .doit = net_dm_cmd_trace, 1643 }, 1644 { 1645 .cmd = NET_DM_CMD_STOP, 1646 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 1647 .doit = net_dm_cmd_trace, 1648 }, 1649 { 1650 .cmd = NET_DM_CMD_CONFIG_GET, 1651 .doit = net_dm_cmd_config_get, 1652 }, 1653 { 1654 .cmd = NET_DM_CMD_STATS_GET, 1655 .doit = net_dm_cmd_stats_get, 1656 }, 1657 }; 1658 1659 static int net_dm_nl_pre_doit(const struct genl_ops *ops, 1660 struct sk_buff *skb, struct genl_info *info) 1661 { 1662 mutex_lock(&net_dm_mutex); 1663 1664 return 0; 1665 } 1666 1667 static void net_dm_nl_post_doit(const struct genl_ops *ops, 1668 struct sk_buff *skb, struct genl_info *info) 1669 { 1670 mutex_unlock(&net_dm_mutex); 1671 } 1672 1673 static struct genl_family net_drop_monitor_family __ro_after_init = { 1674 .hdrsize = 0, 1675 .name = "NET_DM", 1676 .version = 2, 1677 .maxattr = NET_DM_ATTR_MAX, 1678 .policy = net_dm_nl_policy, 1679 .pre_doit = net_dm_nl_pre_doit, 1680 .post_doit = net_dm_nl_post_doit, 1681 .module = THIS_MODULE, 1682 .small_ops = dropmon_ops, 1683 .n_small_ops = ARRAY_SIZE(dropmon_ops), 1684 .mcgrps = dropmon_mcgrps, 1685 .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps), 1686 }; 1687 1688 static struct notifier_block dropmon_net_notifier = { 1689 .notifier_call = dropmon_net_event 1690 }; 1691 1692 static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data) 1693 { 1694 spin_lock_init(&data->lock); 1695 skb_queue_head_init(&data->drop_queue); 1696 u64_stats_init(&data->stats.syncp); 1697 } 1698 1699 static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data) 1700 { 1701 WARN_ON(!skb_queue_empty(&data->drop_queue)); 1702 } 1703 1704 static void net_dm_cpu_data_init(int cpu) 1705 { 1706 struct per_cpu_dm_data *data; 1707 1708 data = &per_cpu(dm_cpu_data, cpu); 1709 __net_dm_cpu_data_init(data); 1710 } 1711 1712 static void net_dm_cpu_data_fini(int cpu) 1713 { 1714 struct per_cpu_dm_data *data; 1715 1716 data = &per_cpu(dm_cpu_data, cpu); 1717 /* At this point, we should have exclusive access 1718 * to this struct and can free the skb inside it. 1719 */ 1720 consume_skb(data->skb); 1721 __net_dm_cpu_data_fini(data); 1722 } 1723 1724 static void net_dm_hw_cpu_data_init(int cpu) 1725 { 1726 struct per_cpu_dm_data *hw_data; 1727 1728 hw_data = &per_cpu(dm_hw_cpu_data, cpu); 1729 __net_dm_cpu_data_init(hw_data); 1730 } 1731 1732 static void net_dm_hw_cpu_data_fini(int cpu) 1733 { 1734 struct per_cpu_dm_data *hw_data; 1735 1736 hw_data = &per_cpu(dm_hw_cpu_data, cpu); 1737 kfree(hw_data->hw_entries); 1738 __net_dm_cpu_data_fini(hw_data); 1739 } 1740 1741 static int __init init_net_drop_monitor(void) 1742 { 1743 int cpu, rc; 1744 1745 pr_info("Initializing network drop monitor service\n"); 1746 1747 if (sizeof(void *) > 8) { 1748 pr_err("Unable to store program counters on this arch, Drop monitor failed\n"); 1749 return -ENOSPC; 1750 } 1751 1752 rc = genl_register_family(&net_drop_monitor_family); 1753 if (rc) { 1754 pr_err("Could not create drop monitor netlink family\n"); 1755 return rc; 1756 } 1757 WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT); 1758 1759 rc = register_netdevice_notifier(&dropmon_net_notifier); 1760 if (rc < 0) { 1761 pr_crit("Failed to register netdevice notifier\n"); 1762 goto out_unreg; 1763 } 1764 1765 rc = 0; 1766 1767 for_each_possible_cpu(cpu) { 1768 net_dm_cpu_data_init(cpu); 1769 net_dm_hw_cpu_data_init(cpu); 1770 } 1771 1772 goto out; 1773 1774 out_unreg: 1775 genl_unregister_family(&net_drop_monitor_family); 1776 out: 1777 return rc; 1778 } 1779 1780 static void exit_net_drop_monitor(void) 1781 { 1782 int cpu; 1783 1784 BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); 1785 1786 /* 1787 * Because of the module_get/put we do in the trace state change path 1788 * we are guaranteed not to have any current users when we get here 1789 */ 1790 1791 for_each_possible_cpu(cpu) { 1792 net_dm_hw_cpu_data_fini(cpu); 1793 net_dm_cpu_data_fini(cpu); 1794 } 1795 1796 BUG_ON(genl_unregister_family(&net_drop_monitor_family)); 1797 } 1798 1799 module_init(init_net_drop_monitor); 1800 module_exit(exit_net_drop_monitor); 1801 1802 MODULE_LICENSE("GPL v2"); 1803 MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>"); 1804 MODULE_ALIAS_GENL_FAMILY("NET_DM"); 1805 MODULE_DESCRIPTION("Monitoring code for network dropped packet alerts"); 1806