1 /* 2 * Monitoring code for network dropped packet alerts 3 * 4 * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> 5 */ 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/netdevice.h> 10 #include <linux/etherdevice.h> 11 #include <linux/string.h> 12 #include <linux/if_arp.h> 13 #include <linux/inetdevice.h> 14 #include <linux/inet.h> 15 #include <linux/interrupt.h> 16 #include <linux/netpoll.h> 17 #include <linux/sched.h> 18 #include <linux/delay.h> 19 #include <linux/types.h> 20 #include <linux/workqueue.h> 21 #include <linux/netlink.h> 22 #include <linux/net_dropmon.h> 23 #include <linux/percpu.h> 24 #include <linux/timer.h> 25 #include <linux/bitops.h> 26 #include <linux/slab.h> 27 #include <linux/module.h> 28 #include <net/genetlink.h> 29 #include <net/netevent.h> 30 31 #include <trace/events/skb.h> 32 #include <trace/events/napi.h> 33 34 #include <asm/unaligned.h> 35 36 #define TRACE_ON 1 37 #define TRACE_OFF 0 38 39 /* 40 * Globals, our netlink socket pointer 41 * and the work handle that will send up 42 * netlink alerts 43 */ 44 static int trace_state = TRACE_OFF; 45 static DEFINE_MUTEX(trace_state_mutex); 46 47 struct per_cpu_dm_data { 48 spinlock_t lock; 49 struct sk_buff *skb; 50 struct work_struct dm_alert_work; 51 struct timer_list send_timer; 52 }; 53 54 struct dm_hw_stat_delta { 55 struct net_device *dev; 56 unsigned long last_rx; 57 struct list_head list; 58 struct rcu_head rcu; 59 unsigned long last_drop_val; 60 }; 61 62 static struct genl_family net_drop_monitor_family; 63 64 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); 65 66 static int dm_hit_limit = 64; 67 static int dm_delay = 1; 68 static unsigned long dm_hw_check_delta = 2*HZ; 69 static LIST_HEAD(hw_stats_list); 70 71 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) 72 { 73 size_t al; 74 struct net_dm_alert_msg *msg; 75 struct nlattr *nla; 76 struct sk_buff *skb; 77 unsigned long flags; 78 void *msg_header; 79 80 al = sizeof(struct net_dm_alert_msg); 81 al += dm_hit_limit * sizeof(struct net_dm_drop_point); 82 al += sizeof(struct nlattr); 83 84 skb = genlmsg_new(al, GFP_KERNEL); 85 86 if (!skb) 87 goto err; 88 89 msg_header = genlmsg_put(skb, 0, 0, &net_drop_monitor_family, 90 0, NET_DM_CMD_ALERT); 91 if (!msg_header) { 92 nlmsg_free(skb); 93 skb = NULL; 94 goto err; 95 } 96 nla = nla_reserve(skb, NLA_UNSPEC, 97 sizeof(struct net_dm_alert_msg)); 98 if (!nla) { 99 nlmsg_free(skb); 100 skb = NULL; 101 goto err; 102 } 103 msg = nla_data(nla); 104 memset(msg, 0, al); 105 goto out; 106 107 err: 108 mod_timer(&data->send_timer, jiffies + HZ / 10); 109 out: 110 spin_lock_irqsave(&data->lock, flags); 111 swap(data->skb, skb); 112 spin_unlock_irqrestore(&data->lock, flags); 113 114 if (skb) { 115 struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data; 116 struct genlmsghdr *gnlh = (struct genlmsghdr *)nlmsg_data(nlh); 117 118 genlmsg_end(skb, genlmsg_data(gnlh)); 119 } 120 121 return skb; 122 } 123 124 static const struct genl_multicast_group dropmon_mcgrps[] = { 125 { .name = "events", }, 126 }; 127 128 static void send_dm_alert(struct work_struct *work) 129 { 130 struct sk_buff *skb; 131 struct per_cpu_dm_data *data; 132 133 data = container_of(work, struct per_cpu_dm_data, dm_alert_work); 134 135 skb = reset_per_cpu_data(data); 136 137 if (skb) 138 genlmsg_multicast(&net_drop_monitor_family, skb, 0, 139 0, GFP_KERNEL); 140 } 141 142 /* 143 * This is the timer function to delay the sending of an alert 144 * in the event that more drops will arrive during the 145 * hysteresis period. 146 */ 147 static void sched_send_work(unsigned long _data) 148 { 149 struct per_cpu_dm_data *data = (struct per_cpu_dm_data *)_data; 150 151 schedule_work(&data->dm_alert_work); 152 } 153 154 static void trace_drop_common(struct sk_buff *skb, void *location) 155 { 156 struct net_dm_alert_msg *msg; 157 struct nlmsghdr *nlh; 158 struct nlattr *nla; 159 int i; 160 struct sk_buff *dskb; 161 struct per_cpu_dm_data *data; 162 unsigned long flags; 163 164 local_irq_save(flags); 165 data = this_cpu_ptr(&dm_cpu_data); 166 spin_lock(&data->lock); 167 dskb = data->skb; 168 169 if (!dskb) 170 goto out; 171 172 nlh = (struct nlmsghdr *)dskb->data; 173 nla = genlmsg_data(nlmsg_data(nlh)); 174 msg = nla_data(nla); 175 for (i = 0; i < msg->entries; i++) { 176 if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { 177 msg->points[i].count++; 178 goto out; 179 } 180 } 181 if (msg->entries == dm_hit_limit) 182 goto out; 183 /* 184 * We need to create a new entry 185 */ 186 __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); 187 nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); 188 memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); 189 msg->points[msg->entries].count = 1; 190 msg->entries++; 191 192 if (!timer_pending(&data->send_timer)) { 193 data->send_timer.expires = jiffies + dm_delay * HZ; 194 add_timer(&data->send_timer); 195 } 196 197 out: 198 spin_unlock_irqrestore(&data->lock, flags); 199 } 200 201 static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) 202 { 203 trace_drop_common(skb, location); 204 } 205 206 static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, 207 int work, int budget) 208 { 209 struct dm_hw_stat_delta *new_stat; 210 211 /* 212 * Don't check napi structures with no associated device 213 */ 214 if (!napi->dev) 215 return; 216 217 rcu_read_lock(); 218 list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { 219 /* 220 * only add a note to our monitor buffer if: 221 * 1) this is the dev we received on 222 * 2) its after the last_rx delta 223 * 3) our rx_dropped count has gone up 224 */ 225 if ((new_stat->dev == napi->dev) && 226 (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) && 227 (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { 228 trace_drop_common(NULL, NULL); 229 new_stat->last_drop_val = napi->dev->stats.rx_dropped; 230 new_stat->last_rx = jiffies; 231 break; 232 } 233 } 234 rcu_read_unlock(); 235 } 236 237 static int set_all_monitor_traces(int state) 238 { 239 int rc = 0; 240 struct dm_hw_stat_delta *new_stat = NULL; 241 struct dm_hw_stat_delta *temp; 242 243 mutex_lock(&trace_state_mutex); 244 245 if (state == trace_state) { 246 rc = -EAGAIN; 247 goto out_unlock; 248 } 249 250 switch (state) { 251 case TRACE_ON: 252 if (!try_module_get(THIS_MODULE)) { 253 rc = -ENODEV; 254 break; 255 } 256 257 rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL); 258 rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL); 259 break; 260 261 case TRACE_OFF: 262 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL); 263 rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL); 264 265 tracepoint_synchronize_unregister(); 266 267 /* 268 * Clean the device list 269 */ 270 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { 271 if (new_stat->dev == NULL) { 272 list_del_rcu(&new_stat->list); 273 kfree_rcu(new_stat, rcu); 274 } 275 } 276 277 module_put(THIS_MODULE); 278 279 break; 280 default: 281 rc = 1; 282 break; 283 } 284 285 if (!rc) 286 trace_state = state; 287 else 288 rc = -EINPROGRESS; 289 290 out_unlock: 291 mutex_unlock(&trace_state_mutex); 292 293 return rc; 294 } 295 296 297 static int net_dm_cmd_config(struct sk_buff *skb, 298 struct genl_info *info) 299 { 300 return -ENOTSUPP; 301 } 302 303 static int net_dm_cmd_trace(struct sk_buff *skb, 304 struct genl_info *info) 305 { 306 switch (info->genlhdr->cmd) { 307 case NET_DM_CMD_START: 308 return set_all_monitor_traces(TRACE_ON); 309 case NET_DM_CMD_STOP: 310 return set_all_monitor_traces(TRACE_OFF); 311 } 312 313 return -ENOTSUPP; 314 } 315 316 static int dropmon_net_event(struct notifier_block *ev_block, 317 unsigned long event, void *ptr) 318 { 319 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 320 struct dm_hw_stat_delta *new_stat = NULL; 321 struct dm_hw_stat_delta *tmp; 322 323 switch (event) { 324 case NETDEV_REGISTER: 325 new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); 326 327 if (!new_stat) 328 goto out; 329 330 new_stat->dev = dev; 331 new_stat->last_rx = jiffies; 332 mutex_lock(&trace_state_mutex); 333 list_add_rcu(&new_stat->list, &hw_stats_list); 334 mutex_unlock(&trace_state_mutex); 335 break; 336 case NETDEV_UNREGISTER: 337 mutex_lock(&trace_state_mutex); 338 list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { 339 if (new_stat->dev == dev) { 340 new_stat->dev = NULL; 341 if (trace_state == TRACE_OFF) { 342 list_del_rcu(&new_stat->list); 343 kfree_rcu(new_stat, rcu); 344 break; 345 } 346 } 347 } 348 mutex_unlock(&trace_state_mutex); 349 break; 350 } 351 out: 352 return NOTIFY_DONE; 353 } 354 355 static const struct genl_ops dropmon_ops[] = { 356 { 357 .cmd = NET_DM_CMD_CONFIG, 358 .doit = net_dm_cmd_config, 359 }, 360 { 361 .cmd = NET_DM_CMD_START, 362 .doit = net_dm_cmd_trace, 363 }, 364 { 365 .cmd = NET_DM_CMD_STOP, 366 .doit = net_dm_cmd_trace, 367 }, 368 }; 369 370 static struct genl_family net_drop_monitor_family __ro_after_init = { 371 .hdrsize = 0, 372 .name = "NET_DM", 373 .version = 2, 374 .module = THIS_MODULE, 375 .ops = dropmon_ops, 376 .n_ops = ARRAY_SIZE(dropmon_ops), 377 .mcgrps = dropmon_mcgrps, 378 .n_mcgrps = ARRAY_SIZE(dropmon_mcgrps), 379 }; 380 381 static struct notifier_block dropmon_net_notifier = { 382 .notifier_call = dropmon_net_event 383 }; 384 385 static int __init init_net_drop_monitor(void) 386 { 387 struct per_cpu_dm_data *data; 388 int cpu, rc; 389 390 pr_info("Initializing network drop monitor service\n"); 391 392 if (sizeof(void *) > 8) { 393 pr_err("Unable to store program counters on this arch, Drop monitor failed\n"); 394 return -ENOSPC; 395 } 396 397 rc = genl_register_family(&net_drop_monitor_family); 398 if (rc) { 399 pr_err("Could not create drop monitor netlink family\n"); 400 return rc; 401 } 402 WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT); 403 404 rc = register_netdevice_notifier(&dropmon_net_notifier); 405 if (rc < 0) { 406 pr_crit("Failed to register netdevice notifier\n"); 407 goto out_unreg; 408 } 409 410 rc = 0; 411 412 for_each_possible_cpu(cpu) { 413 data = &per_cpu(dm_cpu_data, cpu); 414 INIT_WORK(&data->dm_alert_work, send_dm_alert); 415 init_timer(&data->send_timer); 416 data->send_timer.data = (unsigned long)data; 417 data->send_timer.function = sched_send_work; 418 spin_lock_init(&data->lock); 419 reset_per_cpu_data(data); 420 } 421 422 423 goto out; 424 425 out_unreg: 426 genl_unregister_family(&net_drop_monitor_family); 427 out: 428 return rc; 429 } 430 431 static void exit_net_drop_monitor(void) 432 { 433 struct per_cpu_dm_data *data; 434 int cpu; 435 436 BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); 437 438 /* 439 * Because of the module_get/put we do in the trace state change path 440 * we are guarnateed not to have any current users when we get here 441 * all we need to do is make sure that we don't have any running timers 442 * or pending schedule calls 443 */ 444 445 for_each_possible_cpu(cpu) { 446 data = &per_cpu(dm_cpu_data, cpu); 447 del_timer_sync(&data->send_timer); 448 cancel_work_sync(&data->dm_alert_work); 449 /* 450 * At this point, we should have exclusive access 451 * to this struct and can free the skb inside it 452 */ 453 kfree_skb(data->skb); 454 } 455 456 BUG_ON(genl_unregister_family(&net_drop_monitor_family)); 457 } 458 459 module_init(init_net_drop_monitor); 460 module_exit(exit_net_drop_monitor); 461 462 MODULE_LICENSE("GPL v2"); 463 MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>"); 464 MODULE_ALIAS_GENL_FAMILY("NET_DM"); 465