1 /* 2 * Monitoring code for network dropped packet alerts 3 * 4 * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> 5 */ 6 7 #include <linux/netdevice.h> 8 #include <linux/etherdevice.h> 9 #include <linux/string.h> 10 #include <linux/if_arp.h> 11 #include <linux/inetdevice.h> 12 #include <linux/inet.h> 13 #include <linux/interrupt.h> 14 #include <linux/netpoll.h> 15 #include <linux/sched.h> 16 #include <linux/delay.h> 17 #include <linux/types.h> 18 #include <linux/workqueue.h> 19 #include <linux/netlink.h> 20 #include <linux/net_dropmon.h> 21 #include <linux/percpu.h> 22 #include <linux/timer.h> 23 #include <linux/bitops.h> 24 #include <net/genetlink.h> 25 #include <net/netevent.h> 26 27 #include <trace/events/skb.h> 28 #include <trace/events/napi.h> 29 30 #include <asm/unaligned.h> 31 32 #define TRACE_ON 1 33 #define TRACE_OFF 0 34 35 static void send_dm_alert(struct work_struct *unused); 36 37 38 /* 39 * Globals, our netlink socket pointer 40 * and the work handle that will send up 41 * netlink alerts 42 */ 43 static int trace_state = TRACE_OFF; 44 static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED; 45 46 struct per_cpu_dm_data { 47 struct work_struct dm_alert_work; 48 struct sk_buff *skb; 49 atomic_t dm_hit_count; 50 struct timer_list send_timer; 51 }; 52 53 struct dm_hw_stat_delta { 54 struct net_device *dev; 55 struct list_head list; 56 struct rcu_head rcu; 57 unsigned long last_drop_val; 58 }; 59 60 static struct genl_family net_drop_monitor_family = { 61 .id = GENL_ID_GENERATE, 62 .hdrsize = 0, 63 .name = "NET_DM", 64 .version = 2, 65 .maxattr = NET_DM_CMD_MAX, 66 }; 67 68 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); 69 70 static int dm_hit_limit = 64; 71 static int dm_delay = 1; 72 static unsigned long dm_hw_check_delta = 2*HZ; 73 static LIST_HEAD(hw_stats_list); 74 75 static void reset_per_cpu_data(struct per_cpu_dm_data *data) 76 { 77 size_t al; 78 struct net_dm_alert_msg *msg; 79 struct nlattr *nla; 80 81 al = sizeof(struct net_dm_alert_msg); 82 al += dm_hit_limit * sizeof(struct net_dm_drop_point); 83 al += sizeof(struct nlattr); 84 85 data->skb = genlmsg_new(al, GFP_KERNEL); 86 genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, 87 0, NET_DM_CMD_ALERT); 88 nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg)); 89 msg = nla_data(nla); 90 memset(msg, 0, al); 91 atomic_set(&data->dm_hit_count, dm_hit_limit); 92 } 93 94 static void send_dm_alert(struct work_struct *unused) 95 { 96 struct sk_buff *skb; 97 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); 98 99 /* 100 * Grab the skb we're about to send 101 */ 102 skb = data->skb; 103 104 /* 105 * Replace it with a new one 106 */ 107 reset_per_cpu_data(data); 108 109 /* 110 * Ship it! 111 */ 112 genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); 113 114 } 115 116 /* 117 * This is the timer function to delay the sending of an alert 118 * in the event that more drops will arrive during the 119 * hysteresis period. Note that it operates under the timer interrupt 120 * so we don't need to disable preemption here 121 */ 122 static void sched_send_work(unsigned long unused) 123 { 124 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); 125 126 schedule_work(&data->dm_alert_work); 127 } 128 129 static void trace_drop_common(struct sk_buff *skb, void *location) 130 { 131 struct net_dm_alert_msg *msg; 132 struct nlmsghdr *nlh; 133 struct nlattr *nla; 134 int i; 135 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); 136 137 138 if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { 139 /* 140 * we're already at zero, discard this hit 141 */ 142 goto out; 143 } 144 145 nlh = (struct nlmsghdr *)data->skb->data; 146 nla = genlmsg_data(nlmsg_data(nlh)); 147 msg = nla_data(nla); 148 for (i = 0; i < msg->entries; i++) { 149 if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { 150 msg->points[i].count++; 151 goto out; 152 } 153 } 154 155 /* 156 * We need to create a new entry 157 */ 158 __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); 159 nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); 160 memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); 161 msg->points[msg->entries].count = 1; 162 msg->entries++; 163 164 if (!timer_pending(&data->send_timer)) { 165 data->send_timer.expires = jiffies + dm_delay * HZ; 166 add_timer_on(&data->send_timer, smp_processor_id()); 167 } 168 169 out: 170 return; 171 } 172 173 static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) 174 { 175 trace_drop_common(skb, location); 176 } 177 178 static void trace_napi_poll_hit(struct napi_struct *napi) 179 { 180 struct dm_hw_stat_delta *new_stat; 181 182 /* 183 * Ratelimit our check time to dm_hw_check_delta jiffies 184 */ 185 if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta)) 186 return; 187 188 rcu_read_lock(); 189 list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { 190 if ((new_stat->dev == napi->dev) && 191 (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { 192 trace_drop_common(NULL, NULL); 193 new_stat->last_drop_val = napi->dev->stats.rx_dropped; 194 break; 195 } 196 } 197 rcu_read_unlock(); 198 } 199 200 201 static void free_dm_hw_stat(struct rcu_head *head) 202 { 203 struct dm_hw_stat_delta *n; 204 n = container_of(head, struct dm_hw_stat_delta, rcu); 205 kfree(n); 206 } 207 208 static int set_all_monitor_traces(int state) 209 { 210 int rc = 0; 211 struct dm_hw_stat_delta *new_stat = NULL; 212 struct dm_hw_stat_delta *temp; 213 214 spin_lock(&trace_state_lock); 215 216 switch (state) { 217 case TRACE_ON: 218 rc |= register_trace_kfree_skb(trace_kfree_skb_hit); 219 rc |= register_trace_napi_poll(trace_napi_poll_hit); 220 break; 221 case TRACE_OFF: 222 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); 223 rc |= unregister_trace_napi_poll(trace_napi_poll_hit); 224 225 tracepoint_synchronize_unregister(); 226 227 /* 228 * Clean the device list 229 */ 230 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { 231 if (new_stat->dev == NULL) { 232 list_del_rcu(&new_stat->list); 233 call_rcu(&new_stat->rcu, free_dm_hw_stat); 234 } 235 } 236 break; 237 default: 238 rc = 1; 239 break; 240 } 241 242 if (!rc) 243 trace_state = state; 244 245 spin_unlock(&trace_state_lock); 246 247 if (rc) 248 return -EINPROGRESS; 249 return rc; 250 } 251 252 253 static int net_dm_cmd_config(struct sk_buff *skb, 254 struct genl_info *info) 255 { 256 return -ENOTSUPP; 257 } 258 259 static int net_dm_cmd_trace(struct sk_buff *skb, 260 struct genl_info *info) 261 { 262 switch (info->genlhdr->cmd) { 263 case NET_DM_CMD_START: 264 return set_all_monitor_traces(TRACE_ON); 265 break; 266 case NET_DM_CMD_STOP: 267 return set_all_monitor_traces(TRACE_OFF); 268 break; 269 } 270 271 return -ENOTSUPP; 272 } 273 274 static int dropmon_net_event(struct notifier_block *ev_block, 275 unsigned long event, void *ptr) 276 { 277 struct net_device *dev = ptr; 278 struct dm_hw_stat_delta *new_stat = NULL; 279 struct dm_hw_stat_delta *tmp; 280 281 switch (event) { 282 case NETDEV_REGISTER: 283 new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL); 284 285 if (!new_stat) 286 goto out; 287 288 new_stat->dev = dev; 289 INIT_RCU_HEAD(&new_stat->rcu); 290 spin_lock(&trace_state_lock); 291 list_add_rcu(&new_stat->list, &hw_stats_list); 292 spin_unlock(&trace_state_lock); 293 break; 294 case NETDEV_UNREGISTER: 295 spin_lock(&trace_state_lock); 296 list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { 297 if (new_stat->dev == dev) { 298 new_stat->dev = NULL; 299 if (trace_state == TRACE_OFF) { 300 list_del_rcu(&new_stat->list); 301 call_rcu(&new_stat->rcu, free_dm_hw_stat); 302 break; 303 } 304 } 305 } 306 spin_unlock(&trace_state_lock); 307 break; 308 } 309 out: 310 return NOTIFY_DONE; 311 } 312 313 static struct genl_ops dropmon_ops[] = { 314 { 315 .cmd = NET_DM_CMD_CONFIG, 316 .doit = net_dm_cmd_config, 317 }, 318 { 319 .cmd = NET_DM_CMD_START, 320 .doit = net_dm_cmd_trace, 321 }, 322 { 323 .cmd = NET_DM_CMD_STOP, 324 .doit = net_dm_cmd_trace, 325 }, 326 }; 327 328 static struct notifier_block dropmon_net_notifier = { 329 .notifier_call = dropmon_net_event 330 }; 331 332 static int __init init_net_drop_monitor(void) 333 { 334 int cpu; 335 int rc, i, ret; 336 struct per_cpu_dm_data *data; 337 printk(KERN_INFO "Initalizing network drop monitor service\n"); 338 339 if (sizeof(void *) > 8) { 340 printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); 341 return -ENOSPC; 342 } 343 344 if (genl_register_family(&net_drop_monitor_family) < 0) { 345 printk(KERN_ERR "Could not create drop monitor netlink family\n"); 346 return -EFAULT; 347 } 348 349 rc = -EFAULT; 350 351 for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { 352 ret = genl_register_ops(&net_drop_monitor_family, 353 &dropmon_ops[i]); 354 if (ret) { 355 printk(KERN_CRIT "Failed to register operation %d\n", 356 dropmon_ops[i].cmd); 357 goto out_unreg; 358 } 359 } 360 361 rc = register_netdevice_notifier(&dropmon_net_notifier); 362 if (rc < 0) { 363 printk(KERN_CRIT "Failed to register netdevice notifier\n"); 364 goto out_unreg; 365 } 366 367 rc = 0; 368 369 for_each_present_cpu(cpu) { 370 data = &per_cpu(dm_cpu_data, cpu); 371 reset_per_cpu_data(data); 372 INIT_WORK(&data->dm_alert_work, send_dm_alert); 373 init_timer(&data->send_timer); 374 data->send_timer.data = cpu; 375 data->send_timer.function = sched_send_work; 376 } 377 378 goto out; 379 380 out_unreg: 381 genl_unregister_family(&net_drop_monitor_family); 382 out: 383 return rc; 384 } 385 386 late_initcall(init_net_drop_monitor); 387