1 /* SPDX-License-Identifier: GPL-2.0 2 * 3 * Copyright (c) 2019 Facebook 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of version 2 of the GNU General Public 7 * License as published by the Free Software Foundation. 8 * 9 * Include file for sample Host Bandwidth Manager (HBM) BPF programs 10 */ 11 #define KBUILD_MODNAME "foo" 12 #include <stddef.h> 13 #include <stdbool.h> 14 #include <uapi/linux/bpf.h> 15 #include <uapi/linux/if_ether.h> 16 #include <uapi/linux/if_packet.h> 17 #include <uapi/linux/ip.h> 18 #include <uapi/linux/ipv6.h> 19 #include <uapi/linux/in.h> 20 #include <uapi/linux/tcp.h> 21 #include <uapi/linux/filter.h> 22 #include <uapi/linux/pkt_cls.h> 23 #include <net/ipv6.h> 24 #include <net/inet_ecn.h> 25 #include <bpf/bpf_endian.h> 26 #include <bpf/bpf_helpers.h> 27 #include "hbm.h" 28 29 #define DROP_PKT 0 30 #define ALLOW_PKT 1 31 #define TCP_ECN_OK 1 32 #define CWR 2 33 34 #ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging 35 #undef bpf_printk 36 #define bpf_printk(fmt, ...) 37 #endif 38 39 #define INITIAL_CREDIT_PACKETS 100 40 #define MAX_BYTES_PER_PACKET 1500 41 #define MARK_THRESH (40 * MAX_BYTES_PER_PACKET) 42 #define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET) 43 #define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET)) 44 #define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH) 45 #define LARGE_PKT_THRESH 120 46 #define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET) 47 #define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET) 48 49 // Time base accounting for fq's EDT 50 #define BURST_SIZE_NS 100000 // 100us 51 #define MARK_THRESH_NS 50000 // 50us 52 #define DROP_THRESH_NS 500000 // 500us 53 // Reserve 20us of queuing for small packets (less than 120 bytes) 54 #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000) 55 #define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS) 56 57 // rate in bytes per ns << 20 58 #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) 59 #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) 60 #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate)) 61 62 struct { 63 __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); 64 __type(key, struct bpf_cgroup_storage_key); 65 __type(value, struct hbm_vqueue); 66 } queue_state SEC(".maps"); 67 68 struct { 69 __uint(type, BPF_MAP_TYPE_ARRAY); 70 __uint(max_entries, 1); 71 __type(key, u32); 72 __type(value, struct hbm_queue_stats); 73 } queue_stats SEC(".maps"); 74 75 struct hbm_pkt_info { 76 int cwnd; 77 int rtt; 78 int packets_out; 79 bool is_ip; 80 bool is_tcp; 81 short ecn; 82 }; 83 84 static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti) 85 { 86 struct bpf_sock *sk; 87 struct bpf_tcp_sock *tp; 88 89 sk = skb->sk; 90 if (sk) { 91 sk = bpf_sk_fullsock(sk); 92 if (sk) { 93 if (sk->protocol == IPPROTO_TCP) { 94 tp = bpf_tcp_sock(sk); 95 if (tp) { 96 pkti->cwnd = tp->snd_cwnd; 97 pkti->rtt = tp->srtt_us >> 3; 98 pkti->packets_out = tp->packets_out; 99 return 0; 100 } 101 } 102 } 103 } 104 pkti->cwnd = 0; 105 pkti->rtt = 0; 106 pkti->packets_out = 0; 107 return 1; 108 } 109 110 static void hbm_get_pkt_info(struct __sk_buff *skb, 111 struct hbm_pkt_info *pkti) 112 { 113 struct iphdr iph; 114 struct ipv6hdr *ip6h; 115 116 pkti->cwnd = 0; 117 pkti->rtt = 0; 118 bpf_skb_load_bytes(skb, 0, &iph, 12); 119 if (iph.version == 6) { 120 ip6h = (struct ipv6hdr *)&iph; 121 pkti->is_ip = true; 122 pkti->is_tcp = (ip6h->nexthdr == 6); 123 pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK; 124 } else if (iph.version == 4) { 125 pkti->is_ip = true; 126 pkti->is_tcp = (iph.protocol == 6); 127 pkti->ecn = iph.tos & INET_ECN_MASK; 128 } else { 129 pkti->is_ip = false; 130 pkti->is_tcp = false; 131 pkti->ecn = 0; 132 } 133 if (pkti->is_tcp) 134 get_tcp_info(skb, pkti); 135 } 136 137 static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate) 138 { 139 bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); 140 qdp->lasttime = bpf_ktime_get_ns(); 141 qdp->credit = INIT_CREDIT; 142 qdp->rate = rate * 128; 143 } 144 145 static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp, 146 int rate) 147 { 148 unsigned long long curtime; 149 150 curtime = bpf_ktime_get_ns(); 151 bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); 152 qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst 153 qdp->credit = 0; // not used 154 qdp->rate = rate * 128; 155 } 156 157 static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, 158 int len, 159 unsigned long long curtime, 160 bool congestion_flag, 161 bool drop_flag, 162 bool cwr_flag, 163 bool ecn_ce_flag, 164 struct hbm_pkt_info *pkti, 165 int credit) 166 { 167 int rv = ALLOW_PKT; 168 169 if (qsp != NULL) { 170 // Following is needed for work conserving 171 __sync_add_and_fetch(&(qsp->bytes_total), len); 172 if (qsp->stats) { 173 // Optionally update statistics 174 if (qsp->firstPacketTime == 0) 175 qsp->firstPacketTime = curtime; 176 qsp->lastPacketTime = curtime; 177 __sync_add_and_fetch(&(qsp->pkts_total), 1); 178 if (congestion_flag) { 179 __sync_add_and_fetch(&(qsp->pkts_marked), 1); 180 __sync_add_and_fetch(&(qsp->bytes_marked), len); 181 } 182 if (drop_flag) { 183 __sync_add_and_fetch(&(qsp->pkts_dropped), 1); 184 __sync_add_and_fetch(&(qsp->bytes_dropped), 185 len); 186 } 187 if (ecn_ce_flag) 188 __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1); 189 if (pkti->cwnd) { 190 __sync_add_and_fetch(&(qsp->sum_cwnd), 191 pkti->cwnd); 192 __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1); 193 } 194 if (pkti->rtt) 195 __sync_add_and_fetch(&(qsp->sum_rtt), 196 pkti->rtt); 197 __sync_add_and_fetch(&(qsp->sum_credit), credit); 198 199 if (drop_flag) 200 rv = DROP_PKT; 201 if (cwr_flag) 202 rv |= 2; 203 if (rv == DROP_PKT) 204 __sync_add_and_fetch(&(qsp->returnValCount[0]), 205 1); 206 else if (rv == ALLOW_PKT) 207 __sync_add_and_fetch(&(qsp->returnValCount[1]), 208 1); 209 else if (rv == 2) 210 __sync_add_and_fetch(&(qsp->returnValCount[2]), 211 1); 212 else if (rv == 3) 213 __sync_add_and_fetch(&(qsp->returnValCount[3]), 214 1); 215 } 216 } 217 } 218