xref: /linux/include/net/inet_ecn.h (revision 100f946b8d44b64bc0b8a8c30d283105031c0a77)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _INET_ECN_H_
3 #define _INET_ECN_H_
4 
5 #include <linux/ip.h>
6 #include <linux/skbuff.h>
7 #include <linux/if_vlan.h>
8 
9 #include <net/inet_sock.h>
10 #include <net/dsfield.h>
11 #include <net/checksum.h>
12 
13 enum {
14 	INET_ECN_NOT_ECT = 0,
15 	INET_ECN_ECT_1 = 1,
16 	INET_ECN_ECT_0 = 2,
17 	INET_ECN_CE = 3,
18 	INET_ECN_MASK = 3,
19 };
20 
21 extern int sysctl_tunnel_ecn_log;
22 
23 static inline int INET_ECN_is_ce(__u8 dsfield)
24 {
25 	return (dsfield & INET_ECN_MASK) == INET_ECN_CE;
26 }
27 
28 static inline int INET_ECN_is_not_ect(__u8 dsfield)
29 {
30 	return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT;
31 }
32 
33 static inline int INET_ECN_is_capable(__u8 dsfield)
34 {
35 	return dsfield & INET_ECN_ECT_0;
36 }
37 
38 /*
39  * RFC 3168 9.1.1
40  *  The full-functionality option for ECN encapsulation is to copy the
41  *  ECN codepoint of the inside header to the outside header on
42  *  encapsulation if the inside header is not-ECT or ECT, and to set the
43  *  ECN codepoint of the outside header to ECT(0) if the ECN codepoint of
44  *  the inside header is CE.
45  */
46 static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
47 {
48 	outer &= ~INET_ECN_MASK;
49 	outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) :
50 					  INET_ECN_ECT_0;
51 	return outer;
52 }
53 
54 /* Apply either ECT(0) or ECT(1) */
55 static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1)
56 {
57 	__u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0;
58 
59 	/* Mask the complete byte in case the connection alternates between
60 	 * ECT(0) and ECT(1).
61 	 */
62 	inet_sk(sk)->tos &= ~INET_ECN_MASK;
63 	inet_sk(sk)->tos |= ect;
64 	if (inet6_sk(sk)) {
65 		inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
66 		inet6_sk(sk)->tclass |= ect;
67 	}
68 }
69 
70 static inline void INET_ECN_xmit(struct sock *sk)
71 {
72 	__INET_ECN_xmit(sk, false);
73 }
74 
75 static inline void INET_ECN_dontxmit(struct sock *sk)
76 {
77 	inet_sk(sk)->tos &= ~INET_ECN_MASK;
78 	if (inet6_sk(sk) != NULL)
79 		inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
80 }
81 
82 #define IP6_ECN_flow_init(label) do {		\
83       (label) &= ~htonl(INET_ECN_MASK << 20);	\
84     } while (0)
85 
86 #define	IP6_ECN_flow_xmit(sk, label) do {				\
87 	if (INET_ECN_is_capable(inet6_sk(sk)->tclass))			\
88 		(label) |= htonl(INET_ECN_ECT_0 << 20);			\
89     } while (0)
90 
91 static inline int IP_ECN_set_ce(struct iphdr *iph)
92 {
93 	u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
94 	__be16 check_add;
95 
96 	/*
97 	 * After the last operation we have (in binary):
98 	 * INET_ECN_NOT_ECT => 01
99 	 * INET_ECN_ECT_1   => 10
100 	 * INET_ECN_ECT_0   => 11
101 	 * INET_ECN_CE      => 00
102 	 */
103 	if (!(ecn & 2))
104 		return !ecn;
105 
106 	/*
107 	 * The following gives us:
108 	 * INET_ECN_ECT_1 => check += htons(0xFFFD)
109 	 * INET_ECN_ECT_0 => check += htons(0xFFFE)
110 	 */
111 	check_add = (__force __be16)((__force u16)htons(0xFFFB) +
112 				     (__force u16)htons(ecn));
113 
114 	iph->check = csum16_add(iph->check, check_add);
115 	iph->tos |= INET_ECN_CE;
116 	return 1;
117 }
118 
119 static inline int IP_ECN_set_ect1(struct iphdr *iph)
120 {
121 	if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
122 		return 0;
123 
124 	iph->check = csum16_add(iph->check, htons(0x1));
125 	iph->tos ^= INET_ECN_MASK;
126 	return 1;
127 }
128 
129 static inline void IP_ECN_clear(struct iphdr *iph)
130 {
131 	iph->tos &= ~INET_ECN_MASK;
132 }
133 
134 static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner)
135 {
136 	dscp &= ~INET_ECN_MASK;
137 	ipv4_change_dsfield(inner, INET_ECN_MASK, dscp);
138 }
139 
140 struct ipv6hdr;
141 
142 /* Note:
143  * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE,
144  * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE
145  * In IPv6 case, no checksum compensates the change in IPv6 header,
146  * so we have to update skb->csum.
147  */
148 static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
149 {
150 	__be32 from, to;
151 
152 	if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
153 		return 0;
154 
155 	from = *(__be32 *)iph;
156 	to = from | htonl(INET_ECN_CE << 20);
157 	*(__be32 *)iph = to;
158 	if (skb->ip_summed == CHECKSUM_COMPLETE)
159 		skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
160 				     (__force __wsum)to);
161 	return 1;
162 }
163 
164 static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
165 {
166 	__be32 from, to;
167 
168 	if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
169 		return 0;
170 
171 	from = *(__be32 *)iph;
172 	to = from ^ htonl(INET_ECN_MASK << 20);
173 	*(__be32 *)iph = to;
174 	if (skb->ip_summed == CHECKSUM_COMPLETE)
175 		skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
176 				     (__force __wsum)to);
177 	return 1;
178 }
179 
180 static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
181 {
182 	dscp &= ~INET_ECN_MASK;
183 	ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);
184 }
185 
186 static inline int INET_ECN_set_ce(struct sk_buff *skb)
187 {
188 	switch (skb_protocol(skb, true)) {
189 	case cpu_to_be16(ETH_P_IP):
190 		if (skb_network_header(skb) + sizeof(struct iphdr) <=
191 		    skb_tail_pointer(skb))
192 			return IP_ECN_set_ce(ip_hdr(skb));
193 		break;
194 
195 	case cpu_to_be16(ETH_P_IPV6):
196 		if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
197 		    skb_tail_pointer(skb))
198 			return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
199 		break;
200 	}
201 
202 	return 0;
203 }
204 
205 static inline int skb_get_dsfield(struct sk_buff *skb)
206 {
207 	switch (skb_protocol(skb, true)) {
208 	case cpu_to_be16(ETH_P_IP):
209 		if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
210 			break;
211 		return ipv4_get_dsfield(ip_hdr(skb));
212 
213 	case cpu_to_be16(ETH_P_IPV6):
214 		if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
215 			break;
216 		return ipv6_get_dsfield(ipv6_hdr(skb));
217 	}
218 
219 	return -1;
220 }
221 
222 static inline int INET_ECN_set_ect1(struct sk_buff *skb)
223 {
224 	switch (skb_protocol(skb, true)) {
225 	case cpu_to_be16(ETH_P_IP):
226 		if (skb_network_header(skb) + sizeof(struct iphdr) <=
227 		    skb_tail_pointer(skb))
228 			return IP_ECN_set_ect1(ip_hdr(skb));
229 		break;
230 
231 	case cpu_to_be16(ETH_P_IPV6):
232 		if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
233 		    skb_tail_pointer(skb))
234 			return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
235 		break;
236 	}
237 
238 	return 0;
239 }
240 
241 /*
242  * RFC 6040 4.2
243  *  To decapsulate the inner header at the tunnel egress, a compliant
244  *  tunnel egress MUST set the outgoing ECN field to the codepoint at the
245  *  intersection of the appropriate arriving inner header (row) and outer
246  *  header (column) in Figure 4
247  *
248  *      +---------+------------------------------------------------+
249  *      |Arriving |            Arriving Outer Header               |
250  *      |   Inner +---------+------------+------------+------------+
251  *      |  Header | Not-ECT | ECT(0)     | ECT(1)     |     CE     |
252  *      +---------+---------+------------+------------+------------+
253  *      | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)|
254  *      |  ECT(0) |  ECT(0) | ECT(0)     | ECT(1)     |     CE     |
255  *      |  ECT(1) |  ECT(1) | ECT(1) (!) | ECT(1)     |     CE     |
256  *      |    CE   |      CE |     CE     |     CE(!!!)|     CE     |
257  *      +---------+---------+------------+------------+------------+
258  *
259  *             Figure 4: New IP in IP Decapsulation Behaviour
260  *
261  *  returns 0 on success
262  *          1 if something is broken and should be logged (!!! above)
263  *          2 if packet should be dropped
264  */
265 static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
266 {
267 	if (INET_ECN_is_not_ect(inner)) {
268 		switch (outer & INET_ECN_MASK) {
269 		case INET_ECN_NOT_ECT:
270 			return 0;
271 		case INET_ECN_ECT_0:
272 		case INET_ECN_ECT_1:
273 			return 1;
274 		case INET_ECN_CE:
275 			return 2;
276 		}
277 	}
278 
279 	*set_ce = INET_ECN_is_ce(outer);
280 	return 0;
281 }
282 
283 static inline int INET_ECN_decapsulate(struct sk_buff *skb,
284 				       __u8 outer, __u8 inner)
285 {
286 	bool set_ce = false;
287 	int rc;
288 
289 	rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
290 	if (!rc) {
291 		if (set_ce)
292 			INET_ECN_set_ce(skb);
293 		else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
294 			INET_ECN_set_ect1(skb);
295 	}
296 
297 	return rc;
298 }
299 
300 static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
301 				     struct sk_buff *skb)
302 {
303 	__u8 inner;
304 
305 	switch (skb_protocol(skb, true)) {
306 	case htons(ETH_P_IP):
307 		inner = ip_hdr(skb)->tos;
308 		break;
309 	case htons(ETH_P_IPV6):
310 		inner = ipv6_get_dsfield(ipv6_hdr(skb));
311 		break;
312 	default:
313 		return 0;
314 	}
315 
316 	return INET_ECN_decapsulate(skb, oiph->tos, inner);
317 }
318 
319 static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
320 				      struct sk_buff *skb)
321 {
322 	__u8 inner;
323 
324 	switch (skb_protocol(skb, true)) {
325 	case htons(ETH_P_IP):
326 		inner = ip_hdr(skb)->tos;
327 		break;
328 	case htons(ETH_P_IPV6):
329 		inner = ipv6_get_dsfield(ipv6_hdr(skb));
330 		break;
331 	default:
332 		return 0;
333 	}
334 
335 	return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
336 }
337 #endif
338