1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * (C) 2015 Red Hat GmbH
4 * Author: Florian Westphal <fw@strlen.de>
5 */
6
7 #include <linux/module.h>
8 #include <linux/static_key.h>
9 #include <linux/hash.h>
10 #include <linux/siphash.h>
11 #include <linux/if_vlan.h>
12 #include <linux/init.h>
13 #include <linux/skbuff.h>
14 #include <linux/netlink.h>
15 #include <linux/netfilter.h>
16 #include <linux/netfilter/nfnetlink.h>
17 #include <linux/netfilter/nf_tables.h>
18 #include <net/netfilter/nf_conntrack.h>
19 #include <net/netfilter/nf_tables_core.h>
20 #include <net/netfilter/nf_tables.h>
21
22 #define NFT_TRACETYPE_LL_HSIZE 20
23 #define NFT_TRACETYPE_NETWORK_HSIZE 40
24 #define NFT_TRACETYPE_TRANSPORT_HSIZE 20
25
26 DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
27 EXPORT_SYMBOL_GPL(nft_trace_enabled);
28
trace_fill_header(struct sk_buff * nlskb,u16 type,const struct sk_buff * skb,int off,unsigned int len)29 static int trace_fill_header(struct sk_buff *nlskb, u16 type,
30 const struct sk_buff *skb,
31 int off, unsigned int len)
32 {
33 struct nlattr *nla;
34
35 if (len == 0)
36 return 0;
37
38 nla = nla_reserve(nlskb, type, len);
39 if (!nla || skb_copy_bits(skb, off, nla_data(nla), len))
40 return -1;
41
42 return 0;
43 }
44
nf_trace_fill_ll_header(struct sk_buff * nlskb,const struct sk_buff * skb)45 static int nf_trace_fill_ll_header(struct sk_buff *nlskb,
46 const struct sk_buff *skb)
47 {
48 struct vlan_ethhdr veth;
49 int off;
50
51 BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE);
52
53 off = skb_mac_header(skb) - skb->data;
54 if (off != -ETH_HLEN)
55 return -1;
56
57 if (skb_copy_bits(skb, off, &veth, ETH_HLEN))
58 return -1;
59
60 veth.h_vlan_proto = skb->vlan_proto;
61 veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
62 veth.h_vlan_encapsulated_proto = skb->protocol;
63
64 return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth);
65 }
66
nf_trace_fill_dev_info(struct sk_buff * nlskb,const struct net_device * indev,const struct net_device * outdev)67 static int nf_trace_fill_dev_info(struct sk_buff *nlskb,
68 const struct net_device *indev,
69 const struct net_device *outdev)
70 {
71 if (indev) {
72 if (nla_put_be32(nlskb, NFTA_TRACE_IIF,
73 htonl(indev->ifindex)))
74 return -1;
75
76 if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE,
77 htons(indev->type)))
78 return -1;
79 }
80
81 if (outdev) {
82 if (nla_put_be32(nlskb, NFTA_TRACE_OIF,
83 htonl(outdev->ifindex)))
84 return -1;
85
86 if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE,
87 htons(outdev->type)))
88 return -1;
89 }
90
91 return 0;
92 }
93
nf_trace_fill_ct_info(struct sk_buff * nlskb,const struct sk_buff * skb)94 static int nf_trace_fill_ct_info(struct sk_buff *nlskb,
95 const struct sk_buff *skb)
96 {
97 const struct nf_ct_hook *ct_hook;
98 enum ip_conntrack_info ctinfo;
99 const struct nf_conn *ct;
100 u32 state;
101
102 ct_hook = rcu_dereference(nf_ct_hook);
103 if (!ct_hook)
104 return 0;
105
106 ct = nf_ct_get(skb, &ctinfo);
107 if (!ct) {
108 if (ctinfo != IP_CT_UNTRACKED) /* not seen by conntrack or invalid */
109 return 0;
110
111 state = NF_CT_STATE_UNTRACKED_BIT;
112 } else {
113 state = NF_CT_STATE_BIT(ctinfo);
114 }
115
116 if (nla_put_be32(nlskb, NFTA_TRACE_CT_STATE, htonl(state)))
117 return -1;
118
119 if (ct) {
120 u32 id = ct_hook->get_id(&ct->ct_general);
121 u32 status = READ_ONCE(ct->status);
122 u8 dir = CTINFO2DIR(ctinfo);
123
124 if (nla_put_u8(nlskb, NFTA_TRACE_CT_DIRECTION, dir))
125 return -1;
126
127 if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id))
128 return -1;
129
130 /* Kernel implementation detail, withhold this from userspace for now */
131 status &= ~IPS_NAT_CLASH;
132
133 if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status)))
134 return -1;
135 }
136
137 return 0;
138 }
139
nf_trace_fill_pkt_info(struct sk_buff * nlskb,const struct nft_pktinfo * pkt)140 static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
141 const struct nft_pktinfo *pkt)
142 {
143 const struct sk_buff *skb = pkt->skb;
144 int off = skb_network_offset(skb);
145 unsigned int len, nh_end;
146
147 nh_end = pkt->flags & NFT_PKTINFO_L4PROTO ? nft_thoff(pkt) : skb->len;
148 len = min_t(unsigned int, nh_end - skb_network_offset(skb),
149 NFT_TRACETYPE_NETWORK_HSIZE);
150 if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
151 return -1;
152
153 if (pkt->flags & NFT_PKTINFO_L4PROTO) {
154 len = min_t(unsigned int, skb->len - nft_thoff(pkt),
155 NFT_TRACETYPE_TRANSPORT_HSIZE);
156 if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
157 nft_thoff(pkt), len))
158 return -1;
159 }
160
161 if (!skb_mac_header_was_set(skb))
162 return 0;
163
164 if (skb_vlan_tag_get(skb))
165 return nf_trace_fill_ll_header(nlskb, skb);
166
167 off = skb_mac_header(skb) - skb->data;
168 len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE);
169 return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER,
170 skb, off, len);
171 }
172
nf_trace_fill_rule_info(struct sk_buff * nlskb,const struct nft_verdict * verdict,const struct nft_rule_dp * rule,const struct nft_traceinfo * info)173 static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
174 const struct nft_verdict *verdict,
175 const struct nft_rule_dp *rule,
176 const struct nft_traceinfo *info)
177 {
178 if (!rule || rule->is_last)
179 return 0;
180
181 /* a continue verdict with ->type == RETURN means that this is
182 * an implicit return (end of chain reached).
183 *
184 * Since no rule matched, the ->rule pointer is invalid.
185 */
186 if (info->type == NFT_TRACETYPE_RETURN &&
187 verdict->code == NFT_CONTINUE)
188 return 0;
189
190 return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE,
191 cpu_to_be64(rule->handle),
192 NFTA_TRACE_PAD);
193 }
194
nft_trace_have_verdict_chain(const struct nft_verdict * verdict,struct nft_traceinfo * info)195 static bool nft_trace_have_verdict_chain(const struct nft_verdict *verdict,
196 struct nft_traceinfo *info)
197 {
198 switch (info->type) {
199 case NFT_TRACETYPE_RETURN:
200 case NFT_TRACETYPE_RULE:
201 break;
202 default:
203 return false;
204 }
205
206 switch (verdict->code) {
207 case NFT_JUMP:
208 case NFT_GOTO:
209 break;
210 default:
211 return false;
212 }
213
214 return true;
215 }
216
nft_trace_get_chain(const struct nft_rule_dp * rule,const struct nft_traceinfo * info)217 static const struct nft_chain *nft_trace_get_chain(const struct nft_rule_dp *rule,
218 const struct nft_traceinfo *info)
219 {
220 const struct nft_rule_dp_last *last;
221
222 if (!rule)
223 return &info->basechain->chain;
224
225 while (!rule->is_last)
226 rule = nft_rule_next(rule);
227
228 last = (const struct nft_rule_dp_last *)rule;
229
230 if (WARN_ON_ONCE(!last->chain))
231 return &info->basechain->chain;
232
233 return last->chain;
234 }
235
nft_trace_notify(const struct nft_pktinfo * pkt,const struct nft_verdict * verdict,const struct nft_rule_dp * rule,struct nft_traceinfo * info)236 void nft_trace_notify(const struct nft_pktinfo *pkt,
237 const struct nft_verdict *verdict,
238 const struct nft_rule_dp *rule,
239 struct nft_traceinfo *info)
240 {
241 const struct nft_chain *chain;
242 struct nlmsghdr *nlh;
243 struct sk_buff *skb;
244 unsigned int size;
245 u32 mark = 0;
246 u16 event;
247
248 if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
249 return;
250
251 chain = nft_trace_get_chain(rule, info);
252
253 size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
254 nla_total_size(strlen(chain->table->name)) +
255 nla_total_size(strlen(chain->name)) +
256 nla_total_size_64bit(sizeof(__be64)) + /* rule handle */
257 nla_total_size(sizeof(__be32)) + /* trace type */
258 nla_total_size(0) + /* VERDICT, nested */
259 nla_total_size(sizeof(u32)) + /* verdict code */
260 nla_total_size(sizeof(u32)) + /* ct id */
261 nla_total_size(sizeof(u8)) + /* ct direction */
262 nla_total_size(sizeof(u32)) + /* ct state */
263 nla_total_size(sizeof(u32)) + /* ct status */
264 nla_total_size(sizeof(u32)) + /* trace id */
265 nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
266 nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
267 nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) +
268 nla_total_size(sizeof(u32)) + /* iif */
269 nla_total_size(sizeof(__be16)) + /* iiftype */
270 nla_total_size(sizeof(u32)) + /* oif */
271 nla_total_size(sizeof(__be16)) + /* oiftype */
272 nla_total_size(sizeof(u32)) + /* mark */
273 nla_total_size(sizeof(u32)) + /* nfproto */
274 nla_total_size(sizeof(u32)); /* policy */
275
276 if (nft_trace_have_verdict_chain(verdict, info))
277 size += nla_total_size(strlen(verdict->chain->name)); /* jump target */
278
279 skb = nlmsg_new(size, GFP_ATOMIC);
280 if (!skb)
281 return;
282
283 event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE);
284 nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family,
285 NFNETLINK_V0, 0);
286 if (!nlh)
287 goto nla_put_failure;
288
289 if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt))))
290 goto nla_put_failure;
291
292 if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
293 goto nla_put_failure;
294
295 if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid))
296 goto nla_put_failure;
297
298 if (nla_put_string(skb, NFTA_TRACE_CHAIN, chain->name))
299 goto nla_put_failure;
300
301 if (nla_put_string(skb, NFTA_TRACE_TABLE, chain->table->name))
302 goto nla_put_failure;
303
304 if (nf_trace_fill_rule_info(skb, verdict, rule, info))
305 goto nla_put_failure;
306
307 switch (info->type) {
308 case NFT_TRACETYPE_UNSPEC:
309 case __NFT_TRACETYPE_MAX:
310 break;
311 case NFT_TRACETYPE_RETURN:
312 case NFT_TRACETYPE_RULE: {
313 unsigned int v;
314
315 if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict))
316 goto nla_put_failure;
317
318 /* pkt->skb undefined iff NF_STOLEN, disable dump */
319 v = verdict->code & NF_VERDICT_MASK;
320 if (v == NF_STOLEN)
321 info->packet_dumped = true;
322 else
323 mark = pkt->skb->mark;
324
325 break;
326 }
327 case NFT_TRACETYPE_POLICY:
328 mark = pkt->skb->mark;
329
330 if (nla_put_be32(skb, NFTA_TRACE_POLICY,
331 htonl(info->basechain->policy)))
332 goto nla_put_failure;
333 break;
334 }
335
336 if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark)))
337 goto nla_put_failure;
338
339 if (!info->packet_dumped) {
340 if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt)))
341 goto nla_put_failure;
342
343 if (nf_trace_fill_pkt_info(skb, pkt))
344 goto nla_put_failure;
345
346 if (nf_trace_fill_ct_info(skb, pkt->skb))
347 goto nla_put_failure;
348
349 info->packet_dumped = true;
350 }
351
352 nlmsg_end(skb, nlh);
353 nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
354 return;
355
356 nla_put_failure:
357 WARN_ON_ONCE(1);
358 kfree_skb(skb);
359 }
360
nft_trace_init(struct nft_traceinfo * info,const struct nft_pktinfo * pkt,const struct nft_chain * chain)361 void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
362 const struct nft_chain *chain)
363 {
364 static siphash_key_t trace_key __read_mostly;
365 struct sk_buff *skb = pkt->skb;
366
367 info->basechain = nft_base_chain(chain);
368 info->trace = true;
369 info->nf_trace = pkt->skb->nf_trace;
370 info->packet_dumped = false;
371
372 net_get_random_once(&trace_key, sizeof(trace_key));
373
374 info->skbid = (u32)siphash_3u32(hash32_ptr(skb),
375 skb_get_hash_net(nft_net(pkt), skb),
376 skb->skb_iif,
377 &trace_key);
378 }
379