1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright 2013 Google Inc.
4 * Author: Willem de Bruijn (willemb@google.com)
5 *
6 * A basic test of packet socket fanout behavior.
7 *
8 * Control:
9 * - create fanout fails as expected with illegal flag combinations
10 * - join fanout fails as expected with diverging types or flags
11 *
12 * Datapath:
13 * Open a pair of packet sockets and a pair of INET sockets, send a known
14 * number of packets across the two INET sockets and count the number of
15 * packets enqueued onto the two packet sockets.
16 *
17 * The test currently runs for
18 * - PACKET_FANOUT_HASH
19 * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
20 * - PACKET_FANOUT_LB
21 * - PACKET_FANOUT_CPU
22 * - PACKET_FANOUT_ROLLOVER
23 * - PACKET_FANOUT_CBPF
24 * - PACKET_FANOUT_EBPF
25 *
26 * Todo:
27 * - functionality: PACKET_FANOUT_FLAG_DEFRAG
28 */
29
30 #define _GNU_SOURCE /* for sched_setaffinity */
31
32 #include <arpa/inet.h>
33 #include <errno.h>
34 #include <fcntl.h>
35 #include <linux/unistd.h> /* for __NR_bpf */
36 #include <linux/filter.h>
37 #include <linux/bpf.h>
38 #include <linux/if_packet.h>
39 #include <net/if.h>
40 #include <net/ethernet.h>
41 #include <netinet/ip.h>
42 #include <netinet/udp.h>
43 #include <poll.h>
44 #include <sched.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <sys/mman.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #include <sys/stat.h>
53 #include <sys/types.h>
54 #include <unistd.h>
55
56 #include "psock_lib.h"
57 #include "../kselftest.h"
58
59 #define RING_NUM_FRAMES 20
60
61 static uint32_t cfg_max_num_members;
62
loopback_set_up_down(int state_up)63 static void loopback_set_up_down(int state_up)
64 {
65 struct ifreq ifreq = {};
66 int fd, err;
67
68 fd = socket(AF_PACKET, SOCK_RAW, 0);
69 if (fd < 0) {
70 perror("socket loopback");
71 exit(1);
72 }
73 strcpy(ifreq.ifr_name, "lo");
74 err = ioctl(fd, SIOCGIFFLAGS, &ifreq);
75 if (err) {
76 perror("SIOCGIFFLAGS");
77 exit(1);
78 }
79 if (state_up != !!(ifreq.ifr_flags & IFF_UP)) {
80 ifreq.ifr_flags ^= IFF_UP;
81 err = ioctl(fd, SIOCSIFFLAGS, &ifreq);
82 if (err) {
83 perror("SIOCSIFFLAGS");
84 exit(1);
85 }
86 }
87 close(fd);
88 }
89
90 /* Open a socket in a given fanout mode.
91 * @return -1 if mode is bad, a valid socket otherwise */
sock_fanout_open(uint16_t typeflags,uint16_t group_id)92 static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
93 {
94 struct sockaddr_ll addr = {0};
95 struct fanout_args args;
96 int fd, val, err;
97
98 fd = socket(PF_PACKET, SOCK_RAW, 0);
99 if (fd < 0) {
100 perror("socket packet");
101 exit(1);
102 }
103
104 pair_udp_setfilter(fd);
105
106 addr.sll_family = AF_PACKET;
107 addr.sll_protocol = htons(ETH_P_IP);
108 addr.sll_ifindex = if_nametoindex("lo");
109 if (addr.sll_ifindex == 0) {
110 perror("if_nametoindex");
111 exit(1);
112 }
113 if (bind(fd, (void *) &addr, sizeof(addr))) {
114 perror("bind packet");
115 exit(1);
116 }
117
118 if (cfg_max_num_members) {
119 args.id = group_id;
120 args.type_flags = typeflags;
121 args.max_num_members = cfg_max_num_members;
122 err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args,
123 sizeof(args));
124 } else {
125 val = (((int) typeflags) << 16) | group_id;
126 err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val,
127 sizeof(val));
128 }
129 if (err) {
130 if (close(fd)) {
131 perror("close packet");
132 exit(1);
133 }
134 return -1;
135 }
136
137 return fd;
138 }
139
sock_fanout_set_cbpf(int fd)140 static void sock_fanout_set_cbpf(int fd)
141 {
142 struct sock_filter bpf_filter[] = {
143 BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80), /* ldb [80] */
144 BPF_STMT(BPF_RET | BPF_A, 0), /* ret A */
145 };
146 struct sock_fprog bpf_prog;
147
148 bpf_prog.filter = bpf_filter;
149 bpf_prog.len = ARRAY_SIZE(bpf_filter);
150
151 if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog,
152 sizeof(bpf_prog))) {
153 perror("fanout data cbpf");
154 exit(1);
155 }
156 }
157
sock_fanout_getopts(int fd,uint16_t * typeflags,uint16_t * group_id)158 static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id)
159 {
160 int sockopt;
161 socklen_t sockopt_len = sizeof(sockopt);
162
163 if (getsockopt(fd, SOL_PACKET, PACKET_FANOUT,
164 &sockopt, &sockopt_len)) {
165 perror("failed to getsockopt");
166 exit(1);
167 }
168 *typeflags = sockopt >> 16;
169 *group_id = sockopt & 0xfffff;
170 }
171
sock_fanout_set_ebpf(int fd)172 static void sock_fanout_set_ebpf(int fd)
173 {
174 static char log_buf[65536];
175
176 const int len_off = __builtin_offsetof(struct __sk_buff, len);
177 struct bpf_insn prog[] = {
178 { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 },
179 { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 },
180 { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN },
181 { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 },
182 { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 },
183 { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR },
184 { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 },
185 { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 },
186 { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
187 };
188 union bpf_attr attr;
189 int pfd;
190
191 memset(&attr, 0, sizeof(attr));
192 attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
193 attr.insns = (unsigned long) prog;
194 attr.insn_cnt = ARRAY_SIZE(prog);
195 attr.license = (unsigned long) "GPL";
196 attr.log_buf = (unsigned long) log_buf;
197 attr.log_size = sizeof(log_buf);
198 attr.log_level = 1;
199
200 pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
201 if (pfd < 0) {
202 perror("bpf");
203 fprintf(stderr, "bpf verifier:\n%s\n", log_buf);
204 exit(1);
205 }
206
207 if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
208 perror("fanout data ebpf");
209 exit(1);
210 }
211
212 if (close(pfd)) {
213 perror("close ebpf");
214 exit(1);
215 }
216 }
217
sock_fanout_open_ring(int fd)218 static char *sock_fanout_open_ring(int fd)
219 {
220 struct tpacket_req req = {
221 .tp_block_size = getpagesize(),
222 .tp_frame_size = getpagesize(),
223 .tp_block_nr = RING_NUM_FRAMES,
224 .tp_frame_nr = RING_NUM_FRAMES,
225 };
226 char *ring;
227 int val = TPACKET_V2;
228
229 if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val,
230 sizeof(val))) {
231 perror("packetsock ring setsockopt version");
232 exit(1);
233 }
234 if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req,
235 sizeof(req))) {
236 perror("packetsock ring setsockopt");
237 exit(1);
238 }
239
240 ring = mmap(0, req.tp_block_size * req.tp_block_nr,
241 PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
242 if (ring == MAP_FAILED) {
243 perror("packetsock ring mmap");
244 exit(1);
245 }
246
247 return ring;
248 }
249
sock_fanout_read_ring(int fd,void * ring)250 static int sock_fanout_read_ring(int fd, void *ring)
251 {
252 struct tpacket2_hdr *header = ring;
253 int count = 0;
254
255 while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) {
256 count++;
257 header = ring + (count * getpagesize());
258 }
259
260 return count;
261 }
262
sock_fanout_read(int fds[],char * rings[],const int expect[])263 static int sock_fanout_read(int fds[], char *rings[], const int expect[])
264 {
265 int ret[2];
266
267 ret[0] = sock_fanout_read_ring(fds[0], rings[0]);
268 ret[1] = sock_fanout_read_ring(fds[1], rings[1]);
269
270 fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n",
271 ret[0], ret[1], expect[0], expect[1]);
272
273 if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
274 (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
275 fprintf(stderr, "warning: incorrect queue lengths\n");
276 return 1;
277 }
278
279 return 0;
280 }
281
282 /* Test that creating/joining a fanout group fails for unbound socket without
283 * a specified protocol
284 */
test_unbound_fanout(void)285 static void test_unbound_fanout(void)
286 {
287 int val, fd0, fd1, err;
288
289 fprintf(stderr, "test: unbound fanout\n");
290 fd0 = socket(PF_PACKET, SOCK_RAW, 0);
291 if (fd0 < 0) {
292 perror("socket packet");
293 exit(1);
294 }
295 /* Try to create a new fanout group. Should fail. */
296 val = (PACKET_FANOUT_HASH << 16) | 1;
297 err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val));
298 if (!err) {
299 fprintf(stderr, "ERROR: unbound socket fanout create\n");
300 exit(1);
301 }
302 fd1 = sock_fanout_open(PACKET_FANOUT_HASH, 1);
303 if (fd1 == -1) {
304 fprintf(stderr, "ERROR: failed to open HASH socket\n");
305 exit(1);
306 }
307 /* Try to join an existing fanout group. Should fail. */
308 err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val));
309 if (!err) {
310 fprintf(stderr, "ERROR: unbound socket fanout join\n");
311 exit(1);
312 }
313 close(fd0);
314 close(fd1);
315 }
316
317 /* Test illegal mode + flag combination */
test_control_single(void)318 static void test_control_single(void)
319 {
320 fprintf(stderr, "test: control single socket\n");
321
322 if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
323 PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
324 fprintf(stderr, "ERROR: opened socket with dual rollover\n");
325 exit(1);
326 }
327 }
328
329 /* Test illegal group with different modes or flags */
test_control_group(int toggle)330 static void test_control_group(int toggle)
331 {
332 int fds[2];
333
334 if (toggle)
335 fprintf(stderr, "test: control multiple sockets with link down toggle\n");
336 else
337 fprintf(stderr, "test: control multiple sockets\n");
338
339 fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
340 if (fds[0] == -1) {
341 fprintf(stderr, "ERROR: failed to open HASH socket\n");
342 exit(1);
343 }
344 if (toggle)
345 loopback_set_up_down(0);
346 if (sock_fanout_open(PACKET_FANOUT_HASH |
347 PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) {
348 fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
349 exit(1);
350 }
351 if (sock_fanout_open(PACKET_FANOUT_HASH |
352 PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
353 fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
354 exit(1);
355 }
356 if (sock_fanout_open(PACKET_FANOUT_CPU, 0) != -1) {
357 fprintf(stderr, "ERROR: joined group with wrong mode\n");
358 exit(1);
359 }
360 fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
361 if (fds[1] == -1) {
362 fprintf(stderr, "ERROR: failed to join group\n");
363 exit(1);
364 }
365 if (toggle)
366 loopback_set_up_down(1);
367 if (close(fds[1]) || close(fds[0])) {
368 fprintf(stderr, "ERROR: closing sockets\n");
369 exit(1);
370 }
371 }
372
373 /* Test illegal max_num_members values */
test_control_group_max_num_members(void)374 static void test_control_group_max_num_members(void)
375 {
376 int fds[3];
377
378 fprintf(stderr, "test: control multiple sockets, max_num_members\n");
379
380 /* expected failure on greater than PACKET_FANOUT_MAX */
381 cfg_max_num_members = (1 << 16) + 1;
382 if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
383 fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n");
384 exit(1);
385 }
386
387 cfg_max_num_members = 256;
388 fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
389 if (fds[0] == -1) {
390 fprintf(stderr, "ERROR: failed open\n");
391 exit(1);
392 }
393
394 /* expected failure on joining group with different max_num_members */
395 cfg_max_num_members = 257;
396 if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
397 fprintf(stderr, "ERROR: set different max_num_members\n");
398 exit(1);
399 }
400
401 /* success on joining group with same max_num_members */
402 cfg_max_num_members = 256;
403 fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
404 if (fds[1] == -1) {
405 fprintf(stderr, "ERROR: failed to join group\n");
406 exit(1);
407 }
408
409 /* success on joining group with max_num_members unspecified */
410 cfg_max_num_members = 0;
411 fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
412 if (fds[2] == -1) {
413 fprintf(stderr, "ERROR: failed to join group\n");
414 exit(1);
415 }
416
417 if (close(fds[2]) || close(fds[1]) || close(fds[0])) {
418 fprintf(stderr, "ERROR: closing sockets\n");
419 exit(1);
420 }
421 }
422
423 /* Test creating a unique fanout group ids */
test_unique_fanout_group_ids(void)424 static void test_unique_fanout_group_ids(void)
425 {
426 int fds[3];
427 uint16_t typeflags, first_group_id, second_group_id;
428
429 fprintf(stderr, "test: unique ids\n");
430
431 fds[0] = sock_fanout_open(PACKET_FANOUT_HASH |
432 PACKET_FANOUT_FLAG_UNIQUEID, 0);
433 if (fds[0] == -1) {
434 fprintf(stderr, "ERROR: failed to create a unique id group.\n");
435 exit(1);
436 }
437
438 sock_fanout_getopts(fds[0], &typeflags, &first_group_id);
439 if (typeflags != PACKET_FANOUT_HASH) {
440 fprintf(stderr, "ERROR: unexpected typeflags %x\n", typeflags);
441 exit(1);
442 }
443
444 if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id) != -1) {
445 fprintf(stderr, "ERROR: joined group with wrong type.\n");
446 exit(1);
447 }
448
449 fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, first_group_id);
450 if (fds[1] == -1) {
451 fprintf(stderr,
452 "ERROR: failed to join previously created group.\n");
453 exit(1);
454 }
455
456 fds[2] = sock_fanout_open(PACKET_FANOUT_HASH |
457 PACKET_FANOUT_FLAG_UNIQUEID, 0);
458 if (fds[2] == -1) {
459 fprintf(stderr,
460 "ERROR: failed to create a second unique id group.\n");
461 exit(1);
462 }
463
464 sock_fanout_getopts(fds[2], &typeflags, &second_group_id);
465 if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_UNIQUEID,
466 second_group_id) != -1) {
467 fprintf(stderr,
468 "ERROR: specified a group id when requesting unique id\n");
469 exit(1);
470 }
471
472 if (close(fds[0]) || close(fds[1]) || close(fds[2])) {
473 fprintf(stderr, "ERROR: closing sockets\n");
474 exit(1);
475 }
476 }
477
test_datapath(uint16_t typeflags,int port_off,const int expect1[],const int expect2[])478 static int test_datapath(uint16_t typeflags, int port_off,
479 const int expect1[], const int expect2[])
480 {
481 const int expect0[] = { 0, 0 };
482 char *rings[2];
483 uint8_t type = typeflags & 0xFF;
484 int fds[2], fds_udp[2][2], ret;
485
486 fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
487 typeflags, (uint16_t)PORT_BASE,
488 (uint16_t)(PORT_BASE + port_off));
489
490 fds[0] = sock_fanout_open(typeflags, 0);
491 fds[1] = sock_fanout_open(typeflags, 0);
492 if (fds[0] == -1 || fds[1] == -1) {
493 fprintf(stderr, "ERROR: failed open\n");
494 exit(1);
495 }
496 if (type == PACKET_FANOUT_CBPF)
497 sock_fanout_set_cbpf(fds[0]);
498 else if (type == PACKET_FANOUT_EBPF)
499 sock_fanout_set_ebpf(fds[0]);
500
501 rings[0] = sock_fanout_open_ring(fds[0]);
502 rings[1] = sock_fanout_open_ring(fds[1]);
503 pair_udp_open(fds_udp[0], PORT_BASE);
504 pair_udp_open(fds_udp[1], PORT_BASE + port_off);
505 sock_fanout_read(fds, rings, expect0);
506
507 /* Send data, but not enough to overflow a queue */
508 pair_udp_send(fds_udp[0], 15);
509 pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1);
510 ret = sock_fanout_read(fds, rings, expect1);
511
512 /* Send more data, overflow the queue */
513 pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1);
514 /* TODO: ensure consistent order between expect1 and expect2 */
515 ret |= sock_fanout_read(fds, rings, expect2);
516
517 if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) ||
518 munmap(rings[0], RING_NUM_FRAMES * getpagesize())) {
519 fprintf(stderr, "close rings\n");
520 exit(1);
521 }
522 if (close(fds_udp[1][1]) || close(fds_udp[1][0]) ||
523 close(fds_udp[0][1]) || close(fds_udp[0][0]) ||
524 close(fds[1]) || close(fds[0])) {
525 fprintf(stderr, "close datapath\n");
526 exit(1);
527 }
528
529 return ret;
530 }
531
set_cpuaffinity(int cpuid)532 static int set_cpuaffinity(int cpuid)
533 {
534 cpu_set_t mask;
535
536 CPU_ZERO(&mask);
537 CPU_SET(cpuid, &mask);
538 if (sched_setaffinity(0, sizeof(mask), &mask)) {
539 if (errno != EINVAL) {
540 fprintf(stderr, "setaffinity %d\n", cpuid);
541 exit(1);
542 }
543 return 1;
544 }
545
546 return 0;
547 }
548
main(int argc,char ** argv)549 int main(int argc, char **argv)
550 {
551 const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } };
552 const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } };
553 const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } };
554 const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } };
555 const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } };
556 const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } };
557 const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } };
558 const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } };
559 int port_off = 2, tries = 20, ret;
560
561 test_unbound_fanout();
562 test_control_single();
563 test_control_group(0);
564 test_control_group(1);
565 test_control_group_max_num_members();
566 test_unique_fanout_group_ids();
567
568 /* PACKET_FANOUT_MAX */
569 cfg_max_num_members = 1 << 16;
570 /* find a set of ports that do not collide onto the same socket */
571 ret = test_datapath(PACKET_FANOUT_HASH, port_off,
572 expect_hash[0], expect_hash[1]);
573 while (ret) {
574 fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
575 ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
576 expect_hash[0], expect_hash[1]);
577 if (!--tries) {
578 fprintf(stderr, "too many collisions\n");
579 return 1;
580 }
581 }
582
583 ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
584 port_off, expect_hash_rb[0], expect_hash_rb[1]);
585 ret |= test_datapath(PACKET_FANOUT_LB,
586 port_off, expect_lb[0], expect_lb[1]);
587 ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
588 port_off, expect_rb[0], expect_rb[1]);
589
590 ret |= test_datapath(PACKET_FANOUT_CBPF,
591 port_off, expect_bpf[0], expect_bpf[1]);
592 ret |= test_datapath(PACKET_FANOUT_EBPF,
593 port_off, expect_bpf[0], expect_bpf[1]);
594
595 set_cpuaffinity(0);
596 ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
597 expect_cpu0[0], expect_cpu0[1]);
598 if (!set_cpuaffinity(1))
599 /* TODO: test that choice alternates with previous */
600 ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
601 expect_cpu1[0], expect_cpu1[1]);
602
603 ret |= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID, port_off,
604 expect_uniqueid[0], expect_uniqueid[1]);
605
606 if (ret)
607 return 1;
608
609 printf("OK. All tests passed\n");
610 return 0;
611 }
612