xref: /linux/samples/bpf/do_hbm_test.sh (revision 2b64b2ed277ff23e785fbdb65098ee7e1252d64f)
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Copyright (c) 2019 Facebook
5#
6# This program is free software; you can redistribute it and/or
7# modify it under the terms of version 2 of the GNU General Public
8# License as published by the Free Software Foundation.
9
10Usage() {
11  echo "Script for testing HBM (Host Bandwidth Manager) framework."
12  echo "It creates a cgroup to use for testing and load a BPF program to limit"
13  echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
14  echo "loads. The output is the goodput in Mbps (unless -D was used)."
15  echo ""
16  echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>] [-D]"
17  echo "             [-d=<delay>|--delay=<delay>] [--debug] [-E]"
18  echo "             [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
19  echo "             [-l] [-N] [-p=<port>|--port=<port>] [-P]"
20  echo "             [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
21  echo "             [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
22  echo "  Where:"
23  echo "    out               egress (default)"
24  echo "    -b or --bpf       BPF program filename to load and attach."
25  echo "                      Default is hbm_out_kern.o for egress,"
26  echo "    -c or -cc         TCP congestion control (cubic or dctcp)"
27  echo "    --debug           print BPF trace buffer"
28  echo "    -d or --delay     add a delay in ms using netem"
29  echo "    -D                In addition to the goodput in Mbps, it also outputs"
30  echo "                      other detailed information. This information is"
31  echo "                      test dependent (i.e. iperf3 or netperf)."
32  echo "    -E                enable ECN (not required for dctcp)"
33  echo "    -f or --flows     number of concurrent flows (default=1)"
34  echo "    -i or --id        cgroup id (an integer, default is 1)"
35  echo "    -N                use netperf instead of iperf3"
36  echo "    -l                do not limit flows using loopback"
37  echo "    -h                Help"
38  echo "    -p or --port      iperf3 port (default is 5201)"
39  echo "    -P                use an iperf3 instance for each flow"
40  echo "    -q                use the specified qdisc"
41  echo "    -r or --rate      rate in Mbps (default 1s 1Gbps)"
42  echo "    -R                Use TCP_RR for netperf. 1st flow has req"
43  echo "                      size of 10KB, rest of 1MB. Reply in all"
44  echo "                      cases is 1 byte."
45  echo "                      More detailed output for each flow can be found"
46  echo "                      in the files netperf.<cg>.<flow>, where <cg> is the"
47  echo "                      cgroup id as specified with the -i flag, and <flow>"
48  echo "                      is the flow id starting at 1 and increasing by 1 for"
49  echo "                      flow (as specified by -f)."
50  echo "    -s or --server    hostname of netperf server. Used to create netperf"
51  echo "                      test traffic between to hosts (default is within host)"
52  echo "                      netserver must be running on the host."
53  echo "    -S or --stats     whether to update hbm stats (default is yes)."
54  echo "    -t or --time      duration of iperf3 in seconds (default=5)"
55  echo "    -w                Work conserving flag. cgroup can increase its"
56  echo "                      bandwidth beyond the rate limit specified"
57  echo "                      while there is available bandwidth. Current"
58  echo "                      implementation assumes there is only one NIC"
59  echo "                      (eth0), but can be extended to support multiple"
60  echo "                       NICs."
61  echo "    cubic or dctcp    specify which TCP CC to use"
62  echo " "
63  exit
64}
65
66#set -x
67
68debug_flag=0
69args="$@"
70name="$0"
71netem=0
72cc=x
73dir="-o"
74dir_name="out"
75dur=5
76flows=1
77id=1
78prog=""
79port=5201
80rate=1000
81multi_iperf=0
82flow_cnt=1
83use_netperf=0
84rr=0
85ecn=0
86details=0
87server=""
88qdisc=""
89flags=""
90do_stats=0
91
92function start_hbm () {
93  rm -f hbm.out
94  echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
95  echo " " >> hbm.out
96  ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1  &
97  echo $!
98}
99
100processArgs () {
101  for i in $args ; do
102    case $i in
103    # Support for upcomming ingress rate limiting
104    #in)         # support for upcoming ingress rate limiting
105    #  dir="-i"
106    #  dir_name="in"
107    #  ;;
108    out)
109      dir="-o"
110      dir_name="out"
111      ;;
112    -b=*|--bpf=*)
113      prog="${i#*=}"
114      ;;
115    -c=*|--cc=*)
116      cc="${i#*=}"
117      ;;
118    --debug)
119      flags="$flags -d"
120      debug_flag=1
121      ;;
122    -d=*|--delay=*)
123      netem="${i#*=}"
124      ;;
125    -D)
126      details=1
127      ;;
128    -E)
129     ecn=1
130     ;;
131    # Support for upcomming fq Early Departure Time egress rate limiting
132    #--edt)
133    # prog="hbm_out_edt_kern.o"
134    # qdisc="fq"
135    # ;;
136    -f=*|--flows=*)
137      flows="${i#*=}"
138      ;;
139    -i=*|--id=*)
140      id="${i#*=}"
141      ;;
142    -l)
143      flags="$flags -l"
144      ;;
145    -N)
146      use_netperf=1
147      ;;
148    -p=*|--port=*)
149      port="${i#*=}"
150      ;;
151    -P)
152      multi_iperf=1
153      ;;
154    -q=*)
155      qdisc="${i#*=}"
156      ;;
157    -r=*|--rate=*)
158      rate="${i#*=}"
159      ;;
160    -R)
161      rr=1
162      ;;
163    -s=*|--server=*)
164      server="${i#*=}"
165      ;;
166    -S|--stats)
167      flags="$flags -s"
168      do_stats=1
169      ;;
170    -t=*|--time=*)
171      dur="${i#*=}"
172      ;;
173    -w)
174      flags="$flags -w"
175      ;;
176    cubic)
177      cc=cubic
178      ;;
179    dctcp)
180      cc=dctcp
181      ;;
182    *)
183      echo "Unknown arg:$i"
184      Usage
185      ;;
186    esac
187  done
188}
189
190processArgs
191
192if [ $debug_flag -eq 1 ] ; then
193  rm -f hbm_out.log
194fi
195
196hbm_pid=$(start_hbm)
197usleep 100000
198
199host=`hostname`
200cg_base_dir=/sys/fs/cgroup
201cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
202
203echo $$ >> $cg_dir/cgroup.procs
204
205ulimit -l unlimited
206
207rm -f ss.out
208rm -f hbm.[0-9]*.$dir_name
209if [ $ecn -ne 0 ] ; then
210  sysctl -w -q -n net.ipv4.tcp_ecn=1
211fi
212
213if [ $use_netperf -eq 0 ] ; then
214  cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
215  if [ "$cc" != "x" ] ; then
216    sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
217  fi
218fi
219
220if [ "$netem" -ne "0" ] ; then
221  if [ "$qdisc" != "" ] ; then
222    echo "WARNING: Ignoring -q options because -d option used"
223  fi
224  tc qdisc del dev lo root > /dev/null 2>&1
225  tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
226elif [ "$qdisc" != "" ] ; then
227  tc qdisc del dev lo root > /dev/null 2>&1
228  tc qdisc add dev lo root $qdisc > /dev/null 2>&1
229fi
230
231n=0
232m=$[$dur * 5]
233hn="::1"
234if [ $use_netperf -ne 0 ] ; then
235  if [ "$server" != "" ] ; then
236    hn=$server
237  fi
238fi
239
240( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
241
242if [ $use_netperf -ne 0 ] ; then
243  begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
244                   awk '{ print $1 }'`
245  if [ "$begNetserverPid" == "" ] ; then
246    if [ "$server" == "" ] ; then
247      ( ./netserver > /dev/null 2>&1) &
248      usleep 100000
249    fi
250  fi
251  flow_cnt=1
252  if [ "$server" == "" ] ; then
253    np_server=$host
254  else
255    np_server=$server
256  fi
257  if [ "$cc" == "x" ] ; then
258    np_cc=""
259  else
260    np_cc="-K $cc,$cc"
261  fi
262  replySize=1
263  while [ $flow_cnt -le $flows ] ; do
264    if [ $rr -ne 0 ] ; then
265      reqSize=1M
266      if [ $flow_cnt -eq 1 ] ; then
267        reqSize=10K
268      fi
269      if [ "$dir" == "-i" ] ; then
270        replySize=$reqSize
271        reqSize=1
272      fi
273      ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR  -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
274    else
275      if [ "$dir" == "-i" ] ; then
276        ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
277      else
278        ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
279      fi
280    fi
281    flow_cnt=$[flow_cnt+1]
282  done
283
284# sleep for duration of test (plus some buffer)
285  n=$[dur+2]
286  sleep $n
287
288# force graceful termination of netperf
289  pids=`pgrep netperf`
290  for p in $pids ; do
291    kill -SIGALRM $p
292  done
293
294  flow_cnt=1
295  rate=0
296  if [ $details -ne 0 ] ; then
297    echo ""
298    echo "Details for HBM in cgroup $id"
299    if [ $do_stats -eq 1 ] ; then
300      if [ -e hbm.$id.$dir_name ] ; then
301        cat hbm.$id.$dir_name
302      fi
303    fi
304  fi
305  while [ $flow_cnt -le $flows ] ; do
306    if [ "$dir" == "-i" ] ; then
307      r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
308    else
309      r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
310    fi
311    echo "rate for flow $flow_cnt: $r"
312    rate=$[rate+r]
313    if [ $details -ne 0 ] ; then
314      echo "-----"
315      echo "Details for cgroup $id, flow $flow_cnt"
316      cat netperf.$id.$flow_cnt
317    fi
318    flow_cnt=$[flow_cnt+1]
319  done
320  if [ $details -ne 0 ] ; then
321    echo ""
322    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
323    echo "PING AVG DELAY:$delay"
324    echo "AGGREGATE_GOODPUT:$rate"
325  else
326    echo $rate
327  fi
328elif [ $multi_iperf -eq 0 ] ; then
329  (iperf3 -s -p $port -1 > /dev/null 2>&1) &
330  usleep 100000
331  iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
332  rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
333  rate=`echo $rates | grep -o "[0-9]*$"`
334
335  if [ $details -ne 0 ] ; then
336    echo ""
337    echo "Details for HBM in cgroup $id"
338    if [ $do_stats -eq 1 ] ; then
339      if [ -e hbm.$id.$dir_name ] ; then
340        cat hbm.$id.$dir_name
341      fi
342    fi
343    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
344    echo "PING AVG DELAY:$delay"
345    echo "AGGREGATE_GOODPUT:$rate"
346  else
347    echo $rate
348  fi
349else
350  flow_cnt=1
351  while [ $flow_cnt -le $flows ] ; do
352    (iperf3 -s -p $port -1 > /dev/null 2>&1) &
353    ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
354    port=$[port+1]
355    flow_cnt=$[flow_cnt+1]
356  done
357  n=$[dur+1]
358  sleep $n
359  flow_cnt=1
360  rate=0
361  if [ $details -ne 0 ] ; then
362    echo ""
363    echo "Details for HBM in cgroup $id"
364    if [ $do_stats -eq 1 ] ; then
365      if [ -e hbm.$id.$dir_name ] ; then
366        cat hbm.$id.$dir_name
367      fi
368    fi
369  fi
370
371  while [ $flow_cnt -le $flows ] ; do
372    r=`cat iperf3.$id.$flow_cnt`
373#    echo "rate for flow $flow_cnt: $r"
374  if [ $details -ne 0 ] ; then
375    echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
376  fi
377    rate=$[rate+r]
378    flow_cnt=$[flow_cnt+1]
379  done
380  if [ $details -ne 0 ] ; then
381    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
382    echo "PING AVG DELAY:$delay"
383    echo "AGGREGATE_GOODPUT:$rate"
384  else
385    echo $rate
386  fi
387fi
388
389if [ $use_netperf -eq 0 ] ; then
390  sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
391fi
392if [ $ecn -ne 0 ] ; then
393  sysctl -w -q -n net.ipv4.tcp_ecn=0
394fi
395if [ "$netem" -ne "0" ] ; then
396  tc qdisc del dev lo root > /dev/null 2>&1
397fi
398
399sleep 2
400
401hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
402if [ "$hbmPid" == "$hbm_pid" ] ; then
403  kill $hbm_pid
404fi
405
406sleep 1
407
408# Detach any BPF programs that may have lingered
409ttx=`bpftool cgroup tree | grep hbm`
410v=2
411for x in $ttx ; do
412    if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
413	cg=$x ; v=0
414    else
415	if [ $v -eq 0 ] ; then
416	    id=$x ; v=1
417	else
418	    if [ $v -eq 1 ] ; then
419		type=$x ; bpftool cgroup detach $cg $type id $id
420		v=0
421	    fi
422	fi
423    fi
424done
425
426if [ $use_netperf -ne 0 ] ; then
427  if [ "$server" == "" ] ; then
428    if [ "$begNetserverPid" == "" ] ; then
429      netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
430      if [ "$netserverPid" != "" ] ; then
431        kill $netserverPid
432      fi
433    fi
434  fi
435fi
436exit
437