xref: /linux/samples/bpf/do_hbm_test.sh (revision 4201c9260a8d3c4ef238e51692a7e9b4e1e29efe)
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Copyright (c) 2019 Facebook
5#
6# This program is free software; you can redistribute it and/or
7# modify it under the terms of version 2 of the GNU General Public
8# License as published by the Free Software Foundation.
9
10Usage() {
11  echo "Script for testing HBM (Host Bandwidth Manager) framework."
12  echo "It creates a cgroup to use for testing and load a BPF program to limit"
13  echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
14  echo "loads. The output is the goodput in Mbps (unless -D was used)."
15  echo ""
16  echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
17  echo "             [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E]"
18  echo "             [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
19  echo "             [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
20  echo "             [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
21  echo "             [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
22  echo "  Where:"
23  echo "    out               egress (default)"
24  echo "    -b or --bpf       BPF program filename to load and attach."
25  echo "                      Default is hbm_out_kern.o for egress,"
26  echo "    -c or -cc         TCP congestion control (cubic or dctcp)"
27  echo "    --debug           print BPF trace buffer"
28  echo "    -d or --delay     add a delay in ms using netem"
29  echo "    -D                In addition to the goodput in Mbps, it also outputs"
30  echo "                      other detailed information. This information is"
31  echo "                      test dependent (i.e. iperf3 or netperf)."
32  echo "    -E                enable ECN (not required for dctcp)"
33  echo "    -f or --flows     number of concurrent flows (default=1)"
34  echo "    -i or --id        cgroup id (an integer, default is 1)"
35  echo "    -N                use netperf instead of iperf3"
36  echo "    --no_cn           Do not return CN notifications"
37  echo "    -l                do not limit flows using loopback"
38  echo "    -h                Help"
39  echo "    -p or --port      iperf3 port (default is 5201)"
40  echo "    -P                use an iperf3 instance for each flow"
41  echo "    -q                use the specified qdisc"
42  echo "    -r or --rate      rate in Mbps (default 1s 1Gbps)"
43  echo "    -R                Use TCP_RR for netperf. 1st flow has req"
44  echo "                      size of 10KB, rest of 1MB. Reply in all"
45  echo "                      cases is 1 byte."
46  echo "                      More detailed output for each flow can be found"
47  echo "                      in the files netperf.<cg>.<flow>, where <cg> is the"
48  echo "                      cgroup id as specified with the -i flag, and <flow>"
49  echo "                      is the flow id starting at 1 and increasing by 1 for"
50  echo "                      flow (as specified by -f)."
51  echo "    -s or --server    hostname of netperf server. Used to create netperf"
52  echo "                      test traffic between to hosts (default is within host)"
53  echo "                      netserver must be running on the host."
54  echo "    -S or --stats     whether to update hbm stats (default is yes)."
55  echo "    -t or --time      duration of iperf3 in seconds (default=5)"
56  echo "    -w                Work conserving flag. cgroup can increase its"
57  echo "                      bandwidth beyond the rate limit specified"
58  echo "                      while there is available bandwidth. Current"
59  echo "                      implementation assumes there is only one NIC"
60  echo "                      (eth0), but can be extended to support multiple"
61  echo "                       NICs."
62  echo "    cubic or dctcp    specify which TCP CC to use"
63  echo " "
64  exit
65}
66
67#set -x
68
69debug_flag=0
70args="$@"
71name="$0"
72netem=0
73cc=x
74dir="-o"
75dir_name="out"
76dur=5
77flows=1
78id=1
79prog=""
80port=5201
81rate=1000
82multi_iperf=0
83flow_cnt=1
84use_netperf=0
85rr=0
86ecn=0
87details=0
88server=""
89qdisc=""
90flags=""
91do_stats=0
92
93function start_hbm () {
94  rm -f hbm.out
95  echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
96  echo " " >> hbm.out
97  ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1  &
98  echo $!
99}
100
101processArgs () {
102  for i in $args ; do
103    case $i in
104    # Support for upcomming ingress rate limiting
105    #in)         # support for upcoming ingress rate limiting
106    #  dir="-i"
107    #  dir_name="in"
108    #  ;;
109    out)
110      dir="-o"
111      dir_name="out"
112      ;;
113    -b=*|--bpf=*)
114      prog="${i#*=}"
115      ;;
116    -c=*|--cc=*)
117      cc="${i#*=}"
118      ;;
119    --no_cn)
120      flags="$flags --no_cn"
121      ;;
122    --debug)
123      flags="$flags -d"
124      debug_flag=1
125      ;;
126    -d=*|--delay=*)
127      netem="${i#*=}"
128      ;;
129    -D)
130      details=1
131      ;;
132    -E)
133     ecn=1
134     ;;
135    # Support for upcomming fq Early Departure Time egress rate limiting
136    #--edt)
137    # prog="hbm_out_edt_kern.o"
138    # qdisc="fq"
139    # ;;
140    -f=*|--flows=*)
141      flows="${i#*=}"
142      ;;
143    -i=*|--id=*)
144      id="${i#*=}"
145      ;;
146    -l)
147      flags="$flags -l"
148      ;;
149    -N)
150      use_netperf=1
151      ;;
152    -p=*|--port=*)
153      port="${i#*=}"
154      ;;
155    -P)
156      multi_iperf=1
157      ;;
158    -q=*)
159      qdisc="${i#*=}"
160      ;;
161    -r=*|--rate=*)
162      rate="${i#*=}"
163      ;;
164    -R)
165      rr=1
166      ;;
167    -s=*|--server=*)
168      server="${i#*=}"
169      ;;
170    -S|--stats)
171      flags="$flags -s"
172      do_stats=1
173      ;;
174    -t=*|--time=*)
175      dur="${i#*=}"
176      ;;
177    -w)
178      flags="$flags -w"
179      ;;
180    cubic)
181      cc=cubic
182      ;;
183    dctcp)
184      cc=dctcp
185      ;;
186    *)
187      echo "Unknown arg:$i"
188      Usage
189      ;;
190    esac
191  done
192}
193
194processArgs
195
196if [ $debug_flag -eq 1 ] ; then
197  rm -f hbm_out.log
198fi
199
200hbm_pid=$(start_hbm)
201usleep 100000
202
203host=`hostname`
204cg_base_dir=/sys/fs/cgroup
205cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
206
207echo $$ >> $cg_dir/cgroup.procs
208
209ulimit -l unlimited
210
211rm -f ss.out
212rm -f hbm.[0-9]*.$dir_name
213if [ $ecn -ne 0 ] ; then
214  sysctl -w -q -n net.ipv4.tcp_ecn=1
215fi
216
217if [ $use_netperf -eq 0 ] ; then
218  cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
219  if [ "$cc" != "x" ] ; then
220    sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
221  fi
222fi
223
224if [ "$netem" -ne "0" ] ; then
225  if [ "$qdisc" != "" ] ; then
226    echo "WARNING: Ignoring -q options because -d option used"
227  fi
228  tc qdisc del dev lo root > /dev/null 2>&1
229  tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
230elif [ "$qdisc" != "" ] ; then
231  tc qdisc del dev lo root > /dev/null 2>&1
232  tc qdisc add dev lo root $qdisc > /dev/null 2>&1
233fi
234
235n=0
236m=$[$dur * 5]
237hn="::1"
238if [ $use_netperf -ne 0 ] ; then
239  if [ "$server" != "" ] ; then
240    hn=$server
241  fi
242fi
243
244( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
245
246if [ $use_netperf -ne 0 ] ; then
247  begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
248                   awk '{ print $1 }'`
249  if [ "$begNetserverPid" == "" ] ; then
250    if [ "$server" == "" ] ; then
251      ( ./netserver > /dev/null 2>&1) &
252      usleep 100000
253    fi
254  fi
255  flow_cnt=1
256  if [ "$server" == "" ] ; then
257    np_server=$host
258  else
259    np_server=$server
260  fi
261  if [ "$cc" == "x" ] ; then
262    np_cc=""
263  else
264    np_cc="-K $cc,$cc"
265  fi
266  replySize=1
267  while [ $flow_cnt -le $flows ] ; do
268    if [ $rr -ne 0 ] ; then
269      reqSize=1M
270      if [ $flow_cnt -eq 1 ] ; then
271        reqSize=10K
272      fi
273      if [ "$dir" == "-i" ] ; then
274        replySize=$reqSize
275        reqSize=1
276      fi
277      ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR  -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
278    else
279      if [ "$dir" == "-i" ] ; then
280        ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
281      else
282        ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
283      fi
284    fi
285    flow_cnt=$[flow_cnt+1]
286  done
287
288# sleep for duration of test (plus some buffer)
289  n=$[dur+2]
290  sleep $n
291
292# force graceful termination of netperf
293  pids=`pgrep netperf`
294  for p in $pids ; do
295    kill -SIGALRM $p
296  done
297
298  flow_cnt=1
299  rate=0
300  if [ $details -ne 0 ] ; then
301    echo ""
302    echo "Details for HBM in cgroup $id"
303    if [ $do_stats -eq 1 ] ; then
304      if [ -e hbm.$id.$dir_name ] ; then
305        cat hbm.$id.$dir_name
306      fi
307    fi
308  fi
309  while [ $flow_cnt -le $flows ] ; do
310    if [ "$dir" == "-i" ] ; then
311      r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
312    else
313      r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
314    fi
315    echo "rate for flow $flow_cnt: $r"
316    rate=$[rate+r]
317    if [ $details -ne 0 ] ; then
318      echo "-----"
319      echo "Details for cgroup $id, flow $flow_cnt"
320      cat netperf.$id.$flow_cnt
321    fi
322    flow_cnt=$[flow_cnt+1]
323  done
324  if [ $details -ne 0 ] ; then
325    echo ""
326    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
327    echo "PING AVG DELAY:$delay"
328    echo "AGGREGATE_GOODPUT:$rate"
329  else
330    echo $rate
331  fi
332elif [ $multi_iperf -eq 0 ] ; then
333  (iperf3 -s -p $port -1 > /dev/null 2>&1) &
334  usleep 100000
335  iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
336  rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
337  rate=`echo $rates | grep -o "[0-9]*$"`
338
339  if [ $details -ne 0 ] ; then
340    echo ""
341    echo "Details for HBM in cgroup $id"
342    if [ $do_stats -eq 1 ] ; then
343      if [ -e hbm.$id.$dir_name ] ; then
344        cat hbm.$id.$dir_name
345      fi
346    fi
347    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
348    echo "PING AVG DELAY:$delay"
349    echo "AGGREGATE_GOODPUT:$rate"
350  else
351    echo $rate
352  fi
353else
354  flow_cnt=1
355  while [ $flow_cnt -le $flows ] ; do
356    (iperf3 -s -p $port -1 > /dev/null 2>&1) &
357    ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
358    port=$[port+1]
359    flow_cnt=$[flow_cnt+1]
360  done
361  n=$[dur+1]
362  sleep $n
363  flow_cnt=1
364  rate=0
365  if [ $details -ne 0 ] ; then
366    echo ""
367    echo "Details for HBM in cgroup $id"
368    if [ $do_stats -eq 1 ] ; then
369      if [ -e hbm.$id.$dir_name ] ; then
370        cat hbm.$id.$dir_name
371      fi
372    fi
373  fi
374
375  while [ $flow_cnt -le $flows ] ; do
376    r=`cat iperf3.$id.$flow_cnt`
377#    echo "rate for flow $flow_cnt: $r"
378  if [ $details -ne 0 ] ; then
379    echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
380  fi
381    rate=$[rate+r]
382    flow_cnt=$[flow_cnt+1]
383  done
384  if [ $details -ne 0 ] ; then
385    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
386    echo "PING AVG DELAY:$delay"
387    echo "AGGREGATE_GOODPUT:$rate"
388  else
389    echo $rate
390  fi
391fi
392
393if [ $use_netperf -eq 0 ] ; then
394  sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
395fi
396if [ $ecn -ne 0 ] ; then
397  sysctl -w -q -n net.ipv4.tcp_ecn=0
398fi
399if [ "$netem" -ne "0" ] ; then
400  tc qdisc del dev lo root > /dev/null 2>&1
401fi
402
403sleep 2
404
405hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
406if [ "$hbmPid" == "$hbm_pid" ] ; then
407  kill $hbm_pid
408fi
409
410sleep 1
411
412# Detach any BPF programs that may have lingered
413ttx=`bpftool cgroup tree | grep hbm`
414v=2
415for x in $ttx ; do
416    if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
417	cg=$x ; v=0
418    else
419	if [ $v -eq 0 ] ; then
420	    id=$x ; v=1
421	else
422	    if [ $v -eq 1 ] ; then
423		type=$x ; bpftool cgroup detach $cg $type id $id
424		v=0
425	    fi
426	fi
427    fi
428done
429
430if [ $use_netperf -ne 0 ] ; then
431  if [ "$server" == "" ] ; then
432    if [ "$begNetserverPid" == "" ] ; then
433      netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
434      if [ "$netserverPid" != "" ] ; then
435        kill $netserverPid
436      fi
437    fi
438  fi
439fi
440exit
441