xref: /freebsd/share/man/man4/dtrace_tcp.4 (revision b9128a37faafede823eb456aa65a11ac69997284)
1.\" Copyright (c) 2015 Mark Johnston <markj@FreeBSD.org>
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.Dd July 2, 2023
26.Dt DTRACE_TCP 4
27.Os
28.Sh NAME
29.Nm dtrace_tcp
30.Nd a DTrace provider for tracing events related to the
31.Xr tcp 4
32protocol
33.Sh SYNOPSIS
34.Fn tcp:::accept-established "pktinfo_t *" "csinfo_t *" "ipinfo_t *" \
35    "tcpsinfo_t *" "tcpinfo_t *"
36.Fn tcp:::accept-refused "pktinfo_t *" "csinfo_t *" "ipinfo_t *" \
37    "tcpsinfo_t *" "tcpinfo_t *"
38.Fn tcp:::connect-established "pktinfo_t *" "csinfo_t *" "ipinfo_t *" \
39    "tcpsinfo_t *" "tcpinfo_t *"
40.Fn tcp:::connect-refused "pktinfo_t *" "csinfo_t *" "ipinfo_t *" \
41    "tcpsinfo_t *" "tcpinfo_t *"
42.Fn tcp:::connect-request "pktinfo_t *" "csinfo_t *" "ipinfo_t *" \
43    "tcpsinfo_t *" "tcpinfo_t *"
44.Fn tcp:::receive "pktinfo_t *" "csinfo_t *" "ipinfo_t *" "tcpsinfo_t *" \
45    "tcpinfo_t *"
46.Fn tcp:::send "pktinfo_t *" "csinfo_t *" "ipinfo_t *" "tcpsinfo_t *" \
47    "tcpinfo_t *"
48.Fn tcp:::state-change "void *" "csinfo_t *" "void *" "tcpsinfo_t *" "void *" \
49    "tcplsinfo_t *"
50.Fn tcp:::siftr "siftrinfo_t *"
51.Sh DESCRIPTION
52The DTrace
53.Nm tcp
54provider allows users to trace events in the
55.Xr tcp 4
56protocol implementation.
57This provider is similar to the
58.Xr dtrace_ip 4
59and
60.Xr dtrace_udp 4
61providers, but additionally contains probes corresponding to protocol events at
62a level higher than packet reception and transmission.
63All
64.Nm tcp
65probes except for
66.Fn tcp:::state-change
67and
68.Fn tcp:::siftr
69have the same number and type of arguments.
70The last three arguments are used to describe a TCP segment: the
71.Vt ipinfo_t
72argument exposes the version-agnostic fields of the IP header, while the
73.Vt tcpinfo_t
74argument exposes the TCP header, and the
75.Vt tcpsinfo_t
76argument describes details of the corresponding TCP connection state, if any.
77Their fields are described in the ARGUMENTS section.
78.Pp
79The
80.Fn tcp:::accept-established
81probe fires when a remotely-initiated active TCP open succeeds.
82At this point the new connection is in the ESTABLISHED state, and the probe
83arguments expose the headers associated with the final ACK of the three-way
84handshake.
85The
86.Fn tcp:::accept-refused
87probe fires when a SYN arrives on a port without a listening socket.
88The probe arguments expose the headers associated with the RST to be transmitted
89to the remote host in response to the SYN segment.
90.Pp
91The
92.Fn tcp:::connect-established ,
93.Fn tcp:::connect-refused ,
94and
95.Fn tcp:::connect-request
96probes are similar to the
97.Ql accept
98probes, except that they correspond to locally-initiated TCP connections.
99The
100.Fn tcp:::connect-established
101probe fires when the SYN-ACK segment of a three-way handshake is received from
102the remote host and a final ACK is prepared for transmission.
103This occurs immediately after the local connection state transitions from
104SYN-SENT to ESTABLISHED.
105The probe arguments describe the headers associated with the received SYN-ACK
106segment.
107The
108.Fn tcp:::connect-refused
109probe fires when the local host receives a RST segment in response to a SYN
110segment, indicating that the remote host refused to open a connection.
111The probe arguments describe the IP and TCP headers associated with the received
112RST segment.
113The
114.Fn tcp:::connect-request
115probe fires as the kernel prepares to transmit the initial SYN segment of a
116three-way handshake.
117.Pp
118The
119.Fn tcp:::send
120and
121.Fn tcp:::receive
122probes fire when the host sends or receives a TCP packet, respectively.
123As with the
124.Xr dtrace_udp 4
125provider,
126.Nm tcp
127probes fire only for packets sent by or to the local host; forwarded packets are
128handled in the IP layer and are only visible to the
129.Xr dtrace_ip 4
130provider.
131.Pp
132The
133.Fn tcp:::state-change
134probe fires upon local TCP connection state transitions.
135Its first, third and fifth arguments are currently always
136.Dv NULL .
137Its last argument describes the from-state in the transition, and the to-state
138can be obtained from
139.Dv args[3]->tcps_state .
140.Pp
141The
142.Fn tcp:::siftr
143probe fires when a TCP segment is sent or received by the host.
144For a detailed description see
145.Xr siftr 4 .
146The
147.Vt siftrinfo_t
148argument provides the information about the TCP connection.
149.Sh ARGUMENTS
150The
151.Vt pktinfo_t
152argument is currently unimplemented and is included for compatibility with other
153implementations of this provider.
154Its fields are:
155.Bl -tag -width "uinptr_t pkt_addr" -offset indent
156.It Vt uinptr_t pkt_addr
157Always set to 0.
158.El
159.Pp
160The
161.Vt csinfo_t
162argument is currently unimplemented and is included for compatibility with other
163implementations of this provider.
164Its fields are:
165.Bl -tag -width "uintptr_t cs_addr" -offset indent
166.It Vt uintptr_t cs_addr
167Always set to 0.
168.It Vt uint64_t cs_cid
169A pointer to the
170.Vt struct inpcb
171for this packet, or
172.Dv NULL .
173.It Vt pid_t cs_pid
174Always set to 0.
175.El
176.Pp
177The
178.Vt ipinfo_t
179type is a version-agnostic representation of fields from an IP header.
180Its fields are described in the
181.Xr dtrace_ip 4
182manual page.
183.Pp
184The
185.Vt tcpsinfo_t
186type is used to provide a stable representation of TCP connection state.
187Some
188.Nm tcp
189probes, such as
190.Fn tcp:::accept-refused ,
191fire in a context where there is no TCP connection; this argument is
192.Dv NULL
193in that case.
194Its fields are:
195.Bl -tag -width "uint16_t tcps_lport" -offset indent
196.It Vt uintptr_t tcps_addr
197The address of the corresponding TCP control block.
198This is currently a pointer to a
199.Vt struct tcpcb .
200.It Vt int tcps_local
201A boolean indicating whether the connection is local to the host.
202Currently unimplemented and always set to -1.
203.It Vt int tcps_active
204A boolean indicating whether the connection was initiated by the local host.
205Currently unimplemented and always set to -1.
206.It Vt uint16_t tcps_lport
207Local TCP port.
208.It Vt uint16_t tcps_rport
209Remote TCP port.
210.It Vt string tcps_laddr
211Local address.
212.It Vt string tcps_raddr
213Remote address.
214.It Vt int32_t tcps_state
215Current TCP state.
216The valid TCP state values are given by the constants prefixed with
217.Ql TCPS_
218in
219.Pa /usr/lib/dtrace/tcp.d .
220.It Vt uint32_t tcps_iss
221Initial send sequence number.
222.It Vt uint32_t tcps_suna
223Initial sequence number of sent but unacknowledged data.
224.It Vt uint32_t tcps_snxt
225Next sequence number for send.
226.It Vt uint32_t tcps_rack
227Sequence number of received and acknowledged data.
228.It Vt uint32_t tcps_rnxt
229Next expected sequence number for receive.
230.It Vt u_long tcps_swnd
231TCP send window size.
232.It Vt int32_t tcps_snd_ws
233Window scaling factor for the TCP send window.
234.It Vt u_long tcps_rwnd
235TCP receive window size.
236.It Vt int32_t tcps_rcv_ws
237Window scaling factor for the TCP receive window.
238.It Vt u_long tcps_cwnd
239TCP congestion window size.
240.It Vt u_long tcps_cwnd_ssthresh
241Congestion window threshold at which slow start ends and congestion avoidance
242begins.
243.It Vt uint32_t tcps_sack_fack
244Last sequence number selectively acknowledged by the receiver.
245.It Vt uint32_t tcps_sack_snxt
246Next selectively acknowledge sequence number at which to begin retransmitting.
247.It Vt uint32_t tcps_rto
248Round-trip timeout, in milliseconds.
249.It Vt uint32_t tcps_mss
250Maximum segment size.
251.It Vt int tcps_retransmit
252A boolean indicating that the local sender is retransmitting data.
253.It Vt int tcps_srtt
254Smoothed round-trip time.
255.El
256.Pp
257The
258.Vt tcpinfo_t
259type exposes the fields in a TCP segment header in host order.
260Its fields are:
261.Bl -tag -width "struct tcphdr *tcp_hdr" -offset indent
262.It Vt uint16_t tcp_sport
263Source TCP port.
264.It Vt uint16_t tcp_dport
265Destination TCP port.
266.It Vt uint32_t tcp_seq
267Sequence number.
268.It Vt uint32_t tcp_ack
269Acknowledgement number.
270.It Vt uint8_t tcp_offset
271Data offset, in bytes.
272.It Vt uint8_t tcp_flags
273TCP flags.
274.It Vt uint16_t tcp_window
275TCP window size.
276.It Vt uint16_t tcp_checksum
277Checksum.
278.It Vt uint16_t tcp_urgent
279Urgent data pointer.
280.It Vt struct tcphdr *tcp_hdr
281A pointer to the raw TCP header.
282.El
283.Pp
284The
285.Vt tcplsinfo_t
286type is used by the
287.Fn tcp:::state-change
288probe to provide the from-state of a transition.
289Its fields are:
290.Bl -tag -width "int32_t tcps_state" -offset indent
291.It Vt int32_t tcps_state
292A TCP state.
293The valid TCP state values are given by the constants prefixed with
294.Ql TCPS_
295in
296.Pa /usr/lib/dtrace/tcp.d .
297.El
298.Pp
299The
300.Vt siftrinfo_t
301type is used by the
302.Fn tcp:::siftr
303probe to provide the state of the TCP connection.
304Its fields are:
305.Bl -tag -width "u_int sent_inflight_bytes" -offset indent
306.It Vt uint8_t direction
307Direction of packet that triggered the log message.
308Either
309.Qq 0
310for in, or
311.Qq 1
312for out.
313.It Vt uint8_t ipver
314The version of the IP protocol being used.
315Either
316.Qq 1
317for IPv4, or
318.Qq 2
319for IPv6.
320.It Vt uint16_t lport
321The TCP port that the local host is communicating via.
322.It Vt uint16_t rport
323The TCP port that the remote host is communicating via.
324.It Vt string laddr
325The IPv4 or IPv6 address of the local host.
326.It Vt string raddr
327The IPv4 or IPv6 address of the remote host.
328.It Vt uint32_t snd_cwnd
329The current congestion window (CWND) for the flow, in bytes.
330.It Vt uint32_t snd_wnd
331The current sending window for the flow, in bytes.
332The post scaled value is reported, except during the initial handshake (first
333few packets), during which time the unscaled value is reported.
334.It Vt uint32_t rcv_wnd
335The current receive window for the flow, in bytes.
336The post scaled value is always reported.
337.It Vt uint32_t t_flags2
338The current value of the t_flags2 for the flow.
339.It Vt uint32_t snd_ssthresh
340The slow start threshold (SSTHRESH) for the flow, in bytes.
341.It Vt int conn_state
342A TCP state.
343The valid TCP state values are given by the constants prefixed with
344.Ql TCPS_
345in
346.Pa /usr/lib/dtrace/tcp.d .
347.It Vt uint32_t mss
348The maximum segment size (MSS) for the flow, in bytes.
349.It Vt uint32_t srtt
350The current smoothed RTT (SRTT) for the flow in microseconds.
351.It Vt u_char sack_enabled
352SACK enabled indicator. 1 if SACK enabled, 0 otherwise.
353.It Vt u_char snd_scale
354The current window scaling factor for the sending window.
355.It Vt u_char rcv_scale
356The current window scaling factor for the receiving window.
357.It Vt u_int t_flags
358The current value of the t_flags for the flow.
359.It Vt uint32_t rto
360The current retransmission timeout (RTO) for the flow in microseconds.
361Divide by HZ to get the timeout length in seconds.
362.It Vt u_int snd_buf_hiwater
363The current size of the socket send buffer in bytes.
364.It Vt u_int snd_buf_cc
365The current number of bytes in the socket send buffer.
366.It Vt u_int rcv_buf_hiwater
367The current size of the socket receive buffer in bytes.
368.It Vt u_int rcv_buf_cc
369The current number of bytes in the socket receive buffer.
370.It Vt u_int sent_inflight_bytes
371The current number of unacknowledged bytes in-flight.
372Bytes acknowledged via SACK are not excluded from this count.
373.It Vt int t_segqlen
374The current number of segments in the reassembly queue.
375.It Vt u_int flowid
376Flowid for the connection.
377A caveat: Zero '0' either represents a valid flowid or a default value when
378the flowid is not being set.
379.It Vt u_int flowtype
380Flow type for the connection.
381Flowtype defines which protocol fields are hashed to produce the flowid.
382A complete listing is available in
383.Pa /usr/include/sys/mbuf.h
384under
385.Dv M_HASHTYPE_* .
386.El
387.Sh FILES
388.Bl -tag -width "/usr/lib/dtrace/siftr.d" -compact
389.It Pa /usr/lib/dtrace/tcp.d
390DTrace type and translator definitions for all the probes of the
391.Nm tcp
392provider except the
393.Nm siftr
394probe.
395.It Pa /usr/lib/dtrace/siftr.d
396DTrace type and translator definitions for the
397.Nm siftr
398probe of the
399.Nm tcp
400provider.
401.El
402.Sh EXAMPLES
403The following script logs TCP segments in real time:
404.Bd -literal -offset indent
405#pragma D option quiet
406#pragma D option switchrate=10hz
407
408dtrace:::BEGIN
409{
410        printf(" %3s %15s:%-5s      %15s:%-5s %6s  %s\\n", "CPU",
411            "LADDR", "LPORT", "RADDR", "RPORT", "BYTES", "FLAGS");
412}
413
414tcp:::send
415{
416        this->length = args[2]->ip_plength - args[4]->tcp_offset;
417        printf(" %3d %16s:%-5d -> %16s:%-5d %6d  (", cpu, args[2]->ip_saddr,
418            args[4]->tcp_sport, args[2]->ip_daddr, args[4]->tcp_dport,
419            this->length);
420        printf("%s", args[4]->tcp_flags & TH_FIN ? "FIN|" : "");
421        printf("%s", args[4]->tcp_flags & TH_SYN ? "SYN|" : "");
422        printf("%s", args[4]->tcp_flags & TH_RST ? "RST|" : "");
423        printf("%s", args[4]->tcp_flags & TH_PUSH ? "PUSH|" : "");
424        printf("%s", args[4]->tcp_flags & TH_ACK ? "ACK|" : "");
425        printf("%s", args[4]->tcp_flags & TH_URG ? "URG|" : "");
426        printf("%s", args[4]->tcp_flags == 0 ? "null " : "");
427        printf("\\b)\\n");
428}
429
430tcp:::receive
431{
432        this->length = args[2]->ip_plength - args[4]->tcp_offset;
433        printf(" %3d %16s:%-5d <- %16s:%-5d %6d  (", cpu,
434            args[2]->ip_daddr, args[4]->tcp_dport, args[2]->ip_saddr,
435            args[4]->tcp_sport, this->length);
436        printf("%s", args[4]->tcp_flags & TH_FIN ? "FIN|" : "");
437        printf("%s", args[4]->tcp_flags & TH_SYN ? "SYN|" : "");
438        printf("%s", args[4]->tcp_flags & TH_RST ? "RST|" : "");
439        printf("%s", args[4]->tcp_flags & TH_PUSH ? "PUSH|" : "");
440        printf("%s", args[4]->tcp_flags & TH_ACK ? "ACK|" : "");
441        printf("%s", args[4]->tcp_flags & TH_URG ? "URG|" : "");
442        printf("%s", args[4]->tcp_flags == 0 ? "null " : "");
443        printf("\\b)\\n");
444}
445.Ed
446The following script logs TCP connection state changes as they occur:
447.Bd -literal -offset indent
448#pragma D option quiet
449#pragma D option switchrate=25hz
450
451int last[int];
452
453dtrace:::BEGIN
454{
455        printf("   %12s %-20s    %-20s %s\\n",
456            "DELTA(us)", "OLD", "NEW", "TIMESTAMP");
457}
458
459tcp:::state-change
460{
461        this->elapsed = (timestamp - last[args[1]->cs_cid]) / 1000;
462        printf("   %12d %-20s -> %-20s %d\\n", this->elapsed,
463            tcp_state_string[args[5]->tcps_state],
464            tcp_state_string[args[3]->tcps_state], timestamp);
465        last[args[1]->cs_cid] = timestamp;
466}
467
468tcp:::state-change
469/last[args[1]->cs_cid] == 0/
470{
471        printf("   %12s %-20s -> %-20s %d\\n", "-",
472            tcp_state_string[args[5]->tcps_state],
473            tcp_state_string[args[3]->tcps_state], timestamp);
474        last[args[1]->cs_cid] = timestamp;
475}
476.Ed
477The following script uses the siftr probe to show the current value of CWND
478and SSTHRESH when a packet is sent or received:
479.Bd -literal -offset indent
480#pragma D option quiet
481#pragma D option switchrate=10hz
482
483dtrace:::BEGIN
484{
485        printf(" %3s %16s:%-5s %16s:%-5s %10s %10s\\n",
486            "DIR", "LADDR", "LPORT", "RADDR", "RPORT", "CWND", "SSTHRESH");
487}
488
489tcp:::siftr
490{
491        printf(" %3s %16s:%-5d %16s:%-5d %10u %10u\\n",
492            siftr_dir_string[args[0]->direction],
493            args[0]->laddr, args[0]->lport, args[0]->raddr, args[0]->rport,
494            args[0]->snd_cwnd, args[0]->snd_ssthresh);
495}
496.Ed
497.Sh COMPATIBILITY
498This provider is compatible with the
499.Nm tcp
500provider in Solaris.
501.Sh SEE ALSO
502.Xr dtrace 1 ,
503.Xr dtrace_ip 4 ,
504.Xr dtrace_sctp 4 ,
505.Xr dtrace_udp 4 ,
506.Xr dtrace_udplite 4 ,
507.Xr siftr 4 ,
508.Xr tcp 4 ,
509.Xr SDT 9
510.Sh HISTORY
511The
512.Nm tcp
513provider first appeared in
514.Fx
51510.0.
516.Sh AUTHORS
517This manual page was written by
518.An Mark Johnston Aq Mt markj@FreeBSD.org .
519.Sh BUGS
520The
521.Vt tcps_local
522and
523.Vt tcps_active
524fields of
525.Vt tcpsinfo_t
526are not filled in by the translator.
527