xref: /freebsd/share/man/man4/dtrace_tcp.4 (revision 43e29d03f416d7dda52112a29600a7c82ee1a91e)
1.\" Copyright (c) 2015 Mark Johnston <markj@FreeBSD.org>
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd July 2, 2023
28.Dt DTRACE_TCP 4
29.Os
30.Sh NAME
31.Nm dtrace_tcp
32.Nd a DTrace provider for tracing events related to the
33.Xr tcp 4
34protocol
35.Sh SYNOPSIS
36.Fn tcp:::accept-established "pktinfo_t *" "csinfo_t *" "ipinfo_t *" \
37    "tcpsinfo_t *" "tcpinfo_t *"
38.Fn tcp:::accept-refused "pktinfo_t *" "csinfo_t *" "ipinfo_t *" \
39    "tcpsinfo_t *" "tcpinfo_t *"
40.Fn tcp:::connect-established "pktinfo_t *" "csinfo_t *" "ipinfo_t *" \
41    "tcpsinfo_t *" "tcpinfo_t *"
42.Fn tcp:::connect-refused "pktinfo_t *" "csinfo_t *" "ipinfo_t *" \
43    "tcpsinfo_t *" "tcpinfo_t *"
44.Fn tcp:::connect-request "pktinfo_t *" "csinfo_t *" "ipinfo_t *" \
45    "tcpsinfo_t *" "tcpinfo_t *"
46.Fn tcp:::receive "pktinfo_t *" "csinfo_t *" "ipinfo_t *" "tcpsinfo_t *" \
47    "tcpinfo_t *"
48.Fn tcp:::send "pktinfo_t *" "csinfo_t *" "ipinfo_t *" "tcpsinfo_t *" \
49    "tcpinfo_t *"
50.Fn tcp:::state-change "void *" "csinfo_t *" "void *" "tcpsinfo_t *" "void *" \
51    "tcplsinfo_t *"
52.Fn tcp:::siftr "siftrinfo_t *"
53.Sh DESCRIPTION
54The DTrace
55.Nm tcp
56provider allows users to trace events in the
57.Xr tcp 4
58protocol implementation.
59This provider is similar to the
60.Xr dtrace_ip 4
61and
62.Xr dtrace_udp 4
63providers, but additionally contains probes corresponding to protocol events at
64a level higher than packet reception and transmission.
65All
66.Nm tcp
67probes except for
68.Fn tcp:::state-change
69and
70.Fn tcp:::siftr
71have the same number and type of arguments.
72The last three arguments are used to describe a TCP segment: the
73.Vt ipinfo_t
74argument exposes the version-agnostic fields of the IP header, while the
75.Vt tcpinfo_t
76argument exposes the TCP header, and the
77.Vt tcpsinfo_t
78argument describes details of the corresponding TCP connection state, if any.
79Their fields are described in the ARGUMENTS section.
80.Pp
81The
82.Fn tcp:::accept-established
83probe fires when a remotely-initiated active TCP open succeeds.
84At this point the new connection is in the ESTABLISHED state, and the probe
85arguments expose the headers associated with the final ACK of the three-way
86handshake.
87The
88.Fn tcp:::accept-refused
89probe fires when a SYN arrives on a port without a listening socket.
90The probe arguments expose the headers associated with the RST to be transmitted
91to the remote host in response to the SYN segment.
92.Pp
93The
94.Fn tcp:::connect-established ,
95.Fn tcp:::connect-refused ,
96and
97.Fn tcp:::connect-request
98probes are similar to the
99.Ql accept
100probes, except that they correspond to locally-initiated TCP connections.
101The
102.Fn tcp:::connect-established
103probe fires when the SYN-ACK segment of a three-way handshake is received from
104the remote host and a final ACK is prepared for transmission.
105This occurs immediately after the local connection state transitions from
106SYN-SENT to ESTABLISHED.
107The probe arguments describe the headers associated with the received SYN-ACK
108segment.
109The
110.Fn tcp:::connect-refused
111probe fires when the local host receives a RST segment in response to a SYN
112segment, indicating that the remote host refused to open a connection.
113The probe arguments describe the IP and TCP headers associated with the received
114RST segment.
115The
116.Fn tcp:::connect-request
117probe fires as the kernel prepares to transmit the initial SYN segment of a
118three-way handshake.
119.Pp
120The
121.Fn tcp:::send
122and
123.Fn tcp:::receive
124probes fire when the host sends or receives a TCP packet, respectively.
125As with the
126.Xr dtrace_udp 4
127provider,
128.Nm tcp
129probes fire only for packets sent by or to the local host; forwarded packets are
130handled in the IP layer and are only visible to the
131.Xr dtrace_ip 4
132provider.
133.Pp
134The
135.Fn tcp:::state-change
136probe fires upon local TCP connection state transitions.
137Its first, third and fifth arguments are currently always
138.Dv NULL .
139Its last argument describes the from-state in the transition, and the to-state
140can be obtained from
141.Dv args[3]->tcps_state .
142.Pp
143The
144.Fn tcp:::siftr
145probe fires when a TCP segment is sent or received by the host.
146For a detailed description see
147.Xr siftr 4 .
148The
149.Vt siftrinfo_t
150argument provides the information about the TCP connection.
151.Sh ARGUMENTS
152The
153.Vt pktinfo_t
154argument is currently unimplemented and is included for compatibility with other
155implementations of this provider.
156Its fields are:
157.Bl -tag -width "uinptr_t pkt_addr" -offset indent
158.It Vt uinptr_t pkt_addr
159Always set to 0.
160.El
161.Pp
162The
163.Vt csinfo_t
164argument is currently unimplemented and is included for compatibility with other
165implementations of this provider.
166Its fields are:
167.Bl -tag -width "uintptr_t cs_addr" -offset indent
168.It Vt uintptr_t cs_addr
169Always set to 0.
170.It Vt uint64_t cs_cid
171A pointer to the
172.Vt struct inpcb
173for this packet, or
174.Dv NULL .
175.It Vt pid_t cs_pid
176Always set to 0.
177.El
178.Pp
179The
180.Vt ipinfo_t
181type is a version-agnostic representation of fields from an IP header.
182Its fields are described in the
183.Xr dtrace_ip 4
184manual page.
185.Pp
186The
187.Vt tcpsinfo_t
188type is used to provide a stable representation of TCP connection state.
189Some
190.Nm tcp
191probes, such as
192.Fn tcp:::accept-refused ,
193fire in a context where there is no TCP connection; this argument is
194.Dv NULL
195in that case.
196Its fields are:
197.Bl -tag -width "uint16_t tcps_lport" -offset indent
198.It Vt uintptr_t tcps_addr
199The address of the corresponding TCP control block.
200This is currently a pointer to a
201.Vt struct tcpcb .
202.It Vt int tcps_local
203A boolean indicating whether the connection is local to the host.
204Currently unimplemented and always set to -1.
205.It Vt int tcps_active
206A boolean indicating whether the connection was initiated by the local host.
207Currently unimplemented and always set to -1.
208.It Vt uint16_t tcps_lport
209Local TCP port.
210.It Vt uint16_t tcps_rport
211Remote TCP port.
212.It Vt string tcps_laddr
213Local address.
214.It Vt string tcps_raddr
215Remote address.
216.It Vt int32_t tcps_state
217Current TCP state.
218The valid TCP state values are given by the constants prefixed with
219.Ql TCPS_
220in
221.Pa /usr/lib/dtrace/tcp.d .
222.It Vt uint32_t tcps_iss
223Initial send sequence number.
224.It Vt uint32_t tcps_suna
225Initial sequence number of sent but unacknowledged data.
226.It Vt uint32_t tcps_snxt
227Next sequence number for send.
228.It Vt uint32_t tcps_rack
229Sequence number of received and acknowledged data.
230.It Vt uint32_t tcps_rnxt
231Next expected sequence number for receive.
232.It Vt u_long tcps_swnd
233TCP send window size.
234.It Vt int32_t tcps_snd_ws
235Window scaling factor for the TCP send window.
236.It Vt u_long tcps_rwnd
237TCP receive window size.
238.It Vt int32_t tcps_rcv_ws
239Window scaling factor for the TCP receive window.
240.It Vt u_long tcps_cwnd
241TCP congestion window size.
242.It Vt u_long tcps_cwnd_ssthresh
243Congestion window threshold at which slow start ends and congestion avoidance
244begins.
245.It Vt uint32_t tcps_sack_fack
246Last sequence number selectively acknowledged by the receiver.
247.It Vt uint32_t tcps_sack_snxt
248Next selectively acknowledge sequence number at which to begin retransmitting.
249.It Vt uint32_t tcps_rto
250Round-trip timeout, in milliseconds.
251.It Vt uint32_t tcps_mss
252Maximum segment size.
253.It Vt int tcps_retransmit
254A boolean indicating that the local sender is retransmitting data.
255.It Vt int tcps_srtt
256Smoothed round-trip time.
257.El
258.Pp
259The
260.Vt tcpinfo_t
261type exposes the fields in a TCP segment header in host order.
262Its fields are:
263.Bl -tag -width "struct tcphdr *tcp_hdr" -offset indent
264.It Vt uint16_t tcp_sport
265Source TCP port.
266.It Vt uint16_t tcp_dport
267Destination TCP port.
268.It Vt uint32_t tcp_seq
269Sequence number.
270.It Vt uint32_t tcp_ack
271Acknowledgement number.
272.It Vt uint8_t tcp_offset
273Data offset, in bytes.
274.It Vt uint8_t tcp_flags
275TCP flags.
276.It Vt uint16_t tcp_window
277TCP window size.
278.It Vt uint16_t tcp_checksum
279Checksum.
280.It Vt uint16_t tcp_urgent
281Urgent data pointer.
282.It Vt struct tcphdr *tcp_hdr
283A pointer to the raw TCP header.
284.El
285.Pp
286The
287.Vt tcplsinfo_t
288type is used by the
289.Fn tcp:::state-change
290probe to provide the from-state of a transition.
291Its fields are:
292.Bl -tag -width "int32_t tcps_state" -offset indent
293.It Vt int32_t tcps_state
294A TCP state.
295The valid TCP state values are given by the constants prefixed with
296.Ql TCPS_
297in
298.Pa /usr/lib/dtrace/tcp.d .
299.El
300.Pp
301The
302.Vt siftrinfo_t
303type is used by the
304.Fn tcp:::siftr
305probe to provide the state of the TCP connection.
306Its fields are:
307.Bl -tag -width "u_int sent_inflight_bytes" -offset indent
308.It Vt uint8_t direction
309Direction of packet that triggered the log message.
310Either
311.Qq 0
312for in, or
313.Qq 1
314for out.
315.It Vt uint8_t ipver
316The version of the IP protocol being used.
317Either
318.Qq 1
319for IPv4, or
320.Qq 2
321for IPv6.
322.It Vt uint16_t lport
323The TCP port that the local host is communicating via.
324.It Vt uint16_t rport
325The TCP port that the remote host is communicating via.
326.It Vt string laddr
327The IPv4 or IPv6 address of the local host.
328.It Vt string raddr
329The IPv4 or IPv6 address of the remote host.
330.It Vt uint32_t snd_cwnd
331The current congestion window (CWND) for the flow, in bytes.
332.It Vt uint32_t snd_wnd
333The current sending window for the flow, in bytes.
334The post scaled value is reported, except during the initial handshake (first
335few packets), during which time the unscaled value is reported.
336.It Vt uint32_t rcv_wnd
337The current receive window for the flow, in bytes.
338The post scaled value is always reported.
339.It Vt uint32_t t_flags2
340The current value of the t_flags2 for the flow.
341.It Vt uint32_t snd_ssthresh
342The slow start threshold (SSTHRESH) for the flow, in bytes.
343.It Vt int conn_state
344A TCP state.
345The valid TCP state values are given by the constants prefixed with
346.Ql TCPS_
347in
348.Pa /usr/lib/dtrace/tcp.d .
349.It Vt uint32_t mss
350The maximum segment size (MSS) for the flow, in bytes.
351.It Vt uint32_t srtt
352The current smoothed RTT (SRTT) for the flow in microseconds.
353.It Vt u_char sack_enabled
354SACK enabled indicator. 1 if SACK enabled, 0 otherwise.
355.It Vt u_char snd_scale
356The current window scaling factor for the sending window.
357.It Vt u_char rcv_scale
358The current window scaling factor for the receiving window.
359.It Vt u_int t_flags
360The current value of the t_flags for the flow.
361.It Vt uint32_t rto
362The current retransmission timeout (RTO) for the flow in microseconds.
363Divide by HZ to get the timeout length in seconds.
364.It Vt u_int snd_buf_hiwater
365The current size of the socket send buffer in bytes.
366.It Vt u_int snd_buf_cc
367The current number of bytes in the socket send buffer.
368.It Vt u_int rcv_buf_hiwater
369The current size of the socket receive buffer in bytes.
370.It Vt u_int rcv_buf_cc
371The current number of bytes in the socket receive buffer.
372.It Vt u_int sent_inflight_bytes
373The current number of unacknowledged bytes in-flight.
374Bytes acknowledged via SACK are not excluded from this count.
375.It Vt int t_segqlen
376The current number of segments in the reassembly queue.
377.It Vt u_int flowid
378Flowid for the connection.
379A caveat: Zero '0' either represents a valid flowid or a default value when
380the flowid is not being set.
381.It Vt u_int flowtype
382Flow type for the connection.
383Flowtype defines which protocol fields are hashed to produce the flowid.
384A complete listing is available in
385.Pa /usr/include/sys/mbuf.h
386under
387.Dv M_HASHTYPE_* .
388.El
389.Sh FILES
390.Bl -tag -width "/usr/lib/dtrace/siftr.d" -compact
391.It Pa /usr/lib/dtrace/tcp.d
392DTrace type and translator definitions for all the probes of the
393.Nm tcp
394provider except the
395.Nm siftr
396probe.
397.It Pa /usr/lib/dtrace/siftr.d
398DTrace type and translator definitions for the
399.Nm siftr
400probe of the
401.Nm tcp
402provider.
403.El
404.Sh EXAMPLES
405The following script logs TCP segments in real time:
406.Bd -literal -offset indent
407#pragma D option quiet
408#pragma D option switchrate=10hz
409
410dtrace:::BEGIN
411{
412        printf(" %3s %15s:%-5s      %15s:%-5s %6s  %s\\n", "CPU",
413            "LADDR", "LPORT", "RADDR", "RPORT", "BYTES", "FLAGS");
414}
415
416tcp:::send
417{
418        this->length = args[2]->ip_plength - args[4]->tcp_offset;
419        printf(" %3d %16s:%-5d -> %16s:%-5d %6d  (", cpu, args[2]->ip_saddr,
420            args[4]->tcp_sport, args[2]->ip_daddr, args[4]->tcp_dport,
421            this->length);
422        printf("%s", args[4]->tcp_flags & TH_FIN ? "FIN|" : "");
423        printf("%s", args[4]->tcp_flags & TH_SYN ? "SYN|" : "");
424        printf("%s", args[4]->tcp_flags & TH_RST ? "RST|" : "");
425        printf("%s", args[4]->tcp_flags & TH_PUSH ? "PUSH|" : "");
426        printf("%s", args[4]->tcp_flags & TH_ACK ? "ACK|" : "");
427        printf("%s", args[4]->tcp_flags & TH_URG ? "URG|" : "");
428        printf("%s", args[4]->tcp_flags == 0 ? "null " : "");
429        printf("\\b)\\n");
430}
431
432tcp:::receive
433{
434        this->length = args[2]->ip_plength - args[4]->tcp_offset;
435        printf(" %3d %16s:%-5d <- %16s:%-5d %6d  (", cpu,
436            args[2]->ip_daddr, args[4]->tcp_dport, args[2]->ip_saddr,
437            args[4]->tcp_sport, this->length);
438        printf("%s", args[4]->tcp_flags & TH_FIN ? "FIN|" : "");
439        printf("%s", args[4]->tcp_flags & TH_SYN ? "SYN|" : "");
440        printf("%s", args[4]->tcp_flags & TH_RST ? "RST|" : "");
441        printf("%s", args[4]->tcp_flags & TH_PUSH ? "PUSH|" : "");
442        printf("%s", args[4]->tcp_flags & TH_ACK ? "ACK|" : "");
443        printf("%s", args[4]->tcp_flags & TH_URG ? "URG|" : "");
444        printf("%s", args[4]->tcp_flags == 0 ? "null " : "");
445        printf("\\b)\\n");
446}
447.Ed
448The following script logs TCP connection state changes as they occur:
449.Bd -literal -offset indent
450#pragma D option quiet
451#pragma D option switchrate=25hz
452
453int last[int];
454
455dtrace:::BEGIN
456{
457        printf("   %12s %-20s    %-20s %s\\n",
458            "DELTA(us)", "OLD", "NEW", "TIMESTAMP");
459}
460
461tcp:::state-change
462{
463        this->elapsed = (timestamp - last[args[1]->cs_cid]) / 1000;
464        printf("   %12d %-20s -> %-20s %d\\n", this->elapsed,
465            tcp_state_string[args[5]->tcps_state],
466            tcp_state_string[args[3]->tcps_state], timestamp);
467        last[args[1]->cs_cid] = timestamp;
468}
469
470tcp:::state-change
471/last[args[1]->cs_cid] == 0/
472{
473        printf("   %12s %-20s -> %-20s %d\\n", "-",
474            tcp_state_string[args[5]->tcps_state],
475            tcp_state_string[args[3]->tcps_state], timestamp);
476        last[args[1]->cs_cid] = timestamp;
477}
478.Ed
479The following script uses the siftr probe to show the current value of CWND
480and SSTHRESH when a packet is sent or received:
481.Bd -literal -offset indent
482#pragma D option quiet
483#pragma D option switchrate=10hz
484
485dtrace:::BEGIN
486{
487        printf(" %3s %16s:%-5s %16s:%-5s %10s %10s\\n",
488            "DIR", "LADDR", "LPORT", "RADDR", "RPORT", "CWND", "SSTHRESH");
489}
490
491tcp:::siftr
492{
493        printf(" %3s %16s:%-5d %16s:%-5d %10u %10u\\n",
494            siftr_dir_string[args[0]->direction],
495            args[0]->laddr, args[0]->lport, args[0]->raddr, args[0]->rport,
496            args[0]->snd_cwnd, args[0]->snd_ssthresh);
497}
498.Ed
499.Sh COMPATIBILITY
500This provider is compatible with the
501.Nm tcp
502provider in Solaris.
503.Sh SEE ALSO
504.Xr dtrace 1 ,
505.Xr dtrace_ip 4 ,
506.Xr dtrace_sctp 4 ,
507.Xr dtrace_udp 4 ,
508.Xr dtrace_udplite 4 ,
509.Xr siftr 4 ,
510.Xr tcp 4 ,
511.Xr SDT 9
512.Sh HISTORY
513The
514.Nm tcp
515provider first appeared in
516.Fx
51710.0.
518.Sh AUTHORS
519This manual page was written by
520.An Mark Johnston Aq Mt markj@FreeBSD.org .
521.Sh BUGS
522The
523.Vt tcps_local
524and
525.Vt tcps_active
526fields of
527.Vt tcpsinfo_t
528are not filled in by the translator.
529