xref: /freebsd/share/man/man4/ioat.4 (revision bec7ff798a8688da71c4b29d426a0cc887be677c)
1.\" Copyright (c) 2015 EMC / Isilon Storage Division
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.\" $FreeBSD$
26.\"
27.Dd May 3, 2016
28.Dt IOAT 4
29.Os
30.Sh NAME
31.Nm I/OAT
32.Nd Intel I/O Acceleration Technology
33.Sh SYNOPSIS
34To compile this driver into your kernel,
35place the following line in your kernel configuration file:
36.Bd -ragged -offset indent
37.Cd "device ioat"
38.Ed
39.Pp
40Or, to load the driver as a module at boot, place the following line in
41.Xr loader.conf 5 :
42.Bd -literal -offset indent
43ioat_load="YES"
44.Ed
45.Pp
46In
47.Xr loader.conf 5 :
48.Pp
49.Cd hw.ioat.force_legacy_interrupts=0
50.Pp
51In
52.Xr loader.conf 5 or
53.Xr sysctl.conf 5 :
54.Pp
55.Cd hw.ioat.enable_ioat_test=0
56.Cd hw.ioat.debug_level=0
57(only critical errors; maximum of 3)
58.Pp
59.Ft typedef void
60.Fn (*bus_dmaengine_callback_t) "void *arg" "int error"
61.Pp
62.Ft bus_dmaengine_t
63.Fn ioat_get_dmaengine "uint32_t channel_index"
64.Ft void
65.Fn ioat_put_dmaengine "bus_dmaengine_t dmaengine"
66.Ft int
67.Fn ioat_get_hwversion "bus_dmaengine_t dmaengine"
68.Ft size_t
69.Fn ioat_get_max_io_size "bus_dmaengine_t dmaengine"
70.Ft int
71.Fn ioat_set_interrupt_coalesce "bus_dmaengine_t dmaengine" "uint16_t delay"
72.Ft uint16_t
73.Fn ioat_get_max_coalesce_period "bus_dmaengine_t dmaengine"
74.Ft void
75.Fn ioat_acquire "bus_dmaengine_t dmaengine"
76.Ft int
77.Fn ioat_acquire_reserve "bus_dmaengine_t dmaengine" "uint32_t n" "int mflags"
78.Ft void
79.Fn ioat_release "bus_dmaengine_t dmaengine"
80.Ft struct bus_dmadesc *
81.Fo ioat_copy
82.Fa "bus_dmaengine_t dmaengine"
83.Fa "bus_addr_t dst"
84.Fa "bus_addr_t src"
85.Fa "bus_size_t len"
86.Fa "bus_dmaengine_callback_t callback_fn"
87.Fa "void *callback_arg"
88.Fa "uint32_t flags"
89.Fc
90.Ft struct bus_dmadesc *
91.Fo ioat_copy_8k_aligned
92.Fa "bus_dmaengine_t dmaengine"
93.Fa "bus_addr_t dst1"
94.Fa "bus_addr_t dst2"
95.Fa "bus_addr_t src1"
96.Fa "bus_addr_t src2"
97.Fa "bus_dmaengine_callback_t callback_fn"
98.Fa "void *callback_arg"
99.Fa "uint32_t flags"
100.Fc
101.Ft struct bus_dmadesc *
102.Fo ioat_copy_crc
103.Fa "bus_dmaengine_t dmaengine"
104.Fa "bus_addr_t dst"
105.Fa "bus_addr_t src"
106.Fa "bus_size_t len"
107.Fa "uint32_t *initialseed"
108.Fa "bus_addr_t crcptr"
109.Fa "bus_dmaengine_callback_t callback_fn"
110.Fa "void *callback_arg"
111.Fa "uint32_t flags"
112.Fc
113.Ft struct bus_dmadesc *
114.Fo ioat_crc
115.Fa "bus_dmaengine_t dmaengine"
116.Fa "bus_addr_t src"
117.Fa "bus_size_t len"
118.Fa "uint32_t *initialseed"
119.Fa "bus_addr_t crcptr"
120.Fa "bus_dmaengine_callback_t callback_fn"
121.Fa "void *callback_arg"
122.Fa "uint32_t flags"
123.Fc
124.Ft struct bus_dmadesc *
125.Fo ioat_blockfill
126.Fa "bus_dmaengine_t dmaengine"
127.Fa "bus_addr_t dst"
128.Fa "uint64_t fillpattern"
129.Fa "bus_size_t len"
130.Fa "bus_dmaengine_callback_t callback_fn"
131.Fa "void *callback_arg"
132.Fa "uint32_t flags"
133.Fc
134.Ft struct bus_dmadesc *
135.Fo ioat_null
136.Fa "bus_dmaengine_t dmaengine"
137.Fa "bus_dmaengine_callback_t callback_fn"
138.Fa "void *callback_arg"
139.Fa "uint32_t flags"
140.Fc
141.Sh DESCRIPTION
142The
143.Nm
144driver provides a kernel API to a variety of DMA engines on some Intel server
145platforms.
146.Pp
147There is a number of DMA channels per CPU package.
148(Typically 4 or 8.)
149Each may be used independently.
150Operations on a single channel proceed sequentially.
151.Pp
152Blockfill operations can be used to write a 64-bit pattern to memory.
153.Pp
154Copy operations can be used to offload memory copies to the DMA engines.
155.Pp
156Null operations do nothing, but may be used to test the interrupt and callback
157mechanism.
158.Pp
159All operations can optionally trigger an interrupt at completion with the
160.Ar DMA_INT_EN
161flag.
162For example, a user might submit multiple operations to the same channel and
163only enable an interrupt and callback for the last operation.
164.Pp
165The hardware can delay and coalesce interrupts on a given channel for a
166configurable period of time, in microseconds.
167This may be desired to reduce the processing and interrupt overhead per
168descriptor, especially for workflows consisting of many small operations.
169Software can control this on a per-channel basis with the
170.Fn ioat_set_interrupt_coalesce
171API.
172The
173.Fn ioat_get_max_coalesce_period
174API can be used to determine the maximum coalescing period supported by the
175hardware, in microseconds.
176Current platforms support up to a 16.383 millisecond coalescing period.
177Optimal configuration will vary by workflow and desired operation latency.
178.Pp
179All operations are safe to use in a non-blocking context with the
180.Ar DMA_NO_WAIT
181flag.
182(Of course, allocations may fail and operations requested with
183.Ar DMA_NO_WAIT
184may return NULL.)
185.Pp
186Operations that depend on the result of prior operations should use
187.Ar DMA_FENCE .
188For example, such a scenario can happen when two related DMA operations are
189queued.
190First, a DMA copy to one location (A), followed directly by a DMA copy
191from A to B.
192In this scenario, some classes of I/OAT hardware may prefetch A for the second
193operation before it is written by the first operation.
194To avoid reading a stale value in sequences of dependent operations, use
195.Ar DMA_FENCE .
196.Pp
197All operations, as well as
198.Fn ioat_get_dmaengine ,
199can return NULL in special circumstances.
200For example, if the
201.Nm
202driver is being unloaded, or the administrator has induced a hardware reset, or
203a usage error has resulted in a hardware error state that needs to be recovered
204from.
205.Pp
206It is invalid to attempt to submit new DMA operations in a
207.Fa bus_dmaengine_callback_t
208context.
209.Pp
210The CRC operations have three distinct modes.
211The default mode is to accumulate.
212By accumulating over multiple descriptors, a user may gather a CRC over several
213chunks of memory and only write out the result once.
214.Pp
215The
216.Ar DMA_CRC_STORE
217flag causes the operation to emit the CRC32C result.
218If
219.Ar DMA_CRC_INLINE
220is set, the result is written inline with the destination data (or source in
221.Fn ioat_crc
222mode).
223If
224.Ar DMA_CRC_INLINE
225is not set, the result is written to the provided
226.Fa crcptr .
227.Pp
228Similarly, the
229.Ar DMA_CRC_TEST
230flag causes the operation to compare the CRC32C result to an existing checksum.
231If
232.Ar DMA_CRC_INLINE
233is set, the result is compared against the inline four bytes trailing the
234source data.
235If it is not set, the result is compared against the value pointed to by
236.Fa crcptr .
237.Pp
238.Fn ioat_copy_crc
239calculates a CRC32C while copying data.
240.Fn ioat_crc
241only computes a CRC32C of some data.
242If the
243.Fa initialseed
244argument to either routine is non-NULL, the CRC32C engine is initialized with
245the value it points to.
246.Sh USAGE
247A typical user will lookup the DMA engine object for a given channel with
248.Fn ioat_get_dmaengine .
249When the user wants to offload a copy, they will first
250.Fn ioat_acquire
251the
252.Ar bus_dmaengine_t
253object for exclusive access to enqueue operations on that channel.
254Optionally, the user can reserve space by using
255.Fn ioat_acquire_reserve
256instead.
257If
258.Fn ioat_acquire_reserve
259succeeds, there is guaranteed to be room for
260.Fa N
261new operations in the internal ring buffer.
262.Pp
263Then, they will submit one or more operations using
264.Fn ioat_blockfill ,
265.Fn ioat_copy ,
266.Fn ioat_copy_8k_aligned ,
267.Fn ioat_copy_crc ,
268.Fn ioat_crc ,
269or
270.Fn ioat_null .
271After queuing one or more individual DMA operations, they will
272.Fn ioat_release
273the
274.Ar bus_dmaengine_t
275to drop their exclusive access to the channel.
276The routine they provided for the
277.Fa callback_fn
278argument will be invoked with the provided
279.Fa callback_arg
280when the operation is complete.
281When they are finished with the
282.Ar bus_dmaengine_t ,
283the user should
284.Fn ioat_put_dmaengine .
285.Pp
286Users MUST NOT block between
287.Fn ioat_acquire
288and
289.Fn ioat_release .
290Users SHOULD NOT hold
291.Ar bus_dmaengine_t
292references for a very long time to enable fault recovery and kernel module
293unload.
294.Pp
295For an example of usage, see
296.Pa src/sys/dev/ioat/ioat_test.c .
297.Sh FILES
298.Bl -tag
299.It Pa /dev/ioat_test
300test device for
301.Xr ioatcontrol 8
302.El
303.Sh SEE ALSO
304.Xr ioatcontrol 8
305.Sh HISTORY
306The
307.Nm
308driver first appeared in
309.Fx 11.0 .
310.Sh AUTHORS
311The
312.Nm
313driver was developed by
314.An \&Jim Harris Aq Mt jimharris@FreeBSD.org ,
315.An \&Carl Delsey Aq Mt carl.r.delsey@intel.com ,
316and
317.An \&Conrad Meyer Aq Mt cem@FreeBSD.org .
318This manual page was written by
319.An \&Conrad Meyer Aq Mt cem@FreeBSD.org .
320.Sh CAVEATS
321Copy operation takes bus addresses as parameters, not virtual addresses.
322.Pp
323Buffers for individual copy operations must be physically contiguous.
324.Pp
325Copies larger than max transfer size (1MB, but may vary by hardware) are not
326supported.
327Future versions will likely support this by breaking up the transfer into
328smaller sizes.
329.Sh BUGS
330The
331.Nm
332driver only supports blockfill, copy, and null operations at this time.
333The driver does not yet support advanced DMA modes, such as XOR, that some
334I/OAT devices support.
335