xref: /linux/Documentation/netlink/specs/netdev.yaml (revision d9996de40b121d976a17515aada54c54350e3f21)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2
3name: netdev
4
5doc:
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc:
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc:
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc:
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc:
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc:
34         This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc:
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc:
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc:
52          Device is capable of exposing receive HW timestamp via bpf_xdp_metadata_rx_timestamp().
53      -
54        name: hash
55        doc:
56          Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
57      -
58        name: vlan-tag
59        doc:
60          Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
61  -
62    type: flags
63    name: xsk-flags
64    entries:
65      -
66        name: tx-timestamp
67        doc:
68          HW timestamping egress packets is supported by the driver.
69      -
70        name: tx-checksum
71        doc:
72          L3 checksum HW offload is supported by the driver.
73  -
74    name: queue-type
75    type: enum
76    entries: [ rx, tx ]
77  -
78    name: qstats-scope
79    type: flags
80    entries: [ queue ]
81
82attribute-sets:
83  -
84    name: dev
85    attributes:
86      -
87        name: ifindex
88        doc: netdev ifindex
89        type: u32
90        checks:
91          min: 1
92      -
93        name: pad
94        type: pad
95      -
96        name: xdp-features
97        doc: Bitmask of enabled xdp-features.
98        type: u64
99        enum: xdp-act
100      -
101        name: xdp-zc-max-segs
102        doc: max fragment count supported by ZC driver
103        type: u32
104        checks:
105          min: 1
106      -
107        name: xdp-rx-metadata-features
108        doc: Bitmask of supported XDP receive metadata features.
109             See Documentation/networking/xdp-rx-metadata.rst for more details.
110        type: u64
111        enum: xdp-rx-metadata
112      -
113        name: xsk-features
114        doc: Bitmask of enabled AF_XDP features.
115        type: u64
116        enum: xsk-flags
117  -
118    name: page-pool
119    attributes:
120      -
121        name: id
122        doc: Unique ID of a Page Pool instance.
123        type: uint
124        checks:
125          min: 1
126          max: u32-max
127      -
128        name: ifindex
129        doc: |
130          ifindex of the netdev to which the pool belongs.
131          May be reported as 0 if the page pool was allocated for a netdev
132          which got destroyed already (page pools may outlast their netdevs
133          because they wait for all memory to be returned).
134        type: u32
135        checks:
136          min: 1
137          max: s32-max
138      -
139        name: napi-id
140        doc: Id of NAPI using this Page Pool instance.
141        type: uint
142        checks:
143          min: 1
144          max: u32-max
145      -
146        name: inflight
147        type: uint
148        doc: |
149          Number of outstanding references to this page pool (allocated
150          but yet to be freed pages). Allocated pages may be held in
151          socket receive queues, driver receive ring, page pool recycling
152          ring, the page pool cache, etc.
153      -
154        name: inflight-mem
155        type: uint
156        doc: |
157          Amount of memory held by inflight pages.
158      -
159        name: detach-time
160        type: uint
161        doc: |
162          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
163          the driver. Once detached Page Pool can no longer be used to
164          allocate memory.
165          Page Pools wait for all the memory allocated from them to be freed
166          before truly disappearing. "Detached" Page Pools cannot be
167          "re-attached", they are just waiting to disappear.
168          Attribute is absent if Page Pool has not been detached, and
169          can still be used to allocate new memory.
170      -
171        name: dmabuf
172        doc: ID of the dmabuf this page-pool is attached to.
173        type: u32
174  -
175    name: page-pool-info
176    subset-of: page-pool
177    attributes:
178      -
179        name: id
180      -
181        name: ifindex
182  -
183    name: page-pool-stats
184    doc: |
185      Page pool statistics, see docs for struct page_pool_stats
186      for information about individual statistics.
187    attributes:
188      -
189        name: info
190        doc: Page pool identifying information.
191        type: nest
192        nested-attributes: page-pool-info
193      -
194        name: alloc-fast
195        type: uint
196        value: 8 # reserve some attr ids in case we need more metadata later
197      -
198        name: alloc-slow
199        type: uint
200      -
201        name: alloc-slow-high-order
202        type: uint
203      -
204        name: alloc-empty
205        type: uint
206      -
207        name: alloc-refill
208        type: uint
209      -
210        name: alloc-waive
211        type: uint
212      -
213        name: recycle-cached
214        type: uint
215      -
216        name: recycle-cache-full
217        type: uint
218      -
219        name: recycle-ring
220        type: uint
221      -
222        name: recycle-ring-full
223        type: uint
224      -
225        name: recycle-released-refcnt
226        type: uint
227
228  -
229    name: napi
230    attributes:
231      -
232        name: ifindex
233        doc: ifindex of the netdevice to which NAPI instance belongs.
234        type: u32
235        checks:
236          min: 1
237      -
238        name: id
239        doc: ID of the NAPI instance.
240        type: u32
241      -
242        name: irq
243        doc: The associated interrupt vector number for the napi
244        type: u32
245      -
246        name: pid
247        doc: PID of the napi thread, if NAPI is configured to operate in
248             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
249             softirq context), the attribute will be absent.
250        type: u32
251  -
252    name: queue
253    attributes:
254      -
255        name: id
256        doc: Queue index; most queue types are indexed like a C array, with
257             indexes starting at 0 and ending at queue count - 1. Queue indexes
258             are scoped to an interface and queue type.
259        type: u32
260      -
261        name: ifindex
262        doc: ifindex of the netdevice to which the queue belongs.
263        type: u32
264        checks:
265          min: 1
266      -
267        name: type
268        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
269        type: u32
270        enum: queue-type
271      -
272        name: napi-id
273        doc: ID of the NAPI instance which services this queue.
274        type: u32
275      -
276        name: dmabuf
277        doc: ID of the dmabuf attached to this queue, if any.
278        type: u32
279
280  -
281    name: qstats
282    doc: |
283      Get device statistics, scoped to a device or a queue.
284      These statistics extend (and partially duplicate) statistics available
285      in struct rtnl_link_stats64.
286      Value of the `scope` attribute determines how statistics are
287      aggregated. When aggregated for the entire device the statistics
288      represent the total number of events since last explicit reset of
289      the device (i.e. not a reconfiguration like changing queue count).
290      When reported per-queue, however, the statistics may not add
291      up to the total number of events, will only be reported for currently
292      active objects, and will likely report the number of events since last
293      reconfiguration.
294    attributes:
295      -
296        name: ifindex
297        doc: ifindex of the netdevice to which stats belong.
298        type: u32
299        checks:
300          min: 1
301      -
302        name: queue-type
303        doc: Queue type as rx, tx, for queue-id.
304        type: u32
305        enum: queue-type
306      -
307        name: queue-id
308        doc: Queue ID, if stats are scoped to a single queue instance.
309        type: u32
310      -
311        name: scope
312        doc: |
313          What object type should be used to iterate over the stats.
314        type: uint
315        enum: qstats-scope
316      -
317        name: rx-packets
318        doc: |
319          Number of wire packets successfully received and passed to the stack.
320          For drivers supporting XDP, XDP is considered the first layer
321          of the stack, so packets consumed by XDP are still counted here.
322        type: uint
323        value: 8 # reserve some attr ids in case we need more metadata later
324      -
325        name: rx-bytes
326        doc: Successfully received bytes, see `rx-packets`.
327        type: uint
328      -
329        name: tx-packets
330        doc: |
331          Number of wire packets successfully sent. Packet is considered to be
332          successfully sent once it is in device memory (usually this means
333          the device has issued a DMA completion for the packet).
334        type: uint
335      -
336        name: tx-bytes
337        doc: Successfully sent bytes, see `tx-packets`.
338        type: uint
339      -
340        name: rx-alloc-fail
341        doc: |
342          Number of times skb or buffer allocation failed on the Rx datapath.
343          Allocation failure may, or may not result in a packet drop, depending
344          on driver implementation and whether system recovers quickly.
345        type: uint
346      -
347        name: rx-hw-drops
348        doc: |
349          Number of all packets which entered the device, but never left it,
350          including but not limited to: packets dropped due to lack of buffer
351          space, processing errors, explicit or implicit policies and packet
352          filters.
353        type: uint
354      -
355        name: rx-hw-drop-overruns
356        doc: |
357          Number of packets dropped due to transient lack of resources, such as
358          buffer space, host descriptors etc.
359        type: uint
360      -
361        name: rx-csum-complete
362        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
363        type: uint
364      -
365        name: rx-csum-unnecessary
366        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
367        type: uint
368      -
369        name: rx-csum-none
370        doc: Number of packets that were not checksummed by device.
371        type: uint
372      -
373        name: rx-csum-bad
374        doc: |
375          Number of packets with bad checksum. The packets are not discarded,
376          but still delivered to the stack.
377        type: uint
378      -
379        name: rx-hw-gro-packets
380        doc: |
381          Number of packets that were coalesced from smaller packets by the device.
382          Counts only packets coalesced with the HW-GRO netdevice feature,
383          LRO-coalesced packets are not counted.
384        type: uint
385      -
386        name: rx-hw-gro-bytes
387        doc: See `rx-hw-gro-packets`.
388        type: uint
389      -
390        name: rx-hw-gro-wire-packets
391        doc: |
392          Number of packets that were coalesced to bigger packetss with the HW-GRO
393          netdevice feature. LRO-coalesced packets are not counted.
394        type: uint
395      -
396        name: rx-hw-gro-wire-bytes
397        doc: See `rx-hw-gro-wire-packets`.
398        type: uint
399      -
400        name: rx-hw-drop-ratelimits
401        doc: |
402          Number of the packets dropped by the device due to the received
403          packets bitrate exceeding the device rate limit.
404        type: uint
405      -
406        name: tx-hw-drops
407        doc: |
408          Number of packets that arrived at the device but never left it,
409          encompassing packets dropped for reasons such as processing errors, as
410          well as those affected by explicitly defined policies and packet
411          filtering criteria.
412        type: uint
413      -
414        name: tx-hw-drop-errors
415        doc: Number of packets dropped because they were invalid or malformed.
416        type: uint
417      -
418        name: tx-csum-none
419        doc: |
420          Number of packets that did not require the device to calculate the
421          checksum.
422        type: uint
423      -
424        name: tx-needs-csum
425        doc: |
426          Number of packets that required the device to calculate the checksum.
427        type: uint
428      -
429        name: tx-hw-gso-packets
430        doc: |
431          Number of packets that necessitated segmentation into smaller packets
432          by the device.
433        type: uint
434      -
435        name: tx-hw-gso-bytes
436        doc: See `tx-hw-gso-packets`.
437        type: uint
438      -
439        name: tx-hw-gso-wire-packets
440        doc: |
441          Number of wire-sized packets generated by processing
442          `tx-hw-gso-packets`
443        type: uint
444      -
445        name: tx-hw-gso-wire-bytes
446        doc: See `tx-hw-gso-wire-packets`.
447        type: uint
448      -
449        name: tx-hw-drop-ratelimits
450        doc: |
451          Number of the packets dropped by the device due to the transmit
452          packets bitrate exceeding the device rate limit.
453        type: uint
454      -
455        name: tx-stop
456        doc: |
457          Number of times driver paused accepting new tx packets
458          from the stack to this queue, because the queue was full.
459          Note that if BQL is supported and enabled on the device
460          the networking stack will avoid queuing a lot of data at once.
461        type: uint
462      -
463        name: tx-wake
464        doc: |
465          Number of times driver re-started accepting send
466          requests to this queue from the stack.
467        type: uint
468  -
469    name: queue-id
470    subset-of: queue
471    attributes:
472      -
473        name: id
474      -
475        name: type
476  -
477    name: dmabuf
478    attributes:
479      -
480        name: ifindex
481        doc: netdev ifindex to bind the dmabuf to.
482        type: u32
483        checks:
484          min: 1
485      -
486        name: queues
487        doc: receive queues to bind the dmabuf to.
488        type: nest
489        nested-attributes: queue-id
490        multi-attr: true
491      -
492        name: fd
493        doc: dmabuf file descriptor to bind.
494        type: u32
495      -
496        name: id
497        doc: id of the dmabuf binding
498        type: u32
499        checks:
500          min: 1
501
502operations:
503  list:
504    -
505      name: dev-get
506      doc: Get / dump information about a netdev.
507      attribute-set: dev
508      do:
509        request:
510          attributes:
511            - ifindex
512        reply: &dev-all
513          attributes:
514            - ifindex
515            - xdp-features
516            - xdp-zc-max-segs
517            - xdp-rx-metadata-features
518            - xsk-features
519      dump:
520        reply: *dev-all
521    -
522      name: dev-add-ntf
523      doc: Notification about device appearing.
524      notify: dev-get
525      mcgrp: mgmt
526    -
527      name: dev-del-ntf
528      doc: Notification about device disappearing.
529      notify: dev-get
530      mcgrp: mgmt
531    -
532      name: dev-change-ntf
533      doc: Notification about device configuration being changed.
534      notify: dev-get
535      mcgrp: mgmt
536    -
537      name: page-pool-get
538      doc: |
539        Get / dump information about Page Pools.
540        (Only Page Pools associated with a net_device can be listed.)
541      attribute-set: page-pool
542      do:
543        request:
544          attributes:
545            - id
546        reply: &pp-reply
547          attributes:
548            - id
549            - ifindex
550            - napi-id
551            - inflight
552            - inflight-mem
553            - detach-time
554            - dmabuf
555      dump:
556        reply: *pp-reply
557      config-cond: page-pool
558    -
559      name: page-pool-add-ntf
560      doc: Notification about page pool appearing.
561      notify: page-pool-get
562      mcgrp: page-pool
563      config-cond: page-pool
564    -
565      name: page-pool-del-ntf
566      doc: Notification about page pool disappearing.
567      notify: page-pool-get
568      mcgrp: page-pool
569      config-cond: page-pool
570    -
571      name: page-pool-change-ntf
572      doc: Notification about page pool configuration being changed.
573      notify: page-pool-get
574      mcgrp: page-pool
575      config-cond: page-pool
576    -
577      name: page-pool-stats-get
578      doc: Get page pool statistics.
579      attribute-set: page-pool-stats
580      do:
581        request:
582          attributes:
583            - info
584        reply: &pp-stats-reply
585          attributes:
586            - info
587            - alloc-fast
588            - alloc-slow
589            - alloc-slow-high-order
590            - alloc-empty
591            - alloc-refill
592            - alloc-waive
593            - recycle-cached
594            - recycle-cache-full
595            - recycle-ring
596            - recycle-ring-full
597            - recycle-released-refcnt
598      dump:
599        reply: *pp-stats-reply
600      config-cond: page-pool-stats
601    -
602      name: queue-get
603      doc: Get queue information from the kernel.
604           Only configured queues will be reported (as opposed to all available
605           hardware queues).
606      attribute-set: queue
607      do:
608        request:
609          attributes:
610            - ifindex
611            - type
612            - id
613        reply: &queue-get-op
614          attributes:
615            - id
616            - type
617            - napi-id
618            - ifindex
619            - dmabuf
620      dump:
621        request:
622          attributes:
623            - ifindex
624        reply: *queue-get-op
625    -
626      name: napi-get
627      doc: Get information about NAPI instances configured on the system.
628      attribute-set: napi
629      do:
630        request:
631          attributes:
632            - id
633        reply: &napi-get-op
634          attributes:
635            - id
636            - ifindex
637            - irq
638            - pid
639      dump:
640        request:
641          attributes:
642            - ifindex
643        reply: *napi-get-op
644    -
645      name: qstats-get
646      doc: |
647        Get / dump fine grained statistics. Which statistics are reported
648        depends on the device and the driver, and whether the driver stores
649        software counters per-queue.
650      attribute-set: qstats
651      dump:
652        request:
653          attributes:
654            - ifindex
655            - scope
656        reply:
657          attributes:
658            - ifindex
659            - queue-type
660            - queue-id
661            - rx-packets
662            - rx-bytes
663            - tx-packets
664            - tx-bytes
665    -
666      name: bind-rx
667      doc: Bind dmabuf to netdev
668      attribute-set: dmabuf
669      flags: [ admin-perm ]
670      do:
671        request:
672          attributes:
673            - ifindex
674            - fd
675            - queues
676        reply:
677          attributes:
678            - id
679
680kernel-family:
681  headers: [ "linux/list.h"]
682  sock-priv: struct list_head
683
684mcast-groups:
685  list:
686    -
687      name: mgmt
688    -
689      name: page-pool
690