xref: /linux/Documentation/netlink/specs/netdev.yaml (revision 6be87fbb27763c2999e1c69bbec1f3a63cf05422)
1# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
2---
3name: netdev
4
5doc: >-
6  netdev configuration over generic netlink.
7
8definitions:
9  -
10    type: flags
11    name: xdp-act
12    render-max: true
13    entries:
14      -
15        name: basic
16        doc: >-
17          XDP features set supported by all drivers
18          (XDP_ABORTED, XDP_DROP, XDP_PASS, XDP_TX)
19      -
20        name: redirect
21        doc: >-
22          The netdev supports XDP_REDIRECT
23      -
24        name: ndo-xmit
25        doc: >-
26          This feature informs if netdev implements ndo_xdp_xmit callback.
27      -
28        name: xsk-zerocopy
29        doc: >-
30          This feature informs if netdev supports AF_XDP in zero copy mode.
31      -
32        name: hw-offload
33        doc: >-
34          This feature informs if netdev supports XDP hw offloading.
35      -
36        name: rx-sg
37        doc: >-
38          This feature informs if netdev implements non-linear XDP buffer
39          support in the driver napi callback.
40      -
41        name: ndo-xmit-sg
42        doc: >-
43          This feature informs if netdev implements non-linear XDP buffer
44          support in ndo_xdp_xmit callback.
45  -
46    type: flags
47    name: xdp-rx-metadata
48    entries:
49      -
50        name: timestamp
51        doc: |
52          Device is capable of exposing receive HW timestamp via
53          bpf_xdp_metadata_rx_timestamp().
54      -
55        name: hash
56        doc: |
57          Device is capable of exposing receive packet hash via
58          bpf_xdp_metadata_rx_hash().
59      -
60        name: vlan-tag
61        doc: |
62          Device is capable of exposing receive packet VLAN tag via
63          bpf_xdp_metadata_rx_vlan_tag().
64  -
65    type: flags
66    name: xsk-flags
67    entries:
68      -
69        name: tx-timestamp
70        doc: >-
71          HW timestamping egress packets is supported by the driver.
72      -
73        name: tx-checksum
74        doc: >-
75          L3 checksum HW offload is supported by the driver.
76      -
77        name: tx-launch-time-fifo
78        doc: >-
79          Launch time HW offload is supported by the driver.
80  -
81    name: queue-type
82    type: enum
83    entries: [rx, tx]
84  -
85    name: qstats-scope
86    type: flags
87    entries: [queue]
88  -
89    name: napi-threaded
90    type: enum
91    entries: [disabled, enabled, busy-poll]
92
93attribute-sets:
94  -
95    name: dev
96    attributes:
97      -
98        name: ifindex
99        doc: netdev ifindex
100        type: u32
101        checks:
102          min: 1
103      -
104        name: pad
105        type: pad
106      -
107        name: xdp-features
108        doc: Bitmask of enabled xdp-features.
109        type: u64
110        enum: xdp-act
111      -
112        name: xdp-zc-max-segs
113        doc: max fragment count supported by ZC driver
114        type: u32
115        checks:
116          min: 1
117      -
118        name: xdp-rx-metadata-features
119        doc: Bitmask of supported XDP receive metadata features.
120             See Documentation/networking/xdp-rx-metadata.rst for more details.
121        type: u64
122        enum: xdp-rx-metadata
123      -
124        name: xsk-features
125        doc: Bitmask of enabled AF_XDP features.
126        type: u64
127        enum: xsk-flags
128  -
129    name: io-uring-provider-info
130    attributes: []
131  -
132    name: page-pool
133    attributes:
134      -
135        name: id
136        doc: Unique ID of a Page Pool instance.
137        type: uint
138        checks:
139          min: 1
140          max: u32-max
141      -
142        name: ifindex
143        doc: |
144          ifindex of the netdev to which the pool belongs.
145          May not be reported if the page pool was allocated for a netdev
146          which got destroyed already (page pools may outlast their netdevs
147          because they wait for all memory to be returned).
148        type: u32
149        checks:
150          min: 1
151          max: s32-max
152      -
153        name: napi-id
154        doc: Id of NAPI using this Page Pool instance.
155        type: uint
156        checks:
157          min: 1
158          max: u32-max
159      -
160        name: inflight
161        type: uint
162        doc: |
163          Number of outstanding references to this page pool (allocated
164          but yet to be freed pages). Allocated pages may be held in
165          socket receive queues, driver receive ring, page pool recycling
166          ring, the page pool cache, etc.
167      -
168        name: inflight-mem
169        type: uint
170        doc: |
171          Amount of memory held by inflight pages.
172      -
173        name: detach-time
174        type: uint
175        doc: |
176          Seconds in CLOCK_BOOTTIME of when Page Pool was detached by
177          the driver. Once detached Page Pool can no longer be used to
178          allocate memory.
179          Page Pools wait for all the memory allocated from them to be freed
180          before truly disappearing. "Detached" Page Pools cannot be
181          "re-attached", they are just waiting to disappear.
182          Attribute is absent if Page Pool has not been detached, and
183          can still be used to allocate new memory.
184      -
185        name: dmabuf
186        doc: ID of the dmabuf this page-pool is attached to.
187        type: u32
188      -
189        name: io-uring
190        doc: io-uring memory provider information.
191        type: nest
192        nested-attributes: io-uring-provider-info
193  -
194    name: page-pool-info
195    subset-of: page-pool
196    attributes:
197      -
198        name: id
199      -
200        name: ifindex
201  -
202    name: page-pool-stats
203    doc: |
204      Page pool statistics, see docs for struct page_pool_stats
205      for information about individual statistics.
206    attributes:
207      -
208        name: info
209        doc: Page pool identifying information.
210        type: nest
211        nested-attributes: page-pool-info
212      -
213        name: alloc-fast
214        type: uint
215        value: 8  # reserve some attr ids in case we need more metadata later
216      -
217        name: alloc-slow
218        type: uint
219      -
220        name: alloc-slow-high-order
221        type: uint
222      -
223        name: alloc-empty
224        type: uint
225      -
226        name: alloc-refill
227        type: uint
228      -
229        name: alloc-waive
230        type: uint
231      -
232        name: recycle-cached
233        type: uint
234      -
235        name: recycle-cache-full
236        type: uint
237      -
238        name: recycle-ring
239        type: uint
240      -
241        name: recycle-ring-full
242        type: uint
243      -
244        name: recycle-released-refcnt
245        type: uint
246
247  -
248    name: napi
249    attributes:
250      -
251        name: ifindex
252        doc: ifindex of the netdevice to which NAPI instance belongs.
253        type: u32
254        checks:
255          min: 1
256      -
257        name: id
258        doc: ID of the NAPI instance.
259        type: u32
260      -
261        name: irq
262        doc: The associated interrupt vector number for the napi
263        type: u32
264      -
265        name: pid
266        doc: PID of the napi thread, if NAPI is configured to operate in
267             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
268             softirq context), the attribute will be absent.
269        type: u32
270      -
271        name: defer-hard-irqs
272        doc: The number of consecutive empty polls before IRQ deferral ends
273             and hardware IRQs are re-enabled.
274        type: u32
275        checks:
276          max: s32-max
277      -
278        name: gro-flush-timeout
279        doc: The timeout, in nanoseconds, of when to trigger the NAPI watchdog
280             timer which schedules NAPI processing. Additionally, a non-zero
281             value will also prevent GRO from flushing recent super-frames at
282             the end of a NAPI cycle. This may add receive latency in exchange
283             for reducing the number of frames processed by the network stack.
284        type: uint
285      -
286        name: irq-suspend-timeout
287        doc: The timeout, in nanoseconds, of how long to suspend irq
288             processing, if event polling finds events
289        type: uint
290      -
291        name: threaded
292        doc: Whether the NAPI is configured to operate in threaded polling
293             mode. If this is set to enabled then the NAPI context operates
294             in threaded polling mode. If this is set to busy-poll, then the
295             threaded polling mode also busy polls.
296        type: u32
297        enum: napi-threaded
298  -
299    name: xsk-info
300    attributes: []
301  -
302    name: queue
303    attributes:
304      -
305        name: id
306        doc: Queue index; most queue types are indexed like a C array, with
307             indexes starting at 0 and ending at queue count - 1. Queue indexes
308             are scoped to an interface and queue type.
309        type: u32
310      -
311        name: ifindex
312        doc: ifindex of the netdevice to which the queue belongs.
313        type: u32
314        checks:
315          min: 1
316      -
317        name: type
318        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
319             XDP TX queues allocated in the kernel are not linked to NAPIs and
320             thus not listed. AF_XDP queues will have more information set in
321             the xsk attribute.
322        type: u32
323        enum: queue-type
324      -
325        name: napi-id
326        doc: ID of the NAPI instance which services this queue.
327        type: u32
328      -
329        name: dmabuf
330        doc: ID of the dmabuf attached to this queue, if any.
331        type: u32
332      -
333        name: io-uring
334        doc: io_uring memory provider information.
335        type: nest
336        nested-attributes: io-uring-provider-info
337      -
338        name: xsk
339        doc: XSK information for this queue, if any.
340        type: nest
341        nested-attributes: xsk-info
342      -
343        name: lease
344        doc: |
345          A queue from a virtual device can have a lease which refers to
346          another queue from a physical device. This is useful for memory
347          providers and AF_XDP operations which take an ifindex and queue id
348          to allow applications to bind against virtual devices in containers.
349        type: nest
350        nested-attributes: lease
351  -
352    name: qstats
353    doc: |
354      Get device statistics, scoped to a device or a queue.
355      These statistics extend (and partially duplicate) statistics available
356      in struct rtnl_link_stats64.
357      Value of the `scope` attribute determines how statistics are
358      aggregated. When aggregated for the entire device the statistics
359      represent the total number of events since last explicit reset of
360      the device (i.e. not a reconfiguration like changing queue count).
361      When reported per-queue, however, the statistics may not add
362      up to the total number of events, will only be reported for currently
363      active objects, and will likely report the number of events since last
364      reconfiguration.
365    attributes:
366      -
367        name: ifindex
368        doc: ifindex of the netdevice to which stats belong.
369        type: u32
370        checks:
371          min: 1
372      -
373        name: queue-type
374        doc: Queue type as rx, tx, for queue-id.
375        type: u32
376        enum: queue-type
377      -
378        name: queue-id
379        doc: Queue ID, if stats are scoped to a single queue instance.
380        type: u32
381      -
382        name: scope
383        doc: |
384          What object type should be used to iterate over the stats.
385        type: uint
386        enum: qstats-scope
387      -
388        name: rx-packets
389        doc: |
390          Number of wire packets successfully received and passed to the stack.
391          For drivers supporting XDP, XDP is considered the first layer
392          of the stack, so packets consumed by XDP are still counted here.
393        type: uint
394        value: 8  # reserve some attr ids in case we need more metadata later
395      -
396        name: rx-bytes
397        doc: Successfully received bytes, see `rx-packets`.
398        type: uint
399      -
400        name: tx-packets
401        doc: |
402          Number of wire packets successfully sent. Packet is considered to be
403          successfully sent once it is in device memory (usually this means
404          the device has issued a DMA completion for the packet).
405        type: uint
406      -
407        name: tx-bytes
408        doc: Successfully sent bytes, see `tx-packets`.
409        type: uint
410      -
411        name: rx-alloc-fail
412        doc: |
413          Number of times skb or buffer allocation failed on the Rx datapath.
414          Allocation failure may, or may not result in a packet drop, depending
415          on driver implementation and whether system recovers quickly.
416        type: uint
417      -
418        name: rx-hw-drops
419        doc: |
420          Number of all packets which entered the device, but never left it,
421          including but not limited to: packets dropped due to lack of buffer
422          space, processing errors, explicit or implicit policies and packet
423          filters.
424        type: uint
425      -
426        name: rx-hw-drop-overruns
427        doc: |
428          Number of packets dropped due to transient lack of resources, such as
429          buffer space, host descriptors etc.
430        type: uint
431      -
432        name: rx-csum-complete
433        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
434        type: uint
435      -
436        name: rx-csum-unnecessary
437        doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
438        type: uint
439      -
440        name: rx-csum-none
441        doc: Number of packets that were not checksummed by device.
442        type: uint
443      -
444        name: rx-csum-bad
445        doc: |
446          Number of packets with bad checksum. The packets are not discarded,
447          but still delivered to the stack.
448        type: uint
449      -
450        name: rx-hw-gro-packets
451        doc: |
452          Number of packets that were coalesced from smaller packets by the
453          device. Counts only packets coalesced with the HW-GRO netdevice
454          feature, LRO-coalesced packets are not counted.
455        type: uint
456      -
457        name: rx-hw-gro-bytes
458        doc: See `rx-hw-gro-packets`.
459        type: uint
460      -
461        name: rx-hw-gro-wire-packets
462        doc: |
463          Number of packets that were coalesced to bigger packetss with the
464          HW-GRO netdevice feature. LRO-coalesced packets are not counted.
465        type: uint
466      -
467        name: rx-hw-gro-wire-bytes
468        doc: See `rx-hw-gro-wire-packets`.
469        type: uint
470      -
471        name: rx-hw-drop-ratelimits
472        doc: |
473          Number of the packets dropped by the device due to the received
474          packets bitrate exceeding the device rate limit.
475        type: uint
476      -
477        name: tx-hw-drops
478        doc: |
479          Number of packets that arrived at the device but never left it,
480          encompassing packets dropped for reasons such as processing errors, as
481          well as those affected by explicitly defined policies and packet
482          filtering criteria.
483        type: uint
484      -
485        name: tx-hw-drop-errors
486        doc: Number of packets dropped because they were invalid or malformed.
487        type: uint
488      -
489        name: tx-csum-none
490        doc: |
491          Number of packets that did not require the device to calculate the
492          checksum.
493        type: uint
494      -
495        name: tx-needs-csum
496        doc: |
497          Number of packets that required the device to calculate the checksum.
498          This counter includes the number of GSO wire packets for which device
499          calculated the L4 checksum.
500        type: uint
501      -
502        name: tx-hw-gso-packets
503        doc: |
504          Number of packets that necessitated segmentation into smaller packets
505          by the device.
506        type: uint
507      -
508        name: tx-hw-gso-bytes
509        doc: See `tx-hw-gso-packets`.
510        type: uint
511      -
512        name: tx-hw-gso-wire-packets
513        doc: |
514          Number of wire-sized packets generated by processing
515          `tx-hw-gso-packets`
516        type: uint
517      -
518        name: tx-hw-gso-wire-bytes
519        doc: See `tx-hw-gso-wire-packets`.
520        type: uint
521      -
522        name: tx-hw-drop-ratelimits
523        doc: |
524          Number of the packets dropped by the device due to the transmit
525          packets bitrate exceeding the device rate limit.
526        type: uint
527      -
528        name: tx-stop
529        doc: |
530          Number of times driver paused accepting new tx packets
531          from the stack to this queue, because the queue was full.
532          Note that if BQL is supported and enabled on the device
533          the networking stack will avoid queuing a lot of data at once.
534        type: uint
535      -
536        name: tx-wake
537        doc: |
538          Number of times driver re-started accepting send
539          requests to this queue from the stack.
540        type: uint
541  -
542    name: queue-id
543    subset-of: queue
544    attributes:
545      -
546        name: id
547      -
548        name: type
549  -
550    name: lease
551    attributes:
552      -
553        name: ifindex
554        doc: The netdev ifindex to lease the queue from.
555        type: u32
556        checks:
557          min: 1
558      -
559        name: queue
560        doc: The netdev queue to lease from.
561        type: nest
562        nested-attributes: queue-id
563      -
564        name: netns-id
565        doc: The network namespace id of the netdev.
566        type: s32
567  -
568    name: dmabuf
569    attributes:
570      -
571        name: ifindex
572        doc: netdev ifindex to bind the dmabuf to.
573        type: u32
574        checks:
575          min: 1
576      -
577        name: queues
578        doc: receive queues to bind the dmabuf to.
579        type: nest
580        nested-attributes: queue-id
581        multi-attr: true
582      -
583        name: fd
584        doc: dmabuf file descriptor to bind.
585        type: u32
586      -
587        name: id
588        doc: id of the dmabuf binding
589        type: u32
590        checks:
591          min: 1
592
593operations:
594  list:
595    -
596      name: dev-get
597      doc: Get / dump information about a netdev.
598      attribute-set: dev
599      do:
600        request:
601          attributes:
602            - ifindex
603        reply: &dev-all
604          attributes:
605            - ifindex
606            - xdp-features
607            - xdp-zc-max-segs
608            - xdp-rx-metadata-features
609            - xsk-features
610      dump:
611        reply: *dev-all
612    -
613      name: dev-add-ntf
614      doc: Notification about device appearing.
615      notify: dev-get
616      mcgrp: mgmt
617    -
618      name: dev-del-ntf
619      doc: Notification about device disappearing.
620      notify: dev-get
621      mcgrp: mgmt
622    -
623      name: dev-change-ntf
624      doc: Notification about device configuration being changed.
625      notify: dev-get
626      mcgrp: mgmt
627    -
628      name: page-pool-get
629      doc: |
630        Get / dump information about Page Pools.
631        Only Page Pools associated by the driver with a net_device
632        can be listed. ifindex will not be reported if the net_device
633        no longer exists.
634      attribute-set: page-pool
635      do:
636        request:
637          attributes:
638            - id
639        reply: &pp-reply
640          attributes:
641            - id
642            - ifindex
643            - napi-id
644            - inflight
645            - inflight-mem
646            - detach-time
647            - dmabuf
648            - io-uring
649      dump:
650        reply: *pp-reply
651      config-cond: page-pool
652    -
653      name: page-pool-add-ntf
654      doc: Notification about page pool appearing.
655      notify: page-pool-get
656      mcgrp: page-pool
657      config-cond: page-pool
658    -
659      name: page-pool-del-ntf
660      doc: Notification about page pool disappearing.
661      notify: page-pool-get
662      mcgrp: page-pool
663      config-cond: page-pool
664    -
665      name: page-pool-change-ntf
666      doc: Notification about page pool configuration being changed.
667      notify: page-pool-get
668      mcgrp: page-pool
669      config-cond: page-pool
670    -
671      name: page-pool-stats-get
672      doc: Get page pool statistics.
673      attribute-set: page-pool-stats
674      do:
675        request:
676          attributes:
677            - info
678        reply: &pp-stats-reply
679          attributes:
680            - info
681            - alloc-fast
682            - alloc-slow
683            - alloc-slow-high-order
684            - alloc-empty
685            - alloc-refill
686            - alloc-waive
687            - recycle-cached
688            - recycle-cache-full
689            - recycle-ring
690            - recycle-ring-full
691            - recycle-released-refcnt
692      dump:
693        reply: *pp-stats-reply
694      config-cond: page-pool-stats
695    -
696      name: queue-get
697      doc: Get queue information from the kernel.
698           Only configured queues will be reported (as opposed to all available
699           hardware queues).
700      attribute-set: queue
701      do:
702        request:
703          attributes:
704            - ifindex
705            - type
706            - id
707        reply: &queue-get-op
708          attributes:
709            - id
710            - type
711            - napi-id
712            - ifindex
713            - dmabuf
714            - io-uring
715            - xsk
716            - lease
717      dump:
718        request:
719          attributes:
720            - ifindex
721        reply: *queue-get-op
722    -
723      name: napi-get
724      doc: Get information about NAPI instances configured on the system.
725      attribute-set: napi
726      do:
727        request:
728          attributes:
729            - id
730        reply: &napi-get-op
731          attributes:
732            - id
733            - ifindex
734            - irq
735            - pid
736            - defer-hard-irqs
737            - gro-flush-timeout
738            - irq-suspend-timeout
739            - threaded
740      dump:
741        request:
742          attributes:
743            - ifindex
744        reply: *napi-get-op
745    -
746      name: qstats-get
747      doc: |
748        Get / dump fine grained statistics. Which statistics are reported
749        depends on the device and the driver, and whether the driver stores
750        software counters per-queue.
751      attribute-set: qstats
752      dump:
753        request:
754          attributes:
755            - ifindex
756            - scope
757        reply:
758          attributes:
759            - ifindex
760            - queue-type
761            - queue-id
762            - rx-packets
763            - rx-bytes
764            - tx-packets
765            - tx-bytes
766            - rx-alloc-fail
767            - rx-hw-drops
768            - rx-hw-drop-overruns
769            - rx-csum-complete
770            - rx-csum-unnecessary
771            - rx-csum-none
772            - rx-csum-bad
773            - rx-hw-gro-packets
774            - rx-hw-gro-bytes
775            - rx-hw-gro-wire-packets
776            - rx-hw-gro-wire-bytes
777            - rx-hw-drop-ratelimits
778            - tx-hw-drops
779            - tx-hw-drop-errors
780            - tx-csum-none
781            - tx-needs-csum
782            - tx-hw-gso-packets
783            - tx-hw-gso-bytes
784            - tx-hw-gso-wire-packets
785            - tx-hw-gso-wire-bytes
786            - tx-hw-drop-ratelimits
787            - tx-stop
788            - tx-wake
789    -
790      name: bind-rx
791      doc: Bind dmabuf to netdev
792      attribute-set: dmabuf
793      flags: [admin-perm]
794      do:
795        request:
796          attributes:
797            - ifindex
798            - fd
799            - queues
800        reply:
801          attributes:
802            - id
803    -
804      name: napi-set
805      doc: Set configurable NAPI instance settings.
806      attribute-set: napi
807      flags: [admin-perm]
808      do:
809        request:
810          attributes:
811            - id
812            - defer-hard-irqs
813            - gro-flush-timeout
814            - irq-suspend-timeout
815            - threaded
816    -
817      name: bind-tx
818      doc: Bind dmabuf to netdev for TX
819      attribute-set: dmabuf
820      do:
821        request:
822          attributes:
823            - ifindex
824            - fd
825        reply:
826          attributes:
827            - id
828    -
829      name: queue-create
830      doc: |
831        Create a new queue for the given netdevice. Whether this operation
832        is supported depends on the device and the driver.
833      attribute-set: queue
834      flags: [admin-perm]
835      do:
836        request:
837          attributes:
838            - ifindex
839            - type
840            - lease
841        reply: &queue-create-op
842          attributes:
843            - id
844
845kernel-family:
846  headers: ["net/netdev_netlink.h"]
847  sock-priv: struct netdev_nl_sock
848
849mcast-groups:
850  list:
851    -
852      name: mgmt
853    -
854      name: page-pool
855